From 392160f6fce3058a8a5d020afa72c45a5241e171 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 23:09:10 +0000 Subject: [PATCH] hash with properties instead of file itself --- deepface/commons/package_utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/deepface/commons/package_utils.py b/deepface/commons/package_utils.py index 2226e07..b360567 100644 --- a/deepface/commons/package_utils.py +++ b/deepface/commons/package_utils.py @@ -1,5 +1,7 @@ # built-in dependencies +import os import hashlib +import platform # 3rd party dependencies import tensorflow as tf @@ -21,12 +23,22 @@ def get_tf_major_version() -> int: def find_hash_of_file(file_path: str) -> str: """ - Find hash of image file + Find the hash of given image file with its properties + finding the hash of image content is costly operation Args: file_path (str): exact image path Returns: hash (str): digest with sha1 algorithm """ - with open(file_path, "rb") as f: - digest = hashlib.sha1(f.read()).hexdigest() - return digest + file_stats = os.stat(file_path) + + # some properties + file_size = file_stats.st_size + creation_time = file_stats.st_ctime + modification_time = file_stats.st_mtime + + properties = f"{file_size}-{creation_time}-{modification_time}" + + hasher = hashlib.sha1() + hasher.update(properties.encode("utf-8")) + return hasher.hexdigest()