Merge pull request #1074 from serengil/feat-task-0803-hash-approach

hash with properties instead of file itself
This commit is contained in:
Sefik Ilkin Serengil 2024-03-08 23:24:31 +00:00 committed by GitHub
commit 3da81ce549
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 5 deletions

View File

@ -1,4 +1,5 @@
# built-in dependencies
import os
import hashlib
# 3rd party dependencies
@ -21,12 +22,22 @@ def get_tf_major_version() -> int:
def find_hash_of_file(file_path: str) -> str:
"""
Find hash of image file
Find the hash of given image file with its properties
finding the hash of image content is costly operation
Args:
file_path (str): exact image path
Returns:
hash (str): digest with sha1 algorithm
"""
with open(file_path, "rb") as f:
digest = hashlib.sha1(f.read()).hexdigest()
return digest
file_stats = os.stat(file_path)
# some properties
file_size = file_stats.st_size
creation_time = file_stats.st_ctime
modification_time = file_stats.st_mtime
properties = f"{file_size}-{creation_time}-{modification_time}"
hasher = hashlib.sha1()
hasher.update(properties.encode("utf-8"))
return hasher.hexdigest()

View File

@ -96,7 +96,7 @@ def analyze(
- 'white': Confidence score for White ethnicity.
"""
# validate actions
# if actions is passed as tuple with single item, interestingly it becomes str here
if isinstance(actions, str):
actions = (actions,)