hash with properties instead of file itself

This commit is contained in:
Sefik Ilkin Serengil 2024-03-08 23:09:10 +00:00
parent 4b3473e023
commit 392160f6fc

View File

@ -1,5 +1,7 @@
# built-in dependencies
import os
import hashlib
import platform
# 3rd party dependencies
import tensorflow as tf
@ -21,12 +23,22 @@ def get_tf_major_version() -> int:
def find_hash_of_file(file_path: str) -> str:
"""
Find hash of image file
Find the hash of given image file with its properties
finding the hash of image content is costly operation
Args:
file_path (str): exact image path
Returns:
hash (str): digest with sha1 algorithm
"""
with open(file_path, "rb") as f:
digest = hashlib.sha1(f.read()).hexdigest()
return digest
file_stats = os.stat(file_path)
# some properties
file_size = file_stats.st_size
creation_time = file_stats.st_ctime
modification_time = file_stats.st_mtime
properties = f"{file_size}-{creation_time}-{modification_time}"
hasher = hashlib.sha1()
hasher.update(properties.encode("utf-8"))
return hasher.hexdigest()