Merge pull request #1235 from BrunoMarinhoM/refresh_data_base_feature

ISSUE #1233: Feature refresh_database argument added to method find
This commit is contained in:
Sefik Ilkin Serengil 2024-05-17 21:55:25 +01:00 committed by GitHub
commit 676f2793d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 16 deletions

View File

@ -260,6 +260,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -299,6 +300,10 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False). (default is False).
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
(default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -329,6 +334,7 @@ def find(
threshold=threshold, threshold=threshold,
normalization=normalization, normalization=normalization,
silent=silent, silent=silent,
refresh_database=refresh_database,
) )

View File

@ -29,6 +29,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -67,6 +68,11 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process. silent (boolean): Suppress or allow some log messages for a quieter analysis process.
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
directory (default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -145,25 +151,35 @@ def find(
# Get the list of images on storage # Get the list of images on storage
storage_images = image_utils.list_images(path=db_path) storage_images = image_utils.list_images(path=db_path)
if len(storage_images) == 0: must_save_pickle = False
raise ValueError(f"No item found in {db_path}") new_images = []
old_images = []
replaced_images = []
if not refresh_database:
logger.info(f"There could be changes in {db_path} not tracked. Set refresh_database to true to assure that any changes will be tracked.")
# Enforce data consistency amongst on disk images and pickle file # Enforce data consistency amongst on disk images and pickle file
must_save_pickle = False if refresh_database:
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage if len(storage_images) == 0:
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage raise ValueError(f"No item found in {db_path}")
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
# detect replaced images
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)
# detect replaced images
replaced_images = []
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0): if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
logger.info( logger.info(