ISSUE #1233: Feature refresh_database argument added to method find

This commit is contained in:
BrunoMarinhoM 2024-05-17 15:54:33 -04:00
parent 9b5fadd88c
commit 3a6ab4820b
2 changed files with 39 additions and 16 deletions

View File

@ -260,6 +260,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -299,6 +300,10 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False). (default is False).
refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
(default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -329,6 +334,7 @@ def find(
threshold=threshold, threshold=threshold,
normalization=normalization, normalization=normalization,
silent=silent, silent=silent,
refresh_data_base=refresh_database,
) )

View File

@ -29,6 +29,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_data_base: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -67,6 +68,11 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process. silent (boolean): Suppress or allow some log messages for a quieter analysis process.
refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
directory (default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -145,12 +151,22 @@ def find(
# Get the list of images on storage # Get the list of images on storage
storage_images = image_utils.list_images(path=db_path) storage_images = image_utils.list_images(path=db_path)
must_save_pickle = False
new_images = []
old_images = []
replaced_images = []
if not refresh_data_base:
logger.info(f"There could be changes in {db_path} not tracked. Set refresh_data_base to true to assure that any changes will be tracked.")
# Enforce data consistency amongst on disk images and pickle file
if refresh_data_base:
if len(storage_images) == 0: if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}") raise ValueError(f"No item found in {db_path}")
# Enforce data consistency amongst on disk images and pickle file
must_save_pickle = False
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
# detect replaced images # detect replaced images
@ -165,6 +181,7 @@ def find(
logger.debug(f"Even though {identity} represented before, it's replaced later.") logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity) replaced_images.append(identity)
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0): if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
logger.info( logger.info(
f"Found {len(new_images)} newly added image(s)" f"Found {len(new_images)} newly added image(s)"