Merge pull request #1235 from BrunoMarinhoM/refresh_data_base_feature

ISSUE #1233: Feature refresh_database argument added to method find
This commit is contained in:
Sefik Ilkin Serengil 2024-05-17 21:55:25 +01:00 committed by GitHub
commit 676f2793d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 16 deletions

View File

@ -260,6 +260,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -299,6 +300,10 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False). (default is False).
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
(default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -329,6 +334,7 @@ def find(
threshold=threshold, threshold=threshold,
normalization=normalization, normalization=normalization,
silent=silent, silent=silent,
refresh_database=refresh_database,
) )

View File

@ -29,6 +29,7 @@ def find(
threshold: Optional[float] = None, threshold: Optional[float] = None,
normalization: str = "base", normalization: str = "base",
silent: bool = False, silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]: ) -> List[pd.DataFrame]:
""" """
Identify individuals in a database Identify individuals in a database
@ -67,6 +68,11 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process. silent (boolean): Suppress or allow some log messages for a quieter analysis process.
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
directory (default is True).
Returns: Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image. to the identity information for an individual detected in the source image.
@ -145,16 +151,25 @@ def find(
# Get the list of images on storage # Get the list of images on storage
storage_images = image_utils.list_images(path=db_path) storage_images = image_utils.list_images(path=db_path)
must_save_pickle = False
new_images = []
old_images = []
replaced_images = []
if not refresh_database:
logger.info(f"There could be changes in {db_path} not tracked. Set refresh_database to true to assure that any changes will be tracked.")
# Enforce data consistency amongst on disk images and pickle file
if refresh_database:
if len(storage_images) == 0: if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}") raise ValueError(f"No item found in {db_path}")
# Enforce data consistency amongst on disk images and pickle file
must_save_pickle = False
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
# detect replaced images # detect replaced images
replaced_images = []
for current_representation in representations: for current_representation in representations:
identity = current_representation["identity"] identity = current_representation["identity"]
if identity in old_images: if identity in old_images:
@ -165,6 +180,7 @@ def find(
logger.debug(f"Even though {identity} represented before, it's replaced later.") logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity) replaced_images.append(identity)
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0): if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
logger.info( logger.info(
f"Found {len(new_images)} newly added image(s)" f"Found {len(new_images)} newly added image(s)"