Merge pull request #1235 from BrunoMarinhoM/refresh_data_base_feature

ISSUE #1233: Feature refresh_database argument added to method find
This commit is contained in:
Sefik Ilkin Serengil 2024-05-17 21:55:25 +01:00 committed by GitHub
commit 676f2793d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 16 deletions

View File

@ -260,6 +260,7 @@ def find(
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]:
"""
Identify individuals in a database
@ -299,6 +300,10 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
(default is True).
Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image.
@ -329,6 +334,7 @@ def find(
threshold=threshold,
normalization=normalization,
silent=silent,
refresh_database=refresh_database,
)

View File

@ -29,6 +29,7 @@ def find(
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]:
"""
Identify individuals in a database
@ -67,6 +68,11 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process.
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
directory (default is True).
Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image.
@ -145,25 +151,35 @@ def find(
# Get the list of images on storage
storage_images = image_utils.list_images(path=db_path)
if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}")
must_save_pickle = False
new_images = []
old_images = []
replaced_images = []
if not refresh_database:
logger.info(f"There could be changes in {db_path} not tracked. Set refresh_database to true to assure that any changes will be tracked.")
# Enforce data consistency amongst on disk images and pickle file
must_save_pickle = False
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
if refresh_database:
if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}")
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
# detect replaced images
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)
# detect replaced images
replaced_images = []
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
logger.info(