From 411df327bf66b75d9ae020eef28be20e6a320fed Mon Sep 17 00:00:00 2001 From: Andrea Lanfranchi Date: Thu, 22 Feb 2024 14:25:43 +0100 Subject: [PATCH 1/5] Simplify find data initialization --- deepface/modules/recognition.py | 123 +++++++++++++------------------- 1 file changed, 51 insertions(+), 72 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index e8545b1..5c7f6b6 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -100,6 +100,7 @@ def find( file_name = f"representations_{model_name}.pkl" file_name = file_name.replace("-", "_").lower() datastore_path = os.path.join(db_path, file_name) + representations = [] df_cols = [ "identity", @@ -110,77 +111,48 @@ def find( "target_h", ] - if os.path.exists(datastore_path): - with open(datastore_path, "rb") as f: - representations = pickle.load(f) + # Ensure the proper pickle file exists + if not os.path.exists(datastore_path): + with open(datastore_path, "wb") as f: + pickle.dump([], f) + f.close() - if len(representations) > 0 and len(representations[0]) != len(df_cols): - raise ValueError( - f"Seems existing {datastore_path} is out-of-the-date." - "Please delete it and re-run." - ) + # Load the representations from the pickle file + with open(datastore_path, "rb") as f: + representations = pickle.load(f) + f.close() - alpha_employees = __list_images(path=db_path) - beta_employees = [representation[0] for representation in representations] - - newbies = list(set(alpha_employees) - set(beta_employees)) - oldies = list(set(beta_employees) - set(alpha_employees)) - - if newbies: - logger.warn( - f"Items {newbies} were added into {db_path}" - f" just after data source {datastore_path} created!" - ) - newbies_representations = __find_bulk_embeddings( - employees=newbies, - model_name=model_name, - target_size=target_size, - detector_backend=detector_backend, - enforce_detection=enforce_detection, - align=align, - normalization=normalization, - silent=silent, - ) - representations = representations + newbies_representations - - if oldies: - logger.warn( - f"Items {oldies} were dropped from {db_path}" - f" just after data source {datastore_path} created!" - ) - representations = [rep for rep in representations if rep[0] not in oldies] - - if newbies or oldies: - if len(representations) == 0: - raise ValueError(f"There is no image in {db_path} anymore!") - - # save new representations - with open(datastore_path, "wb") as f: - pickle.dump(representations, f) - - if not silent: - logger.info( - f"{len(newbies)} new representations are just added" - f" whereas {len(oldies)} represented one(s) are just dropped" - f" in {os.path.join(db_path,file_name)} file." - ) - - if not silent: - logger.info(f"There are {len(representations)} representations found in {file_name}") - - else: # create representation.pkl from scratch - employees = __list_images(path=db_path) - - if len(employees) == 0: + # Check if the representations are out-of-date + if len(representations) > 0: + if len(representations[0]) != len(df_cols): raise ValueError( - f"Could not find any valid image in {db_path} folder!" - "Valid images are .jpg, .jpeg or .png files.", + f"Seems existing {datastore_path} is out-of-the-date." + "Please delete it and re-run." ) + pickled_images = [representation[0] for representation in representations] + else: + pickled_images = [] - # ------------------------ - # find representations for db images - representations = __find_bulk_embeddings( - employees=employees, + # Get the list of images on storage + storage_images = __list_images(path=db_path) + + # Enforce data consistency amongst on disk images and pickle file + must_save_pickle = False + new_images = list(set(storage_images) - set(pickled_images)) # images added to storage + old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage + + if not silent: + logger.info(f"Found {len(new_images)} new images and {len(old_images)} removed images") + + # remove old images first + if len(old_images)>0: + representations = [rep for rep in representations if rep[0] not in old_images] + must_save_pickle = True + + # find representations for new images + if len(new_images)>0: + representations += __find_bulk_embeddings( + employees=new_images, model_name=model_name, target_size=target_size, detector_backend=detector_backend, @@ -188,15 +160,22 @@ def find( align=align, normalization=normalization, silent=silent, - ) - - # ------------------------------- + ) # add new images + must_save_pickle = True + if must_save_pickle: with open(datastore_path, "wb") as f: pickle.dump(representations, f) - + f.close() if not silent: - logger.info(f"Representations stored in {datastore_path} file.") + logger.info(f"There are now {len(representations)} representations in {file_name}") + + # Should we have no representations bailout + if len(representations) == 0: + toc = time.time() + if not silent: + logger.info(f"find function duration {toc - tic} seconds") + return [] # ---------------------------- # now, we got representations for facial database @@ -290,7 +269,7 @@ def find( toc = time.time() if not silent: - logger.info(f"find function lasts {toc - tic} seconds") + logger.info(f"find function duration {toc - tic} seconds") return resp_obj From 7b0500a17d09567e622253317b12d51df9ef138b Mon Sep 17 00:00:00 2001 From: Andrea Lanfranchi Date: Thu, 22 Feb 2024 14:32:16 +0100 Subject: [PATCH 2/5] Compute toc only if needed --- deepface/modules/recognition.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 5c7f6b6..3b83558 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -172,8 +172,8 @@ def find( # Should we have no representations bailout if len(representations) == 0: - toc = time.time() if not silent: + toc = time.time() logger.info(f"find function duration {toc - tic} seconds") return [] @@ -266,9 +266,8 @@ def find( # ----------------------------------- - toc = time.time() - if not silent: + toc = time.time() logger.info(f"find function duration {toc - tic} seconds") return resp_obj From 1d53ca1bd1feab1be1359d5d31518b31976a8804 Mon Sep 17 00:00:00 2001 From: Andrea Lanfranchi Date: Thu, 22 Feb 2024 14:35:16 +0100 Subject: [PATCH 3/5] Amend test.py for os dependendent directory separator char It was failing on Windows --- tests/test_find.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_find.py b/tests/test_find.py index fd0902a..5218724 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -1,3 +1,4 @@ +import os import cv2 import pandas as pd from deepface import DeepFace @@ -10,7 +11,7 @@ threshold = verification.find_threshold(model_name="VGG-Face", distance_metric=" def test_find_with_exact_path(): - img_path = "dataset/img1.jpg" + img_path = os.path.join("dataset","img1.jpg") dfs = DeepFace.find(img_path=img_path, db_path="dataset", silent=True) assert len(dfs) > 0 for df in dfs: @@ -30,7 +31,7 @@ def test_find_with_exact_path(): def test_find_with_array_input(): - img_path = "dataset/img1.jpg" + img_path = os.path.join("dataset","img1.jpg") img1 = cv2.imread(img_path) dfs = DeepFace.find(img1, db_path="dataset", silent=True) assert len(dfs) > 0 @@ -52,7 +53,7 @@ def test_find_with_array_input(): def test_find_with_extracted_faces(): - img_path = "dataset/img1.jpg" + img_path = os.path.join("dataset","img1.jpg") face_objs = DeepFace.extract_faces(img_path) img = face_objs[0]["face"] dfs = DeepFace.find(img, db_path="dataset", detector_backend="skip", silent=True) From 60c23fbc8e100ed3effdac58282ec37582045aed Mon Sep 17 00:00:00 2001 From: Andrea Lanfranchi Date: Thu, 22 Feb 2024 14:43:01 +0100 Subject: [PATCH 4/5] Remove unnecessary file close --- deepface/modules/recognition.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 3b83558..e51af14 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -115,12 +115,10 @@ def find( if not os.path.exists(datastore_path): with open(datastore_path, "wb") as f: pickle.dump([], f) - f.close() # Load the representations from the pickle file with open(datastore_path, "rb") as f: representations = pickle.load(f) - f.close() # Check if the representations are out-of-date if len(representations) > 0: From 8472a8b808a5ac416eabfb735860ec13d4ae7c42 Mon Sep 17 00:00:00 2001 From: Andrea Lanfranchi Date: Thu, 22 Feb 2024 14:44:53 +0100 Subject: [PATCH 5/5] Conditional log.info --- deepface/modules/recognition.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index e51af14..fa2b3b7 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -139,7 +139,7 @@ def find( new_images = list(set(storage_images) - set(pickled_images)) # images added to storage old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage - if not silent: + if not silent and (len(new_images) > 0 or len(old_images) > 0): logger.info(f"Found {len(new_images)} new images and {len(old_images)} removed images") # remove old images first @@ -164,7 +164,6 @@ def find( if must_save_pickle: with open(datastore_path, "wb") as f: pickle.dump(representations, f) - f.close() if not silent: logger.info(f"There are now {len(representations)} representations in {file_name}")