mirror of
https://github.com/serengil/deepface.git
synced 2025-06-05 19:15:23 +00:00
Merge pull request #1306 from kremnik/recognition
Update internal db storage
This commit is contained in:
commit
f3d1809a9f
@ -1,7 +1,7 @@
|
||||
# built-in dependencies
|
||||
import os
|
||||
import pickle
|
||||
from typing import List, Union, Optional, Dict, Any
|
||||
from typing import List, Union, Optional, Dict, Any, Set
|
||||
import time
|
||||
|
||||
# 3rd party dependencies
|
||||
@ -141,7 +141,7 @@ def find(
|
||||
|
||||
# check each item of representations list has required keys
|
||||
for i, current_representation in enumerate(representations):
|
||||
missing_keys = list(set(df_cols) - set(current_representation.keys()))
|
||||
missing_keys = set(df_cols) - set(current_representation.keys())
|
||||
if len(missing_keys) > 0:
|
||||
raise ValueError(
|
||||
f"{i}-th item does not have some required keys - {missing_keys}."
|
||||
@ -160,9 +160,7 @@ def find(
|
||||
raise ValueError(f"Nothing is found in {datastore_path}")
|
||||
|
||||
must_save_pickle = False
|
||||
new_images = []
|
||||
old_images = []
|
||||
replaced_images = []
|
||||
new_images, old_images, replaced_images = set(), set(), set()
|
||||
|
||||
if not refresh_database:
|
||||
logger.info(
|
||||
@ -172,8 +170,8 @@ def find(
|
||||
|
||||
# Enforce data consistency amongst on disk images and pickle file
|
||||
if refresh_database:
|
||||
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
|
||||
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
|
||||
new_images = set(storage_images) - set(pickled_images) # images added to storage
|
||||
old_images = set(pickled_images) - set(storage_images) # images removed from storage
|
||||
|
||||
# detect replaced images
|
||||
for current_representation in representations:
|
||||
@ -184,7 +182,7 @@ def find(
|
||||
beta_hash = image_utils.find_image_hash(identity)
|
||||
if alpha_hash != beta_hash:
|
||||
logger.debug(f"Even though {identity} represented before, it's replaced later.")
|
||||
replaced_images.append(identity)
|
||||
replaced_images.add(identity)
|
||||
|
||||
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
|
||||
logger.info(
|
||||
@ -194,8 +192,8 @@ def find(
|
||||
)
|
||||
|
||||
# append replaced images into both old and new images. these will be dropped and re-added.
|
||||
new_images = new_images + replaced_images
|
||||
old_images = old_images + replaced_images
|
||||
new_images.update(replaced_images)
|
||||
old_images.update(replaced_images)
|
||||
|
||||
# remove old images first
|
||||
if len(old_images) > 0:
|
||||
@ -316,7 +314,7 @@ def find(
|
||||
|
||||
|
||||
def __find_bulk_embeddings(
|
||||
employees: List[str],
|
||||
employees: Set[str],
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
|
@ -101,3 +101,53 @@ def test_filetype_for_find_bulk_embeddings():
|
||||
|
||||
# img47 is webp even though its extension is jpg
|
||||
assert "dataset/img47.jpg" not in imgs
|
||||
|
||||
|
||||
def test_find_without_refresh_database():
|
||||
import shutil, hashlib
|
||||
|
||||
img_path = os.path.join("dataset", "img1.jpg")
|
||||
|
||||
# 1. Calculate hash of the .pkl file;
|
||||
# 2. Move random image to the temporary created directory;
|
||||
# 3. As a result, there will be a difference between the .pkl file and the disk files;
|
||||
# 4. If refresh_database=False, then .pkl file should not be updated.
|
||||
# Recalculate hash and compare it with the hash from pt. 1;
|
||||
# 5. After successful check, the image will be moved back to the original destination;
|
||||
|
||||
pkl_path = "dataset/ds_model_vggface_detector_opencv_aligned_normalization_base_expand_0.pkl"
|
||||
with open(pkl_path, "rb") as f:
|
||||
hash_before = hashlib.sha256(f.read())
|
||||
|
||||
image_name = "img28.jpg"
|
||||
tmp_dir = "dataset/temp_image"
|
||||
os.mkdir(tmp_dir)
|
||||
shutil.move(os.path.join("dataset", image_name), os.path.join(tmp_dir, image_name))
|
||||
|
||||
dfs = DeepFace.find(img_path=img_path, db_path="dataset", silent=True, refresh_database=False)
|
||||
|
||||
with open(pkl_path, "rb") as f:
|
||||
hash_after = hashlib.sha256(f.read())
|
||||
|
||||
shutil.move(os.path.join(tmp_dir, image_name), os.path.join("dataset", image_name))
|
||||
os.rmdir(tmp_dir)
|
||||
|
||||
assert hash_before.hexdigest() == hash_after.hexdigest()
|
||||
|
||||
logger.info("✅ .pkl hashes before and after the recognition process are the same")
|
||||
|
||||
assert len(dfs) > 0
|
||||
for df in dfs:
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
|
||||
# one is img1.jpg itself
|
||||
identity_df = df[df["identity"] == img_path]
|
||||
assert identity_df.shape[0] > 0
|
||||
|
||||
# validate reproducability
|
||||
assert identity_df["distance"].values[0] < threshold
|
||||
|
||||
df = df[df["identity"] != img_path]
|
||||
logger.debug(df.head())
|
||||
assert df.shape[0] > 0
|
||||
logger.info("✅ test find without refresh database done")
|
||||
|
Loading…
x
Reference in New Issue
Block a user