Merge pull request #959 from serengil/feat-task-1303-datastore-update

some improvements
2025-07-23 02:10:02 +00:00 · 2024-01-16 22:40:41 +00:00 · 2024-01-16 22:40:41 +00:00 · 51bb1808d0
commit 51bb1808d0
parent aa427d1d69 fee33c8b34
7 changed files with 227 additions and 84 deletions
--- a/deepface/basemodels/SFace.py
+++ b/deepface/basemodels/SFace.py
@ -21,9 +21,16 @@ class _Layer:
 class SFaceModel:
    def __init__(self, model_path):

-        self.model = cv.FaceRecognizerSF.create(
-            model=model_path, config="", backend_id=0, target_id=0
-        )
+        try:
+            self.model = cv.FaceRecognizerSF.create(
+                model=model_path, config="", backend_id=0, target_id=0
+            )
+        except Exception as err:
+            raise ValueError(
+                "Exception while calling opencv.FaceRecognizerSF module."
+                + "This is an optional dependency."
+                + "You can install it as pip install opencv-contrib-python."
+            ) from err

        self.layers = [_Layer()]

--- a/deepface/detectors/OpenCvWrapper.py
+++ b/deepface/detectors/OpenCvWrapper.py
@ -90,7 +90,19 @@ def detect_face(detector: dict, img: np.ndarray, align: bool = True) -> list:
    return resp


-def align_face(eye_detector, img):
+def align_face(eye_detector: Any, img: np.ndarray) -> np.ndarray:
+    """
+    Align a given image with the pre-built eye_detector
+    Args:
+        eye_detector (Any): cascade classifier object
+        img (np.ndarray): given image
+    Returns:
+        aligned_img (np.ndarray)
+    """
+    # if image has unexpectedly 0 dimension then skip alignment
+    if img.shape[0] == 0 or img.shape[1] == 0:
+        return img
+
    detected_face_gray = cv2.cvtColor(
        img, cv2.COLOR_BGR2GRAY
    )  # eye detector expects gray scale image
@ -130,7 +142,12 @@ def align_face(eye_detector, img):
    return img  # return img anyway


-def get_opencv_path():
+def get_opencv_path() -> str:
+    """
+    Returns where opencv installed
+    Returns:
+        installation_path (str)
+    """
    opencv_home = cv2.__file__
    folders = opencv_home.split(os.path.sep)[0:-1]

--- a/deepface/detectors/SsdWrapper.py
+++ b/deepface/detectors/SsdWrapper.py
@ -43,10 +43,17 @@ def build_model() -> dict:

        gdown.download(url, output, quiet=False)

-    face_detector = cv2.dnn.readNetFromCaffe(
-        home + "/.deepface/weights/deploy.prototxt",
-        home + "/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel",
-    )
+    try:
+        face_detector = cv2.dnn.readNetFromCaffe(
+            home + "/.deepface/weights/deploy.prototxt",
+            home + "/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel",
+        )
+    except Exception as err:
+        raise ValueError(
+            "Exception while calling opencv.dnn module."
+            + "This is an optional dependency."
+            + "You can install it as pip install opencv-contrib-python."
+        ) from err

    eye_detector = OpenCvWrapper.build_cascade("haarcascade_eye")

--- a/deepface/detectors/YunetWrapper.py
+++ b/deepface/detectors/YunetWrapper.py
@ -24,7 +24,17 @@ def build_model() -> Any:
        logger.info(f"{file_name} will be downloaded...")
        output = home + f"/.deepface/weights/{file_name}"
        gdown.download(url, output, quiet=False)
-    face_detector = cv2.FaceDetectorYN_create(home + f"/.deepface/weights/{file_name}", "", (0, 0))
+
+    try:
+        face_detector = cv2.FaceDetectorYN_create(
+            home + f"/.deepface/weights/{file_name}", "", (0, 0)
+        )
+    except Exception as err:
+        raise ValueError(
+            "Exception while calling opencv.FaceDetectorYN_create module."
+            + "This is an optional dependency."
+            + "You can install it as pip install opencv-contrib-python."
+        ) from err
    return face_detector


--- a/deepface/modules/recognition.py
+++ b/deepface/modules/recognition.py
@ -75,6 +75,7 @@ def find(

    file_name = f"representations_{model_name}.pkl"
    file_name = file_name.replace("-", "_").lower()
+    datastore_path = f"{db_path}/{file_name}"

    df_cols = [
        "identity",
@ -85,100 +86,93 @@ def find(
        "target_h",
    ]

-    if os.path.exists(db_path + "/" + file_name):
-        if not silent:
-            logger.warn(
-                f"Representations for images in {db_path} folder were previously stored"
-                f" in {file_name}. If you added new instances after the creation, then please "
-                "delete this file and call find function again. It will create it again."
-            )
-
-        with open(f"{db_path}/{file_name}", "rb") as f:
+    if os.path.exists(datastore_path):
+        with open(datastore_path, "rb") as f:
            representations = pickle.load(f)

            if len(representations) > 0 and len(representations[0]) != len(df_cols):
                raise ValueError(
-                    f"Seems existing {db_path}/{file_name} is out-of-the-date."
-                    "Delete it and re-run."
+                    f"Seems existing {datastore_path} is out-of-the-date."
+                    "Please delete it and re-run."
+                )
+
+        alpha_employees = __list_images(path=db_path)
+        beta_employees = [representation[0] for representation in representations]
+
+        newbies = list(set(alpha_employees) - set(beta_employees))
+        oldies = list(set(beta_employees) - set(alpha_employees))
+
+        if newbies:
+            logger.warn(
+                f"Items {newbies} were added into {db_path}"
+                f" just after data source {datastore_path} created!"
+            )
+            newbies_representations = __find_bulk_embeddings(
+                employees=newbies,
+                model_name=model_name,
+                target_size=target_size,
+                detector_backend=detector_backend,
+                enforce_detection=enforce_detection,
+                align=align,
+                normalization=normalization,
+                silent=silent,
+            )
+            representations = representations + newbies_representations
+
+        if oldies:
+            logger.warn(
+                f"Items {oldies} were dropped from {db_path}"
+                f" just after data source {datastore_path} created!"
+            )
+            representations = [rep for rep in representations if rep[0] not in oldies]
+
+        if newbies or oldies:
+            if len(representations) == 0:
+                raise ValueError(f"There is no image in {db_path} anymore!")
+
+            # save new representations
+            with open(datastore_path, "wb") as f:
+                pickle.dump(representations, f)
+
+            if not silent:
+                logger.info(
+                    f"{len(newbies)} new representations are just added"
+                    f" whereas {len(oldies)} represented one(s) are just dropped"
+                    f" in {db_path}/{file_name} file."
                )

        if not silent:
            logger.info(f"There are {len(representations)} representations found in {file_name}")

    else:  # create representation.pkl from scratch
-        employees = []
-
-        for r, _, f in os.walk(db_path):
-            for file in f:
-                if (
-                    (".jpg" in file.lower())
-                    or (".jpeg" in file.lower())
-                    or (".png" in file.lower())
-                ):
-                    exact_path = r + "/" + file
-                    employees.append(exact_path)
+        employees = __list_images(path=db_path)

        if len(employees) == 0:
            raise ValueError(
-                "There is no image in ",
-                db_path,
-                " folder! Validate .jpg or .png files exist in this path.",
+                f"There is no image in {db_path} folder!"
+                "Validate .jpg, .jpeg or .png files exist in this path.",
            )

        # ------------------------
        # find representations for db images
-
-        representations = []
-
-        # for employee in employees:
-        pbar = tqdm(
-            range(0, len(employees)),
-            desc="Finding representations",
-            disable=silent,
+        representations = __find_bulk_embeddings(
+            employees=employees,
+            model_name=model_name,
+            target_size=target_size,
+            detector_backend=detector_backend,
+            enforce_detection=enforce_detection,
+            align=align,
+            normalization=normalization,
+            silent=silent,
        )
-        for index in pbar:
-            employee = employees[index]
-
-            img_objs = functions.extract_faces(
-                img=employee,
-                target_size=target_size,
-                detector_backend=detector_backend,
-                grayscale=False,
-                enforce_detection=enforce_detection,
-                align=align,
-            )
-
-            for img_content, img_region, _ in img_objs:
-                embedding_obj = representation.represent(
-                    img_path=img_content,
-                    model_name=model_name,
-                    enforce_detection=enforce_detection,
-                    detector_backend="skip",
-                    align=align,
-                    normalization=normalization,
-                )
-
-                img_representation = embedding_obj[0]["embedding"]
-
-                instance = []
-                instance.append(employee)
-                instance.append(img_representation)
-                instance.append(img_region["x"])
-                instance.append(img_region["y"])
-                instance.append(img_region["w"])
-                instance.append(img_region["h"])
-                representations.append(instance)

        # -------------------------------

-        with open(f"{db_path}/{file_name}", "wb") as f:
+        with open(datastore_path, "wb") as f:
            pickle.dump(representations, f)

        if not silent:
-            logger.info(
-                f"Representations stored in {db_path}/{file_name} file."
-                + "Please delete this file when you add new identities in your database."
-            )
+            logger.info(f"Representations stored in {db_path}/{file_name} file.")

    # ----------------------------
    # now, we got representations for facial database
@ -218,7 +212,7 @@ def find(
        result_df["source_h"] = source_region["h"]

        distances = []
-        for index, instance in df.iterrows():
+        for _, instance in df.iterrows():
            source_representation = instance[f"{model_name}_representation"]

            target_dims = len(list(target_representation))
@ -266,3 +260,87 @@ def find(
        logger.info(f"find function lasts {toc - tic} seconds")

    return resp_obj
+
+
+def __list_images(path: str) -> list:
+    """
+    List images in a given path
+    Args:
+        path (str): path's location
+    Returns:
+        images (list): list of exact image paths
+    """
+    images = []
+    for r, _, f in os.walk(path):
+        for file in f:
+            if file.lower().endswith((".jpg", ".jpeg", ".png")):
+                exact_path = f"{r}/{file}"
+                images.append(exact_path)
+    return images
+
+
+def __find_bulk_embeddings(
+    employees: List[str],
+    model_name: str = "VGG-Face",
+    target_size: tuple = (224, 224),
+    detector_backend: str = "opencv",
+    enforce_detection: bool = True,
+    align: bool = True,
+    normalization: str = "base",
+    silent: bool = False,
+):
+    """
+    Find embeddings of a list of images
+
+    Args:
+        employees (list): list of exact image paths
+        model_name (str): facial recognition model name
+        target_size (tuple): expected input shape of facial
+            recognition model
+        detector_backend (str): face detector model name
+        enforce_detection (bool): set this to False if you
+            want to proceed when you cannot detect any face
+        align (bool): enable or disable alignment of image
+            before feeding to facial recognition model
+        normalization (bool): normalization technique
+        silent (bool): enable or disable informative logging
+    Returns:
+        representations (list): pivot list of embeddings with
+            image name and detected face area's coordinates
+    """
+    representations = []
+    for employee in tqdm(
+        employees,
+        desc="Finding representations",
+        disable=silent,
+    ):
+        img_objs = functions.extract_faces(
+            img=employee,
+            target_size=target_size,
+            detector_backend=detector_backend,
+            grayscale=False,
+            enforce_detection=enforce_detection,
+            align=align,
+        )
+
+        for img_content, img_region, _ in img_objs:
+            embedding_obj = representation.represent(
+                img_path=img_content,
+                model_name=model_name,
+                enforce_detection=enforce_detection,
+                detector_backend="skip",
+                align=align,
+                normalization=normalization,
+            )
+
+            img_representation = embedding_obj[0]["embedding"]
+
+            instance = []
+            instance.append(employee)
+            instance.append(img_representation)
+            instance.append(img_region["x"])
+            instance.append(img_region["y"])
+            instance.append(img_region["w"])
+            instance.append(img_region["h"])
+            representations.append(instance)
+    return representations
--- a/setup.py
+++ b/setup.py
@ -8,11 +8,11 @@ with open("requirements.txt", "r", encoding="utf-8") as f:

 setuptools.setup(
    name="deepface",
-    version="0.0.81",
+    version="0.0.82",
    author="Sefik Ilkin Serengil",
    author_email="serengil@gmail.com",
    description="A Lightweight Face Recognition and Facial Attribute Analysis Framework (Age, Gender, Emotion, Race) for Python",
-    data_files=[('', ['README.md', 'requirements.txt'])],
+    data_files=[("", ["README.md", "requirements.txt"])],
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/serengil/deepface",
--- a/tests/test_find.py
+++ b/tests/test_find.py
@ -1,6 +1,7 @@
 import cv2
 import pandas as pd
 from deepface import DeepFace
+from deepface.commons import distance
 from deepface.commons.logger import Logger

 logger = Logger("tests/test_find.py")
@ -41,9 +42,32 @@ def test_find_with_array_input():
        # validate reproducability
        assert identity_df["VGG-Face_cosine"].values[0] == 0

-
        df = df[df["identity"] != img_path]
        logger.debug(df.head())
        assert df.shape[0] > 0

    logger.info("✅ test find for array input done")
+
+
+def test_find_with_extracted_faces():
+    img_path = "dataset/img1.jpg"
+    face_objs = DeepFace.extract_faces(img_path)
+    img = face_objs[0]["face"]
+    dfs = DeepFace.find(img, db_path="dataset", detector_backend="skip", silent=True)
+    assert len(dfs) > 0
+    for df in dfs:
+        assert isinstance(df, pd.DataFrame)
+
+        # one is img1.jpg itself
+        identity_df = df[df["identity"] == img_path]
+        assert identity_df.shape[0] > 0
+
+        # validate reproducability
+        assert identity_df["VGG-Face_cosine"].values[0] < (
+            distance.findThreshold(model_name="VGG-Face", distance_metric="cosine")
+        )
+
+        df = df[df["identity"] != img_path]
+        logger.debug(df.head())
+        assert df.shape[0] > 0
+    logger.info("✅ test find for extracted face input done")