Merge pull request #1357 from serengil/feat-task-0510-developments

Feat task 0510 developments
2025-07-23 18:30:04 +00:00 · 2024-10-05 22:41:24 +01:00 · 2024-10-05 22:41:24 +01:00 · 5415407b7c
commit 5415407b7c
parent 4f48d97606 532004e7ee
5 changed files with 108 additions and 15 deletions
--- a/deepface/commons/weight_utils.py
+++ b/deepface/commons/weight_utils.py
@ -19,6 +19,40 @@ else:

 logger = Logger()

+# pylint: disable=line-too-long
+WEIGHTS = {
+    "facial_recognition": {
+        "VGG-Face": "https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5",
+        "Facenet": "https://github.com/serengil/deepface_models/releases/download/v1.0/facenet_weights.h5",
+        "Facenet512": "https://github.com/serengil/deepface_models/releases/download/v1.0/facenet512_weights.h5",
+        "OpenFace": "https://github.com/serengil/deepface_models/releases/download/v1.0/openface_weights.h5",
+        "FbDeepFace": "https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip",
+        "ArcFace": "https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5",
+        "DeepID": "https://github.com/serengil/deepface_models/releases/download/v1.0/deepid_keras_weights.h5",
+        "SFace": "https://github.com/opencv/opencv_zoo/raw/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx",
+        "GhostFaceNet": "https://github.com/HamadYA/GhostFaceNets/releases/download/v1.2/GhostFaceNet_W1.3_S1_ArcFace.h5",
+        "Dlib": "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2",
+    },
+    "demography": {
+        "Age": "https://github.com/serengil/deepface_models/releases/download/v1.0/age_model_weights.h5",
+        "Gender": "https://github.com/serengil/deepface_models/releases/download/v1.0/gender_model_weights.h5",
+        "Emotion": "https://github.com/serengil/deepface_models/releases/download/v1.0/facial_expression_model_weights.h5",
+        "Race": "https://github.com/serengil/deepface_models/releases/download/v1.0/race_model_single_batch.h5",
+    },
+    "detection": {
+        "ssd_model": "https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt",
+        "ssd_weights": "https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel",
+        "yolo": "https://drive.google.com/uc?id=1qcr9DbgsX3ryrz2uU8w4Xm3cOrRywXqb",
+        "yunet": "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx",
+        "dlib": "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2",
+        "centerface": "https://github.com/Star-Clouds/CenterFace/raw/master/models/onnx/centerface.onnx",
+    },
+    "spoofing": {
+        "MiniFASNetV2": "https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth",
+        "MiniFASNetV1SE": "https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth",
+    },
+}
+
 ALLOWED_COMPRESS_TYPES = ["zip", "bz2"]


@ -95,3 +129,20 @@ def load_model_weights(model: Sequential, weight_file: str) -> Sequential:
            "and copying it to the target folder."
        ) from err
    return model
+
+
+def retrieve_model_source(model_name: str, task: str) -> str:
+    """
+    Find the source url of a given model name
+    Args:
+        model_name (str): given model name
+    Returns:
+        weight_url (str): source url of the given model
+    """
+    if task not in ["facial_recognition", "detection", "demography", "spoofing"]:
+        raise ValueError(f"unimplemented task - {task}")
+
+    source_url = WEIGHTS.get(task, {}).get(model_name)
+    if source_url is None:
+        raise ValueError(f"Source url cannot be found for given model {task}-{model_name}")
+    return source_url
--- a/deepface/models/Detector.py
+++ b/deepface/models/Detector.py
@ -6,7 +6,7 @@ import numpy as np
 # Notice that all facial detector models must be inherited from this class


-# pylint: disable=unnecessary-pass, too-few-public-methods
+# pylint: disable=unnecessary-pass, too-few-public-methods, too-many-instance-attributes
 class Detector(ABC):
    @abstractmethod
    def detect_faces(self, img: np.ndarray) -> List["FacialAreaRegion"]:
@ -45,6 +45,7 @@ class FacialAreaRegion:
        confidence (float, optional): Confidence score associated with the face detection.
            Default is None.
    """
+
    x: int
    y: int
    w: int
@ -52,6 +53,9 @@ class FacialAreaRegion:
    left_eye: Optional[Tuple[int, int]] = None
    right_eye: Optional[Tuple[int, int]] = None
    confidence: Optional[float] = None
+    nose: Optional[Tuple[int, int]] = None
+    mouth_right: Optional[Tuple[int, int]] = None
+    mouth_left: Optional[Tuple[int, int]] = None


@dataclass
@ -63,7 +67,8 @@ class DetectedFace:
        img (np.ndarray): detected face image as numpy array
        facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
        confidence (float): confidence score for face detection
-        """
+    """
+
    img: np.ndarray
    facial_area: FacialAreaRegion
    confidence: float
--- a/deepface/models/face_detection/RetinaFace.py
+++ b/deepface/models/face_detection/RetinaFace.py
@ -42,10 +42,19 @@ class RetinaFaceClient(Detector):
            # retinaface sets left and right eyes with respect to the person
            left_eye = identity["landmarks"]["left_eye"]
            right_eye = identity["landmarks"]["right_eye"]
+            nose = identity["landmarks"].get("nose")
+            mouth_right = identity["landmarks"].get("mouth_right")
+            mouth_left = identity["landmarks"].get("mouth_left")

            # eyes are list of float, need to cast them tuple of int
            left_eye = tuple(int(i) for i in left_eye)
            right_eye = tuple(int(i) for i in right_eye)
+            if nose is not None:
+                nose = tuple(int(i) for i in nose)
+            if mouth_right is not None:
+                mouth_right = tuple(int(i) for i in mouth_right)
+            if mouth_left is not None:
+                mouth_left = tuple(int(i) for i in mouth_left)

            confidence = identity["score"]

@ -57,6 +66,9 @@ class RetinaFaceClient(Detector):
                left_eye=left_eye,
                right_eye=right_eye,
                confidence=confidence,
+                nose=nose,
+                mouth_left=mouth_left,
+                mouth_right=mouth_right,
            )

            resp.append(facial_area)
--- a/deepface/models/facial_recognition/VGGFace.py
+++ b/deepface/models/facial_recognition/VGGFace.py
@ -140,9 +140,7 @@ def load_model(
        file_name="vgg_face_weights.h5", source_url=url
    )

-    model = weight_utils.load_model_weights(
-        model=model, weight_file=weight_file
-    )
+    model = weight_utils.load_model_weights(model=model, weight_file=weight_file)

    # 2622d dimensional model
    # vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
@ -151,7 +149,6 @@ def load_model(
    # - softmax causes underfitting
    # - added normalization layer to avoid underfitting with euclidean
    # as described here: https://github.com/serengil/deepface/issues/944
-    base_model_output = Sequential()
    base_model_output = Flatten()(model.layers[-5].output)
    # keras backend's l2 normalization layer troubles some gpu users (e.g. issue 957, 966)
    # base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -148,16 +148,26 @@ def extract_faces(
        w = min(width - x - 1, int(current_region.w))
        h = min(height - y - 1, int(current_region.h))

+        facial_area = {
+            "x": x,
+            "y": y,
+            "w": w,
+            "h": h,
+            "left_eye": current_region.left_eye,
+            "right_eye": current_region.right_eye,
+        }
+
+        # optional nose, mouth_left and mouth_right fields are coming just for retinaface
+        if current_region.nose is not None:
+            facial_area["nose"] = current_region.nose
+        if current_region.mouth_left is not None:
+            facial_area["mouth_left"] = current_region.mouth_left
+        if current_region.mouth_right is not None:
+            facial_area["mouth_right"] = current_region.mouth_right
+
        resp_obj = {
            "face": current_img,
-            "facial_area": {
-                "x": x,
-                "y": y,
-                "w": w,
-                "h": h,
-                "left_eye": current_region.left_eye,
-                "right_eye": current_region.right_eye,
-            },
+            "facial_area": facial_area,
            "confidence": round(float(current_region.confidence or 0), 2),
        }

@ -272,6 +282,9 @@ def expand_and_align_face(
    left_eye = facial_area.left_eye
    right_eye = facial_area.right_eye
    confidence = facial_area.confidence
+    nose = facial_area.nose
+    mouth_left = facial_area.mouth_left
+    mouth_right = facial_area.mouth_right

    if expand_percentage > 0:
        # Expand the facial region height and width by the provided percentage
@ -305,11 +318,26 @@ def expand_and_align_face(
            left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
        if right_eye is not None:
            right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
+        if nose is not None:
+            nose = (nose[0] - width_border, nose[1] - height_border)
+        if mouth_left is not None:
+            mouth_left = (mouth_left[0] - width_border, mouth_left[1] - height_border)
+        if mouth_right is not None:
+            mouth_right = (mouth_right[0] - width_border, mouth_right[1] - height_border)

    return DetectedFace(
        img=detected_face,
        facial_area=FacialAreaRegion(
-            x=x, y=y, h=h, w=w, confidence=confidence, left_eye=left_eye, right_eye=right_eye
+            x=x,
+            y=y,
+            h=h,
+            w=w,
+            confidence=confidence,
+            left_eye=left_eye,
+            right_eye=right_eye,
+            nose=nose,
+            mouth_left=mouth_left,
+            mouth_right=mouth_right,
        ),
        confidence=confidence,
    )