Merge pull request #962 from serengil/feat-task-2101-interfaces

cosmetic changes about interfaces
2025-06-08 04:25:21 +00:00 · 2024-01-21 18:17:02 +00:00 · 2024-01-21 18:17:02 +00:00 · bc30c904a0
commit bc30c904a0
parent 70b4f1a722 359162aa5e
29 changed files with 508 additions and 257 deletions
--- a/deepface/basemodels/ArcFace.py
+++ b/deepface/basemodels/ArcFace.py
@ -1,5 +1,7 @@
+from typing import List
 import os
 import gdown
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -43,7 +45,7 @@ else:
    )

 # pylint: disable=too-few-public-methods
-class ArcFace(FacialRecognition):
+class ArcFaceClient(FacialRecognition):
    """
    ArcFace model class
    """
@ -52,6 +54,18 @@ class ArcFace(FacialRecognition):
        self.model = load_model()
        self.model_name = "ArcFace"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with ArcFace model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def load_model(
    url="https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5",
--- a/deepface/basemodels/DeepID.py
+++ b/deepface/basemodels/DeepID.py
@ -1,5 +1,7 @@
+from typing import List
 import os
 import gdown
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -39,7 +41,7 @@ else:
 # -------------------------------------

 # pylint: disable=too-few-public-methods
-class DeepId(FacialRecognition):
+class DeepIdClient(FacialRecognition):
    """
    DeepId model class
    """
@ -48,6 +50,18 @@ class DeepId(FacialRecognition):
        self.model = load_model()
        self.model_name = "DeepId"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with DeepId model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def load_model(
    url="https://github.com/serengil/deepface_models/releases/download/v1.0/deepid_keras_weights.h5",
--- a/deepface/basemodels/DlibResNet.py
+++ b/deepface/basemodels/DlibResNet.py
@ -1,3 +1,4 @@
+from typing import List
 import os
 import bz2
 import gdown
@ -11,7 +12,7 @@ logger = Logger(module="basemodels.DlibResNet")
 # pylint: disable=too-few-public-methods


-class Dlib(FacialRecognition):
+class DlibClient(FacialRecognition):
    """
    Dlib model class
    """
@ -20,15 +21,33 @@ class Dlib(FacialRecognition):
        self.model = DlibResNet()
        self.model_name = "Dlib"

-    def find_embeddings(self, img: np.ndarray) -> list:
+    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
-        Custom find embeddings function of Dlib different than FacialRecognition's one
+        find embeddings with Dlib model - different than regular models
        Args:
-            img (np.ndarray)
-        Retunrs:
-            embeddings (list)
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
        """
-        return self.model.predict(img)[0].tolist()
+        # return self.model.predict(img)[0].tolist()
+
+        # extract_faces returns 4 dimensional images
+        if len(img.shape) == 4:
+            img = img[0]
+
+        # bgr to rgb
+        img = img[:, :, ::-1]  # bgr to rgb
+
+        # img is in scale of [0, 1] but expected [0, 255]
+        if img.max() <= 1:
+            img = img * 255
+
+        img = img.astype(np.uint8)
+
+        img_representation = self.model.model.compute_face_descriptor(img)
+        img_representation = np.array(img_representation)
+        img_representation = np.expand_dims(img_representation, axis=0)
+        return img_representation[0].tolist()


 class DlibResNet:
@ -69,38 +88,12 @@ class DlibResNet:

        # ---------------------

-        model = dlib.face_recognition_model_v1(weight_file)
-        self.__model = model
+        self.model = dlib.face_recognition_model_v1(weight_file)

        # ---------------------

        # return None  # classes must return None

-    def predict(self, img_aligned: np.ndarray) -> np.ndarray:
-
-        # functions.detectFace returns 4 dimensional images
-        if len(img_aligned.shape) == 4:
-            img_aligned = img_aligned[0]
-
-        # functions.detectFace returns bgr images
-        img_aligned = img_aligned[:, :, ::-1]  # bgr to rgb
-
-        # deepface.detectFace returns an array in scale of [0, 1]
-        # but dlib expects in scale of [0, 255]
-        if img_aligned.max() <= 1:
-            img_aligned = img_aligned * 255
-
-        img_aligned = img_aligned.astype(np.uint8)
-
-        model = self.__model
-
-        img_representation = model.compute_face_descriptor(img_aligned)
-
-        img_representation = np.array(img_representation)
-        img_representation = np.expand_dims(img_representation, axis=0)
-
-        return img_representation
-

 class DlibMetaData:
    def __init__(self):
--- a/deepface/basemodels/Facenet.py
+++ b/deepface/basemodels/Facenet.py
@ -1,5 +1,7 @@
+from typing import List
 import os
 import gdown
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -43,7 +45,7 @@ else:
 # --------------------------------

 # pylint: disable=too-few-public-methods
-class FaceNet128d(FacialRecognition):
+class FaceNet128dClient(FacialRecognition):
    """
    FaceNet-128d model class
    """
@ -52,8 +54,20 @@ class FaceNet128d(FacialRecognition):
        self.model = load_facenet128d_model()
        self.model_name = "FaceNet-128d"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with FaceNet-128d model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()

-class FaceNet512d(FacialRecognition):
+
+class FaceNet512dClient(FacialRecognition):
    """
    FaceNet-1512d model class
    """
@ -62,6 +76,18 @@ class FaceNet512d(FacialRecognition):
        self.model = load_facenet512d_model()
        self.model_name = "FaceNet-512d"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with FaceNet-512d model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def scaling(x, scale):
    return x * scale
--- a/deepface/basemodels/FbDeepFace.py
+++ b/deepface/basemodels/FbDeepFace.py
@ -1,6 +1,8 @@
+from typing import List
 import os
 import zipfile
 import gdown
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -36,7 +38,7 @@ else:

 # -------------------------------------
 # pylint: disable=line-too-long, too-few-public-methods
-class DeepFace(FacialRecognition):
+class DeepFaceClient(FacialRecognition):
    """
    Fb's DeepFace model class
    """
@ -45,6 +47,18 @@ class DeepFace(FacialRecognition):
        self.model = load_model()
        self.model_name = "DeepFace"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with OpenFace model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def load_model(
    url="https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip",
--- a/deepface/basemodels/OpenFace.py
+++ b/deepface/basemodels/OpenFace.py
@ -1,6 +1,8 @@
+from typing import List
 import os
 import gdown
 import tensorflow as tf
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -26,7 +28,7 @@ else:
 # ---------------------------------------

 # pylint: disable=too-few-public-methods
-class OpenFace(FacialRecognition):
+class OpenFaceClient(FacialRecognition):
    """
    OpenFace model class
    """
@ -35,6 +37,18 @@ class OpenFace(FacialRecognition):
        self.model = load_model()
        self.model_name = "OpenFace"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with OpenFace model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def load_model(
    url="https://github.com/serengil/deepface_models/releases/download/v1.0/openface_weights.h5",
--- a/deepface/basemodels/SFace.py
+++ b/deepface/basemodels/SFace.py
@ -1,5 +1,5 @@
 import os
-from typing import Any
+from typing import Any, List

 import numpy as np
 import cv2 as cv
@ -14,7 +14,7 @@ logger = Logger(module="basemodels.SFace")
 # pylint: disable=line-too-long, too-few-public-methods


-class SFace(FacialRecognition):
+class SFaceClient(FacialRecognition):
    """
    SFace model class
    """
@ -23,15 +23,22 @@ class SFace(FacialRecognition):
        self.model = load_model()
        self.model_name = "SFace"

-    def find_embeddings(self, img: np.ndarray) -> list:
+    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
-        Custom find embeddings function of SFace different than FacialRecognition's one
+        find embeddings with SFace model - different than regular models
        Args:
-            img (np.ndarray)
-        Retunrs:
-            embeddings (list)
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
        """
-        return self.model.predict(img)[0].tolist()
+        # return self.model.predict(img)[0].tolist()
+
+        # revert the image to original format and preprocess using the model
+        input_blob = (img[0] * 255).astype(np.uint8)
+
+        embeddings = self.model.model.feature(input_blob)
+
+        return embeddings[0].tolist()


 def load_model(
@ -74,17 +81,6 @@ class SFaceWrapper:

        self.layers = [_Layer()]

-    def predict(self, image: np.ndarray) -> np.ndarray:
-        # Preprocess
-        input_blob = (image[0] * 255).astype(
-            np.uint8
-        )  # revert the image to original format and preprocess using the model
-
-        # Forward
-        embeddings = self.model.feature(input_blob)
-
-        return embeddings
-

 class _Layer:
    input_shape = (None, 112, 112, 3)
--- a/deepface/basemodels/VGGFace.py
+++ b/deepface/basemodels/VGGFace.py
@ -1,5 +1,7 @@
+from typing import List
 import os
 import gdown
+import numpy as np
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 from deepface.models.FacialRecognition import FacialRecognition
@ -37,7 +39,7 @@ else:
 # ---------------------------------------

 # pylint: disable=too-few-public-methods
-class VggFace(FacialRecognition):
+class VggFaceClient(FacialRecognition):
    """
    VGG-Face model class
    """
@ -46,6 +48,18 @@ class VggFace(FacialRecognition):
        self.model = load_model()
        self.model_name = "VGG-Face"

+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        """
+        find embeddings with VGG-Face model
+        Args:
+            img (np.ndarray): pre-loaded image in BGR
+        Returns
+            embeddings (list): multi-dimensional vector
+        """
+        # model.predict causes memory issue when it is called in a for loop
+        # embedding = model.predict(img, verbose=0)[0].tolist()
+        return self.model(img, training=False).numpy()[0].tolist()
+

 def base_model() -> Sequential:
    """
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@ -1,5 +1,5 @@
 import os
-from typing import Union, Tuple
+from typing import Union, Tuple, List
 import base64
 from pathlib import Path

@ -140,9 +140,9 @@ def extract_faces(
    grayscale: bool = False,
    enforce_detection: bool = True,
    align: bool = True,
-) -> list:
-    """Extract faces from an image.
-
+) -> List[Tuple[np.ndarray, dict, float]]:
+    """
+    Extract faces from an image.
    Args:
        img: a path, url, base64 or numpy array.
        target_size (tuple, optional): the target size of the extracted faces.
@ -157,7 +157,12 @@ def extract_faces(
        ValueError: if face could not be detected and enforce_detection is True.

    Returns:
-        list: a list of extracted faces.
+        results (List[Tuple[np.ndarray, dict, float]]): A list of tuples
+            where each tuple contains:
+            - detected_face (np.ndarray): The detected face as a NumPy array.
+            - face_region (dict): The image region represented as
+                {"x": x, "y": y, "w": w, "h": h}
+            - confidence (float): The confidence score associated with the detected face.
    """

    # this is going to store a list of img itself (numpy), it region and confidence
@ -246,7 +251,7 @@ def extract_faces(
                "h": int(current_region[3]),
            }

-            extracted_face = [img_pixels, region_obj, confidence]
+            extracted_face = (img_pixels, region_obj, confidence)
            extracted_faces.append(extracted_face)

    if len(extracted_faces) == 0 and enforce_detection == True:
--- a/deepface/detectors/DetectorWrapper.py
+++ b/deepface/detectors/DetectorWrapper.py
@ -2,15 +2,15 @@ from typing import Any
 import numpy as np
 from deepface.models.Detector import Detector
 from deepface.detectors import (
-    OpenCvWrapper,
-    SsdWrapper,
-    DlibWrapper,
-    MtcnnWrapper,
-    RetinaFaceWrapper,
-    MediapipeWrapper,
-    YoloWrapper,
-    YunetWrapper,
-    FastMtcnnWrapper,
+    FastMtCnn,
+    MediaPipe,
+    MtCnn,
+    OpenCv,
+    Dlib,
+    RetinaFace,
+    Ssd,
+    Yolo,
+    YuNet,
 )


@ -25,15 +25,15 @@ def build_model(detector_backend: str) -> Any:
    global face_detector_obj  # singleton design pattern

    backends = {
-        "opencv": OpenCvWrapper.OpenCv,
-        "mtcnn": MtcnnWrapper.MtCnn,
-        "ssd": SsdWrapper.Ssd,
-        "dlib": DlibWrapper.Dlib,
-        "retinaface": RetinaFaceWrapper.RetinaFace,
-        "mediapipe": MediapipeWrapper.MediaPipe,
-        "yolov8": YoloWrapper.Yolo,
-        "yunet": YunetWrapper.YuNet,
-        "fastmtcnn": FastMtcnnWrapper.FastMtCnn,
+        "opencv": OpenCv.OpenCvClient,
+        "mtcnn": MtCnn.MtCnnClient,
+        "ssd": Ssd.SsdClient,
+        "dlib": Dlib.DlibClient,
+        "retinaface": RetinaFace.RetinaFaceClient,
+        "mediapipe": MediaPipe.MediaPipeClient,
+        "yolov8": Yolo.YoloClient,
+        "yunet": YuNet.YuNetClient,
+        "fastmtcnn": FastMtCnn.FastMtCnnClient,
    }

    if not "face_detector_obj" in globals():
@ -59,9 +59,20 @@ def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) ->
        detector_backend (str): detector name
        img (np.ndarray): pre-loaded image
        alig (bool): enable or disable alignment after detection
-    Returns
-        result (list): tuple of face (np.ndarray), face region (list)
-            , confidence score (float)
+    Returns:
+        results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+            where each tuple contains:
+            - detected_face (np.ndarray): The detected face as a NumPy array.
+            - face_region (List[float]): The image region represented as
+                a list of floats e.g. [x, y, w, h]
+            - confidence (float): The confidence score associated with the detected face.
+
+    Example:
+        results = [
+            (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+            (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+            (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+        ]
    """
    face_detector: Detector = build_model(detector_backend)
    return face_detector.detect_faces(img=img, align=align)
--- a/deepface/detectors/DlibWrapper.py
+++ b/deepface/detectors/DlibWrapper.py
@ -1,3 +1,4 @@
+from typing import List, Tuple
 import os
 import bz2
 import gdown
@ -9,7 +10,7 @@ from deepface.commons.logger import Logger
 logger = Logger(module="detectors.DlibWrapper")


-class Dlib(Detector):
+class DlibClient(Detector):
    def __init__(self):
        self.model = self.build_model()

@ -55,7 +56,9 @@ class Dlib(Detector):
        detector["sp"] = sp
        return detector

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with dlib
        Args:
@ -63,7 +66,19 @@ class Dlib(Detector):
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        # this is not a must dependency. do not import it in the global level.
        try:
--- a/deepface/detectors/FastMtcnnWrapper.py
+++ b/deepface/detectors/FastMtcnnWrapper.py
@ -1,24 +1,39 @@
-from typing import Any, Union
+from typing import Any, Union, List, Tuple
 import cv2
 import numpy as np
 from deepface.models.Detector import Detector
+from deepface.modules import detection

 # Link -> https://github.com/timesler/facenet-pytorch
 # Examples https://www.kaggle.com/timesler/guide-to-mtcnn-in-facenet-pytorch


-class FastMtCnn(Detector):
+class FastMtCnnClient(Detector):
    def __init__(self):
        self.model = self.build_model()

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with mtcnn
        Args:
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

@ -31,16 +46,16 @@ class FastMtCnn(Detector):
        )  # returns boundingbox, prob, landmark
        if len(detections[0]) > 0:

-            for detection in zip(*detections):
-                x, y, w, h = xyxy_to_xywh(detection[0])
+            for current_detection in zip(*detections):
+                x, y, w, h = xyxy_to_xywh(current_detection[0])
                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                img_region = [x, y, w, h]
-                confidence = detection[1]
+                confidence = current_detection[1]

                if align:
-                    left_eye = detection[2][0]
-                    right_eye = detection[2][1]
-                    detected_face = self.align_face(
+                    left_eye = current_detection[2][0]
+                    right_eye = current_detection[2][1]
+                    detected_face = detection.align_face(
                        img=detected_face, left_eye=left_eye, right_eye=right_eye
                    )

--- a/deepface/detectors/MediapipeWrapper.py
+++ b/deepface/detectors/MediapipeWrapper.py
@ -1,11 +1,12 @@
-from typing import Any
+from typing import Any, List, Tuple
 import numpy as np
 from deepface.models.Detector import Detector
+from deepface.modules import detection

 # Link - https://google.github.io/mediapipe/solutions/face_detection


-class MediaPipe(Detector):
+class MediaPipeClient(Detector):
    def __init__(self):
        self.model = self.build_model()

@ -28,14 +29,28 @@ class MediaPipe(Detector):
        face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
        return face_detection

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with mediapipe
        Args:
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

@ -49,11 +64,11 @@ class MediaPipe(Detector):
            return resp

        # Extract the bounding box, the landmarks and the confidence score
-        for detection in results.detections:
-            (confidence,) = detection.score
+        for current_detection in results.detections:
+            (confidence,) = current_detection.score

-            bounding_box = detection.location_data.relative_bounding_box
-            landmarks = detection.location_data.relative_keypoints
+            bounding_box = current_detection.location_data.relative_bounding_box
+            landmarks = current_detection.location_data.relative_keypoints

            x = int(bounding_box.xmin * img_width)
            w = int(bounding_box.width * img_width)
@ -73,7 +88,7 @@ class MediaPipe(Detector):
                img_region = [x, y, w, h]

                if align:
-                    detected_face = self.align_face(
+                    detected_face = detection.align_face(
                        img=detected_face, left_eye=left_eye, right_eye=right_eye
                    )

--- a/deepface/detectors/MtCnn.py
+++ b/deepface/detectors/MtCnn.py
@ -0,0 +1,67 @@
+from typing import List, Tuple
+import cv2
+import numpy as np
+from mtcnn import MTCNN
+from deepface.models.Detector import Detector
+from deepface.modules import detection
+
+# pylint: disable=too-few-public-methods
+class MtCnnClient(Detector):
+    """
+    Class to cover common face detection functionalitiy for MtCnn backend
+    """
+
+    def __init__(self):
+        self.model = MTCNN()
+
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
+        """
+        Detect and align face with mtcnn
+        Args:
+            img (np.ndarray): pre-loaded image
+            align (bool): default is true
+        Returns:
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
+        """
+
+        resp = []
+
+        detected_face = None
+        img_region = [0, 0, img.shape[1], img.shape[0]]
+
+        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # mtcnn expects RGB but OpenCV read BGR
+        detections = self.model.detect_faces(img_rgb)
+
+        if len(detections) > 0:
+
+            for current_detection in detections:
+                x, y, w, h = current_detection["box"]
+                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
+                img_region = [x, y, w, h]
+                confidence = current_detection["confidence"]
+
+                if align:
+                    keypoints = current_detection["keypoints"]
+                    left_eye = keypoints["left_eye"]
+                    right_eye = keypoints["right_eye"]
+                    detected_face = detection.align_face(
+                        img=detected_face, left_eye=left_eye, right_eye=right_eye
+                    )
+
+                resp.append((detected_face, img_region, confidence))
+
+        return resp
--- a/deepface/detectors/MtcnnWrapper.py
+++ b/deepface/detectors/MtcnnWrapper.py
@ -1,51 +0,0 @@
-import cv2
-import numpy as np
-from mtcnn import MTCNN
-from deepface.models.Detector import Detector
-
-
-class MtCnn(Detector):
-    """
-    Class to cover common face detection functionalitiy for MtCnn backend
-    """
-
-    def __init__(self):
-        self.model = MTCNN()
-
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
-        """
-        Detect and align face with mtcnn
-        Args:
-            img (np.ndarray): pre-loaded image
-            align (bool): default is true
-        Returns:
-            list of detected and aligned faces
-        """
-
-        resp = []
-
-        detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
-
-        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # mtcnn expects RGB but OpenCV read BGR
-        detections = self.model.detect_faces(img_rgb)
-
-        if len(detections) > 0:
-
-            for detection in detections:
-                x, y, w, h = detection["box"]
-                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
-                img_region = [x, y, w, h]
-                confidence = detection["confidence"]
-
-                if align:
-                    keypoints = detection["keypoints"]
-                    left_eye = keypoints["left_eye"]
-                    right_eye = keypoints["right_eye"]
-                    detected_face = self.align_face(
-                        img=detected_face, left_eye=left_eye, right_eye=right_eye
-                    )
-
-                resp.append((detected_face, img_region, confidence))
-
-        return resp
--- a/deepface/detectors/OpenCvWrapper.py
+++ b/deepface/detectors/OpenCvWrapper.py
@ -1,11 +1,12 @@
 import os
-from typing import Any
+from typing import Any, List, Tuple
 import cv2
 import numpy as np
 from deepface.models.Detector import Detector
+from deepface.modules import detection


-class OpenCv(Detector):
+class OpenCvClient(Detector):
    """
    Class to cover common face detection functionalitiy for OpenCv backend
    """
@ -24,7 +25,9 @@ class OpenCv(Detector):
        detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
        return detector

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with opencv
        Args:
@ -32,7 +35,19 @@ class OpenCv(Detector):
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

@ -56,7 +71,7 @@ class OpenCv(Detector):

                if align:
                    left_eye, right_eye = self.find_eyes(img=detected_face)
-                    detected_face = self.align_face(detected_face, left_eye, right_eye)
+                    detected_face = detection.align_face(detected_face, left_eye, right_eye)

                img_region = [x, y, w, h]

--- a/deepface/detectors/RetinaFaceWrapper.py
+++ b/deepface/detectors/RetinaFaceWrapper.py
@ -1,21 +1,36 @@
+from typing import List, Tuple
 import numpy as np
 from retinaface import RetinaFace as rf
 from retinaface.commons import postprocess
 from deepface.models.Detector import Detector

-
-class RetinaFace(Detector):
+# pylint: disable=too-few-public-methods
+class RetinaFaceClient(Detector):
    def __init__(self):
        self.model = rf.build_model()

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with retinaface
        Args:
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

--- a/deepface/detectors/SsdWrapper.py
+++ b/deepface/detectors/SsdWrapper.py
@ -1,11 +1,13 @@
+from typing import List, Tuple
 import os
 import gdown
 import cv2
 import pandas as pd
 import numpy as np
-from deepface.detectors import OpenCvWrapper
+from deepface.detectors import OpenCv
 from deepface.commons import functions
 from deepface.models.Detector import Detector
+from deepface.modules import detection
 from deepface.commons.logger import Logger

 logger = Logger(module="detectors.SsdWrapper")
@ -13,7 +15,7 @@ logger = Logger(module="detectors.SsdWrapper")
 # pylint: disable=line-too-long


-class Ssd(Detector):
+class SsdClient(Detector):
    def __init__(self):
        self.model = self.build_model()

@ -65,18 +67,32 @@ class Ssd(Detector):

        detector = {}
        detector["face_detector"] = face_detector
-        detector["opencv_module"] = OpenCvWrapper.OpenCv()
+        detector["opencv_module"] = OpenCv.OpenCvClient()

        return detector

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with ssd
        Args:
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

@ -134,9 +150,9 @@ class Ssd(Detector):
                confidence = instance["confidence"]

                if align:
-                    opencv_module: OpenCvWrapper.OpenCv = self.model["opencv_module"]
+                    opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]
                    left_eye, right_eye = opencv_module.find_eyes(detected_face)
-                    detected_face = self.align_face(
+                    detected_face = detection.align_face(
                        img=detected_face, left_eye=left_eye, right_eye=right_eye
                    )

--- a/deepface/detectors/YoloWrapper.py
+++ b/deepface/detectors/YoloWrapper.py
@ -1,6 +1,7 @@
-from typing import Any
+from typing import Any, List, Tuple
 import numpy as np
 from deepface.models.Detector import Detector
+from deepface.modules import detection
 from deepface.commons.logger import Logger

 logger = Logger()
@ -16,7 +17,7 @@ WEIGHT_URL = "https://drive.google.com/uc?id=1qcr9DbgsX3ryrz2uU8w4Xm3cOrRywXqb"
 LANDMARKS_CONFIDENCE_THRESHOLD = 0.5


-class Yolo(Detector):
+class YoloClient(Detector):
    def __init__(self):
        self.model = self.build_model()

@ -50,7 +51,9 @@ class Yolo(Detector):
        # Return face_detector
        return YOLO(weight_path)

-    def detect_faces(self, img: np.ndarray, align: bool = False) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = False
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with yolo
        Args:
@ -58,7 +61,19 @@ class Yolo(Detector):
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        resp = []

@ -85,7 +100,7 @@ class Yolo(Detector):
                    left_eye[1] > LANDMARKS_CONFIDENCE_THRESHOLD
                    and right_eye[1] > LANDMARKS_CONFIDENCE_THRESHOLD
                ):
-                    detected_face = self.align_face(
+                    detected_face = detection.align_face(
                        img=detected_face, left_eye=left_eye[0].cpu(), right_eye=right_eye[0].cpu()
                    )
            resp.append((detected_face, [x, y, w, h], confidence))
--- a/deepface/detectors/YunetWrapper.py
+++ b/deepface/detectors/YunetWrapper.py
@ -1,16 +1,17 @@
 import os
-from typing import Any
+from typing import Any, List, Tuple
 import cv2
 import numpy as np
 import gdown
 from deepface.commons import functions
-from deepface.commons.logger import Logger
 from deepface.models.Detector import Detector
+from deepface.modules import detection
+from deepface.commons.logger import Logger

 logger = Logger(module="detectors.YunetWrapper")


-class YuNet(Detector):
+class YuNetClient(Detector):
    def __init__(self):
        self.model = self.build_model()

@ -41,14 +42,28 @@ class YuNet(Detector):
            ) from err
        return face_detector

-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
        Detect and align face with yunet
        Args:
            img (np.ndarray): pre-loaded image
            align (bool): default is true
        Returns:
-            list of detected and aligned faces
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
        # FaceDetector.detect_faces does not support score_threshold parameter.
        # We can set it via environment variable.
@ -107,6 +122,6 @@ class YuNet(Detector):
            detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
            img_region = [x, y, w, h]
            if align:
-                detected_face = self.align_face(detected_face, (x_re, y_re), (x_le, y_le))
+                detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le))
            resp.append((detected_face, img_region, confidence))
        return resp
--- a/deepface/extendedmodels/Age.py
+++ b/deepface/extendedmodels/Age.py
@ -23,7 +23,7 @@ else:
 # ----------------------------------------

 # pylint: disable=too-few-public-methods
-class ApparentAge(Demography):
+class ApparentAgeClient(Demography):
    """
    Age model class
    """
--- a/deepface/extendedmodels/Emotion.py
+++ b/deepface/extendedmodels/Emotion.py
@ -33,7 +33,7 @@ else:
 labels = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]

 # pylint: disable=too-few-public-methods
-class FacialExpression(Demography):
+class EmotionClient(Demography):
    """
    Emotion model class
    """
--- a/deepface/extendedmodels/Gender.py
+++ b/deepface/extendedmodels/Gender.py
@ -26,7 +26,7 @@ else:
 labels = ["Woman", "Man"]

 # pylint: disable=too-few-public-methods
-class Gender(Demography):
+class GenderClient(Demography):
    """
    Gender model class
    """
--- a/deepface/extendedmodels/Race.py
+++ b/deepface/extendedmodels/Race.py
@ -25,7 +25,7 @@ else:
 labels = ["asian", "indian", "black", "white", "middle eastern", "latino hispanic"]

 # pylint: disable=too-few-public-methods
-class Race(Demography):
+class RaceClient(Demography):
    """
    Race model class
    """
--- a/deepface/models/Detector.py
+++ b/deepface/models/Detector.py
@ -1,39 +1,34 @@
+from typing import List, Tuple
 from abc import ABC, abstractmethod
-from typing import Union
 import numpy as np
-from PIL import Image

 # Notice that all facial detector models must be inherited from this class


+# pylint: disable=unnecessary-pass, too-few-public-methods
 class Detector(ABC):
    @abstractmethod
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
-        pass
-
-    def align_face(
-        self, img: np.ndarray, left_eye: Union[list, tuple], right_eye: Union[list, tuple]
-    ) -> np.ndarray:
+    def detect_faces(
+        self, img: np.ndarray, align: bool = True
+    ) -> List[Tuple[np.ndarray, List[float], float]]:
        """
-        Align a given image horizantally with respect to their left and right eye locations
+        Detect faces from a given image
        Args:
-            img (np.ndarray): pre-loaded image with detected face
-            left_eye (list or tuple): coordinates of left eye with respect to the you
-            right_eye(list or tuple): coordinates of right eye with respect to the you
+            img (np.ndarray): pre-loaded image as a NumPy array
+            align (bool): enable or disable alignment after face detection
        Returns:
-            img (np.ndarray): aligned facial image
+            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
+                where each tuple contains:
+                - detected_face (np.ndarray): The detected face as a NumPy array.
+                - face_region (List[float]): The image region represented as
+                    a list of floats e.g. [x, y, w, h]
+                - confidence (float): The confidence score associated with the detected face.
+
+        Example:
+            results = [
+                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
+                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
+                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
+            ]
        """
-        # if eye could not be detected for the given image, return image itself
-        if left_eye is None or right_eye is None:
-            return img
-
-        # sometimes unexpectedly detected images come with nil dimensions
-        if img.shape[0] == 0 or img.shape[1] == 0:
-            return img
-
-        angle = float(
-            np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]))
-        )
-        img = Image.fromarray(img)
-        img = np.array(img.rotate(angle))
-        return img
+        pass
--- a/deepface/models/FacialRecognition.py
+++ b/deepface/models/FacialRecognition.py
@ -1,5 +1,5 @@
-from abc import ABC
-from typing import Any, Union
+from abc import ABC, abstractmethod
+from typing import Any, Union, List
 import numpy as np
 from deepface.commons import functions

@ -16,13 +16,6 @@ class FacialRecognition(ABC):
    model: Union[Model, Any]
    model_name: str

-    def find_embeddings(self, img: np.ndarray) -> list:
-        if not isinstance(self.model, Model):
-            raise ValueError(
-                "If a facial recognition model is not type of (tf.)keras.models.Model,"
-                "Then its find_embeddings method must be implemented its own module."
-                f"However {self.model_name}'s model type is {type(self.model)}"
-            )
-        # model.predict causes memory issue when it is called in a for loop
-        # embedding = model.predict(img, verbose=0)[0].tolist()
-        return self.model(img, training=False).numpy()[0].tolist()
+    @abstractmethod
+    def find_embeddings(self, img: np.ndarray) -> List[float]:
+        pass
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -3,6 +3,7 @@ from typing import Any, Dict, List, Tuple, Union

 # 3rd part dependencies
 import numpy as np
+from PIL import Image

 # project dependencies
 from deepface.commons import functions
@ -40,8 +41,11 @@ def extract_faces(
            grayscale (boolean): extracting faces in rgb or gray scale

    Returns:
-            list of dictionaries. Each dictionary will have facial image itself (RGB),
-            extracted area from the original image and confidence score.
+        results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
+        - "face" (np.ndarray): The detected face as a NumPy array.
+        - "facial_area" (List[float]): The detected face's regions represented as a list of floats.
+        - "confidence" (float): The confidence score associated with the detected face.
+

    """

@ -70,3 +74,31 @@ def extract_faces(
        resp_objs.append(resp_obj)

    return resp_objs
+
+
+def align_face(
+    img: np.ndarray,
+    left_eye: Union[list, tuple],
+    right_eye: Union[list, tuple],
+) -> np.ndarray:
+    """
+    Align a given image horizantally with respect to their left and right eye locations
+    Args:
+        img (np.ndarray): pre-loaded image with detected face
+        left_eye (list or tuple): coordinates of left eye with respect to the you
+        right_eye(list or tuple): coordinates of right eye with respect to the you
+    Returns:
+        img (np.ndarray): aligned facial image
+    """
+    # if eye could not be detected for the given image, return image itself
+    if left_eye is None or right_eye is None:
+        return img
+
+    # sometimes unexpectedly detected images come with nil dimensions
+    if img.shape[0] == 0 or img.shape[1] == 0:
+        return img
+
+    angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
+    img = Image.fromarray(img)
+    img = np.array(img.rotate(angle))
+    return img
--- a/deepface/modules/modeling.py
+++ b/deepface/modules/modeling.py
@ -2,16 +2,7 @@
 from typing import Any

 # project dependencies
-from deepface.basemodels import (
-    VGGFace,
-    OpenFace,
-    Facenet,
-    FbDeepFace,
-    DeepID,
-    DlibResNet,
-    ArcFace,
-    SFace,
-)
+from deepface.basemodels import VGGFace, OpenFace, FbDeepFace, DeepID, ArcFace, SFace, Dlib, Facenet
 from deepface.extendedmodels import Age, Gender, Race, Emotion


@ -31,19 +22,19 @@ def build_model(model_name: str) -> Any:
    global model_obj

    models = {
-        "VGG-Face": VGGFace.VggFace,
-        "OpenFace": OpenFace.OpenFace,
-        "Facenet": Facenet.FaceNet128d,
-        "Facenet512": Facenet.FaceNet512d,
-        "DeepFace": FbDeepFace.DeepFace,
-        "DeepID": DeepID.DeepId,
-        "Dlib": DlibResNet.Dlib,
-        "ArcFace": ArcFace.ArcFace,
-        "SFace": SFace.SFace,
-        "Emotion": Emotion.FacialExpression,
-        "Age": Age.ApparentAge,
-        "Gender": Gender.Gender,
-        "Race": Race.Race,
+        "VGG-Face": VGGFace.VggFaceClient,
+        "OpenFace": OpenFace.OpenFaceClient,
+        "Facenet": Facenet.FaceNet128dClient,
+        "Facenet512": Facenet.FaceNet512dClient,
+        "DeepFace": FbDeepFace.DeepFaceClient,
+        "DeepID": DeepID.DeepIdClient,
+        "Dlib": Dlib.DlibClient,
+        "ArcFace": ArcFace.ArcFaceClient,
+        "SFace": SFace.SFaceClient,
+        "Emotion": Emotion.EmotionClient,
+        "Age": Age.ApparentAgeClient,
+        "Gender": Gender.GenderClient,
+        "Race": Race.RaceClient,
    }

    if not "model_obj" in globals():
--- a/tests/visual-test.py
+++ b/tests/visual-test.py
@ -14,11 +14,12 @@ model_names = [
    "Facenet512",
    "OpenFace",
    "DeepFace",
-    "DeepID",
+    # "DeepID",
    "Dlib",
    "ArcFace",
    "SFace",
 ]
+
 detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"]


@ -44,10 +45,11 @@ dfs = DeepFace.find(
 for df in dfs:
    logger.info(df)

+
 # extract faces
 for detector_backend in detector_backends:
    face_objs = DeepFace.extract_faces(
-        img_path="dataset/img1.jpg", detector_backend=detector_backend
+        img_path="dataset/img11.jpg", detector_backend=detector_backend
    )
    for face_obj in face_objs:
        face = face_obj["face"]