Merge pull request #990 from serengil/feat-task-2901-cleaning-modules

Feat task 2901 cleaning modules
2025-06-07 20:15:21 +00:00 · 2024-01-31 23:49:57 +00:00 · 2024-01-31 23:49:57 +00:00 · 64d33717a9
commit 64d33717a9
parent 35025cdf6e 96d29ab069
35 changed files with 758 additions and 475 deletions
--- a/.gitignore
+++ b/.gitignore
@ -11,3 +11,5 @@ tests/dataset/*.pkl
 tests/*.ipynb
 tests/*.csv
 *.pyc
 **/.coverage
 **/.coverage.*
--- a/3
+++ b/3
@ -3,3 +3,6 @@ test:
 lint:
 	python -m pylint deepface/ --fail-under=10
 coverage:
 	pip install pytest-cov && cd tests && python -m pytest --cov=deepface
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@ -58,6 +58,7 @@ def verify(
    distance_metric: str = "cosine",
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
 ) -> Dict[str, Any]:
    """
@ -83,6 +84,8 @@ def verify(
        align (bool): Flag to enable face alignment (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        normalization (string): Normalize the input image before feeding it to the model.
            Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
@ -119,6 +122,7 @@ def verify(
        distance_metric=distance_metric,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
        normalization=normalization,
    )
@ -129,6 +133,7 @@ def analyze(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    silent: bool = False,
 ) -> List[Dict[str, Any]]:
    """
@ -152,6 +157,8 @@ def analyze(
        align (boolean): Perform alignment based on the eye positions (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        silent (boolean): Suppress or allow some log messages for a quieter analysis process
            (default is False).
@ -209,6 +216,7 @@ def analyze(
        enforce_detection=enforce_detection,
        detector_backend=detector_backend,
        align=align,
        expand_percentage=expand_percentage,
        silent=silent,
    )
@ -221,6 +229,7 @@ def find(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    threshold: Optional[float] = None,
    normalization: str = "base",
    silent: bool = False,
@ -249,6 +258,8 @@ def find(
        align (boolean): Perform alignment based on the eye positions (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        threshold (float): Specify a threshold to determine whether a pair represents the same
            person or different individuals. This threshold is used for comparing distances.
            If left unset, default pre-tuned threshold values will be applied based on the specified
@ -286,6 +297,7 @@ def find(
        enforce_detection=enforce_detection,
        detector_backend=detector_backend,
        align=align,
        expand_percentage=expand_percentage,
        threshold=threshold,
        normalization=normalization,
        silent=silent,
@ -298,6 +310,7 @@ def represent(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
 ) -> List[Dict[str, Any]]:
    """
@ -320,6 +333,8 @@ def represent(
        align (boolean): Perform alignment based on the eye positions (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        normalization (string): Normalize the input image before feeding it to the model.
            Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
            (default is base).
@ -346,6 +361,7 @@ def represent(
        enforce_detection=enforce_detection,
        detector_backend=detector_backend,
        align=align,
        expand_percentage=expand_percentage,
        normalization=normalization,
    )
@ -409,6 +425,7 @@ def extract_faces(
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    grayscale: bool = False,
 ) -> List[Dict[str, Any]]:
    """
@ -429,6 +446,8 @@ def extract_faces(
        align (bool): Flag to enable face alignment (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        grayscale (boolean): Flag to convert the image to grayscale before
            processing (default is False).
@ -448,7 +467,9 @@ def extract_faces(
        detector_backend=detector_backend,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
        grayscale=grayscale,
        human_readable=True,
    )
@ -459,3 +480,49 @@ def cli() -> None:
    import fire
    fire.Fire()
 # deprecated function(s)
 def detectFace(
    img_path: Union[str, np.ndarray],
    target_size: tuple = (224, 224),
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
 ) -> Union[np.ndarray, None]:
    """
    Deprecated face detection function. Use extract_faces for same functionality.
    Args:
        img_path (str or np.ndarray): Path to the first image. Accepts exact image path
            as a string, numpy array (BGR), or base64 encoded images.
        target_size (tuple): final shape of facial image. black pixels will be
            added to resize the image (default is (224, 224)).
        detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
            'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
        enforce_detection (boolean): If no face is detected in an image, raise an exception.
            Set to False to avoid the exception for low-resolution images (default is True).
        align (bool): Flag to enable face alignment (default is True).
    Returns:
            img (np.ndarray): detected (and aligned) facial area image as numpy array
    """
    logger.warn("Function detectFace is deprecated. Use extract_faces instead.")
    face_objs = extract_faces(
        img_path=img_path,
        target_size=target_size,
        detector_backend=detector_backend,
        enforce_detection=enforce_detection,
        align=align,
        grayscale=False,
    )
    extracted_face = None
    if len(face_objs) > 0:
        extracted_face = face_objs[0]["face"]
    return extracted_face
--- a/deepface/basemodels/ArcFace.py
+++ b/deepface/basemodels/ArcFace.py
@ -53,6 +53,8 @@ class ArcFaceClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "ArcFace"
        self.input_shape = (112, 112)
        self.output_shape = 512
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/DeepID.py
+++ b/deepface/basemodels/DeepID.py
@ -49,6 +49,8 @@ class DeepIdClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "DeepId"
        self.input_shape = (47, 55)
        self.output_shape = 160
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/Dlib.py
+++ b/deepface/basemodels/Dlib.py
@ -20,6 +20,8 @@ class DlibClient(FacialRecognition):
    def __init__(self):
        self.model = DlibResNet()
        self.model_name = "Dlib"
        self.input_shape = (150, 150)
        self.output_shape = 128
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/Facenet.py
+++ b/deepface/basemodels/Facenet.py
@ -53,6 +53,8 @@ class FaceNet128dClient(FacialRecognition):
    def __init__(self):
        self.model = load_facenet128d_model()
        self.model_name = "FaceNet-128d"
        self.input_shape = (160, 160)
        self.output_shape = 128
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
@ -75,6 +77,8 @@ class FaceNet512dClient(FacialRecognition):
    def __init__(self):
        self.model = load_facenet512d_model()
        self.model_name = "FaceNet-512d"
        self.input_shape = (160, 160)
        self.output_shape = 512
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/FbDeepFace.py
+++ b/deepface/basemodels/FbDeepFace.py
@ -46,6 +46,8 @@ class DeepFaceClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "DeepFace"
        self.input_shape = (152, 152)
        self.output_shape = 4096
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/OpenFace.py
+++ b/deepface/basemodels/OpenFace.py
@ -36,6 +36,8 @@ class OpenFaceClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "OpenFace"
        self.input_shape = (96, 96)
        self.output_shape = 128
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/SFace.py
+++ b/deepface/basemodels/SFace.py
@ -22,6 +22,8 @@ class SFaceClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "SFace"
        self.input_shape = (112, 112)
        self.output_shape = 128
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/basemodels/VGGFace.py
+++ b/deepface/basemodels/VGGFace.py
@ -43,6 +43,8 @@ class VggFaceClient(FacialRecognition):
    def __init__(self):
        self.model = load_model()
        self.model_name = "VGG-Face"
        self.input_shape = (224, 224)
        self.output_shape = 4096
    def find_embeddings(self, img: np.ndarray) -> List[float]:
        """
--- a/deepface/commons/distance.py
+++ b/deepface/commons/distance.py
@ -2,7 +2,7 @@ from typing import Union
 import numpy as np
-def findCosineDistance(
+def find_cosine_distance(
    source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
 ) -> np.float64:
    if isinstance(source_representation, list):
@ -17,7 +17,7 @@ def findCosineDistance(
    return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
-def findEuclideanDistance(
+def find_euclidean_distance(
    source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
 ) -> np.float64:
    if isinstance(source_representation, list):
@ -38,7 +38,7 @@ def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray:
    return x / np.sqrt(np.sum(np.multiply(x, x)))
-def findThreshold(model_name: str, distance_metric: str) -> float:
+def find_threshold(model_name: str, distance_metric: str) -> float:
    base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75}
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@ -1,42 +1,19 @@
 import os
 from typing import Union, Tuple, List
 import base64
 from pathlib import Path
 # 3rd party dependencies
 from PIL import Image
 import requests
 import numpy as np
 import cv2
 import tensorflow as tf
 # package dependencies
 from deepface.detectors import DetectorWrapper
 from deepface.models.Detector import DetectedFace, FacialAreaRegion
 from deepface.commons.logger import Logger
 logger = Logger(module="commons.functions")
 # pylint: disable=no-else-raise
 # --------------------------------------------------
 # configurations of dependencies
 def get_tf_major_version() -> int:
    return int(tf.__version__.split(".", maxsplit=1)[0])
 tf_major_version = get_tf_major_version()
 if tf_major_version == 1:
    from keras.preprocessing import image
 elif tf_major_version == 2:
    from tensorflow.keras.preprocessing import image
 # --------------------------------------------------
 def initialize_folder() -> None:
    """Initialize the folder for storing weights and models.
@ -63,293 +40,3 @@ def get_deepface_home() -> str:
        str: the home directory.
    """
    return str(os.getenv("DEEPFACE_HOME", default=str(Path.home())))
 # --------------------------------------------------
 def loadBase64Img(uri: str) -> np.ndarray:
    """Load image from base64 string.
    Args:
        uri: a base64 string.
    Returns:
        numpy array: the loaded image.
    """
    encoded_data = uri.split(",")[1]
    nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
    img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return img_bgr
 def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
    """
    Load image from path, url, base64 or numpy array.
    Args:
        img: a path, url, base64 or numpy array.
    Returns:
        image (numpy array): the loaded image in BGR format
        image name (str): image name itself
    """
    # The image is already a numpy array
    if isinstance(img, np.ndarray):
        return img, "numpy array"
    if isinstance(img, Path):
        img = str(img)
    if not isinstance(img, str):
        raise ValueError(f"img must be numpy array or str but it is {type(img)}")
    # The image is a base64 string
    if img.startswith("data:image/"):
        return loadBase64Img(img), "base64 encoded string"
    # The image is a url
    if img.startswith("http"):
        return (
            np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")),
            # return url as image name
            img,
        )
    # The image is a path
    if os.path.isfile(img) is not True:
        raise ValueError(f"Confirm that {img} exists")
    # image must be a file on the system then
    # image name must have english characters
    if img.isascii() is False:
        raise ValueError(f"Input image must not have non-english characters - {img}")
    img_obj_bgr = cv2.imread(img)
    # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB)
    return img_obj_bgr, img
 # --------------------------------------------------
 def extract_faces(
    img: Union[str, np.ndarray],
    target_size: tuple = (224, 224),
    detector_backend: str = "opencv",
    grayscale: bool = False,
    enforce_detection: bool = True,
    align: bool = True,
 ) -> List[Tuple[np.ndarray, dict, float]]:
    """
    Extract faces from an image.
    Args:
        img: a path, url, base64 or numpy array.
        target_size (tuple, optional): the target size of the extracted faces.
        Defaults to (224, 224).
        detector_backend (str, optional): the face detector backend. Defaults to "opencv".
        grayscale (bool, optional): whether to convert the extracted faces to grayscale.
        Defaults to False.
        enforce_detection (bool, optional): whether to enforce face detection. Defaults to True.
        align (bool, optional): whether to align the extracted faces. Defaults to True.
    Raises:
        ValueError: if face could not be detected and enforce_detection is True.
    Returns:
        results (List[Tuple[np.ndarray, dict, float]]): A list of tuples
            where each tuple contains:
            - detected_face (np.ndarray): The detected face as a NumPy array.
            - face_region (dict): The image region represented as
                {"x": x, "y": y, "w": w, "h": h}
            - confidence (float): The confidence score associated with the detected face.
    """
    # this is going to store a list of img itself (numpy), it region and confidence
    extracted_faces = []
    # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
    img, img_name = load_image(img)
    base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0])
    if detector_backend == "skip":
        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
    else:
        face_objs = DetectorWrapper.detect_faces(detector_backend, img, align)
    # in case of no face found
    if len(face_objs) == 0 and enforce_detection is True:
        if img_name is not None:
            raise ValueError(
                f"Face could not be detected in {img_name}."
                "Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )
        else:
            raise ValueError(
                "Face could not be detected. Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )
    if len(face_objs) == 0 and enforce_detection is False:
        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
    for face_obj in face_objs:
        current_img = face_obj.img
        current_region = face_obj.facial_area
        confidence = face_obj.confidence
        if current_img.shape[0] > 0 and current_img.shape[1] > 0:
            if grayscale is True:
                current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
            # resize and padding
            factor_0 = target_size[0] / current_img.shape[0]
            factor_1 = target_size[1] / current_img.shape[1]
            factor = min(factor_0, factor_1)
            dsize = (
                int(current_img.shape[1] * factor),
                int(current_img.shape[0] * factor),
            )
            current_img = cv2.resize(current_img, dsize)
            diff_0 = target_size[0] - current_img.shape[0]
            diff_1 = target_size[1] - current_img.shape[1]
            if grayscale is False:
                # Put the base image in the middle of the padded image
                current_img = np.pad(
                    current_img,
                    (
                        (diff_0 // 2, diff_0 - diff_0 // 2),
                        (diff_1 // 2, diff_1 - diff_1 // 2),
                        (0, 0),
                    ),
                    "constant",
                )
            else:
                current_img = np.pad(
                    current_img,
                    (
                        (diff_0 // 2, diff_0 - diff_0 // 2),
                        (diff_1 // 2, diff_1 - diff_1 // 2),
                    ),
                    "constant",
                )
            # double check: if target image is not still the same size with target.
            if current_img.shape[0:2] != target_size:
                current_img = cv2.resize(current_img, target_size)
            # normalizing the image pixels
            # what this line doing? must?
            img_pixels = image.img_to_array(current_img)
            img_pixels = np.expand_dims(img_pixels, axis=0)
            img_pixels /= 255  # normalize input in [0, 1]
            # int cast is for the exception - object of type 'float32' is not JSON serializable
            region_obj = {
                "x": current_region.x,
                "y": current_region.y,
                "w": current_region.w,
                "h": current_region.h,
            }
            extracted_face = (img_pixels, region_obj, confidence)
            extracted_faces.append(extracted_face)
    if len(extracted_faces) == 0 and enforce_detection == True:
        raise ValueError(
            f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
        )
    return extracted_faces
 def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
    """Normalize input image.
    Args:
        img (numpy array): the input image.
        normalization (str, optional): the normalization technique. Defaults to "base",
        for no normalization.
    Returns:
        numpy array: the normalized image.
    """
    # issue 131 declares that some normalization techniques improves the accuracy
    if normalization == "base":
        return img
    # @trevorgribble and @davedgd contributed this feature
    # restore input in scale of [0, 255] because it was normalized in scale of
    # [0, 1] in preprocess_face
    img *= 255
    if normalization == "raw":
        pass  # return just restored pixels
    elif normalization == "Facenet":
        mean, std = img.mean(), img.std()
        img = (img - mean) / std
    elif normalization == "Facenet2018":
        # simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
        img /= 127.5
        img -= 1
    elif normalization == "VGGFace":
        # mean subtraction based on VGGFace1 training data
        img[..., 0] -= 93.5940
        img[..., 1] -= 104.7624
        img[..., 2] -= 129.1863
    elif normalization == "VGGFace2":
        # mean subtraction based on VGGFace2 training data
        img[..., 0] -= 91.4953
        img[..., 1] -= 103.8827
        img[..., 2] -= 131.0912
    elif normalization == "ArcFace":
        # Reference study: The faces are cropped and resized to 112×112,
        # and each pixel (ranged between [0, 255]) in RGB images is normalised
        # by subtracting 127.5 then divided by 128.
        img -= 127.5
        img /= 128
    else:
        raise ValueError(f"unimplemented normalization type - {normalization}")
    return img
 def find_target_size(model_name: str) -> tuple:
    """Find the target size of the model.
    Args:
        model_name (str): the model name.
    Returns:
        tuple: the target size.
    """
    target_sizes = {
        "VGG-Face": (224, 224),
        "Facenet": (160, 160),
        "Facenet512": (160, 160),
        "OpenFace": (96, 96),
        "DeepFace": (152, 152),
        "DeepID": (47, 55),
        "Dlib": (150, 150),
        "ArcFace": (112, 112),
        "SFace": (112, 112),
    }
    target_size = target_sizes.get(model_name)
    if target_size == None:
        raise ValueError(f"unimplemented model name - {model_name}")
    return target_size
--- a/deepface/detectors/DetectorWrapper.py
+++ b/deepface/detectors/DetectorWrapper.py
@ -12,6 +12,9 @@ from deepface.detectors import (
    Yolo,
    YuNet,
 )
 from deepface.commons.logger import Logger
 logger = Logger(module="deepface/detectors/DetectorWrapper.py")
 def build_model(detector_backend: str) -> Any:
@ -52,19 +55,35 @@ def build_model(detector_backend: str) -> Any:
    return face_detector_obj[detector_backend]
-def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+def detect_faces(
    detector_backend: str, img: np.ndarray, align: bool = True, expand_percentage: int = 0
 ) -> List[DetectedFace]:
    """
    Detect face(s) from a given image
    Args:
        detector_backend (str): detector name
        img (np.ndarray): pre-loaded image
-        alig (bool): enable or disable alignment after detection
+
        align (bool): enable or disable alignment after detection
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
    Returns:
        results (List[DetectedFace]): A list of DetectedFace objects
            where each object contains:
        - img (np.ndarray): The detected face as a NumPy array.
        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
        - confidence (float): The confidence score associated with the detected face.
    """
    face_detector: Detector = build_model(detector_backend)
-    return face_detector.detect_faces(img=img, align=align)
+    if expand_percentage < 0:
        logger.warn(
            f"Expand percentage cannot be negative but you set it to {expand_percentage}."
            "Overwritten it to 0."
        )
        expand_percentage = 0
    return face_detector.detect_faces(img=img, align=align, expand_percentage=expand_percentage)
--- a/deepface/detectors/Dlib.py
+++ b/deepface/detectors/Dlib.py
@ -56,18 +56,27 @@ class DlibClient(Detector):
        detector["sp"] = sp
        return detector
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with dlib
        Args:
-            face_detector (Any): dlib face detector object
+            img (np.ndarray): pre-loaded image as numpy array
-            img (np.ndarray): pre-loaded image
+
-            align (bool): default is true
+            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace objects
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        # this is not a must dependency. do not import it in the global level.
@ -79,6 +88,12 @@ class DlibClient(Detector):
                "Please install using 'pip install dlib' "
            ) from e
        if expand_percentage != 0:
            logger.warn(
                f"You set expand_percentage argument to {expand_percentage},"
                "but dlib hog handles detection by itself"
            )
        resp = []
        sp = self.model["sp"]
--- a/deepface/detectors/FastMtCnn.py
+++ b/deepface/detectors/FastMtCnn.py
@ -12,17 +12,27 @@ class FastMtCnnClient(Detector):
    def __init__(self):
        self.model = self.build_model()
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with mtcnn
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace objects
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -37,7 +47,16 @@ class FastMtCnnClient(Detector):
            for current_detection in zip(*detections):
                x, y, w, h = xyxy_to_xywh(current_detection[0])
-                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
+
                # expand the facial area to be extracted and stay within img.shape limits
                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
                # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                confidence = current_detection[1]
--- a/deepface/detectors/MediaPipe.py
+++ b/deepface/detectors/MediaPipe.py
@ -29,17 +29,27 @@ class MediaPipeClient(Detector):
        face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
        return face_detection
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with mediapipe
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace): A list of DetectedFace objects
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -74,7 +84,16 @@ class MediaPipeClient(Detector):
            # left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height))
            if x > 0 and y > 0:
-                detected_face = img[y : y + h, x : x + w]
+
                # expand the facial area to be extracted and stay within img.shape limits
                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
                # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                if align:
--- a/deepface/detectors/MtCnn.py
+++ b/deepface/detectors/MtCnn.py
@ -1,5 +1,4 @@
 from typing import List
 import cv2
 import numpy as np
 from mtcnn import MTCNN
 from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
@ -14,17 +13,27 @@ class MtCnnClient(Detector):
    def __init__(self):
        self.model = MTCNN()
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with mtcnn
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace objects
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
@ -32,14 +41,25 @@ class MtCnnClient(Detector):
        detected_face = None
-        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # mtcnn expects RGB but OpenCV read BGR
+        # mtcnn expects RGB but OpenCV read BGR
        # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_rgb = img[:, :, ::-1]
        detections = self.model.detect_faces(img_rgb)
        if detections is not None and len(detections) > 0:
            for current_detection in detections:
                x, y, w, h = current_detection["box"]
-                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
+
                # expand the facial area to be extracted and stay within img.shape limits
                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
                # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                confidence = current_detection["confidence"]
--- a/deepface/detectors/OpenCv.py
+++ b/deepface/detectors/OpenCv.py
@ -25,18 +25,27 @@ class OpenCvClient(Detector):
        detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
        return detector
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with opencv
        Args:
-            face_detector (Any): opencv face detector object
+            img (np.ndarray): pre-loaded image as numpy array
-            img (np.ndarray): pre-loaded image
+
-            align (bool): default is true
+            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -56,7 +65,15 @@ class OpenCvClient(Detector):
        if len(faces) > 0:
            for (x, y, w, h), confidence in zip(faces, scores):
-                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
+
                # expand the facial area to be extracted and stay within img.shape limits
                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
                # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                if align:
                    left_eye, right_eye = self.find_eyes(img=detected_face)
--- a/deepface/detectors/RetinaFace.py
+++ b/deepface/detectors/RetinaFace.py
@ -9,17 +9,27 @@ class RetinaFaceClient(Detector):
    def __init__(self):
        self.model = rf.build_model()
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with retinaface
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace object
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -38,10 +48,14 @@ class RetinaFaceClient(Detector):
                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                confidence = identity["score"]
-                # detected_face = img[int(y):int(y+h), int(x):int(x+w)] #opencv
+                # expand the facial area to be extracted and stay within img.shape limits
-                detected_face = img[
+                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
-                    facial_area[1] : facial_area[3], facial_area[0] : facial_area[2]
+                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
-                ]
+                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
                # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                if align:
                    landmarks = identity["landmarks"]
--- a/deepface/detectors/Ssd.py
+++ b/deepface/detectors/Ssd.py
@ -71,17 +71,27 @@ class SsdClient(Detector):
        return detector
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with ssd
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace object
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -92,16 +102,14 @@ class SsdClient(Detector):
        target_size = (300, 300)
        base_img = img.copy()  # we will restore base_img to img later
        original_size = img.shape
-        img = cv2.resize(img, target_size)
+        current_img = cv2.resize(img, target_size)
        aspect_ratio_x = original_size[1] / target_size[1]
        aspect_ratio_y = original_size[0] / target_size[0]
-        imageBlob = cv2.dnn.blobFromImage(image=img)
+        imageBlob = cv2.dnn.blobFromImage(image=current_img)
        face_detector = self.model["face_detector"]
        face_detector.setInput(imageBlob)
@ -126,17 +134,21 @@ class SsdClient(Detector):
                bottom = instance["bottom"]
                top = instance["top"]
-                detected_face = base_img[
+                x = int(left * aspect_ratio_x)
-                    int(top * aspect_ratio_y) : int(bottom * aspect_ratio_y),
+                y = int(top * aspect_ratio_y)
-                    int(left * aspect_ratio_x) : int(right * aspect_ratio_x),
+                w = int(right * aspect_ratio_x) - int(left * aspect_ratio_x)
-                ]
+                h = int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y)
-                face_region = FacialAreaRegion(
+                # expand the facial area to be extracted and stay within img.shape limits
-                    x=int(left * aspect_ratio_x),
+                x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
-                    y=int(top * aspect_ratio_y),
+                y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
-                    w=int(right * aspect_ratio_x) - int(left * aspect_ratio_x),
+                w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
-                    h=int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y),
+                h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
-                )
+
                detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
                detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
                face_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                confidence = instance["confidence"]
--- a/deepface/detectors/Yolo.py
+++ b/deepface/detectors/Yolo.py
@ -51,18 +51,27 @@ class YoloClient(Detector):
        # Return face_detector
        return YOLO(weight_path)
-    def detect_faces(self, img: np.ndarray, align: bool = False) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = False, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with yolo
        Args:
-            face_detector (Any): yolo face detector object
+            img (np.ndarray): pre-loaded image as numpy array
-            img (np.ndarray): pre-loaded image
+
-            align (bool): default is true
+            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        resp = []
@ -78,7 +87,15 @@ class YoloClient(Detector):
            x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
            region = FacialAreaRegion(x=x, y=y, w=w, h=h)
-            detected_face = img[y : y + h, x : x + w].copy()
+
            # expand the facial area to be extracted and stay within img.shape limits
            x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
            y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
            w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
            h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
            # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
            detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
            if align:
                # Tuple of x,y and confidence for left eye
--- a/deepface/detectors/YuNet.py
+++ b/deepface/detectors/YuNet.py
@ -49,17 +49,27 @@ class YuNetClient(Detector):
            ) from err
        return face_detector
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List[DetectedFace]:
        """
        Detect and align face with yunet
        Args:
-            img (np.ndarray): pre-loaded image
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): default is true
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace objects
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
            - img (np.ndarray): The detected face as a NumPy array.
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        # FaceDetector.detect_faces does not support score_threshold parameter.
@ -115,7 +125,16 @@ class YuNetClient(Detector):
                )
            confidence = face[-1]
            confidence = f"{confidence:.2f}"
-            detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
+
            # expand the facial area to be extracted and stay within img.shape limits
            x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
            y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
            w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
            h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
            # detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
            detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
            img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
            if align:
                detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le))
--- a/deepface/models/Detector.py
+++ b/deepface/models/Detector.py
@ -8,18 +8,27 @@ import numpy as np
 # pylint: disable=unnecessary-pass, too-few-public-methods
 class Detector(ABC):
    @abstractmethod
-    def detect_faces(self, img: np.ndarray, align: bool = True) -> List["DetectedFace"]:
+    def detect_faces(
        self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
    ) -> List["DetectedFace"]:
        """
-        Detect faces from a given image
+        Interface for detect and align face
        Args:
-            img (np.ndarray): pre-loaded image as a NumPy array
+            img (np.ndarray): pre-loaded image as numpy array
-            align (bool): enable or disable alignment after face detection
+
            align (bool): flag to enable or disable alignment after detection (default is True)
            expand_percentage (int): expand detected facial area with a percentage
        Returns:
-            results (List[DetectedFace]): A list of DetectedFace object
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
                where each object contains:
-                - face (np.ndarray): The detected face as a NumPy array.
+
-                - face_region (List[float]): The image region represented as
+            - img (np.ndarray): The detected face as a NumPy array.
-                    a list of floats e.g. [x, y, w, h]
+
            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
            - confidence (float): The confidence score associated with the detected face.
        """
        pass
--- a/deepface/models/FacialRecognition.py
+++ b/deepface/models/FacialRecognition.py
@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Union, List
+from typing import Any, Union, List, Tuple
 import numpy as np
 from deepface.commons import functions
@ -15,6 +15,9 @@ else:
 class FacialRecognition(ABC):
    model: Union[Model, Any]
    model_name: str
    input_shape: Tuple[int, int]
    output_shape: int
    @abstractmethod
    def find_embeddings(self, img: np.ndarray) -> List[float]:
--- a/deepface/modules/demography.py
+++ b/deepface/modules/demography.py
@ -6,8 +6,7 @@ import numpy as np
 from tqdm import tqdm
 # project dependencies
-from deepface.modules import modeling
+from deepface.modules import modeling, detection
 from deepface.commons import functions
 from deepface.extendedmodels import Gender, Race, Emotion
@ -17,6 +16,7 @@ def analyze(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    silent: bool = False,
 ) -> List[Dict[str, Any]]:
    """
@ -41,6 +41,8 @@ def analyze(
        align (boolean): Perform alignment based on the eye positions (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        silent (boolean): Suppress or allow some log messages for a quieter analysis process
            (default is False).
@ -114,16 +116,20 @@ def analyze(
    # ---------------------------------
    resp_objects = []
-    img_objs = functions.extract_faces(
+    img_objs = detection.extract_faces(
-        img=img_path,
+        img_path=img_path,
        target_size=(224, 224),
        detector_backend=detector_backend,
        grayscale=False,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
    )
-    for img_content, img_region, img_confidence in img_objs:
+    for img_obj in img_objs:
        img_content = img_obj["face"]
        img_region = img_obj["facial_area"]
        img_confidence = img_obj["confidence"]
        if img_content.shape[0] > 0 and img_content.shape[1] > 0:
            obj = {}
            # facial attribute analysis
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -3,10 +3,26 @@ from typing import Any, Dict, List, Tuple, Union
 # 3rd part dependencies
 import numpy as np
 import cv2
 from PIL import Image
 # project dependencies
 from deepface.modules import preprocessing
 from deepface.models.Detector import DetectedFace, FacialAreaRegion
 from deepface.detectors import DetectorWrapper
 from deepface.commons import functions
 from deepface.commons.logger import Logger
 logger = Logger(module="deepface/modules/detection.py")
 # pylint: disable=no-else-raise
 tf_major_version = functions.get_tf_major_version()
 if tf_major_version == 1:
    from keras.preprocessing import image
 elif tf_major_version == 2:
    from tensorflow.keras.preprocessing import image
 def extract_faces(
@ -15,7 +31,9 @@ def extract_faces(
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    grayscale: bool = False,
    human_readable=False,
 ) -> List[Dict[str, Any]]:
    """
    Extract faces from a given image
@ -35,9 +53,13 @@ def extract_faces(
        align (bool): Flag to enable face alignment (default is True).
        expand_percentage (int): expand detected facial area with a percentage
        grayscale (boolean): Flag to convert the image to grayscale before
            processing (default is False).
        human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
            or 4D BGR for ML models (default is False).
    Returns:
        results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
@ -48,27 +70,113 @@ def extract_faces(
    resp_objs = []
-    img_objs = functions.extract_faces(
+    # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
-        img=img_path,
+    img, img_name = preprocessing.load_image(img_path)
-        target_size=target_size,
+
    base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0])
    if detector_backend == "skip":
        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
    else:
        face_objs = DetectorWrapper.detect_faces(
            detector_backend=detector_backend,
-        grayscale=grayscale,
+            img=img,
        enforce_detection=enforce_detection,
            align=align,
            expand_percentage=expand_percentage,
        )
-    for img, region, confidence in img_objs:
+    # in case of no face found
-        resp_obj = {}
+    if len(face_objs) == 0 and enforce_detection is True:
        if img_name is not None:
            raise ValueError(
                f"Face could not be detected in {img_name}."
                "Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )
        else:
            raise ValueError(
                "Face could not be detected. Please confirm that the picture is a face photo "
                "or consider to set enforce_detection param to False."
            )
    if len(face_objs) == 0 and enforce_detection is False:
        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
    for face_obj in face_objs:
        current_img = face_obj.img
        current_region = face_obj.facial_area
        confidence = face_obj.confidence
        if current_img.shape[0] == 0 or current_img.shape[1] == 0:
            continue
        if grayscale is True:
            current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
        # resize and padding
        factor_0 = target_size[0] / current_img.shape[0]
        factor_1 = target_size[1] / current_img.shape[1]
        factor = min(factor_0, factor_1)
        dsize = (
            int(current_img.shape[1] * factor),
            int(current_img.shape[0] * factor),
        )
        current_img = cv2.resize(current_img, dsize)
        diff_0 = target_size[0] - current_img.shape[0]
        diff_1 = target_size[1] - current_img.shape[1]
        if grayscale is False:
            # Put the base image in the middle of the padded image
            current_img = np.pad(
                current_img,
                (
                    (diff_0 // 2, diff_0 - diff_0 // 2),
                    (diff_1 // 2, diff_1 - diff_1 // 2),
                    (0, 0),
                ),
                "constant",
            )
        else:
            current_img = np.pad(
                current_img,
                (
                    (diff_0 // 2, diff_0 - diff_0 // 2),
                    (diff_1 // 2, diff_1 - diff_1 // 2),
                ),
                "constant",
            )
        # double check: if target image is not still the same size with target.
        if current_img.shape[0:2] != target_size:
            current_img = cv2.resize(current_img, target_size)
        # normalizing the image pixels
        # what this line doing? must?
        img_pixels = image.img_to_array(current_img)
        img_pixels = np.expand_dims(img_pixels, axis=0)
        img_pixels /= 255  # normalize input in [0, 1]
        # discard expanded dimension
-        if len(img.shape) == 4:
+        if human_readable is True and len(img_pixels.shape) == 4:
-            img = img[0]
+            img_pixels = img_pixels[0]
-        # bgr to rgb
+        resp_objs.append(
-        resp_obj["face"] = img[:, :, ::-1]
+            {
-        resp_obj["facial_area"] = region
+                "face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
-        resp_obj["confidence"] = confidence
+                "facial_area": {
-        resp_objs.append(resp_obj)
+                    "x": current_region.x,
                    "y": current_region.y,
                    "w": current_region.w,
                    "h": current_region.h,
                },
                "confidence": confidence,
            }
        )
    if len(resp_objs) == 0 and enforce_detection == True:
        raise ValueError(
            f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
        )
    return resp_objs
--- a/deepface/modules/preprocessing.py
+++ b/deepface/modules/preprocessing.py
@ -0,0 +1,131 @@
 import os
 from typing import Union, Tuple
 import base64
 from pathlib import Path
 # 3rd party
 import numpy as np
 import cv2
 from PIL import Image
 import requests
 def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
    """
    Load image from path, url, base64 or numpy array.
    Args:
        img: a path, url, base64 or numpy array.
    Returns:
        image (numpy array): the loaded image in BGR format
        image name (str): image name itself
    """
    # The image is already a numpy array
    if isinstance(img, np.ndarray):
        return img, "numpy array"
    if isinstance(img, Path):
        img = str(img)
    if not isinstance(img, str):
        raise ValueError(f"img must be numpy array or str but it is {type(img)}")
    # The image is a base64 string
    if img.startswith("data:image/"):
        return load_base64(img), "base64 encoded string"
    # The image is a url
    if img.startswith("http"):
        return (
            np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")),
            # return url as image name
            img,
        )
    # The image is a path
    if os.path.isfile(img) is not True:
        raise ValueError(f"Confirm that {img} exists")
    # image must be a file on the system then
    # image name must have english characters
    if img.isascii() is False:
        raise ValueError(f"Input image must not have non-english characters - {img}")
    img_obj_bgr = cv2.imread(img)
    # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB)
    return img_obj_bgr, img
 def load_base64(uri: str) -> np.ndarray:
    """Load image from base64 string.
    Args:
        uri: a base64 string.
    Returns:
        numpy array: the loaded image.
    """
    encoded_data = uri.split(",")[1]
    nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
    img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
    return img_bgr
 def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
    """Normalize input image.
    Args:
        img (numpy array): the input image.
        normalization (str, optional): the normalization technique. Defaults to "base",
        for no normalization.
    Returns:
        numpy array: the normalized image.
    """
    # issue 131 declares that some normalization techniques improves the accuracy
    if normalization == "base":
        return img
    # @trevorgribble and @davedgd contributed this feature
    # restore input in scale of [0, 255] because it was normalized in scale of
    # [0, 1] in preprocess_face
    img *= 255
    if normalization == "raw":
        pass  # return just restored pixels
    elif normalization == "Facenet":
        mean, std = img.mean(), img.std()
        img = (img - mean) / std
    elif normalization == "Facenet2018":
        # simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
        img /= 127.5
        img -= 1
    elif normalization == "VGGFace":
        # mean subtraction based on VGGFace1 training data
        img[..., 0] -= 93.5940
        img[..., 1] -= 104.7624
        img[..., 2] -= 129.1863
    elif normalization == "VGGFace2":
        # mean subtraction based on VGGFace2 training data
        img[..., 0] -= 91.4953
        img[..., 1] -= 103.8827
        img[..., 2] -= 131.0912
    elif normalization == "ArcFace":
        # Reference study: The faces are cropped and resized to 112×112,
        # and each pixel (ranged between [0, 255]) in RGB images is normalised
        # by subtracting 127.5 then divided by 128.
        img -= 127.5
        img /= 128
    else:
        raise ValueError(f"unimplemented normalization type - {normalization}")
    return img
--- a/deepface/modules/realtime.py
+++ b/deepface/modules/realtime.py
@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 import cv2
 from deepface import DeepFace
-from deepface.commons import functions
+from deepface.models.FacialRecognition import FacialRecognition
 from deepface.commons.logger import Logger
 logger = Logger(module="commons.realtime")
@ -32,12 +32,13 @@ def analysis(
    enable_emotion = True
    enable_age_gender = True
    # ------------------------
    # find custom values for this input set
    target_size = functions.find_target_size(model_name=model_name)
    # ------------------------
    # build models once to store them in the memory
    # otherwise, they will be built after cam started and this will cause delays
-    DeepFace.build_model(model_name=model_name)
+    model: FacialRecognition = DeepFace.build_model(model_name=model_name)
    # find custom values for this input set
    target_size = model.input_shape
    logger.info(f"facial recognition model {model_name} is just built")
    if enable_face_analysis:
--- a/deepface/modules/recognition.py
+++ b/deepface/modules/recognition.py
@ -10,9 +10,10 @@ import pandas as pd
 from tqdm import tqdm
 # project dependencies
-from deepface.commons import functions, distance as dst
+from deepface.commons import distance as dst
 from deepface.commons.logger import Logger
-from deepface.modules import representation
+from deepface.modules import representation, detection, modeling
 from deepface.models.FacialRecognition import FacialRecognition
 logger = Logger(module="deepface/modules/recognition.py")
@ -25,6 +26,7 @@ def find(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    threshold: Optional[float] = None,
    normalization: str = "base",
    silent: bool = False,
@ -54,6 +56,8 @@ def find(
        align (boolean): Perform alignment based on the eye positions.
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        threshold (float): Specify a threshold to determine whether a pair represents the same
            person or different individuals. This threshold is used for comparing distances.
            If left unset, default pre-tuned threshold values will be applied based on the specified
@ -89,7 +93,8 @@ def find(
    if os.path.isdir(db_path) is not True:
        raise ValueError("Passed db_path does not exist!")
-    target_size = functions.find_target_size(model_name=model_name)
+    model: FacialRecognition = modeling.build_model(model_name)
    target_size = model.input_shape
    # ---------------------------------------
@ -202,18 +207,21 @@ def find(
    )
    # img path might have more than once face
-    source_objs = functions.extract_faces(
+    source_objs = detection.extract_faces(
-        img=img_path,
+        img_path=img_path,
        target_size=target_size,
        detector_backend=detector_backend,
        grayscale=False,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
    )
    resp_obj = []
-    for source_img, source_region, _ in source_objs:
+    for source_obj in source_objs:
        source_img = source_obj["face"]
        source_region = source_obj["facial_area"]
        target_embedding_obj = representation.represent(
            img_path=source_img,
            model_name=model_name,
@ -245,11 +253,11 @@ def find(
                )
            if distance_metric == "cosine":
-                distance = dst.findCosineDistance(source_representation, target_representation)
+                distance = dst.find_cosine_distance(source_representation, target_representation)
            elif distance_metric == "euclidean":
-                distance = dst.findEuclideanDistance(source_representation, target_representation)
+                distance = dst.find_euclidean_distance(source_representation, target_representation)
            elif distance_metric == "euclidean_l2":
-                distance = dst.findEuclideanDistance(
+                distance = dst.find_euclidean_distance(
                    dst.l2_normalize(source_representation),
                    dst.l2_normalize(target_representation),
                )
@ -259,7 +267,7 @@ def find(
            distances.append(distance)
            # ---------------------------
-        target_threshold = threshold or dst.findThreshold(model_name, distance_metric)
+        target_threshold = threshold or dst.find_threshold(model_name, distance_metric)
        result_df["threshold"] = target_threshold
        result_df["distance"] = distances
@ -305,6 +313,7 @@ def __find_bulk_embeddings(
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
    silent: bool = False,
 ):
@ -313,15 +322,24 @@ def __find_bulk_embeddings(
    Args:
        employees (list): list of exact image paths
        model_name (str): facial recognition model name
-        target_size (tuple): expected input shape of facial
+
-            recognition model
+        target_size (tuple): expected input shape of facial recognition model
        detector_backend (str): face detector model name
        enforce_detection (bool): set this to False if you
            want to proceed when you cannot detect any face
        align (bool): enable or disable alignment of image
            before feeding to facial recognition model
        expand_percentage (int): expand detected facial area with a
            percentage (default is 0).
        normalization (bool): normalization technique
        silent (bool): enable or disable informative logging
    Returns:
        representations (list): pivot list of embeddings with
@ -333,16 +351,19 @@ def __find_bulk_embeddings(
        desc="Finding representations",
        disable=silent,
    ):
-        img_objs = functions.extract_faces(
+        img_objs = detection.extract_faces(
-            img=employee,
+            img_path=employee,
            target_size=target_size,
            detector_backend=detector_backend,
            grayscale=False,
            enforce_detection=enforce_detection,
            align=align,
            expand_percentage=expand_percentage,
        )
-        for img_content, img_region, _ in img_objs:
+        for img_obj in img_objs:
            img_content = img_obj["face"]
            img_region = img_obj["facial_area"]
            embedding_obj = representation.represent(
                img_path=img_content,
                model_name=model_name,
--- a/deepface/modules/representation.py
+++ b/deepface/modules/representation.py
@ -6,8 +6,7 @@ import numpy as np
 import cv2
 # project dependencies
-from deepface.modules import modeling
+from deepface.modules import modeling, detection, preprocessing
 from deepface.commons import functions
 from deepface.models.FacialRecognition import FacialRecognition
@ -17,6 +16,7 @@ def represent(
    enforce_detection: bool = True,
    detector_backend: str = "opencv",
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
 ) -> List[Dict[str, Any]]:
    """
@ -38,6 +38,8 @@ def represent(
        align (boolean): Perform alignment based on the eye positions.
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        normalization (string): Normalize the input image before feeding it to the model.
            Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
@ -61,19 +63,20 @@ def represent(
    # ---------------------------------
    # we have run pre-process in verification. so, this can be skipped if it is coming from verify.
-    target_size = functions.find_target_size(model_name=model_name)
+    target_size = model.input_shape
    if detector_backend != "skip":
-        img_objs = functions.extract_faces(
+        img_objs = detection.extract_faces(
-            img=img_path,
+            img_path=img_path,
            target_size=(target_size[1], target_size[0]),
            detector_backend=detector_backend,
            grayscale=False,
            enforce_detection=enforce_detection,
            align=align,
            expand_percentage=expand_percentage,
        )
    else:  # skip
        # Try load. If load error, will raise exception internal
-        img, _ = functions.load_image(img_path)
+        img, _ = preprocessing.load_image(img_path)
        # --------------------------------
        if len(img.shape) == 4:
            img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)
@ -85,13 +88,21 @@ def represent(
                img = (img.astype(np.float32) / 255.0).astype(np.float32)
        # --------------------------------
        # make dummy region and confidence to keep compatibility with `extract_faces`
-        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}
+        img_objs = [
-        img_objs = [(img, img_region, 0)]
+            {
                "face": img,
                "facial_area": {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]},
                "confidence": 0,
            }
        ]
    # ---------------------------------
-    for img, region, confidence in img_objs:
+    for img_obj in img_objs:
        img = img_obj["face"]
        region = img_obj["facial_area"]
        confidence = img_obj["confidence"]
        # custom normalization
-        img = functions.normalize_input(img=img, normalization=normalization)
+        img = preprocessing.normalize_input(img=img, normalization=normalization)
        embedding = model.find_embeddings(img)
--- a/deepface/modules/verification.py
+++ b/deepface/modules/verification.py
@ -6,8 +6,9 @@ from typing import Any, Dict, Union
 import numpy as np
 # project dependencies
-from deepface.commons import functions, distance as dst
+from deepface.commons import distance as dst
-from deepface.modules import representation
+from deepface.modules import representation, detection, modeling
 from deepface.models.FacialRecognition import FacialRecognition
 def verify(
@ -18,6 +19,7 @@ def verify(
    distance_metric: str = "cosine",
    enforce_detection: bool = True,
    align: bool = True,
    expand_percentage: int = 0,
    normalization: str = "base",
 ) -> Dict[str, Any]:
    """
@ -48,6 +50,8 @@ def verify(
        align (bool): Flag to enable face alignment (default is True).
        expand_percentage (int): expand detected facial area with a percentage (default is 0).
        normalization (string): Normalize the input image before feeding it to the model.
            Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
@ -79,32 +83,39 @@ def verify(
    tic = time.time()
    # --------------------------------
-    target_size = functions.find_target_size(model_name=model_name)
+    model: FacialRecognition = modeling.build_model(model_name)
    target_size = model.input_shape
    # img pairs might have many faces
-    img1_objs = functions.extract_faces(
+    img1_objs = detection.extract_faces(
-        img=img1_path,
+        img_path=img1_path,
        target_size=target_size,
        detector_backend=detector_backend,
        grayscale=False,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
    )
-    img2_objs = functions.extract_faces(
+    img2_objs = detection.extract_faces(
-        img=img2_path,
+        img_path=img2_path,
        target_size=target_size,
        detector_backend=detector_backend,
        grayscale=False,
        enforce_detection=enforce_detection,
        align=align,
        expand_percentage=expand_percentage,
    )
    # --------------------------------
    distances = []
    regions = []
    # now we will find the face pair with minimum distance
-    for img1_content, img1_region, _ in img1_objs:
+    for img1_obj in img1_objs:
-        for img2_content, img2_region, _ in img2_objs:
+        img1_content = img1_obj["face"]
        img1_region = img1_obj["facial_area"]
        for img2_obj in img2_objs:
            img2_content = img2_obj["face"]
            img2_region = img2_obj["facial_area"]
            img1_embedding_obj = representation.represent(
                img_path=img1_content,
                model_name=model_name,
@ -127,11 +138,11 @@ def verify(
            img2_representation = img2_embedding_obj[0]["embedding"]
            if distance_metric == "cosine":
-                distance = dst.findCosineDistance(img1_representation, img2_representation)
+                distance = dst.find_cosine_distance(img1_representation, img2_representation)
            elif distance_metric == "euclidean":
-                distance = dst.findEuclideanDistance(img1_representation, img2_representation)
+                distance = dst.find_euclidean_distance(img1_representation, img2_representation)
            elif distance_metric == "euclidean_l2":
-                distance = dst.findEuclideanDistance(
+                distance = dst.find_euclidean_distance(
                    dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)
                )
            else:
@ -141,7 +152,7 @@ def verify(
            regions.append((img1_region, img2_region))
    # -------------------------------
-    threshold = dst.findThreshold(model_name, distance_metric)
+    threshold = dst.find_threshold(model_name, distance_metric)
    distance = min(distances)  # best distance
    facial_areas = regions[np.argmin(distances)]
--- a/tests/face-recognition-how.py
+++ b/tests/face-recognition-how.py
@ -1,7 +1,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from deepface import DeepFace
-from deepface.commons import functions
+from deepface.commons import distance
 from deepface.models.FacialRecognition import FacialRecognition
 from deepface.commons.logger import Logger
 logger = Logger()
@ -11,9 +12,9 @@ logger = Logger()
 model_name = "VGG-Face"
-model = DeepFace.build_model(model_name=model_name)
+model: FacialRecognition = DeepFace.build_model(model_name=model_name)
-target_size = functions.find_target_size(model_name)
+target_size = model.input_shape
 logger.info(f"target_size: {target_size}")
@ -22,21 +23,34 @@ logger.info(f"target_size: {target_size}")
 img1 = DeepFace.extract_faces(img_path="dataset/img1.jpg", target_size=target_size)[0]["face"]
 img1 = np.expand_dims(img1, axis=0)  # to (1, 224, 224, 3)
-img1_representation = model.predict(img1)[0, :]
+img1_representation = model.find_embeddings(img1)
 img2 = DeepFace.extract_faces(img_path="dataset/img3.jpg", target_size=target_size)[0]["face"]
 img2 = np.expand_dims(img2, axis=0)
-img2_representation = model.predict(img2)[0, :]
+img2_representation = model.find_embeddings(img2)
 img1_representation = np.array(img1_representation)
 img2_representation = np.array(img2_representation)
 # ----------------------------------------------
-# distance between two images
+# distance between two images - euclidean distance formula
 distance_vector = np.square(img1_representation - img2_representation)
-logger.debug(distance_vector)
+current_distance = np.sqrt(distance_vector.sum())
 logger.info(f"Euclidean distance: {current_distance}")
-distance = np.sqrt(distance_vector.sum())
+threshold = distance.find_threshold(model_name=model_name, distance_metric="euclidean")
-logger.info(f"Euclidean distance: {distance}")
+logger.info(f"Threshold for {model_name}-euclidean pair is {threshold}")
 if current_distance < threshold:
    logger.info(
        f"This pair is same person because its distance {current_distance}"
        f" is less than threshold {threshold}"
    )
 else:
    logger.info(
        f"This pair is different persons because its distance {current_distance}"
        f" is greater than threshold {threshold}"
    )
 # ----------------------------------------------
 # expand vectors to be shown better in graph
@ -75,7 +89,7 @@ im = plt.imshow(img2_graph, interpolation="nearest", cmap=plt.cm.ocean)
 plt.colorbar()
 ax5 = fig.add_subplot(3, 2, 5)
-plt.text(0.35, 0, f"Distance: {distance}")
+plt.text(0.35, 0, f"Distance: {current_distance}")
 plt.axis("off")
 ax6 = fig.add_subplot(3, 2, 6)
--- a/tests/test_extract_faces.py
+++ b/tests/test_extract_faces.py
@ -1,12 +1,14 @@
 import numpy as np
 import pytest
 from deepface import DeepFace
 from deepface.commons.logger import Logger
 logger = Logger("tests/test_extract_faces.py")
 def test_different_detectors():
 detectors = ["opencv", "mtcnn"]
 def test_different_detectors():
    for detector in detectors:
        img_objs = DeepFace.extract_faces(img_path="dataset/img11.jpg", detector_backend=detector)
        for img_obj in img_objs:
@ -22,3 +24,21 @@ def test_different_detectors():
            img = img_obj["face"]
            assert img.shape[0] > 0 and img.shape[1] > 0
        logger.info(f"✅ extract_faces for {detector} backend test is done")
 def test_backends_for_enforced_detection_with_non_facial_inputs():
    black_img = np.zeros([224, 224, 3])
    for detector in detectors:
        with pytest.raises(ValueError):
            _ = DeepFace.extract_faces(img_path=black_img, detector_backend=detector)
    logger.info("✅ extract_faces for enforced detection and non-facial image test is done")
 def test_backends_for_not_enforced_detection_with_non_facial_inputs():
    black_img = np.zeros([224, 224, 3])
    for detector in detectors:
        objs = DeepFace.extract_faces(
            img_path=black_img, detector_backend=detector, enforce_detection=False
        )
        assert objs[0]["face"].shape == (224, 224, 3)
    logger.info("✅ extract_faces for not enforced detection and non-facial image test is done")
--- a/tests/test_find.py
+++ b/tests/test_find.py
@ -6,7 +6,7 @@ from deepface.commons.logger import Logger
 logger = Logger("tests/test_find.py")
-threshold = distance.findThreshold(model_name="VGG-Face", distance_metric="cosine")
+threshold = distance.find_threshold(model_name="VGG-Face", distance_metric="cosine")
 def test_find_with_exact_path():