change behaviour in special case batched single image

2025-07-24 19:00:07 +00:00 · 2025-02-23 13:27:18 +00:00 · 2025-02-23 13:27:18 +00:00 · 8b1b465d22
commit 8b1b465d22
parent 93b8af100b
11 changed files with 149 additions and 102 deletions
--- a/deepface/models/face_detection/CenterFace.py
+++ b/deepface/models/face_detection/CenterFace.py
@ -49,10 +49,11 @@ class CenterFaceClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]
        results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
            return results[0]
        return results

--- a/deepface/models/face_detection/Dlib.py
+++ b/deepface/models/face_detection/Dlib.py
@ -62,10 +62,11 @@ class DlibClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]
        results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
            return results[0]
        return results

--- a/deepface/models/face_detection/FastMtCnn.py
+++ b/deepface/models/face_detection/FastMtCnn.py
@ -32,10 +32,11 @@ class FastMtCnnClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]
        results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
            return results[0]
        return results

--- a/deepface/models/face_detection/MediaPipe.py
+++ b/deepface/models/face_detection/MediaPipe.py
@ -58,10 +58,11 @@ class MediaPipeClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]
        results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
            return results[0]
        return results

--- a/deepface/models/face_detection/MtCnn.py
+++ b/deepface/models/face_detection/MtCnn.py
@ -38,7 +38,8 @@ class MtCnnClient(Detector):
                or a list of lists of FacialAreaRegion objects for each image
        """

-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]

        resp = []
@ -76,7 +77,7 @@ class MtCnnClient(Detector):

            resp.append(image_resp)

-        if len(resp) == 1:
+        if not is_batched_input:
            return resp[0]
        return resp

--- a/deepface/models/face_detection/OpenCv.py
+++ b/deepface/models/face_detection/OpenCv.py
@ -1,6 +1,7 @@
 # built-in dependencies
 import os
 from typing import Any, List, Union
+import logging

 # 3rd party dependencies
 import cv2
@ -9,6 +10,7 @@ import numpy as np
 #project dependencies
 from deepface.models.Detector import Detector, FacialAreaRegion

+logger = logging.getLogger(__name__)

 class OpenCvClient(Detector):
    """
@ -17,6 +19,7 @@ class OpenCvClient(Detector):

    def __init__(self):
        self.model = self.build_model()
+        self.supports_batch_detection = self._supports_batch_detection()

    def build_model(self):
        """
@ -28,6 +31,19 @@ class OpenCvClient(Detector):
        detector["face_detector"] = self.__build_cascade("haarcascade")
        detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
        return detector
+    
+    def _supports_batch_detection(self) -> bool:
+        supports_batch_detection = os.getenv(
+            "ENABLE_OPENCV_BATCH_DETECTION", "false"
+        ).lower() == "true"
+        if not supports_batch_detection:
+            logger.warning(
+                "Batch detection is disabled for opencv by default "
+                "since the results are not consistent with single image detection. "
+                "You can force enable it by setting the environment variable "
+                "ENABLE_OPENCV_BATCH_DETECTION to true."
+            )
+        return supports_batch_detection

    def detect_faces(
        self,
@ -46,8 +62,10 @@ class OpenCvClient(Detector):
        """
        if isinstance(img, np.ndarray):
            imgs = [img]
-        else:
+        elif self.supports_batch_detection:
            imgs = img
+        else:
+            return [self.detect_faces(single_img) for single_img in img]

        batch_results = []

--- a/deepface/models/face_detection/RetinaFace.py
+++ b/deepface/models/face_detection/RetinaFace.py
@ -28,7 +28,8 @@ class RetinaFaceClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if isinstance(img, np.ndarray):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            imgs = [img]
        else:
            imgs = img
@ -81,4 +82,6 @@ class RetinaFaceClient(Detector):

            batch_results.append(resp)

-        return batch_results if len(batch_results) > 1 else batch_results[0]
+        if not is_batched_input:
+            return batch_results[0]
+        return batch_results
--- a/deepface/models/face_detection/Ssd.py
+++ b/deepface/models/face_detection/Ssd.py
@ -69,81 +69,89 @@ class SsdClient(Detector):
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
            A list or a list of lists of FacialAreaRegion objects
        """
-        if isinstance(img, np.ndarray):
-            imgs = [img]
-        else:
-            imgs = img
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
+            img = [img]
+        results = [self._process_single_image(single_img) for single_img in img]
+        if not is_batched_input:
+            return results[0]
+        return results

-        batch_results = []
+    def _process_single_image(self, single_img: np.ndarray) -> List[FacialAreaRegion]:
+        """
+        Helper function to detect faces in a single image.

-        for single_img in imgs:
-            # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values
-            if single_img.dtype != np.uint8:
-                single_img = single_img.astype(np.uint8)
+        Args:
+            single_img (np.ndarray): Pre-loaded image as numpy array

-            opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]
+        Returns:
+            results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
+        """
+        # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values
+        if single_img.dtype != np.uint8:
+            single_img = single_img.astype(np.uint8)

-            target_size = (300, 300)
-            original_size = single_img.shape
-            current_img = cv2.resize(single_img, target_size)
+        opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]

-            aspect_ratio_x = original_size[1] / target_size[1]
-            aspect_ratio_y = original_size[0] / target_size[0]
+        target_size = (300, 300)
+        original_size = single_img.shape
+        current_img = cv2.resize(single_img, target_size)

-            imageBlob = cv2.dnn.blobFromImage(image=current_img)
+        aspect_ratio_x = original_size[1] / target_size[1]
+        aspect_ratio_y = original_size[0] / target_size[0]

-            face_detector = self.model["face_detector"]
-            face_detector.setInput(imageBlob)
-            detections = face_detector.forward()
+        imageBlob = cv2.dnn.blobFromImage(image=current_img)

-            class ssd_labels(IntEnum):
-                img_id = 0
-                is_face = 1
-                confidence = 2
-                left = 3
-                top = 4
-                right = 5
-                bottom = 6
+        face_detector = self.model["face_detector"]
+        face_detector.setInput(imageBlob)
+        detections = face_detector.forward()

-            faces = detections[0][0]
-            faces = faces[
-                (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90)
-            ]
-            margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom]
-            faces[:, margins] = np.int32(faces[:, margins] * 300)
-            faces[:, margins] = np.int32(
-                faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y]
+        class ssd_labels(IntEnum):
+            img_id = 0
+            is_face = 1
+            confidence = 2
+            left = 3
+            top = 4
+            right = 5
+            bottom = 6
+
+        faces = detections[0][0]
+        faces = faces[
+            (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90)
+        ]
+        margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom]
+        faces[:, margins] = np.int32(faces[:, margins] * 300)
+        faces[:, margins] = np.int32(
+            faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y]
+        )
+        faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[
+            :, [ssd_labels.left, ssd_labels.top]
+        ]
+
+        resp = []
+        for face in faces:
+            confidence = float(face[ssd_labels.confidence])
+            x, y, w, h = map(int, face[margins])
+            detected_face = single_img[y : y + h, x : x + w]
+
+            left_eye, right_eye = opencv_module.find_eyes(detected_face)
+
+            # eyes found in the detected face instead image itself
+            # detected face's coordinates should be added
+            if left_eye is not None:
+                left_eye = x + int(left_eye[0]), y + int(left_eye[1])
+            if right_eye is not None:
+                right_eye = x + int(right_eye[0]), y + int(right_eye[1])
+
+            facial_area = FacialAreaRegion(
+                x=x,
+                y=y,
+                w=w,
+                h=h,
+                left_eye=left_eye,
+                right_eye=right_eye,
+                confidence=confidence,
            )
-            faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[
-                :, [ssd_labels.left, ssd_labels.top]
-            ]
+            resp.append(facial_area)

-            resp = []
-            for face in faces:
-                confidence = float(face[ssd_labels.confidence])
-                x, y, w, h = map(int, face[margins])
-                detected_face = single_img[y : y + h, x : x + w]
-
-                left_eye, right_eye = opencv_module.find_eyes(detected_face)
-
-                # eyes found in the detected face instead image itself
-                # detected face's coordinates should be added
-                if left_eye is not None:
-                    left_eye = x + int(left_eye[0]), y + int(left_eye[1])
-                if right_eye is not None:
-                    right_eye = x + int(right_eye[0]), y + int(right_eye[1])
-
-                facial_area = FacialAreaRegion(
-                    x=x,
-                    y=y,
-                    w=w,
-                    h=h,
-                    left_eye=left_eye,
-                    right_eye=right_eye,
-                    confidence=confidence,
-                )
-                resp.append(facial_area)
-
-            batch_results.append(resp)
-
-        return batch_results if len(batch_results) > 1 else batch_results[0]
+        return resp
--- a/deepface/models/face_detection/YuNet.py
+++ b/deepface/models/face_detection/YuNet.py
@ -67,10 +67,11 @@ class YuNetClient(Detector):
        Returns:
            results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects
        """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
            img = [img]
        results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
            return results[0]
        return results

--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -82,15 +82,23 @@ def extract_faces(
            just available in the result only if anti_spoofing is set to True in input arguments.
    """

-    if isinstance(img_path, np.ndarray) and img_path.ndim == 4:
-        img_path = [img_path[i] for i in range(img_path.shape[0])]
-    if not isinstance(img_path, list):
-        img_path = [img_path]
+    batched_input = (
+        (
+            isinstance(img_path, np.ndarray) and 
+            img_path.ndim == 4
+        ) or isinstance(img_path, list)
+    )
+    if not batched_input:
+        imgs_path = [img_path]
+    elif isinstance(img_path, np.ndarray):
+        imgs_path = [img_path[i] for i in range(img_path.shape[0])]
+    else:
+        imgs_path = img_path

    all_images = []
    img_names = []

-    for single_img_path in img_path:
+    for single_img_path in imgs_path:
        # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
        img, img_name = image_utils.load_image(single_img_path)

@ -109,9 +117,6 @@ def extract_faces(
        max_faces=max_faces,
    )

-    if len(all_images) == 1:
-        all_face_objs = [all_face_objs]
-
    all_resp_objs = []

    for img, img_name, face_objs in zip(all_images, img_names, all_face_objs):
@ -203,7 +208,7 @@ def extract_faces(

        all_resp_objs.append(img_resp_objs)

-    if len(all_resp_objs) == 1:
+    if not batched_input:
        return all_resp_objs[0]
    return all_resp_objs

@ -239,8 +244,18 @@ def detect_faces(

        - confidence (float): The confidence score associated with the detected face.
    """
-    if not isinstance(img, list):
-        img = [img]
+    batched_input = (
+        (
+            isinstance(img, np.ndarray) and 
+            img.ndim == 4
+        ) or isinstance(img, list)
+    )
+    if not batched_input:
+        imgs = [img]
+    elif isinstance(img, np.ndarray):
+        imgs = [img[i] for i in range(img.shape[0])]
+    else:
+        imgs = img

    if detector_backend == "skip":
        all_face_objs = [
@ -253,9 +268,9 @@ def detect_faces(
                    confidence=0,
                )
            ]
-            for single_img in img
+            for single_img in imgs
        ]
-        if len(img) == 1:
+        if not batched_input:
            all_face_objs = all_face_objs[0]
        return all_face_objs

@ -266,7 +281,7 @@ def detect_faces(
    preprocessed_images = []
    width_borders = []
    height_borders = []
-    for single_img in img:
+    for single_img in imgs:
        height, width, _ = single_img.shape

        # validate expand percentage score
@ -299,9 +314,6 @@ def detect_faces(
    # Detect faces in all preprocessed images
    all_facial_areas = face_detector.detect_faces(preprocessed_images)

-    if len(preprocessed_images) == 1:
-        all_facial_areas = [all_facial_areas]
-
    all_detected_faces = []
    for (
        single_img,
@ -336,7 +348,7 @@ def detect_faces(

        all_detected_faces.append(detected_faces)

-    if len(all_detected_faces) == 1:
+    if not batched_input:
        return all_detected_faces[0]
    return all_detected_faces

--- a/tests/test_extract_faces.py
+++ b/tests/test_extract_faces.py
@ -242,8 +242,8 @@ def test_batch_extract_faces_single_image():
        img_path=[img_path],
        align=True,
    )
-    assert len(imgs_objs_batch) == 2
-    assert [isinstance(obj, dict) for obj in imgs_objs_batch]
+    assert len(imgs_objs_batch) == 1 and isinstance(imgs_objs_batch[0], list)
+    assert [isinstance(obj, dict) for obj in imgs_objs_batch[0]]


 def test_backends_for_enforced_detection_with_non_facial_inputs():