From 8b1b465d220677113e1d5f9f275ddb218226d739 Mon Sep 17 00:00:00 2001
From: galthran-wq <levmorozov900@gmail.com>
Date: Sun, 23 Feb 2025 13:27:18 +0000
Subject: [PATCH] change behaviour in special case batched single image

---
 deepface/models/face_detection/CenterFace.py |   5 +-
 deepface/models/face_detection/Dlib.py       |   5 +-
 deepface/models/face_detection/FastMtCnn.py  |   5 +-
 deepface/models/face_detection/MediaPipe.py  |   5 +-
 deepface/models/face_detection/MtCnn.py      |   5 +-
 deepface/models/face_detection/OpenCv.py     |  20 ++-
 deepface/models/face_detection/RetinaFace.py |   7 +-
 deepface/models/face_detection/Ssd.py        | 142 ++++++++++---------
 deepface/models/face_detection/YuNet.py      |   5 +-
 deepface/modules/detection.py                |  48 ++++---
 tests/test_extract_faces.py                  |   4 +-
 11 files changed, 149 insertions(+), 102 deletions(-)

diff --git a/deepface/models/face_detection/CenterFace.py b/deepface/models/face_detection/CenterFace.py
index a947900..523f2bf 100644
--- a/deepface/models/face_detection/CenterFace.py
+++ b/deepface/models/face_detection/CenterFace.py
@@ -49,10 +49,11 @@ class CenterFaceClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
         results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
             return results[0]
         return results
 
diff --git a/deepface/models/face_detection/Dlib.py b/deepface/models/face_detection/Dlib.py
index 208b044..254a32b 100644
--- a/deepface/models/face_detection/Dlib.py
+++ b/deepface/models/face_detection/Dlib.py
@@ -62,10 +62,11 @@ class DlibClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
         results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
             return results[0]
         return results
 
diff --git a/deepface/models/face_detection/FastMtCnn.py b/deepface/models/face_detection/FastMtCnn.py
index ea2431e..7d5ccf5 100644
--- a/deepface/models/face_detection/FastMtCnn.py
+++ b/deepface/models/face_detection/FastMtCnn.py
@@ -32,10 +32,11 @@ class FastMtCnnClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
         results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
             return results[0]
         return results
 
diff --git a/deepface/models/face_detection/MediaPipe.py b/deepface/models/face_detection/MediaPipe.py
index 4716e7c..9fcdbbc 100644
--- a/deepface/models/face_detection/MediaPipe.py
+++ b/deepface/models/face_detection/MediaPipe.py
@@ -58,10 +58,11 @@ class MediaPipeClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
         results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
             return results[0]
         return results
 
diff --git a/deepface/models/face_detection/MtCnn.py b/deepface/models/face_detection/MtCnn.py
index 8c42075..2382670 100644
--- a/deepface/models/face_detection/MtCnn.py
+++ b/deepface/models/face_detection/MtCnn.py
@@ -38,7 +38,8 @@ class MtCnnClient(Detector):
                 or a list of lists of FacialAreaRegion objects for each image
         """
 
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
 
         resp = []
@@ -76,7 +77,7 @@ class MtCnnClient(Detector):
 
             resp.append(image_resp)
 
-        if len(resp) == 1:
+        if not is_batched_input:
             return resp[0]
         return resp
 
diff --git a/deepface/models/face_detection/OpenCv.py b/deepface/models/face_detection/OpenCv.py
index f97f658..a6d39c9 100644
--- a/deepface/models/face_detection/OpenCv.py
+++ b/deepface/models/face_detection/OpenCv.py
@@ -1,6 +1,7 @@
 # built-in dependencies
 import os
 from typing import Any, List, Union
+import logging
 
 # 3rd party dependencies
 import cv2
@@ -9,6 +10,7 @@ import numpy as np
 #project dependencies
 from deepface.models.Detector import Detector, FacialAreaRegion
 
+logger = logging.getLogger(__name__)
 
 class OpenCvClient(Detector):
     """
@@ -17,6 +19,7 @@ class OpenCvClient(Detector):
 
     def __init__(self):
         self.model = self.build_model()
+        self.supports_batch_detection = self._supports_batch_detection()
 
     def build_model(self):
         """
@@ -28,6 +31,19 @@ class OpenCvClient(Detector):
         detector["face_detector"] = self.__build_cascade("haarcascade")
         detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
         return detector
+    
+    def _supports_batch_detection(self) -> bool:
+        supports_batch_detection = os.getenv(
+            "ENABLE_OPENCV_BATCH_DETECTION", "false"
+        ).lower() == "true"
+        if not supports_batch_detection:
+            logger.warning(
+                "Batch detection is disabled for opencv by default "
+                "since the results are not consistent with single image detection. "
+                "You can force enable it by setting the environment variable "
+                "ENABLE_OPENCV_BATCH_DETECTION to true."
+            )
+        return supports_batch_detection
 
     def detect_faces(
         self,
@@ -46,8 +62,10 @@ class OpenCvClient(Detector):
         """
         if isinstance(img, np.ndarray):
             imgs = [img]
-        else:
+        elif self.supports_batch_detection:
             imgs = img
+        else:
+            return [self.detect_faces(single_img) for single_img in img]
 
         batch_results = []
 
diff --git a/deepface/models/face_detection/RetinaFace.py b/deepface/models/face_detection/RetinaFace.py
index b0d1c94..2722e01 100644
--- a/deepface/models/face_detection/RetinaFace.py
+++ b/deepface/models/face_detection/RetinaFace.py
@@ -28,7 +28,8 @@ class RetinaFaceClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if isinstance(img, np.ndarray):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             imgs = [img]
         else:
             imgs = img
@@ -81,4 +82,6 @@ class RetinaFaceClient(Detector):
 
             batch_results.append(resp)
 
-        return batch_results if len(batch_results) > 1 else batch_results[0]
+        if not is_batched_input:
+            return batch_results[0]
+        return batch_results
diff --git a/deepface/models/face_detection/Ssd.py b/deepface/models/face_detection/Ssd.py
index c0ae2cb..a620f96 100644
--- a/deepface/models/face_detection/Ssd.py
+++ b/deepface/models/face_detection/Ssd.py
@@ -69,81 +69,89 @@ class SsdClient(Detector):
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): 
             A list or a list of lists of FacialAreaRegion objects
         """
-        if isinstance(img, np.ndarray):
-            imgs = [img]
-        else:
-            imgs = img
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
+            img = [img]
+        results = [self._process_single_image(single_img) for single_img in img]
+        if not is_batched_input:
+            return results[0]
+        return results
 
-        batch_results = []
+    def _process_single_image(self, single_img: np.ndarray) -> List[FacialAreaRegion]:
+        """
+        Helper function to detect faces in a single image.
 
-        for single_img in imgs:
-            # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values
-            if single_img.dtype != np.uint8:
-                single_img = single_img.astype(np.uint8)
+        Args:
+            single_img (np.ndarray): Pre-loaded image as numpy array
 
-            opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]
+        Returns:
+            results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
+        """
+        # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values
+        if single_img.dtype != np.uint8:
+            single_img = single_img.astype(np.uint8)
 
-            target_size = (300, 300)
-            original_size = single_img.shape
-            current_img = cv2.resize(single_img, target_size)
+        opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]
 
-            aspect_ratio_x = original_size[1] / target_size[1]
-            aspect_ratio_y = original_size[0] / target_size[0]
+        target_size = (300, 300)
+        original_size = single_img.shape
+        current_img = cv2.resize(single_img, target_size)
 
-            imageBlob = cv2.dnn.blobFromImage(image=current_img)
+        aspect_ratio_x = original_size[1] / target_size[1]
+        aspect_ratio_y = original_size[0] / target_size[0]
 
-            face_detector = self.model["face_detector"]
-            face_detector.setInput(imageBlob)
-            detections = face_detector.forward()
+        imageBlob = cv2.dnn.blobFromImage(image=current_img)
 
-            class ssd_labels(IntEnum):
-                img_id = 0
-                is_face = 1
-                confidence = 2
-                left = 3
-                top = 4
-                right = 5
-                bottom = 6
+        face_detector = self.model["face_detector"]
+        face_detector.setInput(imageBlob)
+        detections = face_detector.forward()
 
-            faces = detections[0][0]
-            faces = faces[
-                (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90)
-            ]
-            margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom]
-            faces[:, margins] = np.int32(faces[:, margins] * 300)
-            faces[:, margins] = np.int32(
-                faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y]
+        class ssd_labels(IntEnum):
+            img_id = 0
+            is_face = 1
+            confidence = 2
+            left = 3
+            top = 4
+            right = 5
+            bottom = 6
+
+        faces = detections[0][0]
+        faces = faces[
+            (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90)
+        ]
+        margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom]
+        faces[:, margins] = np.int32(faces[:, margins] * 300)
+        faces[:, margins] = np.int32(
+            faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y]
+        )
+        faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[
+            :, [ssd_labels.left, ssd_labels.top]
+        ]
+
+        resp = []
+        for face in faces:
+            confidence = float(face[ssd_labels.confidence])
+            x, y, w, h = map(int, face[margins])
+            detected_face = single_img[y : y + h, x : x + w]
+
+            left_eye, right_eye = opencv_module.find_eyes(detected_face)
+
+            # eyes found in the detected face instead image itself
+            # detected face's coordinates should be added
+            if left_eye is not None:
+                left_eye = x + int(left_eye[0]), y + int(left_eye[1])
+            if right_eye is not None:
+                right_eye = x + int(right_eye[0]), y + int(right_eye[1])
+
+            facial_area = FacialAreaRegion(
+                x=x,
+                y=y,
+                w=w,
+                h=h,
+                left_eye=left_eye,
+                right_eye=right_eye,
+                confidence=confidence,
             )
-            faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[
-                :, [ssd_labels.left, ssd_labels.top]
-            ]
+            resp.append(facial_area)
 
-            resp = []
-            for face in faces:
-                confidence = float(face[ssd_labels.confidence])
-                x, y, w, h = map(int, face[margins])
-                detected_face = single_img[y : y + h, x : x + w]
-
-                left_eye, right_eye = opencv_module.find_eyes(detected_face)
-
-                # eyes found in the detected face instead image itself
-                # detected face's coordinates should be added
-                if left_eye is not None:
-                    left_eye = x + int(left_eye[0]), y + int(left_eye[1])
-                if right_eye is not None:
-                    right_eye = x + int(right_eye[0]), y + int(right_eye[1])
-
-                facial_area = FacialAreaRegion(
-                    x=x,
-                    y=y,
-                    w=w,
-                    h=h,
-                    left_eye=left_eye,
-                    right_eye=right_eye,
-                    confidence=confidence,
-                )
-                resp.append(facial_area)
-
-            batch_results.append(resp)
-
-        return batch_results if len(batch_results) > 1 else batch_results[0]
+        return resp
diff --git a/deepface/models/face_detection/YuNet.py b/deepface/models/face_detection/YuNet.py
index 4086f17..93e65a8 100644
--- a/deepface/models/face_detection/YuNet.py
+++ b/deepface/models/face_detection/YuNet.py
@@ -67,10 +67,11 @@ class YuNetClient(Detector):
         Returns:
             results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects
         """
-        if not isinstance(img, list):
+        is_batched_input = isinstance(img, list)
+        if not is_batched_input:
             img = [img]
         results = [self._process_single_image(single_img) for single_img in img]
-        if len(results) == 1:
+        if not is_batched_input:
             return results[0]
         return results
 
diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py
index 2b14a7c..9977ea3 100644
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@@ -82,15 +82,23 @@ def extract_faces(
             just available in the result only if anti_spoofing is set to True in input arguments.
     """
 
-    if isinstance(img_path, np.ndarray) and img_path.ndim == 4:
-        img_path = [img_path[i] for i in range(img_path.shape[0])]
-    if not isinstance(img_path, list):
-        img_path = [img_path]
+    batched_input = (
+        (
+            isinstance(img_path, np.ndarray) and 
+            img_path.ndim == 4
+        ) or isinstance(img_path, list)
+    )
+    if not batched_input:
+        imgs_path = [img_path]
+    elif isinstance(img_path, np.ndarray):
+        imgs_path = [img_path[i] for i in range(img_path.shape[0])]
+    else:
+        imgs_path = img_path
 
     all_images = []
     img_names = []
 
-    for single_img_path in img_path:
+    for single_img_path in imgs_path:
         # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
         img, img_name = image_utils.load_image(single_img_path)
 
@@ -109,9 +117,6 @@ def extract_faces(
         max_faces=max_faces,
     )
 
-    if len(all_images) == 1:
-        all_face_objs = [all_face_objs]
-
     all_resp_objs = []
 
     for img, img_name, face_objs in zip(all_images, img_names, all_face_objs):
@@ -203,7 +208,7 @@ def extract_faces(
 
         all_resp_objs.append(img_resp_objs)
 
-    if len(all_resp_objs) == 1:
+    if not batched_input:
         return all_resp_objs[0]
     return all_resp_objs
 
@@ -239,8 +244,18 @@ def detect_faces(
 
         - confidence (float): The confidence score associated with the detected face.
     """
-    if not isinstance(img, list):
-        img = [img]
+    batched_input = (
+        (
+            isinstance(img, np.ndarray) and 
+            img.ndim == 4
+        ) or isinstance(img, list)
+    )
+    if not batched_input:
+        imgs = [img]
+    elif isinstance(img, np.ndarray):
+        imgs = [img[i] for i in range(img.shape[0])]
+    else:
+        imgs = img
 
     if detector_backend == "skip":
         all_face_objs = [
@@ -253,9 +268,9 @@ def detect_faces(
                     confidence=0,
                 )
             ]
-            for single_img in img
+            for single_img in imgs
         ]
-        if len(img) == 1:
+        if not batched_input:
             all_face_objs = all_face_objs[0]
         return all_face_objs
 
@@ -266,7 +281,7 @@ def detect_faces(
     preprocessed_images = []
     width_borders = []
     height_borders = []
-    for single_img in img:
+    for single_img in imgs:
         height, width, _ = single_img.shape
 
         # validate expand percentage score
@@ -299,9 +314,6 @@ def detect_faces(
     # Detect faces in all preprocessed images
     all_facial_areas = face_detector.detect_faces(preprocessed_images)
 
-    if len(preprocessed_images) == 1:
-        all_facial_areas = [all_facial_areas]
-
     all_detected_faces = []
     for (
         single_img,
@@ -336,7 +348,7 @@ def detect_faces(
 
         all_detected_faces.append(detected_faces)
 
-    if len(all_detected_faces) == 1:
+    if not batched_input:
         return all_detected_faces[0]
     return all_detected_faces
 
diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py
index 18ef34f..2814c54 100644
--- a/tests/test_extract_faces.py
+++ b/tests/test_extract_faces.py
@@ -242,8 +242,8 @@ def test_batch_extract_faces_single_image():
         img_path=[img_path],
         align=True,
     )
-    assert len(imgs_objs_batch) == 2
-    assert [isinstance(obj, dict) for obj in imgs_objs_batch]
+    assert len(imgs_objs_batch) == 1 and isinstance(imgs_objs_batch[0], list)
+    assert [isinstance(obj, dict) for obj in imgs_objs_batch[0]]
 
 
 def test_backends_for_enforced_detection_with_non_facial_inputs():