yolo detect batched

2025-06-06 11:35:21 +00:00 · 2025-02-12 15:59:13 +00:00 · 2025-02-12 15:59:13 +00:00 · 1bd83356e7
commit 1bd83356e7
parent b2d6178bed
1 changed files with 54 additions and 41 deletions
--- a/deepface/models/face_detection/Yolo.py
+++ b/deepface/models/face_detection/Yolo.py
@ -1,6 +1,6 @@
 # built-in dependencies
 import os
-from typing import List, Any
+from typing import List, Any, Union, Tuple
 from enum import Enum

 # 3rd party dependencies
@ -62,64 +62,77 @@ class YoloDetectorClient(Detector):
        # Return face_detector
        return YOLO(weight_file)

-    def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
+    def detect_faces(self, imgs: Union[np.ndarray, List[np.ndarray]]) -> Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]:
        """
-        Detect and align face with yolo
+        Detect and align faces in an image or a list of images with yolo

        Args:
-            img (np.ndarray): pre-loaded image as numpy array
+            imgs (Union[np.ndarray, List[np.ndarray]]): pre-loaded image as numpy array or a list of those

        Returns:
-            results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
+            results (Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]): 
+                A list of lists of FacialAreaRegion objects for each image or a list of FacialAreaRegion objects
        """
-        resp = []
+        if not isinstance(imgs, list):
+            imgs = [imgs]

-        # Detect faces
-        results = self.model.predict(
-            img,
+        all_results = []
+
+        # Detect faces for all images
+        results_list = self.model.predict(
+            imgs,
            verbose=False,
            show=False,
            conf=float(os.getenv("YOLO_MIN_DETECTION_CONFIDENCE", "0.25")),
-        )[0]
+        )

-        # For each face, extract the bounding box, the landmarks and confidence
-        for result in results:
+        # Iterate over each image's results
+        for results in results_list:
+            resp = []

-            if result.boxes is None:
-                continue
+            # For each face, extract the bounding box, the landmarks and confidence
+            for result in results:

-            # Extract the bounding box and the confidence
-            x, y, w, h = result.boxes.xywh.tolist()[0]
-            confidence = result.boxes.conf.tolist()[0]
+                if result.boxes is None:
+                    continue

-            right_eye = None
-            left_eye = None
+                # Extract the bounding box and the confidence
+                x, y, w, h = result.boxes.xywh.tolist()[0]
+                confidence = result.boxes.conf.tolist()[0]

-            # yolo-facev8 is detecting eyes through keypoints,
-            # while for v11 keypoints are always None
-            if result.keypoints is not None:
-                # right_eye_conf = result.keypoints.conf[0][0]
-                # left_eye_conf = result.keypoints.conf[0][1]
-                right_eye = result.keypoints.xy[0][0].tolist()
-                left_eye = result.keypoints.xy[0][1].tolist()
+                right_eye = None
+                left_eye = None

-                # eyes are list of float, need to cast them tuple of int
-                left_eye = tuple(int(i) for i in left_eye)
-                right_eye = tuple(int(i) for i in right_eye)
+                # yolo-facev8 is detecting eyes through keypoints,
+                # while for v11 keypoints are always None
+                if result.keypoints is not None:
+                    # right_eye_conf = result.keypoints.conf[0][0]
+                    # left_eye_conf = result.keypoints.conf[0][1]
+                    right_eye = result.keypoints.xy[0][0].tolist()
+                    left_eye = result.keypoints.xy[0][1].tolist()

-            x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
-            facial_area = FacialAreaRegion(
-                x=x,
-                y=y,
-                w=w,
-                h=h,
-                left_eye=left_eye,
-                right_eye=right_eye,
-                confidence=confidence,
-            )
-            resp.append(facial_area)
+                    # eyes are list of float, need to cast them tuple of int
+                    # Ensure eyes are tuples of exactly two integers or None
+                    left_eye = tuple(map(int, left_eye[:2])) if left_eye and len(left_eye) == 2 else None
+                    right_eye = tuple(map(int, right_eye[:2])) if right_eye and len(right_eye) == 2 else None

-        return resp
+                x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
+                facial_area = FacialAreaRegion(
+                    x=x,
+                    y=y,
+                    w=w,
+                    h=h,
+                    left_eye=left_eye,
+                    right_eye=right_eye,
+                    confidence=confidence,
+                )
+                resp.append(facial_area)
+
+            all_results.append(resp)
+
+        if len(all_results) == 1:
+            return all_results[0]
+        return all_results


 class YoloDetectorClientV8n(YoloDetectorClient):