yolo detect batched

2025-07-23 02:10:02 +00:00 · 2025-02-12 15:59:13 +00:00 · 2025-02-12 15:59:13 +00:00 · 1bd83356e7
commit 1bd83356e7
parent b2d6178bed
1 changed files with 54 additions and 41 deletions
--- a/deepface/models/face_detection/Yolo.py
+++ b/deepface/models/face_detection/Yolo.py
@ -1,6 +1,6 @@
 # built-in dependencies
 import os
-from typing import List, Any
+from typing import List, Any, Union, Tuple
 from enum import Enum
 # 3rd party dependencies
@ -62,64 +62,77 @@ class YoloDetectorClient(Detector):
        # Return face_detector
        return YOLO(weight_file)
-    def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
+    def detect_faces(self, imgs: Union[np.ndarray, List[np.ndarray]]) -> Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]:
        """
-        Detect and align face with yolo
+        Detect and align faces in an image or a list of images with yolo
        Args:
-            img (np.ndarray): pre-loaded image as numpy array
+            imgs (Union[np.ndarray, List[np.ndarray]]): pre-loaded image as numpy array or a list of those
        Returns:
-            results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
+            results (Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]): 
                A list of lists of FacialAreaRegion objects for each image or a list of FacialAreaRegion objects
        """
-        resp = []
+        if not isinstance(imgs, list):
            imgs = [imgs]
-        # Detect faces
+        all_results = []
-        results = self.model.predict(
+
-            img,
+        # Detect faces for all images
        results_list = self.model.predict(
            imgs,
            verbose=False,
            show=False,
            conf=float(os.getenv("YOLO_MIN_DETECTION_CONFIDENCE", "0.25")),
-        )[0]
+        )
-        # For each face, extract the bounding box, the landmarks and confidence
+        # Iterate over each image's results
-        for result in results:
+        for results in results_list:
            resp = []
-            if result.boxes is None:
+            # For each face, extract the bounding box, the landmarks and confidence
-                continue
+            for result in results:
-            # Extract the bounding box and the confidence
+                if result.boxes is None:
-            x, y, w, h = result.boxes.xywh.tolist()[0]
+                    continue
            confidence = result.boxes.conf.tolist()[0]
-            right_eye = None
+                # Extract the bounding box and the confidence
-            left_eye = None
+                x, y, w, h = result.boxes.xywh.tolist()[0]
                confidence = result.boxes.conf.tolist()[0]
-            # yolo-facev8 is detecting eyes through keypoints,
+                right_eye = None
-            # while for v11 keypoints are always None
+                left_eye = None
            if result.keypoints is not None:
                # right_eye_conf = result.keypoints.conf[0][0]
                # left_eye_conf = result.keypoints.conf[0][1]
                right_eye = result.keypoints.xy[0][0].tolist()
                left_eye = result.keypoints.xy[0][1].tolist()
-                # eyes are list of float, need to cast them tuple of int
+                # yolo-facev8 is detecting eyes through keypoints,
-                left_eye = tuple(int(i) for i in left_eye)
+                # while for v11 keypoints are always None
-                right_eye = tuple(int(i) for i in right_eye)
+                if result.keypoints is not None:
                    # right_eye_conf = result.keypoints.conf[0][0]
                    # left_eye_conf = result.keypoints.conf[0][1]
                    right_eye = result.keypoints.xy[0][0].tolist()
                    left_eye = result.keypoints.xy[0][1].tolist()
-            x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
+                    # eyes are list of float, need to cast them tuple of int
-            facial_area = FacialAreaRegion(
+                    # Ensure eyes are tuples of exactly two integers or None
-                x=x,
+                    left_eye = tuple(map(int, left_eye[:2])) if left_eye and len(left_eye) == 2 else None
-                y=y,
+                    right_eye = tuple(map(int, right_eye[:2])) if right_eye and len(right_eye) == 2 else None
                w=w,
                h=h,
                left_eye=left_eye,
                right_eye=right_eye,
                confidence=confidence,
            )
            resp.append(facial_area)
-        return resp
+                x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
                facial_area = FacialAreaRegion(
                    x=x,
                    y=y,
                    w=w,
                    h=h,
                    left_eye=left_eye,
                    right_eye=right_eye,
                    confidence=confidence,
                )
                resp.append(facial_area)
            all_results.append(resp)
        if len(all_results) == 1:
            return all_results[0]
        return all_results
 class YoloDetectorClientV8n(YoloDetectorClient):