diff --git a/deepface/models/face_detection/Yolo.py b/deepface/models/face_detection/Yolo.py index 233f088..34fdb01 100644 --- a/deepface/models/face_detection/Yolo.py +++ b/deepface/models/face_detection/Yolo.py @@ -1,6 +1,6 @@ # built-in dependencies import os -from typing import List, Any +from typing import List, Any, Union, Tuple from enum import Enum # 3rd party dependencies @@ -62,64 +62,77 @@ class YoloDetectorClient(Detector): # Return face_detector return YOLO(weight_file) - def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]: + def detect_faces(self, imgs: Union[np.ndarray, List[np.ndarray]]) -> Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]: """ - Detect and align face with yolo + Detect and align faces in an image or a list of images with yolo Args: - img (np.ndarray): pre-loaded image as numpy array + imgs (Union[np.ndarray, List[np.ndarray]]): pre-loaded image as numpy array or a list of those Returns: - results (List[FacialAreaRegion]): A list of FacialAreaRegion objects + results (Union[List[List[FacialAreaRegion]], List[FacialAreaRegion]]): + A list of lists of FacialAreaRegion objects for each image or a list of FacialAreaRegion objects """ - resp = [] + if not isinstance(imgs, list): + imgs = [imgs] - # Detect faces - results = self.model.predict( - img, + all_results = [] + + # Detect faces for all images + results_list = self.model.predict( + imgs, verbose=False, show=False, conf=float(os.getenv("YOLO_MIN_DETECTION_CONFIDENCE", "0.25")), - )[0] + ) - # For each face, extract the bounding box, the landmarks and confidence - for result in results: + # Iterate over each image's results + for results in results_list: + resp = [] - if result.boxes is None: - continue + # For each face, extract the bounding box, the landmarks and confidence + for result in results: - # Extract the bounding box and the confidence - x, y, w, h = result.boxes.xywh.tolist()[0] - confidence = result.boxes.conf.tolist()[0] + if result.boxes is None: + continue - right_eye = None - left_eye = None + # Extract the bounding box and the confidence + x, y, w, h = result.boxes.xywh.tolist()[0] + confidence = result.boxes.conf.tolist()[0] - # yolo-facev8 is detecting eyes through keypoints, - # while for v11 keypoints are always None - if result.keypoints is not None: - # right_eye_conf = result.keypoints.conf[0][0] - # left_eye_conf = result.keypoints.conf[0][1] - right_eye = result.keypoints.xy[0][0].tolist() - left_eye = result.keypoints.xy[0][1].tolist() + right_eye = None + left_eye = None - # eyes are list of float, need to cast them tuple of int - left_eye = tuple(int(i) for i in left_eye) - right_eye = tuple(int(i) for i in right_eye) + # yolo-facev8 is detecting eyes through keypoints, + # while for v11 keypoints are always None + if result.keypoints is not None: + # right_eye_conf = result.keypoints.conf[0][0] + # left_eye_conf = result.keypoints.conf[0][1] + right_eye = result.keypoints.xy[0][0].tolist() + left_eye = result.keypoints.xy[0][1].tolist() - x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h) - facial_area = FacialAreaRegion( - x=x, - y=y, - w=w, - h=h, - left_eye=left_eye, - right_eye=right_eye, - confidence=confidence, - ) - resp.append(facial_area) + # eyes are list of float, need to cast them tuple of int + # Ensure eyes are tuples of exactly two integers or None + left_eye = tuple(map(int, left_eye[:2])) if left_eye and len(left_eye) == 2 else None + right_eye = tuple(map(int, right_eye[:2])) if right_eye and len(right_eye) == 2 else None - return resp + x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h) + facial_area = FacialAreaRegion( + x=x, + y=y, + w=w, + h=h, + left_eye=left_eye, + right_eye=right_eye, + confidence=confidence, + ) + resp.append(facial_area) + + all_results.append(resp) + + if len(all_results) == 1: + return all_results[0] + return all_results class YoloDetectorClientV8n(YoloDetectorClient):