From 8b1b465d220677113e1d5f9f275ddb218226d739 Mon Sep 17 00:00:00 2001 From: galthran-wq Date: Sun, 23 Feb 2025 13:27:18 +0000 Subject: [PATCH] change behaviour in special case batched single image --- deepface/models/face_detection/CenterFace.py | 5 +- deepface/models/face_detection/Dlib.py | 5 +- deepface/models/face_detection/FastMtCnn.py | 5 +- deepface/models/face_detection/MediaPipe.py | 5 +- deepface/models/face_detection/MtCnn.py | 5 +- deepface/models/face_detection/OpenCv.py | 20 ++- deepface/models/face_detection/RetinaFace.py | 7 +- deepface/models/face_detection/Ssd.py | 142 ++++++++++--------- deepface/models/face_detection/YuNet.py | 5 +- deepface/modules/detection.py | 48 ++++--- tests/test_extract_faces.py | 4 +- 11 files changed, 149 insertions(+), 102 deletions(-) diff --git a/deepface/models/face_detection/CenterFace.py b/deepface/models/face_detection/CenterFace.py index a947900..523f2bf 100644 --- a/deepface/models/face_detection/CenterFace.py +++ b/deepface/models/face_detection/CenterFace.py @@ -49,10 +49,11 @@ class CenterFaceClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] results = [self._process_single_image(single_img) for single_img in img] - if len(results) == 1: + if not is_batched_input: return results[0] return results diff --git a/deepface/models/face_detection/Dlib.py b/deepface/models/face_detection/Dlib.py index 208b044..254a32b 100644 --- a/deepface/models/face_detection/Dlib.py +++ b/deepface/models/face_detection/Dlib.py @@ -62,10 +62,11 @@ class DlibClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] results = [self._process_single_image(single_img) for single_img in img] - if len(results) == 1: + if not is_batched_input: return results[0] return results diff --git a/deepface/models/face_detection/FastMtCnn.py b/deepface/models/face_detection/FastMtCnn.py index ea2431e..7d5ccf5 100644 --- a/deepface/models/face_detection/FastMtCnn.py +++ b/deepface/models/face_detection/FastMtCnn.py @@ -32,10 +32,11 @@ class FastMtCnnClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] results = [self._process_single_image(single_img) for single_img in img] - if len(results) == 1: + if not is_batched_input: return results[0] return results diff --git a/deepface/models/face_detection/MediaPipe.py b/deepface/models/face_detection/MediaPipe.py index 4716e7c..9fcdbbc 100644 --- a/deepface/models/face_detection/MediaPipe.py +++ b/deepface/models/face_detection/MediaPipe.py @@ -58,10 +58,11 @@ class MediaPipeClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] results = [self._process_single_image(single_img) for single_img in img] - if len(results) == 1: + if not is_batched_input: return results[0] return results diff --git a/deepface/models/face_detection/MtCnn.py b/deepface/models/face_detection/MtCnn.py index 8c42075..2382670 100644 --- a/deepface/models/face_detection/MtCnn.py +++ b/deepface/models/face_detection/MtCnn.py @@ -38,7 +38,8 @@ class MtCnnClient(Detector): or a list of lists of FacialAreaRegion objects for each image """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] resp = [] @@ -76,7 +77,7 @@ class MtCnnClient(Detector): resp.append(image_resp) - if len(resp) == 1: + if not is_batched_input: return resp[0] return resp diff --git a/deepface/models/face_detection/OpenCv.py b/deepface/models/face_detection/OpenCv.py index f97f658..a6d39c9 100644 --- a/deepface/models/face_detection/OpenCv.py +++ b/deepface/models/face_detection/OpenCv.py @@ -1,6 +1,7 @@ # built-in dependencies import os from typing import Any, List, Union +import logging # 3rd party dependencies import cv2 @@ -9,6 +10,7 @@ import numpy as np #project dependencies from deepface.models.Detector import Detector, FacialAreaRegion +logger = logging.getLogger(__name__) class OpenCvClient(Detector): """ @@ -17,6 +19,7 @@ class OpenCvClient(Detector): def __init__(self): self.model = self.build_model() + self.supports_batch_detection = self._supports_batch_detection() def build_model(self): """ @@ -28,6 +31,19 @@ class OpenCvClient(Detector): detector["face_detector"] = self.__build_cascade("haarcascade") detector["eye_detector"] = self.__build_cascade("haarcascade_eye") return detector + + def _supports_batch_detection(self) -> bool: + supports_batch_detection = os.getenv( + "ENABLE_OPENCV_BATCH_DETECTION", "false" + ).lower() == "true" + if not supports_batch_detection: + logger.warning( + "Batch detection is disabled for opencv by default " + "since the results are not consistent with single image detection. " + "You can force enable it by setting the environment variable " + "ENABLE_OPENCV_BATCH_DETECTION to true." + ) + return supports_batch_detection def detect_faces( self, @@ -46,8 +62,10 @@ class OpenCvClient(Detector): """ if isinstance(img, np.ndarray): imgs = [img] - else: + elif self.supports_batch_detection: imgs = img + else: + return [self.detect_faces(single_img) for single_img in img] batch_results = [] diff --git a/deepface/models/face_detection/RetinaFace.py b/deepface/models/face_detection/RetinaFace.py index b0d1c94..2722e01 100644 --- a/deepface/models/face_detection/RetinaFace.py +++ b/deepface/models/face_detection/RetinaFace.py @@ -28,7 +28,8 @@ class RetinaFaceClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if isinstance(img, np.ndarray): + is_batched_input = isinstance(img, list) + if not is_batched_input: imgs = [img] else: imgs = img @@ -81,4 +82,6 @@ class RetinaFaceClient(Detector): batch_results.append(resp) - return batch_results if len(batch_results) > 1 else batch_results[0] + if not is_batched_input: + return batch_results[0] + return batch_results diff --git a/deepface/models/face_detection/Ssd.py b/deepface/models/face_detection/Ssd.py index c0ae2cb..a620f96 100644 --- a/deepface/models/face_detection/Ssd.py +++ b/deepface/models/face_detection/Ssd.py @@ -69,81 +69,89 @@ class SsdClient(Detector): results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if isinstance(img, np.ndarray): - imgs = [img] - else: - imgs = img + is_batched_input = isinstance(img, list) + if not is_batched_input: + img = [img] + results = [self._process_single_image(single_img) for single_img in img] + if not is_batched_input: + return results[0] + return results - batch_results = [] + def _process_single_image(self, single_img: np.ndarray) -> List[FacialAreaRegion]: + """ + Helper function to detect faces in a single image. - for single_img in imgs: - # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values - if single_img.dtype != np.uint8: - single_img = single_img.astype(np.uint8) + Args: + single_img (np.ndarray): Pre-loaded image as numpy array - opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"] + Returns: + results (List[FacialAreaRegion]): A list of FacialAreaRegion objects + """ + # Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values + if single_img.dtype != np.uint8: + single_img = single_img.astype(np.uint8) - target_size = (300, 300) - original_size = single_img.shape - current_img = cv2.resize(single_img, target_size) + opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"] - aspect_ratio_x = original_size[1] / target_size[1] - aspect_ratio_y = original_size[0] / target_size[0] + target_size = (300, 300) + original_size = single_img.shape + current_img = cv2.resize(single_img, target_size) - imageBlob = cv2.dnn.blobFromImage(image=current_img) + aspect_ratio_x = original_size[1] / target_size[1] + aspect_ratio_y = original_size[0] / target_size[0] - face_detector = self.model["face_detector"] - face_detector.setInput(imageBlob) - detections = face_detector.forward() + imageBlob = cv2.dnn.blobFromImage(image=current_img) - class ssd_labels(IntEnum): - img_id = 0 - is_face = 1 - confidence = 2 - left = 3 - top = 4 - right = 5 - bottom = 6 + face_detector = self.model["face_detector"] + face_detector.setInput(imageBlob) + detections = face_detector.forward() - faces = detections[0][0] - faces = faces[ - (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90) - ] - margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom] - faces[:, margins] = np.int32(faces[:, margins] * 300) - faces[:, margins] = np.int32( - faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y] + class ssd_labels(IntEnum): + img_id = 0 + is_face = 1 + confidence = 2 + left = 3 + top = 4 + right = 5 + bottom = 6 + + faces = detections[0][0] + faces = faces[ + (faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90) + ] + margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom] + faces[:, margins] = np.int32(faces[:, margins] * 300) + faces[:, margins] = np.int32( + faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y] + ) + faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[ + :, [ssd_labels.left, ssd_labels.top] + ] + + resp = [] + for face in faces: + confidence = float(face[ssd_labels.confidence]) + x, y, w, h = map(int, face[margins]) + detected_face = single_img[y : y + h, x : x + w] + + left_eye, right_eye = opencv_module.find_eyes(detected_face) + + # eyes found in the detected face instead image itself + # detected face's coordinates should be added + if left_eye is not None: + left_eye = x + int(left_eye[0]), y + int(left_eye[1]) + if right_eye is not None: + right_eye = x + int(right_eye[0]), y + int(right_eye[1]) + + facial_area = FacialAreaRegion( + x=x, + y=y, + w=w, + h=h, + left_eye=left_eye, + right_eye=right_eye, + confidence=confidence, ) - faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[ - :, [ssd_labels.left, ssd_labels.top] - ] + resp.append(facial_area) - resp = [] - for face in faces: - confidence = float(face[ssd_labels.confidence]) - x, y, w, h = map(int, face[margins]) - detected_face = single_img[y : y + h, x : x + w] - - left_eye, right_eye = opencv_module.find_eyes(detected_face) - - # eyes found in the detected face instead image itself - # detected face's coordinates should be added - if left_eye is not None: - left_eye = x + int(left_eye[0]), y + int(left_eye[1]) - if right_eye is not None: - right_eye = x + int(right_eye[0]), y + int(right_eye[1]) - - facial_area = FacialAreaRegion( - x=x, - y=y, - w=w, - h=h, - left_eye=left_eye, - right_eye=right_eye, - confidence=confidence, - ) - resp.append(facial_area) - - batch_results.append(resp) - - return batch_results if len(batch_results) > 1 else batch_results[0] + return resp diff --git a/deepface/models/face_detection/YuNet.py b/deepface/models/face_detection/YuNet.py index 4086f17..93e65a8 100644 --- a/deepface/models/face_detection/YuNet.py +++ b/deepface/models/face_detection/YuNet.py @@ -67,10 +67,11 @@ class YuNetClient(Detector): Returns: results (Union[List[FacialAreaRegion], List[List[FacialAreaRegion]]]): A list or a list of lists of FacialAreaRegion objects """ - if not isinstance(img, list): + is_batched_input = isinstance(img, list) + if not is_batched_input: img = [img] results = [self._process_single_image(single_img) for single_img in img] - if len(results) == 1: + if not is_batched_input: return results[0] return results diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index 2b14a7c..9977ea3 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -82,15 +82,23 @@ def extract_faces( just available in the result only if anti_spoofing is set to True in input arguments. """ - if isinstance(img_path, np.ndarray) and img_path.ndim == 4: - img_path = [img_path[i] for i in range(img_path.shape[0])] - if not isinstance(img_path, list): - img_path = [img_path] + batched_input = ( + ( + isinstance(img_path, np.ndarray) and + img_path.ndim == 4 + ) or isinstance(img_path, list) + ) + if not batched_input: + imgs_path = [img_path] + elif isinstance(img_path, np.ndarray): + imgs_path = [img_path[i] for i in range(img_path.shape[0])] + else: + imgs_path = img_path all_images = [] img_names = [] - for single_img_path in img_path: + for single_img_path in imgs_path: # img might be path, base64 or numpy array. Convert it to numpy whatever it is. img, img_name = image_utils.load_image(single_img_path) @@ -109,9 +117,6 @@ def extract_faces( max_faces=max_faces, ) - if len(all_images) == 1: - all_face_objs = [all_face_objs] - all_resp_objs = [] for img, img_name, face_objs in zip(all_images, img_names, all_face_objs): @@ -203,7 +208,7 @@ def extract_faces( all_resp_objs.append(img_resp_objs) - if len(all_resp_objs) == 1: + if not batched_input: return all_resp_objs[0] return all_resp_objs @@ -239,8 +244,18 @@ def detect_faces( - confidence (float): The confidence score associated with the detected face. """ - if not isinstance(img, list): - img = [img] + batched_input = ( + ( + isinstance(img, np.ndarray) and + img.ndim == 4 + ) or isinstance(img, list) + ) + if not batched_input: + imgs = [img] + elif isinstance(img, np.ndarray): + imgs = [img[i] for i in range(img.shape[0])] + else: + imgs = img if detector_backend == "skip": all_face_objs = [ @@ -253,9 +268,9 @@ def detect_faces( confidence=0, ) ] - for single_img in img + for single_img in imgs ] - if len(img) == 1: + if not batched_input: all_face_objs = all_face_objs[0] return all_face_objs @@ -266,7 +281,7 @@ def detect_faces( preprocessed_images = [] width_borders = [] height_borders = [] - for single_img in img: + for single_img in imgs: height, width, _ = single_img.shape # validate expand percentage score @@ -299,9 +314,6 @@ def detect_faces( # Detect faces in all preprocessed images all_facial_areas = face_detector.detect_faces(preprocessed_images) - if len(preprocessed_images) == 1: - all_facial_areas = [all_facial_areas] - all_detected_faces = [] for ( single_img, @@ -336,7 +348,7 @@ def detect_faces( all_detected_faces.append(detected_faces) - if len(all_detected_faces) == 1: + if not batched_input: return all_detected_faces[0] return all_detected_faces diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py index 18ef34f..2814c54 100644 --- a/tests/test_extract_faces.py +++ b/tests/test_extract_faces.py @@ -242,8 +242,8 @@ def test_batch_extract_faces_single_image(): img_path=[img_path], align=True, ) - assert len(imgs_objs_batch) == 2 - assert [isinstance(obj, dict) for obj in imgs_objs_batch] + assert len(imgs_objs_batch) == 1 and isinstance(imgs_objs_batch[0], list) + assert [isinstance(obj, dict) for obj in imgs_objs_batch[0]] def test_backends_for_enforced_detection_with_non_facial_inputs():