mirror of
https://github.com/serengil/deepface.git
synced 2025-06-06 19:45:21 +00:00
214 lines
7.5 KiB
Python
214 lines
7.5 KiB
Python
# built-in dependencies
|
|
from typing import Any, Dict, List, Tuple, Union
|
|
|
|
# 3rd part dependencies
|
|
import numpy as np
|
|
import cv2
|
|
from PIL import Image
|
|
|
|
# project dependencies
|
|
from deepface.modules import preprocessing
|
|
from deepface.models.Detector import DetectedFace, FacialAreaRegion
|
|
from deepface.detectors import DetectorWrapper
|
|
from deepface.commons import package_utils
|
|
from deepface.commons.logger import Logger
|
|
|
|
logger = Logger(module="deepface/modules/detection.py")
|
|
|
|
# pylint: disable=no-else-raise
|
|
|
|
|
|
tf_major_version = package_utils.get_tf_major_version()
|
|
if tf_major_version == 1:
|
|
from keras.preprocessing import image
|
|
elif tf_major_version == 2:
|
|
from tensorflow.keras.preprocessing import image
|
|
|
|
|
|
def extract_faces(
|
|
img_path: Union[str, np.ndarray],
|
|
target_size: Tuple[int, int] = (224, 224),
|
|
detector_backend: str = "opencv",
|
|
enforce_detection: bool = True,
|
|
align: bool = True,
|
|
expand_percentage: int = 0,
|
|
grayscale: bool = False,
|
|
human_readable=False,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Extract faces from a given image
|
|
|
|
Args:
|
|
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
|
|
as a string, numpy array (BGR), or base64 encoded images.
|
|
|
|
target_size (tuple): final shape of facial image. black pixels will be
|
|
added to resize the image.
|
|
|
|
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
|
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
|
|
|
|
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
|
Default is True. Set to False to avoid the exception for low-resolution images.
|
|
|
|
align (bool): Flag to enable face alignment (default is True).
|
|
|
|
expand_percentage (int): expand detected facial area with a percentage
|
|
|
|
grayscale (boolean): Flag to convert the image to grayscale before
|
|
processing (default is False).
|
|
|
|
human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
|
|
or 4D BGR for ML models (default is False).
|
|
|
|
Returns:
|
|
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
|
|
|
|
- "face" (np.ndarray): The detected face as a NumPy array.
|
|
|
|
- "facial_area" (List[float]): The detected face's regions represented as a list of floats.
|
|
|
|
- "confidence" (float): The confidence score associated with the detected face.
|
|
"""
|
|
|
|
resp_objs = []
|
|
|
|
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
|
|
img, img_name = preprocessing.load_image(img_path)
|
|
|
|
base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0], confidence=0)
|
|
|
|
if detector_backend == "skip":
|
|
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
|
|
else:
|
|
face_objs = DetectorWrapper.detect_faces(
|
|
detector_backend=detector_backend,
|
|
img=img,
|
|
align=align,
|
|
expand_percentage=expand_percentage,
|
|
)
|
|
|
|
# in case of no face found
|
|
if len(face_objs) == 0 and enforce_detection is True:
|
|
if img_name is not None:
|
|
raise ValueError(
|
|
f"Face could not be detected in {img_name}."
|
|
"Please confirm that the picture is a face photo "
|
|
"or consider to set enforce_detection param to False."
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
"Face could not be detected. Please confirm that the picture is a face photo "
|
|
"or consider to set enforce_detection param to False."
|
|
)
|
|
|
|
if len(face_objs) == 0 and enforce_detection is False:
|
|
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
|
|
|
|
for face_obj in face_objs:
|
|
current_img = face_obj.img
|
|
current_region = face_obj.facial_area
|
|
|
|
if current_img.shape[0] == 0 or current_img.shape[1] == 0:
|
|
continue
|
|
|
|
if grayscale is True:
|
|
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
|
|
|
|
# resize and padding
|
|
factor_0 = target_size[0] / current_img.shape[0]
|
|
factor_1 = target_size[1] / current_img.shape[1]
|
|
factor = min(factor_0, factor_1)
|
|
|
|
dsize = (
|
|
int(current_img.shape[1] * factor),
|
|
int(current_img.shape[0] * factor),
|
|
)
|
|
current_img = cv2.resize(current_img, dsize)
|
|
|
|
diff_0 = target_size[0] - current_img.shape[0]
|
|
diff_1 = target_size[1] - current_img.shape[1]
|
|
if grayscale is False:
|
|
# Put the base image in the middle of the padded image
|
|
current_img = np.pad(
|
|
current_img,
|
|
(
|
|
(diff_0 // 2, diff_0 - diff_0 // 2),
|
|
(diff_1 // 2, diff_1 - diff_1 // 2),
|
|
(0, 0),
|
|
),
|
|
"constant",
|
|
)
|
|
else:
|
|
current_img = np.pad(
|
|
current_img,
|
|
(
|
|
(diff_0 // 2, diff_0 - diff_0 // 2),
|
|
(diff_1 // 2, diff_1 - diff_1 // 2),
|
|
),
|
|
"constant",
|
|
)
|
|
|
|
# double check: if target image is not still the same size with target.
|
|
if current_img.shape[0:2] != target_size:
|
|
current_img = cv2.resize(current_img, target_size)
|
|
|
|
# normalizing the image pixels
|
|
# what this line doing? must?
|
|
img_pixels = image.img_to_array(current_img)
|
|
img_pixels = np.expand_dims(img_pixels, axis=0)
|
|
img_pixels /= 255 # normalize input in [0, 1]
|
|
# discard expanded dimension
|
|
if human_readable is True and len(img_pixels.shape) == 4:
|
|
img_pixels = img_pixels[0]
|
|
|
|
resp_objs.append(
|
|
{
|
|
"face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
|
|
"facial_area": {
|
|
"x": int(current_region.x),
|
|
"y": int(current_region.y),
|
|
"w": int(current_region.w),
|
|
"h": int(current_region.h),
|
|
"left_eye": current_region.left_eye,
|
|
"right_eye": current_region.right_eye,
|
|
},
|
|
"confidence": round(current_region.confidence, 2),
|
|
}
|
|
)
|
|
|
|
if len(resp_objs) == 0 and enforce_detection == True:
|
|
raise ValueError(
|
|
f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
|
|
)
|
|
|
|
return resp_objs
|
|
|
|
|
|
def align_face(
|
|
img: np.ndarray,
|
|
left_eye: Union[list, tuple],
|
|
right_eye: Union[list, tuple],
|
|
) -> np.ndarray:
|
|
"""
|
|
Align a given image horizantally with respect to their left and right eye locations
|
|
Args:
|
|
img (np.ndarray): pre-loaded image with detected face
|
|
left_eye (list or tuple): coordinates of left eye with respect to the you
|
|
right_eye(list or tuple): coordinates of right eye with respect to the you
|
|
Returns:
|
|
img (np.ndarray): aligned facial image
|
|
"""
|
|
# if eye could not be detected for the given image, return image itself
|
|
if left_eye is None or right_eye is None:
|
|
return img
|
|
|
|
# sometimes unexpectedly detected images come with nil dimensions
|
|
if img.shape[0] == 0 or img.shape[1] == 0:
|
|
return img
|
|
|
|
angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
|
|
img = Image.fromarray(img)
|
|
img = np.array(img.rotate(angle))
|
|
return img
|