mirror of
https://github.com/serengil/deepface.git
synced 2025-06-07 03:55:21 +00:00
580 lines
21 KiB
Python
580 lines
21 KiB
Python
# built-in dependencies
|
|
from typing import Any, Dict, IO, List, Tuple, Union, Optional, Sequence
|
|
|
|
# 3rd part dependencies
|
|
from heapq import nlargest
|
|
import numpy as np
|
|
import cv2
|
|
|
|
# project dependencies
|
|
from deepface.modules import modeling
|
|
from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
|
|
from deepface.commons import image_utils
|
|
|
|
from deepface.commons.logger import Logger
|
|
|
|
logger = Logger()
|
|
|
|
# pylint: disable=no-else-raise
|
|
|
|
|
|
def extract_faces(
|
|
img_path: Union[Sequence[Union[str, np.ndarray, IO[bytes]]], str, np.ndarray, IO[bytes]],
|
|
detector_backend: str = "opencv",
|
|
enforce_detection: bool = True,
|
|
align: bool = True,
|
|
expand_percentage: int = 0,
|
|
grayscale: bool = False,
|
|
color_face: str = "rgb",
|
|
normalize_face: bool = True,
|
|
anti_spoofing: bool = False,
|
|
max_faces: Optional[int] = None,
|
|
) -> Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
|
|
"""
|
|
Extract faces from a given image or list of images
|
|
|
|
Args:
|
|
img_paths (List[str or np.ndarray or IO[bytes]] or str or np.ndarray or IO[bytes]):
|
|
Path(s) to the image(s) as a string,
|
|
numpy array (BGR), a file object that supports at least `.read` and is
|
|
opened in binary mode, or base64 encoded images.
|
|
|
|
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
|
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'yolov11n', 'yolov11s', 'yolov11m',
|
|
'centerface' or 'skip' (default is opencv)
|
|
|
|
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
|
Default is True. Set to False to avoid the exception for low-resolution images.
|
|
|
|
align (bool): Flag to enable face alignment (default is True).
|
|
|
|
expand_percentage (int): expand detected facial area with a percentage.
|
|
|
|
grayscale (boolean): (Deprecated) Flag to convert the output face image to grayscale
|
|
(default is False).
|
|
|
|
color_face (string): Color to return face image output. Options: 'rgb', 'bgr' or 'gray'
|
|
(default is 'rgb').
|
|
|
|
normalize_face (boolean): Flag to enable normalization (divide by 255) of the output
|
|
face image output face image normalization (default is True).
|
|
|
|
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
|
|
|
Returns:
|
|
results (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]):
|
|
A list or list of lists of dictionaries, where each dictionary contains:
|
|
|
|
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
|
|
|
|
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
|
- keys 'x', 'y', 'w', 'h' with int values
|
|
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
|
|
left eye and right eye are eyes on the left and right respectively with respect
|
|
to the person itself instead of observer.
|
|
|
|
- "confidence" (float): The confidence score associated with the detected face.
|
|
|
|
- "is_real" (boolean): antispoofing analyze result. this key is just available in the
|
|
result only if anti_spoofing is set to True in input arguments.
|
|
|
|
- "antispoof_score" (float): score of antispoofing analyze result. this key is
|
|
just available in the result only if anti_spoofing is set to True in input arguments.
|
|
"""
|
|
|
|
if not isinstance(img_path, list):
|
|
img_path = [img_path]
|
|
|
|
all_images = []
|
|
img_names = []
|
|
|
|
for single_img_path in img_path:
|
|
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
|
|
img, img_name = image_utils.load_image(single_img_path)
|
|
|
|
if img is None:
|
|
raise ValueError(f"Exception while loading {img_name}")
|
|
|
|
all_images.append(img)
|
|
img_names.append(img_name)
|
|
|
|
# Run detect_faces for all images at once
|
|
all_face_objs = detect_faces(
|
|
detector_backend=detector_backend,
|
|
img=all_images,
|
|
align=align,
|
|
expand_percentage=expand_percentage,
|
|
max_faces=max_faces,
|
|
)
|
|
|
|
if len(all_images) == 1:
|
|
all_face_objs = [all_face_objs]
|
|
|
|
all_resp_objs = []
|
|
|
|
for img, img_name, face_objs in zip(all_images, img_names, all_face_objs):
|
|
height, width, _ = img.shape
|
|
|
|
if len(face_objs) == 0 and enforce_detection is True:
|
|
if img_name is not None:
|
|
raise ValueError(
|
|
f"Face could not be detected in {img_name}."
|
|
"Please confirm that the picture is a face photo "
|
|
"or consider to set enforce_detection param to False."
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
"Face could not be detected. Please confirm that the picture is a face photo "
|
|
"or consider to set enforce_detection param to False."
|
|
)
|
|
|
|
if len(face_objs) == 0 and enforce_detection is False:
|
|
base_region = FacialAreaRegion(x=0, y=0, w=width, h=height, confidence=0)
|
|
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
|
|
|
|
img_resp_objs = []
|
|
for face_obj in face_objs:
|
|
current_img = face_obj.img
|
|
current_region = face_obj.facial_area
|
|
|
|
if current_img.shape[0] == 0 or current_img.shape[1] == 0:
|
|
continue
|
|
|
|
if grayscale is True:
|
|
logger.warn("Parameter grayscale is deprecated. Use color_face instead.")
|
|
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
if color_face == "rgb":
|
|
current_img = current_img[:, :, ::-1]
|
|
elif color_face == "bgr":
|
|
pass # image is in BGR
|
|
elif color_face == "gray":
|
|
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
raise ValueError(
|
|
f"The color_face can be rgb, bgr or gray, "
|
|
f"but it is {color_face}."
|
|
)
|
|
|
|
if normalize_face:
|
|
current_img = current_img / 255 # normalize input in [0, 1]
|
|
|
|
# cast to int for flask, and do final checks for borders
|
|
x = max(0, int(current_region.x))
|
|
y = max(0, int(current_region.y))
|
|
w = min(width - x - 1, int(current_region.w))
|
|
h = min(height - y - 1, int(current_region.h))
|
|
|
|
facial_area = {
|
|
"x": x,
|
|
"y": y,
|
|
"w": w,
|
|
"h": h,
|
|
"left_eye": current_region.left_eye,
|
|
"right_eye": current_region.right_eye,
|
|
}
|
|
|
|
# optional nose, mouth_left and mouth_right fields are coming just for retinaface
|
|
if current_region.nose is not None:
|
|
facial_area["nose"] = current_region.nose
|
|
if current_region.mouth_left is not None:
|
|
facial_area["mouth_left"] = current_region.mouth_left
|
|
if current_region.mouth_right is not None:
|
|
facial_area["mouth_right"] = current_region.mouth_right
|
|
|
|
resp_obj = {
|
|
"face": current_img,
|
|
"facial_area": facial_area,
|
|
"confidence": round(float(current_region.confidence or 0), 2),
|
|
}
|
|
|
|
if anti_spoofing is True:
|
|
antispoof_model = modeling.build_model(task="spoofing", model_name="Fasnet")
|
|
is_real, antispoof_score = antispoof_model.analyze(
|
|
img=img,
|
|
facial_area=(x, y, w, h)
|
|
)
|
|
resp_obj["is_real"] = is_real
|
|
resp_obj["antispoof_score"] = antispoof_score
|
|
|
|
img_resp_objs.append(resp_obj)
|
|
|
|
all_resp_objs.append(img_resp_objs)
|
|
|
|
if len(all_resp_objs) == 1:
|
|
return all_resp_objs[0]
|
|
return all_resp_objs
|
|
|
|
|
|
def detect_faces(
|
|
detector_backend: str,
|
|
img: Union[np.ndarray, List[np.ndarray]],
|
|
align: bool = True,
|
|
expand_percentage: int = 0,
|
|
max_faces: Optional[int] = None,
|
|
) -> Union[List[List[DetectedFace]], List[DetectedFace]]:
|
|
"""
|
|
Detect face(s) from a given image or list of images
|
|
Args:
|
|
detector_backend (str): detector name
|
|
|
|
img (np.ndarray or List[np.ndarray]): pre-loaded image or list of images
|
|
|
|
align (bool): enable or disable alignment after detection
|
|
|
|
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
|
|
|
Returns:
|
|
results (Union[List[List[DetectedFace]], List[DetectedFace]]):
|
|
A list of lists of DetectedFace objects or a list of DetectedFace objects
|
|
where each object contains:
|
|
|
|
- img (np.ndarray): The detected face as a NumPy array.
|
|
|
|
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
|
|
left_eye and right eye. left eye and right eye are eyes on the left and right
|
|
with respect to the person instead of observer.
|
|
|
|
- confidence (float): The confidence score associated with the detected face.
|
|
"""
|
|
if not isinstance(img, list):
|
|
img = [img]
|
|
|
|
if detector_backend == "skip":
|
|
all_face_objs = [
|
|
[
|
|
DetectedFace(
|
|
img=single_img,
|
|
facial_area=FacialAreaRegion(
|
|
x=0, y=0, w=single_img.shape[1], h=single_img.shape[0]
|
|
),
|
|
confidence=0,
|
|
)
|
|
]
|
|
for single_img in img
|
|
]
|
|
if len(img) == 1:
|
|
all_face_objs = all_face_objs[0]
|
|
return all_face_objs
|
|
|
|
face_detector: Detector = modeling.build_model(
|
|
task="face_detector", model_name=detector_backend
|
|
)
|
|
|
|
preprocessed_images = []
|
|
width_borders = []
|
|
height_borders = []
|
|
for single_img in img:
|
|
height, width, _ = single_img.shape
|
|
|
|
# validate expand percentage score
|
|
if expand_percentage < 0:
|
|
logger.warn(
|
|
f"Expand percentage cannot be negative but you set it to {expand_percentage}."
|
|
"Overwritten it to 0."
|
|
)
|
|
expand_percentage = 0
|
|
|
|
# If faces are close to the upper boundary, alignment move them outside
|
|
# Add a black border around an image to avoid this.
|
|
height_border = int(0.5 * height)
|
|
width_border = int(0.5 * width)
|
|
if align is True:
|
|
single_img = cv2.copyMakeBorder(
|
|
single_img,
|
|
height_border,
|
|
height_border,
|
|
width_border,
|
|
width_border,
|
|
cv2.BORDER_CONSTANT,
|
|
value=[0, 0, 0], # Color of the border (black)
|
|
)
|
|
|
|
preprocessed_images.append(single_img)
|
|
width_borders.append(width_border)
|
|
height_borders.append(height_border)
|
|
|
|
# Detect faces in all preprocessed images
|
|
all_facial_areas = face_detector.detect_faces(preprocessed_images)
|
|
|
|
if len(preprocessed_images) == 1:
|
|
all_facial_areas = [all_facial_areas]
|
|
|
|
all_detected_faces = []
|
|
for (
|
|
single_img,
|
|
facial_areas,
|
|
width_border,
|
|
height_border
|
|
) in zip(
|
|
preprocessed_images,
|
|
all_facial_areas,
|
|
width_borders,
|
|
height_borders
|
|
):
|
|
if not isinstance(facial_areas, list):
|
|
facial_areas = [facial_areas]
|
|
|
|
if max_faces is not None and max_faces < len(facial_areas):
|
|
facial_areas = nlargest(
|
|
max_faces, facial_areas, key=lambda facial_area: facial_area.w * facial_area.h
|
|
)
|
|
|
|
detected_faces = [
|
|
extract_face(
|
|
facial_area=facial_area,
|
|
img=single_img,
|
|
align=align,
|
|
expand_percentage=expand_percentage,
|
|
width_border=width_border,
|
|
height_border=height_border,
|
|
)
|
|
for facial_area in facial_areas if isinstance(facial_area, FacialAreaRegion)
|
|
]
|
|
|
|
all_detected_faces.append(detected_faces)
|
|
|
|
if len(all_detected_faces) == 1:
|
|
return all_detected_faces[0]
|
|
return all_detected_faces
|
|
|
|
|
|
def extract_face(
|
|
facial_area: FacialAreaRegion,
|
|
img: np.ndarray,
|
|
align: bool,
|
|
expand_percentage: int,
|
|
width_border: int,
|
|
height_border: int,
|
|
) -> DetectedFace:
|
|
x = facial_area.x
|
|
y = facial_area.y
|
|
w = facial_area.w
|
|
h = facial_area.h
|
|
left_eye = facial_area.left_eye
|
|
right_eye = facial_area.right_eye
|
|
confidence = facial_area.confidence
|
|
nose = facial_area.nose
|
|
mouth_left = facial_area.mouth_left
|
|
mouth_right = facial_area.mouth_right
|
|
|
|
if expand_percentage > 0:
|
|
# Expand the facial region height and width by the provided percentage
|
|
# ensuring that the expanded region stays within img.shape limits
|
|
expanded_w = w + int(w * expand_percentage / 100)
|
|
expanded_h = h + int(h * expand_percentage / 100)
|
|
|
|
x = max(0, x - int((expanded_w - w) / 2))
|
|
y = max(0, y - int((expanded_h - h) / 2))
|
|
w = min(img.shape[1] - x, expanded_w)
|
|
h = min(img.shape[0] - y, expanded_h)
|
|
|
|
# extract detected face unaligned
|
|
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
|
|
# align original image, then find projection of detected face area after alignment
|
|
if align is True: # and left_eye is not None and right_eye is not None:
|
|
# we were aligning the original image before, but this comes with an extra cost
|
|
# instead we now focus on the facial area with a margin
|
|
# and align it instead of original image to decrese the cost
|
|
sub_img, relative_x, relative_y = extract_sub_image(img=img, facial_area=(x, y, w, h))
|
|
|
|
aligned_sub_img, angle = align_img_wrt_eyes(
|
|
img=sub_img, left_eye=left_eye, right_eye=right_eye
|
|
)
|
|
|
|
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = project_facial_area(
|
|
facial_area=(
|
|
relative_x,
|
|
relative_y,
|
|
relative_x + w,
|
|
relative_y + h,
|
|
),
|
|
angle=angle,
|
|
size=(sub_img.shape[0], sub_img.shape[1]),
|
|
)
|
|
detected_face = aligned_sub_img[
|
|
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
|
|
]
|
|
|
|
# do not spend memory for these temporary variables anymore
|
|
del aligned_sub_img, sub_img
|
|
|
|
# restore x, y, le and re before border added
|
|
x = x - width_border
|
|
y = y - height_border
|
|
# w and h will not change
|
|
if left_eye is not None:
|
|
left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
|
|
if right_eye is not None:
|
|
right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
|
|
if nose is not None:
|
|
nose = (nose[0] - width_border, nose[1] - height_border)
|
|
if mouth_left is not None:
|
|
mouth_left = (mouth_left[0] - width_border, mouth_left[1] - height_border)
|
|
if mouth_right is not None:
|
|
mouth_right = (mouth_right[0] - width_border, mouth_right[1] - height_border)
|
|
|
|
return DetectedFace(
|
|
img=detected_face,
|
|
facial_area=FacialAreaRegion(
|
|
x=x,
|
|
y=y,
|
|
h=h,
|
|
w=w,
|
|
confidence=confidence,
|
|
left_eye=left_eye,
|
|
right_eye=right_eye,
|
|
nose=nose,
|
|
mouth_left=mouth_left,
|
|
mouth_right=mouth_right,
|
|
),
|
|
confidence=confidence or 0,
|
|
)
|
|
|
|
|
|
def extract_sub_image(
|
|
img: np.ndarray, facial_area: Tuple[int, int, int, int]
|
|
) -> Tuple[np.ndarray, int, int]:
|
|
"""
|
|
Get the sub image with given facial area while expanding the facial region
|
|
to ensure alignment does not shift the face outside the image.
|
|
|
|
This function doubles the height and width of the face region,
|
|
and adds black pixels if necessary.
|
|
|
|
Args:
|
|
- img (np.ndarray): pre-loaded image with detected face
|
|
- facial_area (tuple of int): Representing the (x, y, w, h) of the facial area.
|
|
|
|
Returns:
|
|
- extracted_face (np.ndarray): expanded facial image
|
|
- relative_x (int): adjusted x-coordinates relative to the expanded region
|
|
- relative_y (int): adjusted y-coordinates relative to the expanded region
|
|
"""
|
|
x, y, w, h = facial_area
|
|
relative_x = int(0.5 * w)
|
|
relative_y = int(0.5 * h)
|
|
|
|
# calculate expanded coordinates
|
|
x1, y1 = x - relative_x, y - relative_y
|
|
x2, y2 = x + w + relative_x, y + h + relative_y
|
|
|
|
# most of the time, the expanded region fits inside the image
|
|
if x1 >= 0 and y1 >= 0 and x2 <= img.shape[1] and y2 <= img.shape[0]:
|
|
return img[y1:y2, x1:x2], relative_x, relative_y
|
|
|
|
# but sometimes, we need to add black pixels
|
|
# ensure the coordinates are within bounds
|
|
x1, y1 = max(0, x1), max(0, y1)
|
|
x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
|
|
cropped_region = img[y1:y2, x1:x2]
|
|
|
|
# create a black image
|
|
extracted_face = np.zeros(
|
|
(h + 2 * relative_y, w + 2 * relative_x, img.shape[2]), dtype=img.dtype
|
|
)
|
|
|
|
# map the cropped region
|
|
start_x = max(0, relative_x - x)
|
|
start_y = max(0, relative_y - y)
|
|
extracted_face[
|
|
start_y : start_y + cropped_region.shape[0], start_x : start_x + cropped_region.shape[1]
|
|
] = cropped_region
|
|
|
|
return extracted_face, relative_x, relative_y
|
|
|
|
|
|
def align_img_wrt_eyes(
|
|
img: np.ndarray,
|
|
left_eye: Optional[Union[list, tuple]],
|
|
right_eye: Optional[Union[list, tuple]],
|
|
) -> Tuple[np.ndarray, float]:
|
|
"""
|
|
Align a given image horizantally with respect to their left and right eye locations
|
|
Args:
|
|
img (np.ndarray): pre-loaded image with detected face
|
|
left_eye (list or tuple): coordinates of left eye with respect to the person itself
|
|
right_eye(list or tuple): coordinates of right eye with respect to the person itself
|
|
Returns:
|
|
img (np.ndarray): aligned facial image
|
|
"""
|
|
# if eye could not be detected for the given image, return image itself
|
|
if left_eye is None or right_eye is None:
|
|
return img, 0
|
|
|
|
# sometimes unexpectedly detected images come with nil dimensions
|
|
if img.shape[0] == 0 or img.shape[1] == 0:
|
|
return img, 0
|
|
|
|
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
|
|
|
|
(h, w) = img.shape[:2]
|
|
center = (w // 2, h // 2)
|
|
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
img = cv2.warpAffine(
|
|
img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)
|
|
)
|
|
|
|
return img, angle
|
|
|
|
|
|
def project_facial_area(
|
|
facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
|
|
) -> Tuple[int, int, int, int]:
|
|
"""
|
|
Update pre-calculated facial area coordinates after image itself
|
|
rotated with respect to the eyes.
|
|
Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80
|
|
|
|
Args:
|
|
facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area.
|
|
x2 is equal to x1 + w1, and y2 is equal to y1 + h1
|
|
angle (float): Angle of rotation in degrees. Its sign determines the direction of rotation.
|
|
Note that angles > 360 degrees are normalized to the range [0, 360).
|
|
size (tuple of int): Tuple representing the size of the image (width, height).
|
|
|
|
Returns:
|
|
rotated_coordinates (tuple of int): Representing the new coordinates
|
|
(x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area.
|
|
"""
|
|
|
|
# Normalize the witdh of the angle so we don't have to
|
|
# worry about rotations greater than 360 degrees.
|
|
# We workaround the quirky behavior of the modulo operator
|
|
# for negative angle values.
|
|
direction = 1 if angle >= 0 else -1
|
|
angle = abs(angle) % 360
|
|
if angle == 0:
|
|
return facial_area
|
|
|
|
# Angle in radians
|
|
angle = angle * np.pi / 180
|
|
|
|
height, weight = size
|
|
|
|
# Translate the facial area to the center of the image
|
|
x = (facial_area[0] + facial_area[2]) / 2 - weight / 2
|
|
y = (facial_area[1] + facial_area[3]) / 2 - height / 2
|
|
|
|
# Rotate the facial area
|
|
x_new = x * np.cos(angle) + y * direction * np.sin(angle)
|
|
y_new = -x * direction * np.sin(angle) + y * np.cos(angle)
|
|
|
|
# Translate the facial area back to the original position
|
|
x_new = x_new + weight / 2
|
|
y_new = y_new + height / 2
|
|
|
|
# Calculate projected coordinates after alignment
|
|
x1 = x_new - (facial_area[2] - facial_area[0]) / 2
|
|
y1 = y_new - (facial_area[3] - facial_area[1]) / 2
|
|
x2 = x_new + (facial_area[2] - facial_area[0]) / 2
|
|
y2 = y_new + (facial_area[3] - facial_area[1]) / 2
|
|
|
|
# validate projected coordinates are in image's boundaries
|
|
x1 = max(int(x1), 0)
|
|
y1 = max(int(y1), 0)
|
|
x2 = min(int(x2), weight)
|
|
y2 = min(int(y2), height)
|
|
|
|
return (x1, y1, x2, y2)
|