deepface/deepface/detectors/YunetWrapper.py

113 lines
4.5 KiB
Python

import os
from typing import Any
import cv2
import numpy as np
import gdown
from deepface.commons import functions
from deepface.commons.logger import Logger
from deepface.models.Detector import Detector
logger = Logger(module="detectors.YunetWrapper")
class YuNet(Detector):
def __init__(self):
self.model = self.build_model()
def build_model(self) -> Any:
"""
Build a yunet detector model
Returns:
model (Any)
"""
# pylint: disable=C0301
url = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
file_name = "face_detection_yunet_2023mar.onnx"
home = functions.get_deepface_home()
if os.path.isfile(home + f"/.deepface/weights/{file_name}") is False:
logger.info(f"{file_name} will be downloaded...")
output = home + f"/.deepface/weights/{file_name}"
gdown.download(url, output, quiet=False)
try:
face_detector = cv2.FaceDetectorYN_create(
home + f"/.deepface/weights/{file_name}", "", (0, 0)
)
except Exception as err:
raise ValueError(
"Exception while calling opencv.FaceDetectorYN_create module."
+ "This is an optional dependency."
+ "You can install it as pip install opencv-contrib-python."
) from err
return face_detector
def detect_faces(self, img: np.ndarray, align: bool = True) -> list:
"""
Detect and align face with yunet
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
Returns:
list of detected and aligned faces
"""
# FaceDetector.detect_faces does not support score_threshold parameter.
# We can set it via environment variable.
score_threshold = os.environ.get("yunet_score_threshold", "0.9")
resp = []
detected_face = None
img_region = [0, 0, img.shape[1], img.shape[0]]
faces = []
height, width = img.shape[0], img.shape[1]
# resize image if it is too large (Yunet fails to detect faces on large input sometimes)
# I picked 640 as a threshold because it is the default value of max_size in Yunet.
resized = False
if height > 640 or width > 640:
r = 640.0 / max(height, width)
original_image = img.copy()
img = cv2.resize(img, (int(width * r), int(height * r)))
height, width = img.shape[0], img.shape[1]
resized = True
self.model.setInputSize((width, height))
self.model.setScoreThreshold(score_threshold)
_, faces = self.model.detect(img)
if faces is None:
return resp
for face in faces:
# pylint: disable=W0105
"""
The detection output faces is a two-dimension array of type CV_32F,
whose rows are the detected face instances, columns are the location
of a face and 5 facial landmarks.
The format of each row is as follows:
x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt,
x_rcm, y_rcm, x_lcm, y_lcm,
where x1, y1, w, h are the top-left coordinates, width and height of
the face bounding box,
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
left eye, nose tip, the right corner and left corner of the mouth respectively.
"""
(x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
# Yunet returns negative coordinates if it thinks part of
# the detected face is outside the frame.
# We set the coordinate to 0 if they are negative.
x = max(x, 0)
y = max(y, 0)
if resized:
img = original_image
x, y, w, h = int(x / r), int(y / r), int(w / r), int(h / r)
x_re, y_re, x_le, y_le = (
int(x_re / r),
int(y_re / r),
int(x_le / r),
int(y_le / r),
)
confidence = face[-1]
confidence = f"{confidence:.2f}"
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
img_region = [x, y, w, h]
if align:
detected_face = self.align_face(detected_face, (x_re, y_re), (x_le, y_le))
resp.append((detected_face, img_region, confidence))
return resp