deepface/deepface/detectors/YunetWrapper.py

import os
from typing import Any
import cv2
import numpy as np
import gdown
from deepface.detectors import FaceDetector
from deepface.commons import functions
from deepface.commons.logger import Logger

logger = Logger(module="detectors.YunetWrapper")


def build_model() -> Any:
    """
    Build a yunet detector model
    Returns:
        model (Any)
    """
    # pylint: disable=C0301
    url = "https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx"
    file_name = "face_detection_yunet_2023mar.onnx"
    home = functions.get_deepface_home()
    if os.path.isfile(home + f"/.deepface/weights/{file_name}") is False:
        logger.info(f"{file_name} will be downloaded...")
        output = home + f"/.deepface/weights/{file_name}"
        gdown.download(url, output, quiet=False)
    face_detector = cv2.FaceDetectorYN_create(home + f"/.deepface/weights/{file_name}", "", (0, 0))
    return face_detector


def detect_face(
    detector: Any, image: np.ndarray, align: bool = True, score_threshold: float = 0.9
) -> list:
    """
    Detect and align face with yunet
    Args:
        face_detector (Any): yunet face detector object
        img (np.ndarray): pre-loaded image
        align (bool): default is true
    Returns:
        list of detected and aligned faces
    """
    # FaceDetector.detect_faces does not support score_threshold parameter.
    # We can set it via environment variable.
    score_threshold = os.environ.get("yunet_score_threshold", score_threshold)
    resp = []
    detected_face = None
    img_region = [0, 0, image.shape[1], image.shape[0]]
    faces = []
    height, width = image.shape[0], image.shape[1]
    # resize image if it is too large (Yunet fails to detect faces on large input sometimes)
    # I picked 640 as a threshold because it is the default value of max_size in Yunet.
    resized = False
    if height > 640 or width > 640:
        r = 640.0 / max(height, width)
        original_image = image.copy()
        image = cv2.resize(image, (int(width * r), int(height * r)))
        height, width = image.shape[0], image.shape[1]
        resized = True
    detector.setInputSize((width, height))
    detector.setScoreThreshold(score_threshold)
    _, faces = detector.detect(image)
    if faces is None:
        return resp
    for face in faces:
        # pylint: disable=W0105
        """
        The detection output faces is a two-dimension array of type CV_32F,
        whose rows are the detected face instances, columns are the location
        of a face and 5 facial landmarks.
        The format of each row is as follows:
        x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt,
        x_rcm, y_rcm, x_lcm, y_lcm,
        where x1, y1, w, h are the top-left coordinates, width and height of
        the face bounding box,
        {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
        left eye, nose tip, the right corner and left corner of the mouth respectively.
        """
        (x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))

        # Yunet returns negative coordinates if it thinks part of
        # the detected face is outside the frame.
        # We set the coordinate to 0 if they are negative.
        x = max(x, 0)
        y = max(y, 0)
        if resized:
            image = original_image
            x, y, w, h = int(x / r), int(y / r), int(w / r), int(h / r)
            x_re, y_re, x_le, y_le = (
                int(x_re / r),
                int(y_re / r),
                int(x_le / r),
                int(y_le / r),
            )
        confidence = face[-1]
        confidence = f"{confidence:.2f}"
        detected_face = image[int(y) : int(y + h), int(x) : int(x + w)]
        img_region = [x, y, w, h]
        if align:
            detected_face = FaceDetector.alignment_procedure(
                detected_face, (x_re, y_re), (x_le, y_le)
            )
        resp.append((detected_face, img_region, confidence))
    return resp