align first detect second

2025-07-21 09:20:02 +00:00 · 2024-02-16 17:20:15 +00:00 · 2024-02-16 17:20:15 +00:00 · 16c72cd0f6
commit 16c72cd0f6
parent e529fa5c26
5 changed files with 135 additions and 68 deletions
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@ -423,7 +423,7 @@ def stream(

 def extract_faces(
    img_path: Union[str, np.ndarray],
-    target_size: Tuple[int, int] = (224, 224),
+    target_size: Optional[Tuple[int, int]] = (224, 224),
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
--- a/deepface/detectors/DetectorWrapper.py
+++ b/deepface/detectors/DetectorWrapper.py
@ -1,4 +1,4 @@
-from typing import Any, List
+from typing import Any, List, Tuple
 import numpy as np
 from deepface.modules import detection
 from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
@ -106,17 +106,27 @@ def detect_faces(
        # expand the facial area to be extracted and stay within img.shape limits
        x2 = max(0, x - int((w * expand_percentage) / 100))  # expand left
        y2 = max(0, y - int((h * expand_percentage) / 100))  # expand top
-        w2 = min(img.shape[1], w + int((w * expand_percentage) / 100))  # expand right
-        h2 = min(img.shape[0], h + int((h * expand_percentage) / 100))  # expand bottom
+        w2 = min(img.shape[1], w + int((w * 2 * expand_percentage) / 100))  # expand right
+        h2 = min(img.shape[0], h + int((h * 2 * expand_percentage) / 100))  # expand bottom

        # extract detected face unaligned
        detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]

-        # align detected face
-        if align is True:
-            detected_face = detection.align_face(
-                img=detected_face, left_eye=left_eye, right_eye=right_eye
+        # aligning detected face causes a lot of black pixels
+        # if align is True:
+        #     detected_face, _ = detection.align_face(
+        #         img=detected_face, left_eye=left_eye, right_eye=right_eye
+        #     )
+
+        # align original image, then find projection of detected face area after alignment
+        if align is True:  # and left_eye is not None and right_eye is not None:
+            aligned_img, angle = detection.align_face(
+                img=img, left_eye=left_eye, right_eye=right_eye
            )
+            x1_new, y1_new, x2_new, y2_new = rotate_facial_area(
+                facial_area=(x2, y2, x2 + w2, y2 + h2), angle=angle, direction=1, size=img.shape
+            )
+            detected_face = aligned_img[int(y1_new) : int(y2_new), int(x1_new) : int(x2_new)]

        result = DetectedFace(
            img=detected_face,
@ -127,3 +137,45 @@ def detect_faces(
        )
        results.append(result)
    return results
+
+
+def rotate_facial_area(
+    facial_area: Tuple[int, int, int, int], angle: float, direction: int, size: Tuple[int, int]
+) -> Tuple[int, int, int, int]:
+    """
+    Rotate the facial area around its center.
+    Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80
+
+    Args:
+        facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area.
+            x2 is equal to x1 + w1, and y2 is equal to y1 + h1
+        angle (float): Angle of rotation in degrees.
+        direction (int): Direction of rotation (-1 for clockwise, 1 for counterclockwise).
+        size (tuple of int): Tuple representing the size of the image (width, height).
+
+    Returns:
+        rotated_coordinates (tuple of int): Representing the new coordinates
+            (x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area.
+    """
+    # Angle in radians
+    angle = angle * np.pi / 180
+
+    # Translate the facial area to the center of the image
+    x = (facial_area[0] + facial_area[2]) / 2 - size[1] / 2
+    y = (facial_area[1] + facial_area[3]) / 2 - size[0] / 2
+
+    # Rotate the facial area
+    x_new = x * np.cos(angle) + y * direction * np.sin(angle)
+    y_new = -x * direction * np.sin(angle) + y * np.cos(angle)
+
+    # Translate the facial area back to the original position
+    x_new = x_new + size[1] / 2
+    y_new = y_new + size[0] / 2
+
+    # Calculate the new facial area
+    x1 = x_new - (facial_area[2] - facial_area[0]) / 2
+    y1 = y_new - (facial_area[3] - facial_area[1]) / 2
+    x2 = x_new + (facial_area[2] - facial_area[0]) / 2
+    y2 = y_new + (facial_area[3] - facial_area[1]) / 2
+
+    return (int(x1), int(y1), int(x2), int(y2))
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -1,5 +1,5 @@
 # built-in dependencies
-from typing import Any, Dict, List, Tuple, Union
+from typing import Any, Dict, List, Tuple, Union, Optional

 # 3rd part dependencies
 import numpy as np
@ -27,7 +27,7 @@ elif tf_major_version == 2:

 def extract_faces(
    img_path: Union[str, np.ndarray],
-    target_size: Tuple[int, int] = (224, 224),
+    target_size: Optional[Tuple[int, int]] = (224, 224),
    detector_backend: str = "opencv",
    enforce_detection: bool = True,
    align: bool = True,
@ -116,42 +116,43 @@ def extract_faces(
            current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)

        # resize and padding
-        factor_0 = target_size[0] / current_img.shape[0]
-        factor_1 = target_size[1] / current_img.shape[1]
-        factor = min(factor_0, factor_1)
+        if target_size is not None:
+            factor_0 = target_size[0] / current_img.shape[0]
+            factor_1 = target_size[1] / current_img.shape[1]
+            factor = min(factor_0, factor_1)

-        dsize = (
-            int(current_img.shape[1] * factor),
-            int(current_img.shape[0] * factor),
-        )
-        current_img = cv2.resize(current_img, dsize)
-
-        diff_0 = target_size[0] - current_img.shape[0]
-        diff_1 = target_size[1] - current_img.shape[1]
-        if grayscale is False:
-            # Put the base image in the middle of the padded image
-            current_img = np.pad(
-                current_img,
-                (
-                    (diff_0 // 2, diff_0 - diff_0 // 2),
-                    (diff_1 // 2, diff_1 - diff_1 // 2),
-                    (0, 0),
-                ),
-                "constant",
-            )
-        else:
-            current_img = np.pad(
-                current_img,
-                (
-                    (diff_0 // 2, diff_0 - diff_0 // 2),
-                    (diff_1 // 2, diff_1 - diff_1 // 2),
-                ),
-                "constant",
+            dsize = (
+                int(current_img.shape[1] * factor),
+                int(current_img.shape[0] * factor),
            )
+            current_img = cv2.resize(current_img, dsize)

-        # double check: if target image is not still the same size with target.
-        if current_img.shape[0:2] != target_size:
-            current_img = cv2.resize(current_img, target_size)
+            diff_0 = target_size[0] - current_img.shape[0]
+            diff_1 = target_size[1] - current_img.shape[1]
+            if grayscale is False:
+                # Put the base image in the middle of the padded image
+                current_img = np.pad(
+                    current_img,
+                    (
+                        (diff_0 // 2, diff_0 - diff_0 // 2),
+                        (diff_1 // 2, diff_1 - diff_1 // 2),
+                        (0, 0),
+                    ),
+                    "constant",
+                )
+            else:
+                current_img = np.pad(
+                    current_img,
+                    (
+                        (diff_0 // 2, diff_0 - diff_0 // 2),
+                        (diff_1 // 2, diff_1 - diff_1 // 2),
+                    ),
+                    "constant",
+                )
+
+            # double check: if target image is not still the same size with target.
+            if current_img.shape[0:2] != target_size:
+                current_img = cv2.resize(current_img, target_size)

        # normalizing the image pixels
        # what this line doing? must?
@ -189,7 +190,7 @@ def align_face(
    img: np.ndarray,
    left_eye: Union[list, tuple],
    right_eye: Union[list, tuple],
-) -> np.ndarray:
+) -> Tuple[np.ndarray, float]:
    """
    Align a given image horizantally with respect to their left and right eye locations
    Args:
@ -201,13 +202,13 @@ def align_face(
    """
    # if eye could not be detected for the given image, return image itself
    if left_eye is None or right_eye is None:
-        return img
+        return img, 0

    # sometimes unexpectedly detected images come with nil dimensions
    if img.shape[0] == 0 or img.shape[1] == 0:
-        return img
+        return img, 0

    angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
    img = Image.fromarray(img)
    img = np.array(img.rotate(angle))
-    return img
+    return img, angle
--- a/tests/dataset/img11_reflection.jpg
+++ b/tests/dataset/img11_reflection.jpg
--- a/tests/visual-test.py
+++ b/tests/visual-test.py
@ -53,24 +53,38 @@ dfs = DeepFace.find(
 for df in dfs:
    logger.info(df)

-# extract faces
-for detector_backend in detector_backends:
-    face_objs = DeepFace.extract_faces(
-        img_path="dataset/img11.jpg", detector_backend=detector_backend, align=True
-    )
-    for face_obj in face_objs:
-        face = face_obj["face"]
-        logger.info(detector_backend)
-        logger.info(face_obj["facial_area"])
-        logger.info(face_obj["confidence"])
-        assert isinstance(face_obj["facial_area"]["left_eye"], tuple)
-        assert isinstance(face_obj["facial_area"]["right_eye"], tuple)
-        assert isinstance(face_obj["facial_area"]["left_eye"][0], int)
-        assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
-        assert isinstance(face_obj["facial_area"]["left_eye"][1], int)
-        assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
-        assert isinstance(face_obj["confidence"], float)
-        plt.imshow(face)
-        plt.axis("off")
-        plt.show()
-        logger.info("-----------")
+
+# img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg", "dataset/couple.jpg"]
+img_paths = ["dataset/img11.jpg"]
+for img_path in img_paths:
+    # extract faces
+    for detector_backend in detector_backends:
+        face_objs = DeepFace.extract_faces(
+            img_path=img_path,
+            detector_backend=detector_backend,
+            align=True,
+            # expand_percentage=10,
+            # target_size=None,
+        )
+        for face_obj in face_objs:
+            face = face_obj["face"]
+            logger.info(detector_backend)
+            logger.info(face_obj["facial_area"])
+            logger.info(face_obj["confidence"])
+
+            # we know opencv sometimes cannot find eyes
+            if face_obj["facial_area"]["left_eye"] is not None:
+                assert isinstance(face_obj["facial_area"]["left_eye"], tuple)
+                assert isinstance(face_obj["facial_area"]["left_eye"][0], int)
+                assert isinstance(face_obj["facial_area"]["left_eye"][1], int)
+
+            if face_obj["facial_area"]["right_eye"] is not None:
+                assert isinstance(face_obj["facial_area"]["right_eye"], tuple)
+                assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
+                assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
+
+            assert isinstance(face_obj["confidence"], float)
+            plt.imshow(face)
+            plt.axis("off")
+            plt.show()
+            logger.info("-----------")