Merge pull request #1140 from serengil/feat-task-2403-eye-coordinates-in-biology

Feat task 2403 eye coordinates in biology
2025-07-24 10:50:03 +00:00 · 2024-03-24 16:59:48 +00:00 · 2024-03-24 16:59:48 +00:00 · 329606ffe8
commit 329606ffe8
parent ad80a31807 09499c91c3
16 changed files with 86 additions and 39 deletions
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@ -476,7 +476,9 @@ def extract_faces(

        - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
            - keys 'x', 'y', 'w', 'h' with int values
-            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
+            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
+                are eyes on the left and right respectively with respect to the person itself
+                instead of observer.

        - "confidence" (float): The confidence score associated with the detected face.
    """
--- a/deepface/init.py
+++ b/deepface/init.py
@ -1 +1 @@
-__version__ = "0.0.89"
+__version__ = "0.0.90"
--- a/deepface/detectors/DetectorWrapper.py
+++ b/deepface/detectors/DetectorWrapper.py
@ -76,7 +76,9 @@ def detect_faces(

        - img (np.ndarray): The detected face as a NumPy array.

-        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+        - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
+            left_eye and right eye. left eye and right eye are eyes on the left and right
+            with respect to the person instead of observer.

        - confidence (float): The confidence score associated with the detected face.
    """
@ -123,13 +125,11 @@ def detect_faces(
                img=img, left_eye=left_eye, right_eye=right_eye
            )
            rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
-                facial_area=(x, y, x + w, y + h),
-                angle=angle,
-                size=(img.shape[0], img.shape[1])
+                facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
            )
            detected_face = aligned_img[
-                int(rotated_y1) : int(rotated_y2),
-                int(rotated_x1) : int(rotated_x2)]
+                int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
+            ]

        result = DetectedFace(
            img=detected_face,
@ -143,9 +143,7 @@ def detect_faces(


 def rotate_facial_area(
-    facial_area: Tuple[int, int, int, int],
-    angle: float,
-    size: Tuple[int, int]
+    facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
 ) -> Tuple[int, int, int, int]:
    """
    Rotate the facial area around its center.
--- a/deepface/detectors/Dlib.py
+++ b/deepface/detectors/Dlib.py
@ -88,11 +88,11 @@ class DlibClient(Detector):

                shape = self.model["sp"](img, detection)

-                left_eye = (
+                right_eye = (
                    int((shape.part(2).x + shape.part(3).x) // 2),
                    int((shape.part(2).y + shape.part(3).y) // 2),
                )
-                right_eye = (
+                left_eye = (
                    int((shape.part(0).x + shape.part(1).x) // 2),
                    int((shape.part(0).y + shape.part(1).y) // 2),
                )
--- a/deepface/detectors/FastMtCnn.py
+++ b/deepface/detectors/FastMtCnn.py
@ -34,8 +34,8 @@ class FastMtCnnClient(Detector):
        ):
            for regions, confidence, eyes in zip(*detections):
                x, y, w, h = xyxy_to_xywh(regions)
-                left_eye = eyes[0]
-                right_eye = eyes[1]
+                right_eye = eyes[0]
+                left_eye = eyes[1]

                left_eye = tuple(int(i) for i in left_eye)
                right_eye = tuple(int(i) for i in right_eye)
--- a/deepface/detectors/MediaPipe.py
+++ b/deepface/detectors/MediaPipe.py
@ -61,8 +61,8 @@ class MediaPipeClient(Detector):
            y = int(bounding_box.ymin * img_height)
            h = int(bounding_box.height * img_height)

-            left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
-            right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
+            right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
+            left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
            # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
            # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
            # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
--- a/deepface/detectors/MtCnn.py
+++ b/deepface/detectors/MtCnn.py
@ -35,8 +35,10 @@ class MtCnnClient(Detector):
            for current_detection in detections:
                x, y, w, h = current_detection["box"]
                confidence = current_detection["confidence"]
-                left_eye = current_detection["keypoints"]["left_eye"]
-                right_eye = current_detection["keypoints"]["right_eye"]
+                # mtcnn detector assigns left eye with respect to the observer
+                # but we are setting it with respect to the person itself
+                left_eye = current_detection["keypoints"]["right_eye"]
+                right_eye = current_detection["keypoints"]["left_eye"]

                facial_area = FacialAreaRegion(
                    x=x,
--- a/deepface/detectors/OpenCv.py
+++ b/deepface/detectors/OpenCv.py
@ -112,15 +112,18 @@ class OpenCvClient(Detector):
            eye_2 = eyes[1]

            if eye_1[0] < eye_2[0]:
-                left_eye = eye_1
-                right_eye = eye_2
-            else:
-                left_eye = eye_2
                right_eye = eye_1
+                left_eye = eye_2
+            else:
+                right_eye = eye_2
+                left_eye = eye_1

            # -----------------------
            # find center of eyes
-            left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
+            left_eye = (
+                int(left_eye[0] + (left_eye[2] / 2)),
+                int(left_eye[1] + (left_eye[3] / 2)),
+            )
            right_eye = (
                int(right_eye[0] + (right_eye[2] / 2)),
                int(right_eye[1] + (right_eye[3] / 2)),
--- a/deepface/detectors/RetinaFace.py
+++ b/deepface/detectors/RetinaFace.py
@ -34,9 +34,9 @@ class RetinaFaceClient(Detector):
            x = detection[0]
            w = detection[2] - x

-            # notice that these must be inverse for retinaface
-            left_eye = identity["landmarks"]["right_eye"]
-            right_eye = identity["landmarks"]["left_eye"]
+            # retinaface sets left and right eyes with respect to the person
+            left_eye = identity["landmarks"]["left_eye"]
+            right_eye = identity["landmarks"]["right_eye"]

            # eyes are list of float, need to cast them tuple of int
            left_eye = tuple(int(i) for i in left_eye)
--- a/deepface/detectors/Yolo.py
+++ b/deepface/detectors/Yolo.py
@ -81,10 +81,10 @@ class YoloClient(Detector):
            x, y, w, h = result.boxes.xywh.tolist()[0]
            confidence = result.boxes.conf.tolist()[0]

-            # left_eye_conf = result.keypoints.conf[0][0]
-            # right_eye_conf = result.keypoints.conf[0][1]
-            left_eye = result.keypoints.xy[0][0].tolist()
-            right_eye = result.keypoints.xy[0][1].tolist()
+            # right_eye_conf = result.keypoints.conf[0][0]
+            # left_eye_conf = result.keypoints.conf[0][1]
+            right_eye = result.keypoints.xy[0][0].tolist()
+            left_eye = result.keypoints.xy[0][1].tolist()

            # eyes are list of float, need to cast them tuple of int
            left_eye = tuple(int(i) for i in left_eye)
--- a/deepface/detectors/YuNet.py
+++ b/deepface/detectors/YuNet.py
@ -99,7 +99,7 @@ class YuNetClient(Detector):
            {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
            left eye, nose tip, the right corner and left corner of the mouth respectively.
            """
-            (x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
+            (x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))

            # YuNet returns negative coordinates if it thinks part of the detected face
            # is outside the frame.
--- a/deepface/models/Detector.py
+++ b/deepface/models/Detector.py
@ -20,7 +20,9 @@ class Detector(ABC):
                where each object contains:

            - facial_area (FacialAreaRegion): The facial area region represented
-                as x, y, w, h, left_eye and right_eye
+                as x, y, w, h, left_eye and right_eye. left eye and right eye are
+                eyes on the left and right respectively with respect to the person
+                instead of observer.
        """
        pass

@ -44,6 +46,21 @@ class FacialAreaRegion:
        right_eye: Optional[Tuple[int, int]] = None,
        confidence: Optional[float] = None,
    ):
+        """
+        Initialize a Face object.
+
+        Args:
+            x (int): The x-coordinate of the top-left corner of the bounding box.
+            y (int): The y-coordinate of the top-left corner of the bounding box.
+            w (int): The width of the bounding box.
+            h (int): The height of the bounding box.
+            left_eye (tuple): The coordinates (x, y) of the left eye with respect to
+                the person instead of observer. Default is None.
+            right_eye (tuple): The coordinates (x, y) of the right eye with respect to
+                the person instead of observer. Default is None.
+            confidence (float, optional): Confidence score associated with the face detection.
+                Default is None.
+        """
        self.x = x
        self.y = y
        self.w = w
@ -59,6 +76,14 @@ class DetectedFace:
    confidence: float

    def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
+        """
+        Initialize detected face object.
+
+        Args:
+            img (np.ndarray): detected face image as numpy array
+            facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
+            confidence (float): confidence score for face detection
+        """
        self.img = img
        self.facial_area = facial_area
        self.confidence = confidence
--- a/deepface/modules/detection.py
+++ b/deepface/modules/detection.py
@ -68,7 +68,9 @@ def extract_faces(

        - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
            - keys 'x', 'y', 'w', 'h' with int values
-            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
+            - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
+                left eye and right eye are eyes on the left and right respectively with respect
+                to the person itself instead of observer.

        - "confidence" (float): The confidence score associated with the detected face.
    """
@ -201,8 +203,8 @@ def align_face(
    Align a given image horizantally with respect to their left and right eye locations
    Args:
        img (np.ndarray): pre-loaded image with detected face
-        left_eye (list or tuple): coordinates of left eye with respect to the you
-        right_eye(list or tuple): coordinates of right eye with respect to the you
+        left_eye (list or tuple): coordinates of left eye with respect to the person itself
+        right_eye(list or tuple): coordinates of right eye with respect to the person itself
    Returns:
        img (np.ndarray): aligned facial image
    """
@ -214,6 +216,6 @@ def align_face(
    if img.shape[0] == 0 or img.shape[1] == 0:
        return img, 0

-    angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
+    angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
    img = np.array(Image.fromarray(img).rotate(angle))
    return img, angle
--- a/package_info.json
+++ b/package_info.json
@ -1,3 +1,3 @@
 {
-    "version": "0.0.89"
+    "version": "0.0.90"
 }
--- a/tests/test_extract_faces.py
+++ b/tests/test_extract_faces.py
@ -19,6 +19,12 @@ def test_different_detectors():
            assert "y" in img_obj["facial_area"].keys()
            assert "w" in img_obj["facial_area"].keys()
            assert "h" in img_obj["facial_area"].keys()
+            # is left eye set with respect to the person instead of observer
+            assert "left_eye" in img_obj["facial_area"].keys()
+            assert "right_eye" in img_obj["facial_area"].keys()
+            right_eye = img_obj["facial_area"]["right_eye"]
+            left_eye = img_obj["facial_area"]["left_eye"]
+            assert left_eye[0] > right_eye[0]
            assert "confidence" in img_obj.keys()

            img = img_obj["face"]
--- a/tests/visual-test.py
+++ b/tests/visual-test.py
@ -57,7 +57,7 @@ for df in dfs:
    logger.info(df)


-expand_areas = [0, 25]
+expand_areas = [0]
 img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
 for expand_area in expand_areas:
    for img_path in img_paths:
@ -86,6 +86,15 @@ for expand_area in expand_areas:
                    assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
                    assert isinstance(face_obj["facial_area"]["right_eye"][1], int)

+                # left eye is really the left eye of the person
+                if (
+                    face_obj["facial_area"]["left_eye"] is not None
+                    and face_obj["facial_area"]["right_eye"] is not None
+                ):
+                    re_x = face_obj["facial_area"]["right_eye"][0]
+                    le_x = face_obj["facial_area"]["left_eye"][0]
+                    assert re_x < le_x, "right eye must be the right eye of the person"
+
                assert isinstance(face_obj["confidence"], float)
                assert face_obj["confidence"] <= 1