set left and right eye locations with respect to the person instead of observer

2025-07-24 10:50:03 +00:00 · 2024-03-24 16:34:28 +00:00 · 2024-03-24 16:34:28 +00:00 · 745b4b8835
commit 745b4b8835
parent 2b5c139670
8 changed files with 26 additions and 21 deletions
--- a/deepface/detectors/Dlib.py
+++ b/deepface/detectors/Dlib.py
@ -88,11 +88,11 @@ class DlibClient(Detector):

                shape = self.model["sp"](img, detection)

-                left_eye = (
+                right_eye = (
                    int((shape.part(2).x + shape.part(3).x) // 2),
                    int((shape.part(2).y + shape.part(3).y) // 2),
                )
-                right_eye = (
+                left_eye = (
                    int((shape.part(0).x + shape.part(1).x) // 2),
                    int((shape.part(0).y + shape.part(1).y) // 2),
                )
--- a/deepface/detectors/FastMtCnn.py
+++ b/deepface/detectors/FastMtCnn.py
@ -34,8 +34,8 @@ class FastMtCnnClient(Detector):
        ):
            for regions, confidence, eyes in zip(*detections):
                x, y, w, h = xyxy_to_xywh(regions)
-                left_eye = eyes[0]
-                right_eye = eyes[1]
+                right_eye = eyes[0]
+                left_eye = eyes[1]

                left_eye = tuple(int(i) for i in left_eye)
                right_eye = tuple(int(i) for i in right_eye)
--- a/deepface/detectors/MediaPipe.py
+++ b/deepface/detectors/MediaPipe.py
@ -61,8 +61,8 @@ class MediaPipeClient(Detector):
            y = int(bounding_box.ymin * img_height)
            h = int(bounding_box.height * img_height)

-            left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
-            right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
+            right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
+            left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
            # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
            # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
            # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
--- a/deepface/detectors/MtCnn.py
+++ b/deepface/detectors/MtCnn.py
@ -35,8 +35,10 @@ class MtCnnClient(Detector):
            for current_detection in detections:
                x, y, w, h = current_detection["box"]
                confidence = current_detection["confidence"]
-                left_eye = current_detection["keypoints"]["left_eye"]
-                right_eye = current_detection["keypoints"]["right_eye"]
+                # mtcnn detector assigns left eye with respect to the observer
+                # but we are setting it with respect to the person itself
+                left_eye = current_detection["keypoints"]["right_eye"]
+                right_eye = current_detection["keypoints"]["left_eye"]

                facial_area = FacialAreaRegion(
                    x=x,
--- a/deepface/detectors/OpenCv.py
+++ b/deepface/detectors/OpenCv.py
@ -112,15 +112,18 @@ class OpenCvClient(Detector):
            eye_2 = eyes[1]

            if eye_1[0] < eye_2[0]:
-                left_eye = eye_1
-                right_eye = eye_2
-            else:
-                left_eye = eye_2
                right_eye = eye_1
+                left_eye = eye_2
+            else:
+                right_eye = eye_2
+                left_eye = eye_1

            # -----------------------
            # find center of eyes
-            left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
+            left_eye = (
+                int(left_eye[0] + (left_eye[2] / 2)),
+                int(left_eye[1] + (left_eye[3] / 2)),
+            )
            right_eye = (
                int(right_eye[0] + (right_eye[2] / 2)),
                int(right_eye[1] + (right_eye[3] / 2)),
--- a/deepface/detectors/RetinaFace.py
+++ b/deepface/detectors/RetinaFace.py
@ -34,9 +34,9 @@ class RetinaFaceClient(Detector):
            x = detection[0]
            w = detection[2] - x

-            # notice that these must be inverse for retinaface
-            left_eye = identity["landmarks"]["right_eye"]
-            right_eye = identity["landmarks"]["left_eye"]
+            # retinaface sets left and right eyes with respect to the person
+            left_eye = identity["landmarks"]["left_eye"]
+            right_eye = identity["landmarks"]["right_eye"]

            # eyes are list of float, need to cast them tuple of int
            left_eye = tuple(int(i) for i in left_eye)
--- a/deepface/detectors/Yolo.py
+++ b/deepface/detectors/Yolo.py
@ -81,10 +81,10 @@ class YoloClient(Detector):
            x, y, w, h = result.boxes.xywh.tolist()[0]
            confidence = result.boxes.conf.tolist()[0]

-            # left_eye_conf = result.keypoints.conf[0][0]
-            # right_eye_conf = result.keypoints.conf[0][1]
-            left_eye = result.keypoints.xy[0][0].tolist()
-            right_eye = result.keypoints.xy[0][1].tolist()
+            # right_eye_conf = result.keypoints.conf[0][0]
+            # left_eye_conf = result.keypoints.conf[0][1]
+            right_eye = result.keypoints.xy[0][0].tolist()
+            left_eye = result.keypoints.xy[0][1].tolist()

            # eyes are list of float, need to cast them tuple of int
            left_eye = tuple(int(i) for i in left_eye)
--- a/deepface/detectors/YuNet.py
+++ b/deepface/detectors/YuNet.py
@ -99,7 +99,7 @@ class YuNetClient(Detector):
            {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
            left eye, nose tip, the right corner and left corner of the mouth respectively.
            """
-            (x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
+            (x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))

            # YuNet returns negative coordinates if it thinks part of the detected face
            # is outside the frame.