diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index c8920d8..a8ebab5 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -476,7 +476,9 @@ def extract_faces( - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing: - keys 'x', 'y', 'w', 'h' with int values - - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values + - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes + are eyes on the left and right respectively with respect to the person itself + instead of observer. - "confidence" (float): The confidence score associated with the detected face. """ diff --git a/deepface/__init__.py b/deepface/__init__.py index a20d210..69d27b4 100644 --- a/deepface/__init__.py +++ b/deepface/__init__.py @@ -1 +1 @@ -__version__ = "0.0.89" +__version__ = "0.0.90" diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py index 9baaffd..5f594e1 100644 --- a/deepface/detectors/DetectorWrapper.py +++ b/deepface/detectors/DetectorWrapper.py @@ -76,7 +76,9 @@ def detect_faces( - img (np.ndarray): The detected face as a NumPy array. - - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h + - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h, + left_eye and right eye. left eye and right eye are eyes on the left and right + with respect to the person instead of observer. - confidence (float): The confidence score associated with the detected face. """ @@ -123,13 +125,11 @@ def detect_faces( img=img, left_eye=left_eye, right_eye=right_eye ) rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area( - facial_area=(x, y, x + w, y + h), - angle=angle, - size=(img.shape[0], img.shape[1]) + facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1]) ) detected_face = aligned_img[ - int(rotated_y1) : int(rotated_y2), - int(rotated_x1) : int(rotated_x2)] + int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2) + ] result = DetectedFace( img=detected_face, @@ -143,9 +143,7 @@ def detect_faces( def rotate_facial_area( - facial_area: Tuple[int, int, int, int], - angle: float, - size: Tuple[int, int] + facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int] ) -> Tuple[int, int, int, int]: """ Rotate the facial area around its center. diff --git a/deepface/detectors/Dlib.py b/deepface/detectors/Dlib.py index be1922e..08a1346 100644 --- a/deepface/detectors/Dlib.py +++ b/deepface/detectors/Dlib.py @@ -88,11 +88,11 @@ class DlibClient(Detector): shape = self.model["sp"](img, detection) - left_eye = ( + right_eye = ( int((shape.part(2).x + shape.part(3).x) // 2), int((shape.part(2).y + shape.part(3).y) // 2), ) - right_eye = ( + left_eye = ( int((shape.part(0).x + shape.part(1).x) // 2), int((shape.part(0).y + shape.part(1).y) // 2), ) diff --git a/deepface/detectors/FastMtCnn.py b/deepface/detectors/FastMtCnn.py index 0b97058..acc71f1 100644 --- a/deepface/detectors/FastMtCnn.py +++ b/deepface/detectors/FastMtCnn.py @@ -34,8 +34,8 @@ class FastMtCnnClient(Detector): ): for regions, confidence, eyes in zip(*detections): x, y, w, h = xyxy_to_xywh(regions) - left_eye = eyes[0] - right_eye = eyes[1] + right_eye = eyes[0] + left_eye = eyes[1] left_eye = tuple(int(i) for i in left_eye) right_eye = tuple(int(i) for i in right_eye) diff --git a/deepface/detectors/MediaPipe.py b/deepface/detectors/MediaPipe.py index fd79107..c566483 100644 --- a/deepface/detectors/MediaPipe.py +++ b/deepface/detectors/MediaPipe.py @@ -61,8 +61,8 @@ class MediaPipeClient(Detector): y = int(bounding_box.ymin * img_height) h = int(bounding_box.height * img_height) - left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height)) - right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height)) + right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height)) + left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height)) # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height)) # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height)) # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height)) diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py index 59ed1aa..527c9e5 100644 --- a/deepface/detectors/MtCnn.py +++ b/deepface/detectors/MtCnn.py @@ -35,8 +35,10 @@ class MtCnnClient(Detector): for current_detection in detections: x, y, w, h = current_detection["box"] confidence = current_detection["confidence"] - left_eye = current_detection["keypoints"]["left_eye"] - right_eye = current_detection["keypoints"]["right_eye"] + # mtcnn detector assigns left eye with respect to the observer + # but we are setting it with respect to the person itself + left_eye = current_detection["keypoints"]["right_eye"] + right_eye = current_detection["keypoints"]["left_eye"] facial_area = FacialAreaRegion( x=x, diff --git a/deepface/detectors/OpenCv.py b/deepface/detectors/OpenCv.py index 93d2492..9b59d7d 100644 --- a/deepface/detectors/OpenCv.py +++ b/deepface/detectors/OpenCv.py @@ -112,15 +112,18 @@ class OpenCvClient(Detector): eye_2 = eyes[1] if eye_1[0] < eye_2[0]: - left_eye = eye_1 - right_eye = eye_2 - else: - left_eye = eye_2 right_eye = eye_1 + left_eye = eye_2 + else: + right_eye = eye_2 + left_eye = eye_1 # ----------------------- # find center of eyes - left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2))) + left_eye = ( + int(left_eye[0] + (left_eye[2] / 2)), + int(left_eye[1] + (left_eye[3] / 2)), + ) right_eye = ( int(right_eye[0] + (right_eye[2] / 2)), int(right_eye[1] + (right_eye[3] / 2)), diff --git a/deepface/detectors/RetinaFace.py b/deepface/detectors/RetinaFace.py index fde7765..a21a793 100644 --- a/deepface/detectors/RetinaFace.py +++ b/deepface/detectors/RetinaFace.py @@ -34,9 +34,9 @@ class RetinaFaceClient(Detector): x = detection[0] w = detection[2] - x - # notice that these must be inverse for retinaface - left_eye = identity["landmarks"]["right_eye"] - right_eye = identity["landmarks"]["left_eye"] + # retinaface sets left and right eyes with respect to the person + left_eye = identity["landmarks"]["left_eye"] + right_eye = identity["landmarks"]["right_eye"] # eyes are list of float, need to cast them tuple of int left_eye = tuple(int(i) for i in left_eye) diff --git a/deepface/detectors/Yolo.py b/deepface/detectors/Yolo.py index e50e450..87496ea 100644 --- a/deepface/detectors/Yolo.py +++ b/deepface/detectors/Yolo.py @@ -81,10 +81,10 @@ class YoloClient(Detector): x, y, w, h = result.boxes.xywh.tolist()[0] confidence = result.boxes.conf.tolist()[0] - # left_eye_conf = result.keypoints.conf[0][0] - # right_eye_conf = result.keypoints.conf[0][1] - left_eye = result.keypoints.xy[0][0].tolist() - right_eye = result.keypoints.xy[0][1].tolist() + # right_eye_conf = result.keypoints.conf[0][0] + # left_eye_conf = result.keypoints.conf[0][1] + right_eye = result.keypoints.xy[0][0].tolist() + left_eye = result.keypoints.xy[0][1].tolist() # eyes are list of float, need to cast them tuple of int left_eye = tuple(int(i) for i in left_eye) diff --git a/deepface/detectors/YuNet.py b/deepface/detectors/YuNet.py index ec773ad..3f0deba 100644 --- a/deepface/detectors/YuNet.py +++ b/deepface/detectors/YuNet.py @@ -99,7 +99,7 @@ class YuNetClient(Detector): {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye, left eye, nose tip, the right corner and left corner of the mouth respectively. """ - (x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8])) + (x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8])) # YuNet returns negative coordinates if it thinks part of the detected face # is outside the frame. diff --git a/deepface/models/Detector.py b/deepface/models/Detector.py index da724f5..0a1e48d 100644 --- a/deepface/models/Detector.py +++ b/deepface/models/Detector.py @@ -20,7 +20,9 @@ class Detector(ABC): where each object contains: - facial_area (FacialAreaRegion): The facial area region represented - as x, y, w, h, left_eye and right_eye + as x, y, w, h, left_eye and right_eye. left eye and right eye are + eyes on the left and right respectively with respect to the person + instead of observer. """ pass @@ -44,6 +46,21 @@ class FacialAreaRegion: right_eye: Optional[Tuple[int, int]] = None, confidence: Optional[float] = None, ): + """ + Initialize a Face object. + + Args: + x (int): The x-coordinate of the top-left corner of the bounding box. + y (int): The y-coordinate of the top-left corner of the bounding box. + w (int): The width of the bounding box. + h (int): The height of the bounding box. + left_eye (tuple): The coordinates (x, y) of the left eye with respect to + the person instead of observer. Default is None. + right_eye (tuple): The coordinates (x, y) of the right eye with respect to + the person instead of observer. Default is None. + confidence (float, optional): Confidence score associated with the face detection. + Default is None. + """ self.x = x self.y = y self.w = w @@ -59,6 +76,14 @@ class DetectedFace: confidence: float def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float): + """ + Initialize detected face object. + + Args: + img (np.ndarray): detected face image as numpy array + facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box) + confidence (float): confidence score for face detection + """ self.img = img self.facial_area = facial_area self.confidence = confidence diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index e3042e4..18bad8f 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -68,7 +68,9 @@ def extract_faces( - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing: - keys 'x', 'y', 'w', 'h' with int values - - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values + - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. + left eye and right eye are eyes on the left and right respectively with respect + to the person itself instead of observer. - "confidence" (float): The confidence score associated with the detected face. """ @@ -201,8 +203,8 @@ def align_face( Align a given image horizantally with respect to their left and right eye locations Args: img (np.ndarray): pre-loaded image with detected face - left_eye (list or tuple): coordinates of left eye with respect to the you - right_eye(list or tuple): coordinates of right eye with respect to the you + left_eye (list or tuple): coordinates of left eye with respect to the person itself + right_eye(list or tuple): coordinates of right eye with respect to the person itself Returns: img (np.ndarray): aligned facial image """ @@ -214,6 +216,6 @@ def align_face( if img.shape[0] == 0 or img.shape[1] == 0: return img, 0 - angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]))) + angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0]))) img = np.array(Image.fromarray(img).rotate(angle)) return img, angle diff --git a/package_info.json b/package_info.json index 9eac2a2..36c1863 100644 --- a/package_info.json +++ b/package_info.json @@ -1,3 +1,3 @@ { - "version": "0.0.89" + "version": "0.0.90" } \ No newline at end of file diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py index 41bd2a0..b8119fd 100644 --- a/tests/test_extract_faces.py +++ b/tests/test_extract_faces.py @@ -19,6 +19,12 @@ def test_different_detectors(): assert "y" in img_obj["facial_area"].keys() assert "w" in img_obj["facial_area"].keys() assert "h" in img_obj["facial_area"].keys() + # is left eye set with respect to the person instead of observer + assert "left_eye" in img_obj["facial_area"].keys() + assert "right_eye" in img_obj["facial_area"].keys() + right_eye = img_obj["facial_area"]["right_eye"] + left_eye = img_obj["facial_area"]["left_eye"] + assert left_eye[0] > right_eye[0] assert "confidence" in img_obj.keys() img = img_obj["face"] diff --git a/tests/visual-test.py b/tests/visual-test.py index 99d0a3f..a1c348a 100644 --- a/tests/visual-test.py +++ b/tests/visual-test.py @@ -57,7 +57,7 @@ for df in dfs: logger.info(df) -expand_areas = [0, 25] +expand_areas = [0] img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"] for expand_area in expand_areas: for img_path in img_paths: @@ -86,6 +86,15 @@ for expand_area in expand_areas: assert isinstance(face_obj["facial_area"]["right_eye"][0], int) assert isinstance(face_obj["facial_area"]["right_eye"][1], int) + # left eye is really the left eye of the person + if ( + face_obj["facial_area"]["left_eye"] is not None + and face_obj["facial_area"]["right_eye"] is not None + ): + re_x = face_obj["facial_area"]["right_eye"][0] + le_x = face_obj["facial_area"]["left_eye"][0] + assert re_x < le_x, "right eye must be the right eye of the person" + assert isinstance(face_obj["confidence"], float) assert face_obj["confidence"] <= 1