mirror of
https://github.com/serengil/deepface.git
synced 2025-06-08 20:45:22 +00:00
Merge pull request #1140 from serengil/feat-task-2403-eye-coordinates-in-biology
Feat task 2403 eye coordinates in biology
This commit is contained in:
commit
329606ffe8
@ -476,7 +476,9 @@ def extract_faces(
|
|||||||
|
|
||||||
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
||||||
- keys 'x', 'y', 'w', 'h' with int values
|
- keys 'x', 'y', 'w', 'h' with int values
|
||||||
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
|
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
|
||||||
|
are eyes on the left and right respectively with respect to the person itself
|
||||||
|
instead of observer.
|
||||||
|
|
||||||
- "confidence" (float): The confidence score associated with the detected face.
|
- "confidence" (float): The confidence score associated with the detected face.
|
||||||
"""
|
"""
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.0.89"
|
__version__ = "0.0.90"
|
||||||
|
@ -76,7 +76,9 @@ def detect_faces(
|
|||||||
|
|
||||||
- img (np.ndarray): The detected face as a NumPy array.
|
- img (np.ndarray): The detected face as a NumPy array.
|
||||||
|
|
||||||
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
|
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
|
||||||
|
left_eye and right eye. left eye and right eye are eyes on the left and right
|
||||||
|
with respect to the person instead of observer.
|
||||||
|
|
||||||
- confidence (float): The confidence score associated with the detected face.
|
- confidence (float): The confidence score associated with the detected face.
|
||||||
"""
|
"""
|
||||||
@ -123,13 +125,11 @@ def detect_faces(
|
|||||||
img=img, left_eye=left_eye, right_eye=right_eye
|
img=img, left_eye=left_eye, right_eye=right_eye
|
||||||
)
|
)
|
||||||
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
|
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
|
||||||
facial_area=(x, y, x + w, y + h),
|
facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
|
||||||
angle=angle,
|
|
||||||
size=(img.shape[0], img.shape[1])
|
|
||||||
)
|
)
|
||||||
detected_face = aligned_img[
|
detected_face = aligned_img[
|
||||||
int(rotated_y1) : int(rotated_y2),
|
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
|
||||||
int(rotated_x1) : int(rotated_x2)]
|
]
|
||||||
|
|
||||||
result = DetectedFace(
|
result = DetectedFace(
|
||||||
img=detected_face,
|
img=detected_face,
|
||||||
@ -143,9 +143,7 @@ def detect_faces(
|
|||||||
|
|
||||||
|
|
||||||
def rotate_facial_area(
|
def rotate_facial_area(
|
||||||
facial_area: Tuple[int, int, int, int],
|
facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
|
||||||
angle: float,
|
|
||||||
size: Tuple[int, int]
|
|
||||||
) -> Tuple[int, int, int, int]:
|
) -> Tuple[int, int, int, int]:
|
||||||
"""
|
"""
|
||||||
Rotate the facial area around its center.
|
Rotate the facial area around its center.
|
||||||
|
@ -88,11 +88,11 @@ class DlibClient(Detector):
|
|||||||
|
|
||||||
shape = self.model["sp"](img, detection)
|
shape = self.model["sp"](img, detection)
|
||||||
|
|
||||||
left_eye = (
|
right_eye = (
|
||||||
int((shape.part(2).x + shape.part(3).x) // 2),
|
int((shape.part(2).x + shape.part(3).x) // 2),
|
||||||
int((shape.part(2).y + shape.part(3).y) // 2),
|
int((shape.part(2).y + shape.part(3).y) // 2),
|
||||||
)
|
)
|
||||||
right_eye = (
|
left_eye = (
|
||||||
int((shape.part(0).x + shape.part(1).x) // 2),
|
int((shape.part(0).x + shape.part(1).x) // 2),
|
||||||
int((shape.part(0).y + shape.part(1).y) // 2),
|
int((shape.part(0).y + shape.part(1).y) // 2),
|
||||||
)
|
)
|
||||||
|
@ -34,8 +34,8 @@ class FastMtCnnClient(Detector):
|
|||||||
):
|
):
|
||||||
for regions, confidence, eyes in zip(*detections):
|
for regions, confidence, eyes in zip(*detections):
|
||||||
x, y, w, h = xyxy_to_xywh(regions)
|
x, y, w, h = xyxy_to_xywh(regions)
|
||||||
left_eye = eyes[0]
|
right_eye = eyes[0]
|
||||||
right_eye = eyes[1]
|
left_eye = eyes[1]
|
||||||
|
|
||||||
left_eye = tuple(int(i) for i in left_eye)
|
left_eye = tuple(int(i) for i in left_eye)
|
||||||
right_eye = tuple(int(i) for i in right_eye)
|
right_eye = tuple(int(i) for i in right_eye)
|
||||||
|
@ -61,8 +61,8 @@ class MediaPipeClient(Detector):
|
|||||||
y = int(bounding_box.ymin * img_height)
|
y = int(bounding_box.ymin * img_height)
|
||||||
h = int(bounding_box.height * img_height)
|
h = int(bounding_box.height * img_height)
|
||||||
|
|
||||||
left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
|
right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
|
||||||
right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
|
left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
|
||||||
# nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
|
# nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
|
||||||
# mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
|
# mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
|
||||||
# right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
|
# right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
|
||||||
|
@ -35,8 +35,10 @@ class MtCnnClient(Detector):
|
|||||||
for current_detection in detections:
|
for current_detection in detections:
|
||||||
x, y, w, h = current_detection["box"]
|
x, y, w, h = current_detection["box"]
|
||||||
confidence = current_detection["confidence"]
|
confidence = current_detection["confidence"]
|
||||||
left_eye = current_detection["keypoints"]["left_eye"]
|
# mtcnn detector assigns left eye with respect to the observer
|
||||||
right_eye = current_detection["keypoints"]["right_eye"]
|
# but we are setting it with respect to the person itself
|
||||||
|
left_eye = current_detection["keypoints"]["right_eye"]
|
||||||
|
right_eye = current_detection["keypoints"]["left_eye"]
|
||||||
|
|
||||||
facial_area = FacialAreaRegion(
|
facial_area = FacialAreaRegion(
|
||||||
x=x,
|
x=x,
|
||||||
|
@ -112,15 +112,18 @@ class OpenCvClient(Detector):
|
|||||||
eye_2 = eyes[1]
|
eye_2 = eyes[1]
|
||||||
|
|
||||||
if eye_1[0] < eye_2[0]:
|
if eye_1[0] < eye_2[0]:
|
||||||
left_eye = eye_1
|
|
||||||
right_eye = eye_2
|
|
||||||
else:
|
|
||||||
left_eye = eye_2
|
|
||||||
right_eye = eye_1
|
right_eye = eye_1
|
||||||
|
left_eye = eye_2
|
||||||
|
else:
|
||||||
|
right_eye = eye_2
|
||||||
|
left_eye = eye_1
|
||||||
|
|
||||||
# -----------------------
|
# -----------------------
|
||||||
# find center of eyes
|
# find center of eyes
|
||||||
left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
|
left_eye = (
|
||||||
|
int(left_eye[0] + (left_eye[2] / 2)),
|
||||||
|
int(left_eye[1] + (left_eye[3] / 2)),
|
||||||
|
)
|
||||||
right_eye = (
|
right_eye = (
|
||||||
int(right_eye[0] + (right_eye[2] / 2)),
|
int(right_eye[0] + (right_eye[2] / 2)),
|
||||||
int(right_eye[1] + (right_eye[3] / 2)),
|
int(right_eye[1] + (right_eye[3] / 2)),
|
||||||
|
@ -34,9 +34,9 @@ class RetinaFaceClient(Detector):
|
|||||||
x = detection[0]
|
x = detection[0]
|
||||||
w = detection[2] - x
|
w = detection[2] - x
|
||||||
|
|
||||||
# notice that these must be inverse for retinaface
|
# retinaface sets left and right eyes with respect to the person
|
||||||
left_eye = identity["landmarks"]["right_eye"]
|
left_eye = identity["landmarks"]["left_eye"]
|
||||||
right_eye = identity["landmarks"]["left_eye"]
|
right_eye = identity["landmarks"]["right_eye"]
|
||||||
|
|
||||||
# eyes are list of float, need to cast them tuple of int
|
# eyes are list of float, need to cast them tuple of int
|
||||||
left_eye = tuple(int(i) for i in left_eye)
|
left_eye = tuple(int(i) for i in left_eye)
|
||||||
|
@ -81,10 +81,10 @@ class YoloClient(Detector):
|
|||||||
x, y, w, h = result.boxes.xywh.tolist()[0]
|
x, y, w, h = result.boxes.xywh.tolist()[0]
|
||||||
confidence = result.boxes.conf.tolist()[0]
|
confidence = result.boxes.conf.tolist()[0]
|
||||||
|
|
||||||
# left_eye_conf = result.keypoints.conf[0][0]
|
# right_eye_conf = result.keypoints.conf[0][0]
|
||||||
# right_eye_conf = result.keypoints.conf[0][1]
|
# left_eye_conf = result.keypoints.conf[0][1]
|
||||||
left_eye = result.keypoints.xy[0][0].tolist()
|
right_eye = result.keypoints.xy[0][0].tolist()
|
||||||
right_eye = result.keypoints.xy[0][1].tolist()
|
left_eye = result.keypoints.xy[0][1].tolist()
|
||||||
|
|
||||||
# eyes are list of float, need to cast them tuple of int
|
# eyes are list of float, need to cast them tuple of int
|
||||||
left_eye = tuple(int(i) for i in left_eye)
|
left_eye = tuple(int(i) for i in left_eye)
|
||||||
|
@ -99,7 +99,7 @@ class YuNetClient(Detector):
|
|||||||
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
|
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
|
||||||
left eye, nose tip, the right corner and left corner of the mouth respectively.
|
left eye, nose tip, the right corner and left corner of the mouth respectively.
|
||||||
"""
|
"""
|
||||||
(x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
|
(x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))
|
||||||
|
|
||||||
# YuNet returns negative coordinates if it thinks part of the detected face
|
# YuNet returns negative coordinates if it thinks part of the detected face
|
||||||
# is outside the frame.
|
# is outside the frame.
|
||||||
|
@ -20,7 +20,9 @@ class Detector(ABC):
|
|||||||
where each object contains:
|
where each object contains:
|
||||||
|
|
||||||
- facial_area (FacialAreaRegion): The facial area region represented
|
- facial_area (FacialAreaRegion): The facial area region represented
|
||||||
as x, y, w, h, left_eye and right_eye
|
as x, y, w, h, left_eye and right_eye. left eye and right eye are
|
||||||
|
eyes on the left and right respectively with respect to the person
|
||||||
|
instead of observer.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -44,6 +46,21 @@ class FacialAreaRegion:
|
|||||||
right_eye: Optional[Tuple[int, int]] = None,
|
right_eye: Optional[Tuple[int, int]] = None,
|
||||||
confidence: Optional[float] = None,
|
confidence: Optional[float] = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Initialize a Face object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x (int): The x-coordinate of the top-left corner of the bounding box.
|
||||||
|
y (int): The y-coordinate of the top-left corner of the bounding box.
|
||||||
|
w (int): The width of the bounding box.
|
||||||
|
h (int): The height of the bounding box.
|
||||||
|
left_eye (tuple): The coordinates (x, y) of the left eye with respect to
|
||||||
|
the person instead of observer. Default is None.
|
||||||
|
right_eye (tuple): The coordinates (x, y) of the right eye with respect to
|
||||||
|
the person instead of observer. Default is None.
|
||||||
|
confidence (float, optional): Confidence score associated with the face detection.
|
||||||
|
Default is None.
|
||||||
|
"""
|
||||||
self.x = x
|
self.x = x
|
||||||
self.y = y
|
self.y = y
|
||||||
self.w = w
|
self.w = w
|
||||||
@ -59,6 +76,14 @@ class DetectedFace:
|
|||||||
confidence: float
|
confidence: float
|
||||||
|
|
||||||
def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
|
def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
|
||||||
|
"""
|
||||||
|
Initialize detected face object.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
img (np.ndarray): detected face image as numpy array
|
||||||
|
facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
|
||||||
|
confidence (float): confidence score for face detection
|
||||||
|
"""
|
||||||
self.img = img
|
self.img = img
|
||||||
self.facial_area = facial_area
|
self.facial_area = facial_area
|
||||||
self.confidence = confidence
|
self.confidence = confidence
|
||||||
|
@ -68,7 +68,9 @@ def extract_faces(
|
|||||||
|
|
||||||
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
||||||
- keys 'x', 'y', 'w', 'h' with int values
|
- keys 'x', 'y', 'w', 'h' with int values
|
||||||
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
|
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
|
||||||
|
left eye and right eye are eyes on the left and right respectively with respect
|
||||||
|
to the person itself instead of observer.
|
||||||
|
|
||||||
- "confidence" (float): The confidence score associated with the detected face.
|
- "confidence" (float): The confidence score associated with the detected face.
|
||||||
"""
|
"""
|
||||||
@ -201,8 +203,8 @@ def align_face(
|
|||||||
Align a given image horizantally with respect to their left and right eye locations
|
Align a given image horizantally with respect to their left and right eye locations
|
||||||
Args:
|
Args:
|
||||||
img (np.ndarray): pre-loaded image with detected face
|
img (np.ndarray): pre-loaded image with detected face
|
||||||
left_eye (list or tuple): coordinates of left eye with respect to the you
|
left_eye (list or tuple): coordinates of left eye with respect to the person itself
|
||||||
right_eye(list or tuple): coordinates of right eye with respect to the you
|
right_eye(list or tuple): coordinates of right eye with respect to the person itself
|
||||||
Returns:
|
Returns:
|
||||||
img (np.ndarray): aligned facial image
|
img (np.ndarray): aligned facial image
|
||||||
"""
|
"""
|
||||||
@ -214,6 +216,6 @@ def align_face(
|
|||||||
if img.shape[0] == 0 or img.shape[1] == 0:
|
if img.shape[0] == 0 or img.shape[1] == 0:
|
||||||
return img, 0
|
return img, 0
|
||||||
|
|
||||||
angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
|
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
|
||||||
img = np.array(Image.fromarray(img).rotate(angle))
|
img = np.array(Image.fromarray(img).rotate(angle))
|
||||||
return img, angle
|
return img, angle
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"version": "0.0.89"
|
"version": "0.0.90"
|
||||||
}
|
}
|
@ -19,6 +19,12 @@ def test_different_detectors():
|
|||||||
assert "y" in img_obj["facial_area"].keys()
|
assert "y" in img_obj["facial_area"].keys()
|
||||||
assert "w" in img_obj["facial_area"].keys()
|
assert "w" in img_obj["facial_area"].keys()
|
||||||
assert "h" in img_obj["facial_area"].keys()
|
assert "h" in img_obj["facial_area"].keys()
|
||||||
|
# is left eye set with respect to the person instead of observer
|
||||||
|
assert "left_eye" in img_obj["facial_area"].keys()
|
||||||
|
assert "right_eye" in img_obj["facial_area"].keys()
|
||||||
|
right_eye = img_obj["facial_area"]["right_eye"]
|
||||||
|
left_eye = img_obj["facial_area"]["left_eye"]
|
||||||
|
assert left_eye[0] > right_eye[0]
|
||||||
assert "confidence" in img_obj.keys()
|
assert "confidence" in img_obj.keys()
|
||||||
|
|
||||||
img = img_obj["face"]
|
img = img_obj["face"]
|
||||||
|
@ -57,7 +57,7 @@ for df in dfs:
|
|||||||
logger.info(df)
|
logger.info(df)
|
||||||
|
|
||||||
|
|
||||||
expand_areas = [0, 25]
|
expand_areas = [0]
|
||||||
img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
|
img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
|
||||||
for expand_area in expand_areas:
|
for expand_area in expand_areas:
|
||||||
for img_path in img_paths:
|
for img_path in img_paths:
|
||||||
@ -86,6 +86,15 @@ for expand_area in expand_areas:
|
|||||||
assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
|
assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
|
||||||
assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
|
assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
|
||||||
|
|
||||||
|
# left eye is really the left eye of the person
|
||||||
|
if (
|
||||||
|
face_obj["facial_area"]["left_eye"] is not None
|
||||||
|
and face_obj["facial_area"]["right_eye"] is not None
|
||||||
|
):
|
||||||
|
re_x = face_obj["facial_area"]["right_eye"][0]
|
||||||
|
le_x = face_obj["facial_area"]["left_eye"][0]
|
||||||
|
assert re_x < le_x, "right eye must be the right eye of the person"
|
||||||
|
|
||||||
assert isinstance(face_obj["confidence"], float)
|
assert isinstance(face_obj["confidence"], float)
|
||||||
assert face_obj["confidence"] <= 1
|
assert face_obj["confidence"] <= 1
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user