Merge pull request #1140 from serengil/feat-task-2403-eye-coordinates-in-biology

Feat task 2403 eye coordinates in biology
This commit is contained in:
Sefik Ilkin Serengil 2024-03-24 16:59:48 +00:00 committed by GitHub
commit 329606ffe8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 86 additions and 39 deletions

View File

@ -476,7 +476,9 @@ def extract_faces(
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing: - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values - keys 'x', 'y', 'w', 'h' with int values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
are eyes on the left and right respectively with respect to the person itself
instead of observer.
- "confidence" (float): The confidence score associated with the detected face. - "confidence" (float): The confidence score associated with the detected face.
""" """

View File

@ -1 +1 @@
__version__ = "0.0.89" __version__ = "0.0.90"

View File

@ -76,7 +76,9 @@ def detect_faces(
- img (np.ndarray): The detected face as a NumPy array. - img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
left_eye and right eye. left eye and right eye are eyes on the left and right
with respect to the person instead of observer.
- confidence (float): The confidence score associated with the detected face. - confidence (float): The confidence score associated with the detected face.
""" """
@ -123,13 +125,11 @@ def detect_faces(
img=img, left_eye=left_eye, right_eye=right_eye img=img, left_eye=left_eye, right_eye=right_eye
) )
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area( rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
facial_area=(x, y, x + w, y + h), facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
angle=angle,
size=(img.shape[0], img.shape[1])
) )
detected_face = aligned_img[ detected_face = aligned_img[
int(rotated_y1) : int(rotated_y2), int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
int(rotated_x1) : int(rotated_x2)] ]
result = DetectedFace( result = DetectedFace(
img=detected_face, img=detected_face,
@ -143,9 +143,7 @@ def detect_faces(
def rotate_facial_area( def rotate_facial_area(
facial_area: Tuple[int, int, int, int], facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
angle: float,
size: Tuple[int, int]
) -> Tuple[int, int, int, int]: ) -> Tuple[int, int, int, int]:
""" """
Rotate the facial area around its center. Rotate the facial area around its center.

View File

@ -88,11 +88,11 @@ class DlibClient(Detector):
shape = self.model["sp"](img, detection) shape = self.model["sp"](img, detection)
left_eye = ( right_eye = (
int((shape.part(2).x + shape.part(3).x) // 2), int((shape.part(2).x + shape.part(3).x) // 2),
int((shape.part(2).y + shape.part(3).y) // 2), int((shape.part(2).y + shape.part(3).y) // 2),
) )
right_eye = ( left_eye = (
int((shape.part(0).x + shape.part(1).x) // 2), int((shape.part(0).x + shape.part(1).x) // 2),
int((shape.part(0).y + shape.part(1).y) // 2), int((shape.part(0).y + shape.part(1).y) // 2),
) )

View File

@ -34,8 +34,8 @@ class FastMtCnnClient(Detector):
): ):
for regions, confidence, eyes in zip(*detections): for regions, confidence, eyes in zip(*detections):
x, y, w, h = xyxy_to_xywh(regions) x, y, w, h = xyxy_to_xywh(regions)
left_eye = eyes[0] right_eye = eyes[0]
right_eye = eyes[1] left_eye = eyes[1]
left_eye = tuple(int(i) for i in left_eye) left_eye = tuple(int(i) for i in left_eye)
right_eye = tuple(int(i) for i in right_eye) right_eye = tuple(int(i) for i in right_eye)

View File

@ -61,8 +61,8 @@ class MediaPipeClient(Detector):
y = int(bounding_box.ymin * img_height) y = int(bounding_box.ymin * img_height)
h = int(bounding_box.height * img_height) h = int(bounding_box.height * img_height)
left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height)) right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height)) left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
# nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height)) # nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
# mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height)) # mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
# right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height)) # right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))

View File

@ -35,8 +35,10 @@ class MtCnnClient(Detector):
for current_detection in detections: for current_detection in detections:
x, y, w, h = current_detection["box"] x, y, w, h = current_detection["box"]
confidence = current_detection["confidence"] confidence = current_detection["confidence"]
left_eye = current_detection["keypoints"]["left_eye"] # mtcnn detector assigns left eye with respect to the observer
right_eye = current_detection["keypoints"]["right_eye"] # but we are setting it with respect to the person itself
left_eye = current_detection["keypoints"]["right_eye"]
right_eye = current_detection["keypoints"]["left_eye"]
facial_area = FacialAreaRegion( facial_area = FacialAreaRegion(
x=x, x=x,

View File

@ -112,15 +112,18 @@ class OpenCvClient(Detector):
eye_2 = eyes[1] eye_2 = eyes[1]
if eye_1[0] < eye_2[0]: if eye_1[0] < eye_2[0]:
left_eye = eye_1
right_eye = eye_2
else:
left_eye = eye_2
right_eye = eye_1 right_eye = eye_1
left_eye = eye_2
else:
right_eye = eye_2
left_eye = eye_1
# ----------------------- # -----------------------
# find center of eyes # find center of eyes
left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2))) left_eye = (
int(left_eye[0] + (left_eye[2] / 2)),
int(left_eye[1] + (left_eye[3] / 2)),
)
right_eye = ( right_eye = (
int(right_eye[0] + (right_eye[2] / 2)), int(right_eye[0] + (right_eye[2] / 2)),
int(right_eye[1] + (right_eye[3] / 2)), int(right_eye[1] + (right_eye[3] / 2)),

View File

@ -34,9 +34,9 @@ class RetinaFaceClient(Detector):
x = detection[0] x = detection[0]
w = detection[2] - x w = detection[2] - x
# notice that these must be inverse for retinaface # retinaface sets left and right eyes with respect to the person
left_eye = identity["landmarks"]["right_eye"] left_eye = identity["landmarks"]["left_eye"]
right_eye = identity["landmarks"]["left_eye"] right_eye = identity["landmarks"]["right_eye"]
# eyes are list of float, need to cast them tuple of int # eyes are list of float, need to cast them tuple of int
left_eye = tuple(int(i) for i in left_eye) left_eye = tuple(int(i) for i in left_eye)

View File

@ -81,10 +81,10 @@ class YoloClient(Detector):
x, y, w, h = result.boxes.xywh.tolist()[0] x, y, w, h = result.boxes.xywh.tolist()[0]
confidence = result.boxes.conf.tolist()[0] confidence = result.boxes.conf.tolist()[0]
# left_eye_conf = result.keypoints.conf[0][0] # right_eye_conf = result.keypoints.conf[0][0]
# right_eye_conf = result.keypoints.conf[0][1] # left_eye_conf = result.keypoints.conf[0][1]
left_eye = result.keypoints.xy[0][0].tolist() right_eye = result.keypoints.xy[0][0].tolist()
right_eye = result.keypoints.xy[0][1].tolist() left_eye = result.keypoints.xy[0][1].tolist()
# eyes are list of float, need to cast them tuple of int # eyes are list of float, need to cast them tuple of int
left_eye = tuple(int(i) for i in left_eye) left_eye = tuple(int(i) for i in left_eye)

View File

@ -99,7 +99,7 @@ class YuNetClient(Detector):
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye, {x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
left eye, nose tip, the right corner and left corner of the mouth respectively. left eye, nose tip, the right corner and left corner of the mouth respectively.
""" """
(x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8])) (x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))
# YuNet returns negative coordinates if it thinks part of the detected face # YuNet returns negative coordinates if it thinks part of the detected face
# is outside the frame. # is outside the frame.

View File

@ -20,7 +20,9 @@ class Detector(ABC):
where each object contains: where each object contains:
- facial_area (FacialAreaRegion): The facial area region represented - facial_area (FacialAreaRegion): The facial area region represented
as x, y, w, h, left_eye and right_eye as x, y, w, h, left_eye and right_eye. left eye and right eye are
eyes on the left and right respectively with respect to the person
instead of observer.
""" """
pass pass
@ -44,6 +46,21 @@ class FacialAreaRegion:
right_eye: Optional[Tuple[int, int]] = None, right_eye: Optional[Tuple[int, int]] = None,
confidence: Optional[float] = None, confidence: Optional[float] = None,
): ):
"""
Initialize a Face object.
Args:
x (int): The x-coordinate of the top-left corner of the bounding box.
y (int): The y-coordinate of the top-left corner of the bounding box.
w (int): The width of the bounding box.
h (int): The height of the bounding box.
left_eye (tuple): The coordinates (x, y) of the left eye with respect to
the person instead of observer. Default is None.
right_eye (tuple): The coordinates (x, y) of the right eye with respect to
the person instead of observer. Default is None.
confidence (float, optional): Confidence score associated with the face detection.
Default is None.
"""
self.x = x self.x = x
self.y = y self.y = y
self.w = w self.w = w
@ -59,6 +76,14 @@ class DetectedFace:
confidence: float confidence: float
def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float): def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
"""
Initialize detected face object.
Args:
img (np.ndarray): detected face image as numpy array
facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
confidence (float): confidence score for face detection
"""
self.img = img self.img = img
self.facial_area = facial_area self.facial_area = facial_area
self.confidence = confidence self.confidence = confidence

View File

@ -68,7 +68,9 @@ def extract_faces(
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing: - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values - keys 'x', 'y', 'w', 'h' with int values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values - keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
left eye and right eye are eyes on the left and right respectively with respect
to the person itself instead of observer.
- "confidence" (float): The confidence score associated with the detected face. - "confidence" (float): The confidence score associated with the detected face.
""" """
@ -201,8 +203,8 @@ def align_face(
Align a given image horizantally with respect to their left and right eye locations Align a given image horizantally with respect to their left and right eye locations
Args: Args:
img (np.ndarray): pre-loaded image with detected face img (np.ndarray): pre-loaded image with detected face
left_eye (list or tuple): coordinates of left eye with respect to the you left_eye (list or tuple): coordinates of left eye with respect to the person itself
right_eye(list or tuple): coordinates of right eye with respect to the you right_eye(list or tuple): coordinates of right eye with respect to the person itself
Returns: Returns:
img (np.ndarray): aligned facial image img (np.ndarray): aligned facial image
""" """
@ -214,6 +216,6 @@ def align_face(
if img.shape[0] == 0 or img.shape[1] == 0: if img.shape[0] == 0 or img.shape[1] == 0:
return img, 0 return img, 0
angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]))) angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
img = np.array(Image.fromarray(img).rotate(angle)) img = np.array(Image.fromarray(img).rotate(angle))
return img, angle return img, angle

View File

@ -1,3 +1,3 @@
{ {
"version": "0.0.89" "version": "0.0.90"
} }

View File

@ -19,6 +19,12 @@ def test_different_detectors():
assert "y" in img_obj["facial_area"].keys() assert "y" in img_obj["facial_area"].keys()
assert "w" in img_obj["facial_area"].keys() assert "w" in img_obj["facial_area"].keys()
assert "h" in img_obj["facial_area"].keys() assert "h" in img_obj["facial_area"].keys()
# is left eye set with respect to the person instead of observer
assert "left_eye" in img_obj["facial_area"].keys()
assert "right_eye" in img_obj["facial_area"].keys()
right_eye = img_obj["facial_area"]["right_eye"]
left_eye = img_obj["facial_area"]["left_eye"]
assert left_eye[0] > right_eye[0]
assert "confidence" in img_obj.keys() assert "confidence" in img_obj.keys()
img = img_obj["face"] img = img_obj["face"]

View File

@ -57,7 +57,7 @@ for df in dfs:
logger.info(df) logger.info(df)
expand_areas = [0, 25] expand_areas = [0]
img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"] img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
for expand_area in expand_areas: for expand_area in expand_areas:
for img_path in img_paths: for img_path in img_paths:
@ -86,6 +86,15 @@ for expand_area in expand_areas:
assert isinstance(face_obj["facial_area"]["right_eye"][0], int) assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
assert isinstance(face_obj["facial_area"]["right_eye"][1], int) assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
# left eye is really the left eye of the person
if (
face_obj["facial_area"]["left_eye"] is not None
and face_obj["facial_area"]["right_eye"] is not None
):
re_x = face_obj["facial_area"]["right_eye"][0]
le_x = face_obj["facial_area"]["left_eye"][0]
assert re_x < le_x, "right eye must be the right eye of the person"
assert isinstance(face_obj["confidence"], float) assert isinstance(face_obj["confidence"], float)
assert face_obj["confidence"] <= 1 assert face_obj["confidence"] <= 1