Merge pull request #1140 from serengil/feat-task-2403-eye-coordinates-in-biology

Feat task 2403 eye coordinates in biology
This commit is contained in:
Sefik Ilkin Serengil 2024-03-24 16:59:48 +00:00 committed by GitHub
commit 329606ffe8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 86 additions and 39 deletions

View File

@ -476,7 +476,9 @@ def extract_faces(
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
are eyes on the left and right respectively with respect to the person itself
instead of observer.
- "confidence" (float): The confidence score associated with the detected face.
"""

View File

@ -1 +1 @@
__version__ = "0.0.89"
__version__ = "0.0.90"

View File

@ -76,7 +76,9 @@ def detect_faces(
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
left_eye and right eye. left eye and right eye are eyes on the left and right
with respect to the person instead of observer.
- confidence (float): The confidence score associated with the detected face.
"""
@ -123,13 +125,11 @@ def detect_faces(
img=img, left_eye=left_eye, right_eye=right_eye
)
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = rotate_facial_area(
facial_area=(x, y, x + w, y + h),
angle=angle,
size=(img.shape[0], img.shape[1])
facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
)
detected_face = aligned_img[
int(rotated_y1) : int(rotated_y2),
int(rotated_x1) : int(rotated_x2)]
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
]
result = DetectedFace(
img=detected_face,
@ -143,9 +143,7 @@ def detect_faces(
def rotate_facial_area(
facial_area: Tuple[int, int, int, int],
angle: float,
size: Tuple[int, int]
facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
) -> Tuple[int, int, int, int]:
"""
Rotate the facial area around its center.

View File

@ -88,11 +88,11 @@ class DlibClient(Detector):
shape = self.model["sp"](img, detection)
left_eye = (
right_eye = (
int((shape.part(2).x + shape.part(3).x) // 2),
int((shape.part(2).y + shape.part(3).y) // 2),
)
right_eye = (
left_eye = (
int((shape.part(0).x + shape.part(1).x) // 2),
int((shape.part(0).y + shape.part(1).y) // 2),
)

View File

@ -34,8 +34,8 @@ class FastMtCnnClient(Detector):
):
for regions, confidence, eyes in zip(*detections):
x, y, w, h = xyxy_to_xywh(regions)
left_eye = eyes[0]
right_eye = eyes[1]
right_eye = eyes[0]
left_eye = eyes[1]
left_eye = tuple(int(i) for i in left_eye)
right_eye = tuple(int(i) for i in right_eye)

View File

@ -61,8 +61,8 @@ class MediaPipeClient(Detector):
y = int(bounding_box.ymin * img_height)
h = int(bounding_box.height * img_height)
left_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
right_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
# nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
# mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
# right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))

View File

@ -35,8 +35,10 @@ class MtCnnClient(Detector):
for current_detection in detections:
x, y, w, h = current_detection["box"]
confidence = current_detection["confidence"]
left_eye = current_detection["keypoints"]["left_eye"]
right_eye = current_detection["keypoints"]["right_eye"]
# mtcnn detector assigns left eye with respect to the observer
# but we are setting it with respect to the person itself
left_eye = current_detection["keypoints"]["right_eye"]
right_eye = current_detection["keypoints"]["left_eye"]
facial_area = FacialAreaRegion(
x=x,

View File

@ -112,15 +112,18 @@ class OpenCvClient(Detector):
eye_2 = eyes[1]
if eye_1[0] < eye_2[0]:
left_eye = eye_1
right_eye = eye_2
else:
left_eye = eye_2
right_eye = eye_1
left_eye = eye_2
else:
right_eye = eye_2
left_eye = eye_1
# -----------------------
# find center of eyes
left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
left_eye = (
int(left_eye[0] + (left_eye[2] / 2)),
int(left_eye[1] + (left_eye[3] / 2)),
)
right_eye = (
int(right_eye[0] + (right_eye[2] / 2)),
int(right_eye[1] + (right_eye[3] / 2)),

View File

@ -34,9 +34,9 @@ class RetinaFaceClient(Detector):
x = detection[0]
w = detection[2] - x
# notice that these must be inverse for retinaface
left_eye = identity["landmarks"]["right_eye"]
right_eye = identity["landmarks"]["left_eye"]
# retinaface sets left and right eyes with respect to the person
left_eye = identity["landmarks"]["left_eye"]
right_eye = identity["landmarks"]["right_eye"]
# eyes are list of float, need to cast them tuple of int
left_eye = tuple(int(i) for i in left_eye)

View File

@ -81,10 +81,10 @@ class YoloClient(Detector):
x, y, w, h = result.boxes.xywh.tolist()[0]
confidence = result.boxes.conf.tolist()[0]
# left_eye_conf = result.keypoints.conf[0][0]
# right_eye_conf = result.keypoints.conf[0][1]
left_eye = result.keypoints.xy[0][0].tolist()
right_eye = result.keypoints.xy[0][1].tolist()
# right_eye_conf = result.keypoints.conf[0][0]
# left_eye_conf = result.keypoints.conf[0][1]
right_eye = result.keypoints.xy[0][0].tolist()
left_eye = result.keypoints.xy[0][1].tolist()
# eyes are list of float, need to cast them tuple of int
left_eye = tuple(int(i) for i in left_eye)

View File

@ -99,7 +99,7 @@ class YuNetClient(Detector):
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
left eye, nose tip, the right corner and left corner of the mouth respectively.
"""
(x, y, w, h, x_re, y_re, x_le, y_le) = list(map(int, face[:8]))
(x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))
# YuNet returns negative coordinates if it thinks part of the detected face
# is outside the frame.

View File

@ -20,7 +20,9 @@ class Detector(ABC):
where each object contains:
- facial_area (FacialAreaRegion): The facial area region represented
as x, y, w, h, left_eye and right_eye
as x, y, w, h, left_eye and right_eye. left eye and right eye are
eyes on the left and right respectively with respect to the person
instead of observer.
"""
pass
@ -44,6 +46,21 @@ class FacialAreaRegion:
right_eye: Optional[Tuple[int, int]] = None,
confidence: Optional[float] = None,
):
"""
Initialize a Face object.
Args:
x (int): The x-coordinate of the top-left corner of the bounding box.
y (int): The y-coordinate of the top-left corner of the bounding box.
w (int): The width of the bounding box.
h (int): The height of the bounding box.
left_eye (tuple): The coordinates (x, y) of the left eye with respect to
the person instead of observer. Default is None.
right_eye (tuple): The coordinates (x, y) of the right eye with respect to
the person instead of observer. Default is None.
confidence (float, optional): Confidence score associated with the face detection.
Default is None.
"""
self.x = x
self.y = y
self.w = w
@ -59,6 +76,14 @@ class DetectedFace:
confidence: float
def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
"""
Initialize detected face object.
Args:
img (np.ndarray): detected face image as numpy array
facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
confidence (float): confidence score for face detection
"""
self.img = img
self.facial_area = facial_area
self.confidence = confidence

View File

@ -68,7 +68,9 @@ def extract_faces(
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
left eye and right eye are eyes on the left and right respectively with respect
to the person itself instead of observer.
- "confidence" (float): The confidence score associated with the detected face.
"""
@ -201,8 +203,8 @@ def align_face(
Align a given image horizantally with respect to their left and right eye locations
Args:
img (np.ndarray): pre-loaded image with detected face
left_eye (list or tuple): coordinates of left eye with respect to the you
right_eye(list or tuple): coordinates of right eye with respect to the you
left_eye (list or tuple): coordinates of left eye with respect to the person itself
right_eye(list or tuple): coordinates of right eye with respect to the person itself
Returns:
img (np.ndarray): aligned facial image
"""
@ -214,6 +216,6 @@ def align_face(
if img.shape[0] == 0 or img.shape[1] == 0:
return img, 0
angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0])))
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
img = np.array(Image.fromarray(img).rotate(angle))
return img, angle

View File

@ -1,3 +1,3 @@
{
"version": "0.0.89"
"version": "0.0.90"
}

View File

@ -19,6 +19,12 @@ def test_different_detectors():
assert "y" in img_obj["facial_area"].keys()
assert "w" in img_obj["facial_area"].keys()
assert "h" in img_obj["facial_area"].keys()
# is left eye set with respect to the person instead of observer
assert "left_eye" in img_obj["facial_area"].keys()
assert "right_eye" in img_obj["facial_area"].keys()
right_eye = img_obj["facial_area"]["right_eye"]
left_eye = img_obj["facial_area"]["left_eye"]
assert left_eye[0] > right_eye[0]
assert "confidence" in img_obj.keys()
img = img_obj["face"]

View File

@ -57,7 +57,7 @@ for df in dfs:
logger.info(df)
expand_areas = [0, 25]
expand_areas = [0]
img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg"]
for expand_area in expand_areas:
for img_path in img_paths:
@ -86,6 +86,15 @@ for expand_area in expand_areas:
assert isinstance(face_obj["facial_area"]["right_eye"][0], int)
assert isinstance(face_obj["facial_area"]["right_eye"][1], int)
# left eye is really the left eye of the person
if (
face_obj["facial_area"]["left_eye"] is not None
and face_obj["facial_area"]["right_eye"] is not None
):
re_x = face_obj["facial_area"]["right_eye"][0]
le_x = face_obj["facial_area"]["left_eye"][0]
assert re_x < le_x, "right eye must be the right eye of the person"
assert isinstance(face_obj["confidence"], float)
assert face_obj["confidence"] <= 1