Refactor face extraction and alignment logic; enhance bounding box transformation and facial landmark adjustments

This commit is contained in:
Jogendra Dhaka 2024-12-30 23:42:51 +05:30
parent ca6afd98ef
commit 07845a4fae

View File

@ -286,6 +286,7 @@ def extract_face(
mouth_left = facial_area.mouth_left
mouth_right = facial_area.mouth_right
# Expand the facial area if needed
if expand_percentage > 0:
# Expand the facial region height and width by the provided percentage
# ensuring that the expanded region stays within img.shape limits
@ -300,48 +301,53 @@ def extract_face(
# extract detected face unaligned
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# align original image, then find projection of detected face area after alignment
if align is True: # and left_eye is not None and right_eye is not None:
if align:
# we were aligning the original image before, but this comes with an extra cost
# instead we now focus on the facial area with a margin
# and align it instead of original image to decrese the cost
# and align it instead of original image to decrease the cost
# 1. Extract sub-image (bigger region) around face
sub_img, relative_x, relative_y = extract_sub_image(img=img, facial_area=(x, y, w, h))
aligned_sub_img, angle = align_img_wrt_eyes(
# 2. Align sub_img wrt eyes
aligned_sub_img, angle, M, new_w, new_h = align_img_wrt_eyes(
img=sub_img, left_eye=left_eye, right_eye=right_eye
)
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = project_facial_area(
facial_area=(
relative_x,
relative_y,
relative_x + w,
relative_y + h,
),
angle=angle,
size=(sub_img.shape[0], sub_img.shape[1]),
)
detected_face = aligned_sub_img[
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
]
# 3. Transform the bounding box (relative_x, relative_y, w, h)
# in sub_img space => bounding box corners => then rotate them
x1_rel = relative_x
y1_rel = relative_y
x2_rel = x1_rel + w
y2_rel = y1_rel + h
# do not spend memory for these temporary variables anymore
(rotated_x1, rotated_y1, rotated_x2, rotated_y2) = transform_bounding_box(
x1_rel, y1_rel, x2_rel, y2_rel, M, new_w, new_h
)
# 4. Crop from rotated image
detected_face = aligned_sub_img[rotated_y1:rotated_y2, rotated_x1:rotated_x2]
# 5. Adjust original facial_area's top-left in case we have added borders
x -= width_border
y -= height_border
# 6. Re-position facial landmarks
def shift_landmark(landmark):
if landmark is not None:
return (landmark[0] - width_border, landmark[1] - height_border)
return None
left_eye = shift_landmark(left_eye)
right_eye = shift_landmark(right_eye)
nose = shift_landmark(nose)
mouth_left = shift_landmark(mouth_left)
mouth_right = shift_landmark(mouth_right)
# free memory from big arrays
del aligned_sub_img, sub_img
# restore x, y, le and re before border added
x = x - width_border
y = y - height_border
# w and h will not change
if left_eye is not None:
left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
if right_eye is not None:
right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
if nose is not None:
nose = (nose[0] - width_border, nose[1] - height_border)
if mouth_left is not None:
mouth_left = (mouth_left[0] - width_border, mouth_left[1] - height_border)
if mouth_right is not None:
mouth_right = (mouth_right[0] - width_border, mouth_right[1] - height_border)
# Return final DetectedFace
return DetectedFace(
img=detected_face,
facial_area=FacialAreaRegion(
@ -412,38 +418,94 @@ def extract_sub_image(
return extracted_face, relative_x, relative_y
def transform_bounding_box(
x1: float, y1: float, x2: float, y2: float, M: np.ndarray, new_w: int, new_h: int
) -> Tuple[int, int, int, int]:
"""
Applies the rotation matrix M to the corners of the bounding box,
then returns the bounding box (min_x, min_y, max_x, max_y) in the rotated image space.
All values are clamped to the new image boundaries.
"""
# corners in homogeneous coordinates
corners = np.array(
[[x1, y1, 1.0], [x2, y1, 1.0], [x2, y2, 1.0], [x1, y2, 1.0]], dtype=np.float32
)
# apply transformation
transformed = (M @ corners.T).T # shape (4, 2)
xs = transformed[:, 0]
ys = transformed[:, 1]
min_x, max_x = np.min(xs), np.max(xs)
min_y, max_y = np.min(ys), np.max(ys)
# clamp to boundaries
min_x = max(0, min_x)
min_y = max(0, min_y)
max_x = min(new_w, max_x)
max_y = min(new_h, max_y)
return int(min_x), int(min_y), int(max_x), int(max_y)
def align_img_wrt_eyes(
img: np.ndarray,
left_eye: Optional[Union[list, tuple]],
right_eye: Optional[Union[list, tuple]],
) -> Tuple[np.ndarray, float]:
) -> Tuple[np.ndarray, float, np.ndarray, int, int]:
"""
Align a given image horizantally with respect to their left and right eye locations
Align a given image horizontally with respect to the left and right eye locations.
Returns the rotated image, rotation angle in degrees, rotation matrix M, and final image size.
Args:
img (np.ndarray): pre-loaded image with detected face
left_eye (list or tuple): coordinates of left eye with respect to the person itself
right_eye(list or tuple): coordinates of right eye with respect to the person itself
Returns:
img (np.ndarray): aligned facial image
rotated_img (np.ndarray): The rotated image
angle (float): The rotation angle in degrees
M (np.ndarray): The rotation matrix
new_w (int): The width of the rotated image
new_h (int): The height of the rotated image
"""
# if eye could not be detected for the given image, return image itself
if left_eye is None or right_eye is None:
return img, 0
# if eyes are missing or image has nil dimensions, do nothing
if left_eye is None or right_eye is None or img.shape[0] == 0 or img.shape[1] == 0:
return img, 0.0, np.eye(3, dtype=np.float32), img.shape[1], img.shape[0]
# sometimes unexpectedly detected images come with nil dimensions
if img.shape[0] == 0 or img.shape[1] == 0:
return img, 0
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
# compute rotation angle
dy = left_eye[1] - right_eye[1]
dx = left_eye[0] - right_eye[0]
angle = float(np.degrees(np.arctan2(dy, dx)))
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
# get initial rotation matrix
M = cv2.getRotationMatrix2D(center, angle, 1.0)
img = cv2.warpAffine(
img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)
# compute the absolute values of cos and sin
cos_val = np.abs(M[0, 0])
sin_val = np.abs(M[0, 1])
# compute new bounding dimensions after rotation
new_w = int((h * sin_val) + (w * cos_val))
new_h = int((h * cos_val) + (w * sin_val))
# adjust the rotation matrix to account for translation
M[0, 2] += (new_w / 2) - center[0]
M[1, 2] += (new_h / 2) - center[1]
# rotate the image with a resized bounding box
rotated_img = cv2.warpAffine(
img,
M,
(new_w, new_h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0, 0, 0),
)
return img, angle
return rotated_img, angle, M, new_w, new_h
def project_facial_area(