diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index b9fe211..6d07c86 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -423,7 +423,7 @@ def stream( def extract_faces( img_path: Union[str, np.ndarray], - target_size: Tuple[int, int] = (224, 224), + target_size: Optional[Tuple[int, int]] = (224, 224), detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py index 55e26e1..176c06d 100644 --- a/deepface/detectors/DetectorWrapper.py +++ b/deepface/detectors/DetectorWrapper.py @@ -1,4 +1,4 @@ -from typing import Any, List +from typing import Any, List, Tuple import numpy as np from deepface.modules import detection from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion @@ -106,17 +106,27 @@ def detect_faces( # expand the facial area to be extracted and stay within img.shape limits x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top - w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right - h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom + w2 = min(img.shape[1], w + int((w * 2 * expand_percentage) / 100)) # expand right + h2 = min(img.shape[0], h + int((h * 2 * expand_percentage) / 100)) # expand bottom # extract detected face unaligned detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)] - # align detected face - if align is True: - detected_face = detection.align_face( - img=detected_face, left_eye=left_eye, right_eye=right_eye + # aligning detected face causes a lot of black pixels + # if align is True: + # detected_face, _ = detection.align_face( + # img=detected_face, left_eye=left_eye, right_eye=right_eye + # ) + + # align original image, then find projection of detected face area after alignment + if align is True: # and left_eye is not None and right_eye is not None: + aligned_img, angle = detection.align_face( + img=img, left_eye=left_eye, right_eye=right_eye ) + x1_new, y1_new, x2_new, y2_new = rotate_facial_area( + facial_area=(x2, y2, x2 + w2, y2 + h2), angle=angle, direction=1, size=img.shape + ) + detected_face = aligned_img[int(y1_new) : int(y2_new), int(x1_new) : int(x2_new)] result = DetectedFace( img=detected_face, @@ -127,3 +137,45 @@ def detect_faces( ) results.append(result) return results + + +def rotate_facial_area( + facial_area: Tuple[int, int, int, int], angle: float, direction: int, size: Tuple[int, int] +) -> Tuple[int, int, int, int]: + """ + Rotate the facial area around its center. + Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80 + + Args: + facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area. + x2 is equal to x1 + w1, and y2 is equal to y1 + h1 + angle (float): Angle of rotation in degrees. + direction (int): Direction of rotation (-1 for clockwise, 1 for counterclockwise). + size (tuple of int): Tuple representing the size of the image (width, height). + + Returns: + rotated_coordinates (tuple of int): Representing the new coordinates + (x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area. + """ + # Angle in radians + angle = angle * np.pi / 180 + + # Translate the facial area to the center of the image + x = (facial_area[0] + facial_area[2]) / 2 - size[1] / 2 + y = (facial_area[1] + facial_area[3]) / 2 - size[0] / 2 + + # Rotate the facial area + x_new = x * np.cos(angle) + y * direction * np.sin(angle) + y_new = -x * direction * np.sin(angle) + y * np.cos(angle) + + # Translate the facial area back to the original position + x_new = x_new + size[1] / 2 + y_new = y_new + size[0] / 2 + + # Calculate the new facial area + x1 = x_new - (facial_area[2] - facial_area[0]) / 2 + y1 = y_new - (facial_area[3] - facial_area[1]) / 2 + x2 = x_new + (facial_area[2] - facial_area[0]) / 2 + y2 = y_new + (facial_area[3] - facial_area[1]) / 2 + + return (int(x1), int(y1), int(x2), int(y2)) diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index 03e485f..e3a0748 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -1,5 +1,5 @@ # built-in dependencies -from typing import Any, Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union, Optional # 3rd part dependencies import numpy as np @@ -27,7 +27,7 @@ elif tf_major_version == 2: def extract_faces( img_path: Union[str, np.ndarray], - target_size: Tuple[int, int] = (224, 224), + target_size: Optional[Tuple[int, int]] = (224, 224), detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, @@ -116,42 +116,43 @@ def extract_faces( current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) # resize and padding - factor_0 = target_size[0] / current_img.shape[0] - factor_1 = target_size[1] / current_img.shape[1] - factor = min(factor_0, factor_1) + if target_size is not None: + factor_0 = target_size[0] / current_img.shape[0] + factor_1 = target_size[1] / current_img.shape[1] + factor = min(factor_0, factor_1) - dsize = ( - int(current_img.shape[1] * factor), - int(current_img.shape[0] * factor), - ) - current_img = cv2.resize(current_img, dsize) - - diff_0 = target_size[0] - current_img.shape[0] - diff_1 = target_size[1] - current_img.shape[1] - if grayscale is False: - # Put the base image in the middle of the padded image - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - (0, 0), - ), - "constant", - ) - else: - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - ), - "constant", + dsize = ( + int(current_img.shape[1] * factor), + int(current_img.shape[0] * factor), ) + current_img = cv2.resize(current_img, dsize) - # double check: if target image is not still the same size with target. - if current_img.shape[0:2] != target_size: - current_img = cv2.resize(current_img, target_size) + diff_0 = target_size[0] - current_img.shape[0] + diff_1 = target_size[1] - current_img.shape[1] + if grayscale is False: + # Put the base image in the middle of the padded image + current_img = np.pad( + current_img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + (0, 0), + ), + "constant", + ) + else: + current_img = np.pad( + current_img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + ), + "constant", + ) + + # double check: if target image is not still the same size with target. + if current_img.shape[0:2] != target_size: + current_img = cv2.resize(current_img, target_size) # normalizing the image pixels # what this line doing? must? @@ -189,7 +190,7 @@ def align_face( img: np.ndarray, left_eye: Union[list, tuple], right_eye: Union[list, tuple], -) -> np.ndarray: +) -> Tuple[np.ndarray, float]: """ Align a given image horizantally with respect to their left and right eye locations Args: @@ -201,13 +202,13 @@ def align_face( """ # if eye could not be detected for the given image, return image itself if left_eye is None or right_eye is None: - return img + return img, 0 # sometimes unexpectedly detected images come with nil dimensions if img.shape[0] == 0 or img.shape[1] == 0: - return img + return img, 0 angle = float(np.degrees(np.arctan2(right_eye[1] - left_eye[1], right_eye[0] - left_eye[0]))) img = Image.fromarray(img) img = np.array(img.rotate(angle)) - return img + return img, angle diff --git a/tests/dataset/img11_reflection.jpg b/tests/dataset/img11_reflection.jpg new file mode 100644 index 0000000..24b29f2 Binary files /dev/null and b/tests/dataset/img11_reflection.jpg differ diff --git a/tests/visual-test.py b/tests/visual-test.py index 876a9f7..2148c0e 100644 --- a/tests/visual-test.py +++ b/tests/visual-test.py @@ -53,24 +53,38 @@ dfs = DeepFace.find( for df in dfs: logger.info(df) -# extract faces -for detector_backend in detector_backends: - face_objs = DeepFace.extract_faces( - img_path="dataset/img11.jpg", detector_backend=detector_backend, align=True - ) - for face_obj in face_objs: - face = face_obj["face"] - logger.info(detector_backend) - logger.info(face_obj["facial_area"]) - logger.info(face_obj["confidence"]) - assert isinstance(face_obj["facial_area"]["left_eye"], tuple) - assert isinstance(face_obj["facial_area"]["right_eye"], tuple) - assert isinstance(face_obj["facial_area"]["left_eye"][0], int) - assert isinstance(face_obj["facial_area"]["right_eye"][0], int) - assert isinstance(face_obj["facial_area"]["left_eye"][1], int) - assert isinstance(face_obj["facial_area"]["right_eye"][1], int) - assert isinstance(face_obj["confidence"], float) - plt.imshow(face) - plt.axis("off") - plt.show() - logger.info("-----------") + +# img_paths = ["dataset/img11.jpg", "dataset/img11_reflection.jpg", "dataset/couple.jpg"] +img_paths = ["dataset/img11.jpg"] +for img_path in img_paths: + # extract faces + for detector_backend in detector_backends: + face_objs = DeepFace.extract_faces( + img_path=img_path, + detector_backend=detector_backend, + align=True, + # expand_percentage=10, + # target_size=None, + ) + for face_obj in face_objs: + face = face_obj["face"] + logger.info(detector_backend) + logger.info(face_obj["facial_area"]) + logger.info(face_obj["confidence"]) + + # we know opencv sometimes cannot find eyes + if face_obj["facial_area"]["left_eye"] is not None: + assert isinstance(face_obj["facial_area"]["left_eye"], tuple) + assert isinstance(face_obj["facial_area"]["left_eye"][0], int) + assert isinstance(face_obj["facial_area"]["left_eye"][1], int) + + if face_obj["facial_area"]["right_eye"] is not None: + assert isinstance(face_obj["facial_area"]["right_eye"], tuple) + assert isinstance(face_obj["facial_area"]["right_eye"][0], int) + assert isinstance(face_obj["facial_area"]["right_eye"][1], int) + + assert isinstance(face_obj["confidence"], float) + plt.imshow(face) + plt.axis("off") + plt.show() + logger.info("-----------")