diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index a8ebab5..8a4769b 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -2,7 +2,7 @@ import os import warnings import logging -from typing import Any, Dict, List, Tuple, Union, Optional +from typing import Any, Dict, List, Union, Optional # this has to be set before importing tensorflow os.environ["TF_USE_LEGACY_KERAS"] = "1" @@ -439,7 +439,6 @@ def stream( def extract_faces( img_path: Union[str, np.ndarray], - target_size: Optional[Tuple[int, int]] = (224, 224), detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, @@ -453,9 +452,6 @@ def extract_faces( img_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. - target_size (tuple): final shape of facial image. black pixels will be - added to resize the image (default is (224, 224)). - detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). @@ -485,13 +481,11 @@ def extract_faces( return detection.extract_faces( img_path=img_path, - target_size=target_size, detector_backend=detector_backend, enforce_detection=enforce_detection, align=align, expand_percentage=expand_percentage, grayscale=grayscale, - human_readable=True, ) diff --git a/deepface/modules/demography.py b/deepface/modules/demography.py index fa15c3a..59c1f82 100644 --- a/deepface/modules/demography.py +++ b/deepface/modules/demography.py @@ -6,7 +6,7 @@ import numpy as np from tqdm import tqdm # project dependencies -from deepface.modules import modeling, detection +from deepface.modules import modeling, detection, preprocessing from deepface.extendedmodels import Gender, Race, Emotion @@ -118,7 +118,6 @@ def analyze( img_objs = detection.extract_faces( img_path=img_path, - target_size=(224, 224), detector_backend=detector_backend, grayscale=False, enforce_detection=enforce_detection, @@ -130,60 +129,68 @@ def analyze( img_content = img_obj["face"] img_region = img_obj["facial_area"] img_confidence = img_obj["confidence"] - if img_content.shape[0] > 0 and img_content.shape[1] > 0: - obj = {} - # facial attribute analysis - pbar = tqdm( - range(0, len(actions)), - desc="Finding actions", - disable=silent if len(actions) > 1 else True, - ) - for index in pbar: - action = actions[index] - pbar.set_description(f"Action: {action}") + if img_content.shape[0] == 0 or img_content.shape[1] == 0: + continue - if action == "emotion": - emotion_predictions = modeling.build_model("Emotion").predict(img_content) - sum_of_predictions = emotion_predictions.sum() + # rgb to bgr + img_content = img_content[:, :, ::-1] - obj["emotion"] = {} - for i, emotion_label in enumerate(Emotion.labels): - emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions - obj["emotion"][emotion_label] = emotion_prediction + # resize input image + img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224)) - obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)] + obj = {} + # facial attribute analysis + pbar = tqdm( + range(0, len(actions)), + desc="Finding actions", + disable=silent if len(actions) > 1 else True, + ) + for index in pbar: + action = actions[index] + pbar.set_description(f"Action: {action}") - elif action == "age": - apparent_age = modeling.build_model("Age").predict(img_content) - # int cast is for exception - object of type 'float32' is not JSON serializable - obj["age"] = int(apparent_age) + if action == "emotion": + emotion_predictions = modeling.build_model("Emotion").predict(img_content) + sum_of_predictions = emotion_predictions.sum() - elif action == "gender": - gender_predictions = modeling.build_model("Gender").predict(img_content) - obj["gender"] = {} - for i, gender_label in enumerate(Gender.labels): - gender_prediction = 100 * gender_predictions[i] - obj["gender"][gender_label] = gender_prediction + obj["emotion"] = {} + for i, emotion_label in enumerate(Emotion.labels): + emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions + obj["emotion"][emotion_label] = emotion_prediction - obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)] + obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)] - elif action == "race": - race_predictions = modeling.build_model("Race").predict(img_content) - sum_of_predictions = race_predictions.sum() + elif action == "age": + apparent_age = modeling.build_model("Age").predict(img_content) + # int cast is for exception - object of type 'float32' is not JSON serializable + obj["age"] = int(apparent_age) - obj["race"] = {} - for i, race_label in enumerate(Race.labels): - race_prediction = 100 * race_predictions[i] / sum_of_predictions - obj["race"][race_label] = race_prediction + elif action == "gender": + gender_predictions = modeling.build_model("Gender").predict(img_content) + obj["gender"] = {} + for i, gender_label in enumerate(Gender.labels): + gender_prediction = 100 * gender_predictions[i] + obj["gender"][gender_label] = gender_prediction - obj["dominant_race"] = Race.labels[np.argmax(race_predictions)] + obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)] - # ----------------------------- - # mention facial areas - obj["region"] = img_region - # include image confidence - obj["face_confidence"] = img_confidence + elif action == "race": + race_predictions = modeling.build_model("Race").predict(img_content) + sum_of_predictions = race_predictions.sum() - resp_objects.append(obj) + obj["race"] = {} + for i, race_label in enumerate(Race.labels): + race_prediction = 100 * race_predictions[i] / sum_of_predictions + obj["race"][race_label] = race_prediction + + obj["dominant_race"] = Race.labels[np.argmax(race_predictions)] + + # ----------------------------- + # mention facial areas + obj["region"] = img_region + # include image confidence + obj["face_confidence"] = img_confidence + + resp_objects.append(obj) return resp_objects diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index 18bad8f..edecf1e 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -1,5 +1,5 @@ # built-in dependencies -from typing import Any, Dict, List, Tuple, Union, Optional +from typing import Any, Dict, List, Tuple, Union # 3rd part dependencies import numpy as np @@ -10,7 +10,6 @@ from PIL import Image from deepface.modules import preprocessing from deepface.models.Detector import DetectedFace, FacialAreaRegion from deepface.detectors import DetectorWrapper -from deepface.commons import package_utils from deepface.commons.logger import Logger logger = Logger(module="deepface/modules/detection.py") @@ -18,22 +17,13 @@ logger = Logger(module="deepface/modules/detection.py") # pylint: disable=no-else-raise -tf_major_version = package_utils.get_tf_major_version() -if tf_major_version == 1: - from keras.preprocessing import image -elif tf_major_version == 2: - from tensorflow.keras.preprocessing import image - - def extract_faces( img_path: Union[str, np.ndarray], - target_size: Optional[Tuple[int, int]] = (224, 224), detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, expand_percentage: int = 0, grayscale: bool = False, - human_readable=False, ) -> List[Dict[str, Any]]: """ Extract faces from a given image @@ -42,9 +32,6 @@ def extract_faces( img_path (str or np.ndarray): Path to the first image. Accepts exact image path as a string, numpy array (BGR), or base64 encoded images. - target_size (tuple): final shape of facial image. black pixels will be - added to resize the image. - detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv) @@ -58,13 +45,10 @@ def extract_faces( grayscale (boolean): Flag to convert the image to grayscale before processing (default is False). - human_readable (bool): Flag to make the image human readable. 3D RGB for human readable - or 4D BGR for ML models (default is False). - Returns: results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains: - - "face" (np.ndarray): The detected face as a NumPy array. + - "face" (np.ndarray): The detected face as a NumPy array in RGB format. - "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing: - keys 'x', 'y', 'w', 'h' with int values @@ -122,57 +106,11 @@ def extract_faces( if grayscale is True: current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) - # resize and padding - if target_size is not None: - factor_0 = target_size[0] / current_img.shape[0] - factor_1 = target_size[1] / current_img.shape[1] - factor = min(factor_0, factor_1) - - dsize = ( - int(current_img.shape[1] * factor), - int(current_img.shape[0] * factor), - ) - current_img = cv2.resize(current_img, dsize) - - diff_0 = target_size[0] - current_img.shape[0] - diff_1 = target_size[1] - current_img.shape[1] - if grayscale is False: - # Put the base image in the middle of the padded image - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - (0, 0), - ), - "constant", - ) - else: - current_img = np.pad( - current_img, - ( - (diff_0 // 2, diff_0 - diff_0 // 2), - (diff_1 // 2, diff_1 - diff_1 // 2), - ), - "constant", - ) - - # double check: if target image is not still the same size with target. - if current_img.shape[0:2] != target_size: - current_img = cv2.resize(current_img, target_size) - - # normalizing the image pixels - # what this line doing? must? - img_pixels = image.img_to_array(current_img) - img_pixels = np.expand_dims(img_pixels, axis=0) - img_pixels /= 255 # normalize input in [0, 1] - # discard expanded dimension - if human_readable is True and len(img_pixels.shape) == 4: - img_pixels = img_pixels[0] + current_img = current_img / 255 # normalize input in [0, 1] resp_objs.append( { - "face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels, + "face": current_img[:, :, ::-1], "facial_area": { "x": int(current_region.x), "y": int(current_region.y), diff --git a/deepface/modules/preprocessing.py b/deepface/modules/preprocessing.py index 28ff8e7..0ecef6a 100644 --- a/deepface/modules/preprocessing.py +++ b/deepface/modules/preprocessing.py @@ -11,6 +11,16 @@ import cv2 import requests from PIL import Image +# project dependencies +from deepface.commons import package_utils + + +tf_major_version = package_utils.get_tf_major_version() +if tf_major_version == 1: + from keras.preprocessing import image +elif tf_major_version == 2: + from tensorflow.keras.preprocessing import image + def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: """ @@ -66,8 +76,8 @@ def load_image_from_web(url: str) -> np.ndarray: response = requests.get(url, stream=True, timeout=60) response.raise_for_status() image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8) - image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) - return image + img = cv2.imdecode(image_array, cv2.IMREAD_COLOR) + return img def load_base64(uri: str) -> np.ndarray: @@ -157,3 +167,50 @@ def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray: raise ValueError(f"unimplemented normalization type - {normalization}") return img + + +def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray: + """ + Resize an image to expected size of a ml model with adding black pixels. + Args: + img (np.ndarray): pre-loaded image as numpy array + target_size (tuple): input shape of ml model + Returns: + img (np.ndarray): resized input image + """ + factor_0 = target_size[0] / img.shape[0] + factor_1 = target_size[1] / img.shape[1] + factor = min(factor_0, factor_1) + + dsize = ( + int(img.shape[1] * factor), + int(img.shape[0] * factor), + ) + img = cv2.resize(img, dsize) + + diff_0 = target_size[0] - img.shape[0] + diff_1 = target_size[1] - img.shape[1] + + # Put the base image in the middle of the padded image + img = np.pad( + img, + ( + (diff_0 // 2, diff_0 - diff_0 // 2), + (diff_1 // 2, diff_1 - diff_1 // 2), + (0, 0), + ), + "constant", + ) + + # double check: if target image is not still the same size with target. + if img.shape[0:2] != target_size: + img = cv2.resize(img, target_size) + + # make it 4-dimensional how ML models expect + img = image.img_to_array(img) + img = np.expand_dims(img, axis=0) + + if img.max() > 1: + img = (img.astype(np.float32) / 255.0).astype(np.float32) + + return img diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index ccc850d..76212b4 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -13,8 +13,7 @@ from PIL import Image # project dependencies from deepface.commons.logger import Logger from deepface.commons import package_utils -from deepface.modules import representation, detection, modeling, verification -from deepface.models.FacialRecognition import FacialRecognition +from deepface.modules import representation, detection, verification logger = Logger(module="deepface/modules/recognition.py") @@ -90,15 +89,9 @@ def find( tic = time.time() - # ------------------------------- if os.path.isdir(db_path) is not True: raise ValueError("Passed db_path does not exist!") - model: FacialRecognition = modeling.build_model(model_name) - target_size = model.input_shape - - # --------------------------------------- - file_name = f"ds_{model_name}_{detector_backend}_v2.pkl" file_name = file_name.replace("-", "").lower() datastore_path = os.path.join(db_path, file_name) @@ -180,7 +173,6 @@ def find( representations += __find_bulk_embeddings( employees=new_images, model_name=model_name, - target_size=target_size, detector_backend=detector_backend, enforce_detection=enforce_detection, align=align, @@ -212,7 +204,6 @@ def find( # img path might have more than once face source_objs = detection.extract_faces( img_path=img_path, - target_size=target_size, detector_backend=detector_backend, grayscale=False, enforce_detection=enforce_detection, @@ -314,7 +305,6 @@ def __list_images(path: str) -> List[str]: def __find_bulk_embeddings( employees: List[str], model_name: str = "VGG-Face", - target_size: tuple = (224, 224), detector_backend: str = "opencv", enforce_detection: bool = True, align: bool = True, @@ -362,7 +352,6 @@ def __find_bulk_embeddings( try: img_objs = detection.extract_faces( img_path=employee, - target_size=target_size, detector_backend=detector_backend, grayscale=False, enforce_detection=enforce_detection, diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py index 95c1a1e..6a0d2a2 100644 --- a/deepface/modules/representation.py +++ b/deepface/modules/representation.py @@ -3,7 +3,6 @@ from typing import Any, Dict, List, Union # 3rd party dependencies import numpy as np -import cv2 # project dependencies from deepface.modules import modeling, detection, preprocessing @@ -67,7 +66,6 @@ def represent( if detector_backend != "skip": img_objs = detection.extract_faces( img_path=img_path, - target_size=(target_size[1], target_size[0]), detector_backend=detector_backend, grayscale=False, enforce_detection=enforce_detection, @@ -77,16 +75,10 @@ def represent( else: # skip # Try load. If load error, will raise exception internal img, _ = preprocessing.load_image(img_path) - # -------------------------------- - if len(img.shape) == 4: - img = img[0] # e.g. (1, 224, 224, 3) to (224, 224, 3) - if len(img.shape) == 3: - img = cv2.resize(img, target_size) - img = np.expand_dims(img, axis=0) - # when called from verify, this is already normalized. But needed when user given. - if img.max() > 1: - img = (img.astype(np.float32) / 255.0).astype(np.float32) - # -------------------------------- + + if len(img.shape) != 3: + raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}") + # make dummy region and confidence to keep compatibility with `extract_faces` img_objs = [ { @@ -99,8 +91,20 @@ def represent( for img_obj in img_objs: img = img_obj["face"] + + # rgb to bgr + img = img[:, :, ::-1] + region = img_obj["facial_area"] confidence = img_obj["confidence"] + + # resize to expected shape of ml model + img = preprocessing.resize_image( + img=img, + # thanks to DeepId (!) + target_size=(target_size[1], target_size[0]), + ) + # custom normalization img = preprocessing.normalize_input(img=img, normalization=normalization) diff --git a/deepface/modules/streaming.py b/deepface/modules/streaming.py index 577126e..cc28b77 100644 --- a/deepface/modules/streaming.py +++ b/deepface/modules/streaming.py @@ -62,7 +62,7 @@ def analysis( """ # initialize models build_demography_models(enable_face_analysis=enable_face_analysis) - target_size = build_facial_recognition_model(model_name=model_name) + build_facial_recognition_model(model_name=model_name) # call a dummy find function for db_path once to create embeddings before starting webcam _ = search_identity( detected_face=np.zeros([224, 224, 3]), @@ -89,9 +89,7 @@ def analysis( faces_coordinates = [] if freeze is False: - faces_coordinates = grab_facial_areas( - img=img, detector_backend=detector_backend, target_size=target_size - ) + faces_coordinates = grab_facial_areas(img=img, detector_backend=detector_backend) # we will pass img to analyze modules (identity, demography) and add some illustrations # that is why, we will not be able to extract detected face from img clearly @@ -156,7 +154,7 @@ def analysis( cv2.destroyAllWindows() -def build_facial_recognition_model(model_name: str) -> tuple: +def build_facial_recognition_model(model_name: str) -> None: """ Build facial recognition model Args: @@ -165,9 +163,8 @@ def build_facial_recognition_model(model_name: str) -> tuple: Returns input_shape (tuple): input shape of given facial recognitio n model. """ - model: FacialRecognition = DeepFace.build_model(model_name=model_name) + _ = DeepFace.build_model(model_name=model_name) logger.info(f"{model_name} is built") - return model.input_shape def search_identity( @@ -231,7 +228,6 @@ def search_identity( # load found identity image - extracted if possible target_objs = DeepFace.extract_faces( img_path=target_path, - target_size=(IDENTIFIED_IMG_SIZE, IDENTIFIED_IMG_SIZE), detector_backend=detector_backend, enforce_detection=False, align=True, @@ -243,6 +239,7 @@ def search_identity( # extract 1st item directly target_obj = target_objs[0] target_img = target_obj["face"] + target_img = cv2.resize(target_img, (IDENTIFIED_IMG_SIZE, IDENTIFIED_IMG_SIZE)) target_img *= 255 target_img = target_img[:, :, ::-1] else: @@ -346,7 +343,7 @@ def countdown_to_release( def grab_facial_areas( - img: np.ndarray, detector_backend: str, target_size: Tuple[int, int], threshold: int = 130 + img: np.ndarray, detector_backend: str, threshold: int = 130 ) -> List[Tuple[int, int, int, int]]: """ Find facial area coordinates in the given image @@ -354,7 +351,6 @@ def grab_facial_areas( img (np.ndarray): image itself detector_backend (string): face detector backend. Options: 'opencv', 'retinaface', 'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv). - target_size (tuple): input shape of the facial recognition model. threshold (int): threshold for facial area, discard smaller ones Returns result (list): list of tuple with x, y, w and h coordinates @@ -363,7 +359,6 @@ def grab_facial_areas( face_objs = DeepFace.extract_faces( img_path=img, detector_backend=detector_backend, - target_size=target_size, # you may consider to extract with larger expanding value expand_percentage=0, ) diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index ca26dcc..c7d7682 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -223,12 +223,8 @@ def __extract_faces_and_embeddings( embeddings = [] facial_areas = [] - model: FacialRecognition = modeling.build_model(model_name) - target_size = model.input_shape - img_objs = detection.extract_faces( img_path=img_path, - target_size=target_size, detector_backend=detector_backend, grayscale=False, enforce_detection=enforce_detection, diff --git a/tests/visual-test.py b/tests/visual-test.py index a1c348a..cd6de97 100644 --- a/tests/visual-test.py +++ b/tests/visual-test.py @@ -18,6 +18,7 @@ model_names = [ "Dlib", "ArcFace", "SFace", + "GhostFaceNet", ] detector_backends = [