Merge pull request #2 from NatLee/feat/make-Race-and-Emotion-batch

[update] make Race and Emotion batch prediction
2025-06-06 11:35:21 +00:00 · 2024-12-31 13:54:54 +08:00 · 2024-12-31 13:54:54 +08:00 · 05dbc8008c
commit 05dbc8008c
parent 9c079e94ae 4cf43be49d
6 changed files with 186 additions and 94 deletions
--- a/deepface/models/demography/Age.py
+++ b/deepface/models/demography/Age.py
@ -2,7 +2,6 @@

 from typing import List, Union

-
 # 3rd party dependencies
 import numpy as np

@ -42,7 +41,7 @@ class ApparentAgeClient(Demography):
        self.model = load_model()
        self.model_name = "Age"

-    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> Union[np.float64, np.ndarray]:
+    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
        """
        Predict apparent age(s) for single or multiple faces
        Args:
@ -50,8 +49,7 @@ class ApparentAgeClient(Demography):
                List of images as List[np.ndarray] or
                Batch of images as np.ndarray (n, 224, 224, 3)
        Returns:
-            Single age as np.float64 or
-            Multiple ages as np.ndarray (n,)
+            np.ndarray (n,)
        """
        # Convert to numpy array if input is list
        if isinstance(img, list):
@ -66,9 +64,6 @@ class ApparentAgeClient(Demography):
        if len(imgs.shape) == 3:
            # Single image - add batch dimension
            imgs = np.expand_dims(imgs, axis=0)
-            is_single = True
-        else:
-            is_single = False

        # Batch prediction
        age_predictions = self.model.predict_on_batch(imgs)
@ -78,11 +73,9 @@ class ApparentAgeClient(Demography):
            [find_apparent_age(age_prediction) for age_prediction in age_predictions]
        )

-        # Return single value for single image
-        if is_single:
-            return apparent_ages[0]
        return apparent_ages

+
    def predicts(self, imgs: List[np.ndarray]) -> np.ndarray:
        """
        Predict apparent ages of multiple faces
@ -107,6 +100,7 @@ class ApparentAgeClient(Demography):
        return apparent_ages


+
 def load_model(
    url=WEIGHTS_URL,
 ) -> Model:
--- a/deepface/models/demography/Emotion.py
+++ b/deepface/models/demography/Emotion.py
@ -1,3 +1,6 @@
+# stdlib dependencies
+from typing import List, Union
+
 # 3rd party dependencies
 import numpy as np
 import cv2
@ -43,16 +46,53 @@ class EmotionClient(Demography):
        self.model = load_model()
        self.model_name = "Emotion"

-    def predict(self, img: np.ndarray) -> np.ndarray:
-        img_gray = cv2.cvtColor(img[0], cv2.COLOR_BGR2GRAY)
+    def _preprocess_image(self, img: np.ndarray) -> np.ndarray:
+        """
+        Preprocess single image for emotion detection
+        Args:
+            img: Input image (224, 224, 3)
+        Returns:
+            Preprocessed grayscale image (48, 48)
+        """
+        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img_gray = cv2.resize(img_gray, (48, 48))
-        img_gray = np.expand_dims(img_gray, axis=0)
+        return img_gray

-        # model.predict causes memory issue when it is called in a for loop
-        # emotion_predictions = self.model.predict(img_gray, verbose=0)[0, :]
-        emotion_predictions = self.model(img_gray, training=False).numpy()[0, :]
+    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """
+        Predict emotion probabilities for single or multiple faces
+        Args:
+            img: Single image as np.ndarray (224, 224, 3) or
+                List of images as List[np.ndarray] or
+                Batch of images as np.ndarray (n, 224, 224, 3)
+        Returns:
+            np.ndarray (n, n_emotions)
+            where n_emotions is the number of emotion categories
+        """
+        # Convert to numpy array if input is list
+        if isinstance(img, list):
+            imgs = np.array(img)
+        else:
+            imgs = img

-        return emotion_predictions
+        # Remove batch dimension if exists
+        imgs = imgs.squeeze()
+
+        # Check input dimension
+        if len(imgs.shape) == 3:
+            # Single image - add batch dimension
+            imgs = np.expand_dims(imgs, axis=0)
+
+        # Preprocess each image
+        processed_imgs = np.array([self._preprocess_image(img) for img in imgs])
+        
+        # Add channel dimension for grayscale images
+        processed_imgs = np.expand_dims(processed_imgs, axis=-1)
+
+        # Batch prediction
+        predictions = self.model.predict_on_batch(processed_imgs)
+
+        return predictions


 def load_model(
--- a/deepface/models/demography/Gender.py
+++ b/deepface/models/demography/Gender.py
@ -2,7 +2,6 @@

 from typing import List, Union

-
 # 3rd party dependencies
 import numpy as np

@ -42,7 +41,7 @@ class GenderClient(Demography):
        self.model = load_model()
        self.model_name = "Gender"

-    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, np.ndarray]:
+    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
        """
        Predict gender probabilities for single or multiple faces
        Args:
@ -50,8 +49,7 @@ class GenderClient(Demography):
                List of images as List[np.ndarray] or
                Batch of images as np.ndarray (n, 224, 224, 3)
        Returns:
-            Single prediction as np.ndarray (2,) [female_prob, male_prob] or
-            Multiple predictions as np.ndarray (n, 2)
+            np.ndarray (n, 2)
        """
        # Convert to numpy array if input is list
        if isinstance(img, list):
@ -66,18 +64,13 @@ class GenderClient(Demography):
        if len(imgs.shape) == 3:
            # Single image - add batch dimension
            imgs = np.expand_dims(imgs, axis=0)
-            is_single = True
-        else:
-            is_single = False

        # Batch prediction
        predictions = self.model.predict_on_batch(imgs)

-        # Return single prediction for single image
-        if is_single:
-            return predictions[0]
        return predictions

+
    def predicts(self, imgs: List[np.ndarray]) -> np.ndarray:
        """
        Predict apparent ages of multiple faces
@ -97,6 +90,7 @@ class GenderClient(Demography):
        return self.model.predict_on_batch(imgs_)


+
 def load_model(
    url=WEIGHTS_URL,
 ) -> Model:
--- a/deepface/models/demography/Race.py
+++ b/deepface/models/demography/Race.py
@ -1,3 +1,6 @@
+# stdlib dependencies
+from typing import List, Union
+
 # 3rd party dependencies
 import numpy as np

@ -37,10 +40,35 @@ class RaceClient(Demography):
        self.model = load_model()
        self.model_name = "Race"

-    def predict(self, img: np.ndarray) -> np.ndarray:
-        # model.predict causes memory issue when it is called in a for loop
-        # return self.model.predict(img, verbose=0)[0, :]
-        return self.model(img, training=False).numpy()[0, :]
+    def predict(self, img: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """
+        Predict race probabilities for single or multiple faces
+        Args:
+            img: Single image as np.ndarray (224, 224, 3) or
+                List of images as List[np.ndarray] or
+                Batch of images as np.ndarray (n, 224, 224, 3)
+        Returns:
+            np.ndarray (n, n_races)
+            where n_races is the number of race categories
+        """
+        # Convert to numpy array if input is list
+        if isinstance(img, list):
+            imgs = np.array(img)
+        else:
+            imgs = img
+
+        # Remove batch dimension if exists
+        imgs = imgs.squeeze()
+
+        # Check input dimension
+        if len(imgs.shape) == 3:
+            # Single image - add batch dimension
+            imgs = np.expand_dims(imgs, axis=0)
+
+        # Batch prediction
+        predictions = self.model.predict_on_batch(imgs)
+
+        return predictions


 def load_model(
@ -62,7 +90,7 @@ def load_model(

    # --------------------------

-    race_model = Model(inputs=model.input, outputs=base_model_output)
+    race_model = Model(inputs=model.inputs, outputs=base_model_output)

    # --------------------------

--- a/deepface/modules/demography.py
+++ b/deepface/modules/demography.py
@ -9,7 +9,7 @@ from tqdm import tqdm
 from deepface.modules import modeling, detection, preprocessing
 from deepface.models.demography import Gender, Race, Emotion

-
+# pylint: disable=trailing-whitespace
 def analyze(
    img_path: Union[str, np.ndarray],
    actions: Union[tuple, list] = ("emotion", "age", "gender", "race"),
@ -130,83 +130,107 @@ def analyze(
        anti_spoofing=anti_spoofing,
    )

-    for img_obj in img_objs:
-        if anti_spoofing is True and img_obj.get("is_real", True) is False:
-            raise ValueError("Spoof detected in the given image.")
+    # Anti-spoofing check
+    if anti_spoofing:
+        for img_obj in img_objs:
+            if img_obj.get("is_real", True) is False:
+                raise ValueError("Spoof detected in the given image.")

+    # Prepare the input for the model
+    valid_faces = []
+    face_regions = []
+    face_confidences = []
+    
+    for img_obj in img_objs:
+        # Extract the face content
        img_content = img_obj["face"]
-        img_region = img_obj["facial_area"]
-        img_confidence = img_obj["confidence"]
+        # Check if the face content is empty
        if img_content.shape[0] == 0 or img_content.shape[1] == 0:
            continue

-        # rgb to bgr
+        # Convert the image to RGB format from BGR
        img_content = img_content[:, :, ::-1]
-
-        # resize input image
+        # Resize the image to the target size for the model
        img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))

-        obj = {}
-        # facial attribute analysis
-        pbar = tqdm(
-            range(0, len(actions)),
-            desc="Finding actions",
-            disable=silent if len(actions) > 1 else True,
-        )
-        for index in pbar:
-            action = actions[index]
-            pbar.set_description(f"Action: {action}")
+        valid_faces.append(img_content)
+        face_regions.append(img_obj["facial_area"])
+        face_confidences.append(img_obj["confidence"])

-            if action == "emotion":
-                emotion_predictions = modeling.build_model(
-                    task="facial_attribute", model_name="Emotion"
-                ).predict(img_content)
-                sum_of_predictions = emotion_predictions.sum()
+    # If no valid faces are found, return an empty list
+    if not valid_faces:
+        return []

-                obj["emotion"] = {}
+    # Convert the list of valid faces to a numpy array
+    faces_array = np.array(valid_faces)
+    resp_objects = [{} for _ in range(len(valid_faces))]
+
+    # For each action, predict the corresponding attribute
+    pbar = tqdm(
+        range(0, len(actions)),
+        desc="Finding actions",
+        disable=silent if len(actions) > 1 else True,
+    )
+    
+    for index in pbar:
+        action = actions[index]
+        pbar.set_description(f"Action: {action}")
+
+        if action == "emotion":
+            # Build the emotion model
+            model = modeling.build_model(task="facial_attribute", model_name="Emotion")
+            emotion_predictions = model.predict(faces_array)
+            
+            for idx, predictions in enumerate(emotion_predictions):
+                sum_of_predictions = predictions.sum()
+                resp_objects[idx]["emotion"] = {}
+                
                for i, emotion_label in enumerate(Emotion.labels):
-                    emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
-                    obj["emotion"][emotion_label] = emotion_prediction
+                    emotion_prediction = 100 * predictions[i] / sum_of_predictions
+                    resp_objects[idx]["emotion"][emotion_label] = emotion_prediction
+                
+                resp_objects[idx]["dominant_emotion"] = Emotion.labels[np.argmax(predictions)]

-                obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
+        elif action == "age":
+            # Build the age model
+            model = modeling.build_model(task="facial_attribute", model_name="Age")
+            age_predictions = model.predict(faces_array)
+            
+            for idx, age in enumerate(age_predictions):
+                resp_objects[idx]["age"] = int(age)

-            elif action == "age":
-                apparent_age = modeling.build_model(
-                    task="facial_attribute", model_name="Age"
-                ).predict(img_content)
-                # int cast is for exception - object of type 'float32' is not JSON serializable
-                obj["age"] = int(apparent_age)
-
-            elif action == "gender":
-                gender_predictions = modeling.build_model(
-                    task="facial_attribute", model_name="Gender"
-                ).predict(img_content)
-                obj["gender"] = {}
+        elif action == "gender":
+            # Build the gender model
+            model = modeling.build_model(task="facial_attribute", model_name="Gender")
+            gender_predictions = model.predict(faces_array)
+            
+            for idx, predictions in enumerate(gender_predictions):
+                resp_objects[idx]["gender"] = {}
+                
                for i, gender_label in enumerate(Gender.labels):
-                    gender_prediction = 100 * gender_predictions[i]
-                    obj["gender"][gender_label] = gender_prediction
+                    gender_prediction = 100 * predictions[i]
+                    resp_objects[idx]["gender"][gender_label] = gender_prediction
+                
+                resp_objects[idx]["dominant_gender"] = Gender.labels[np.argmax(predictions)]

-                obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
-
-            elif action == "race":
-                race_predictions = modeling.build_model(
-                    task="facial_attribute", model_name="Race"
-                ).predict(img_content)
-                sum_of_predictions = race_predictions.sum()
-
-                obj["race"] = {}
+        elif action == "race":
+            # Build the race model
+            model = modeling.build_model(task="facial_attribute", model_name="Race")
+            race_predictions = model.predict(faces_array)
+            
+            for idx, predictions in enumerate(race_predictions):
+                sum_of_predictions = predictions.sum()
+                resp_objects[idx]["race"] = {}
+                
                for i, race_label in enumerate(Race.labels):
-                    race_prediction = 100 * race_predictions[i] / sum_of_predictions
-                    obj["race"][race_label] = race_prediction
+                    race_prediction = 100 * predictions[i] / sum_of_predictions
+                    resp_objects[idx]["race"][race_label] = race_prediction
+                
+                resp_objects[idx]["dominant_race"] = Race.labels[np.argmax(predictions)]

-                obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
-
-            # -----------------------------
-            # mention facial areas
-            obj["region"] = img_region
-            # include image confidence
-            obj["face_confidence"] = img_confidence
-
-        resp_objects.append(obj)
+    # Add the face region and confidence to the response objects
+    for idx, resp_obj in enumerate(resp_objects):
+        resp_obj["region"] = face_regions[idx]
+        resp_obj["face_confidence"] = face_confidences[idx]

    return resp_objects
--- a/tests/test_analyze.py
+++ b/tests/test_analyze.py
@ -135,3 +135,15 @@ def test_analyze_for_different_detectors():
                    assert result["gender"]["Man"] > result["gender"]["Woman"]
                else:
                    assert result["gender"]["Man"] < result["gender"]["Woman"]
+
+def test_analyze_for_multiple_faces():
+    img = "dataset/img4.jpg"
+    # Copy and combine the same image to create multiple faces
+    img = cv2.imread(img)
+    img = cv2.hconcat([img, img])
+    demography_objs = DeepFace.analyze(img, silent=True)
+    for demography in demography_objs:
+        logger.debug(demography)
+        assert demography["age"] > 20 and demography["age"] < 40
+        assert demography["dominant_gender"] == "Woman"
+    logger.info("✅ test analyze for multiple faces done")