From 3bb317ac63f87624bd8b77d76e919d42edff13c5 Mon Sep 17 00:00:00 2001
From: kremnik <kremnikov@gmail.com>
Date: Mon, 30 Sep 2024 15:02:58 +0300
Subject: [PATCH] fix l2_normalize and tests

---
 deepface/modules/recognition.py  | 28 +++++++---------------------
 deepface/modules/verification.py | 13 ++++++++-----
 tests/test_find_batched.py       |  8 ++++++--
 3 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py
index 664b1cc..12a368c 100644
--- a/deepface/modules/recognition.py
+++ b/deepface/modules/recognition.py
@@ -453,7 +453,7 @@ def find_batched(
     anti_spoofing: bool = False,
 ) -> List[List[Dict[str, Any]]]:
     """
-    Perform batched face recognition by comparing source face embeddingswith a set of
+    Perform batched face recognition by comparing source face embeddings with a set of
     target embeddings. It calculates pairwise distances between the source and target
     embeddings using the specified distance metric.
     The function uses batch processing for efficient computation of distances.
@@ -549,6 +549,7 @@ def find_batched(
             align=align,
             normalization=normalization,
         )
+        # it is safe to access 0 index because we already fed detected face to represent function
         target_representation = target_embedding_obj[0]["embedding"]
 
         target_embeddings.append(target_representation)
@@ -566,21 +567,6 @@ def find_batched(
         'source_h': np.array([region['h'] for region in source_regions]),
     }
 
-    def l2_normalize(
-        x: np.ndarray, axis: int = 1, epsilon: float = 1e-10
-    ) -> np.ndarray:
-        """
-        Normalize input vectors along a specified axis using L2 normalization
-        Args:
-            x (np.ndarray): input array
-            axis (int): axis along which to normalize
-            epsilon (float): small value to avoid division by zero
-        Returns:
-            np.ndarray: L2-normalized array of the same shape as input
-        """
-        norm = np.linalg.norm(x, axis=axis, keepdims=True)
-        return x / (norm + epsilon)
-
     def find_cosine_distance_batch(
         embeddings: np.ndarray, target_embeddings: np.ndarray
     ) -> np.ndarray:
@@ -592,8 +578,8 @@ def find_batched(
         Returns:
             np.ndarray: distance matrix of shape (M, N)
         """
-        embeddings_norm = l2_normalize(embeddings, axis=1)
-        target_embeddings_norm = l2_normalize(target_embeddings, axis=1)
+        embeddings_norm = verification.l2_normalize(embeddings, axis=1)
+        target_embeddings_norm = verification.l2_normalize(target_embeddings, axis=1)
         cosine_similarities = np.dot(target_embeddings_norm, embeddings_norm.T)
         cosine_distances = 1 - cosine_similarities
         return cosine_distances
@@ -630,12 +616,12 @@ def find_batched(
         elif distance_metric == "euclidean":
             distances = find_euclidean_distance_batch(embeddings, target_embeddings)
         elif distance_metric == "euclidean_l2":
-            embeddings_norm = l2_normalize(embeddings, axis=1)
-            target_embeddings_norm = l2_normalize(target_embeddings, axis=1)
+            embeddings_norm = verification.l2_normalize(embeddings, axis=1)
+            target_embeddings_norm = verification.l2_normalize(target_embeddings, axis=1)
             distances = find_euclidean_distance_batch(embeddings_norm, target_embeddings_norm)
         else:
             raise ValueError("Invalid distance_metric passed - ", distance_metric)
-        return distances
+        return np.round(distances, 6)
 
     distances = find_distance_batch(embeddings, target_embeddings, distance_metric) # (M, N)
     distances[:, ~valid_mask] = np.inf
diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py
index b6b1002..3756db7 100644
--- a/deepface/modules/verification.py
+++ b/deepface/modules/verification.py
@@ -304,18 +304,21 @@ def find_euclidean_distance(
     return np.linalg.norm(source_representation - test_representation)
 
 
-def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray:
+def l2_normalize(
+    x: Union[np.ndarray, list], axis: Union[int, None] = None, epsilon: float = 1e-10
+) -> np.ndarray:
     """
     Normalize input vector with l2
     Args:
         x (np.ndarray or list): given vector
+        axis (int): axis along which to normalize
     Returns:
-        y (np.ndarray): l2 normalized vector
+        np.ndarray: l2 normalized vector
     """
     if isinstance(x, list):
         x = np.array(x)
-    norm = np.linalg.norm(x)
-    return x if norm == 0 else x / norm
+    norm = np.linalg.norm(x, axis=axis, keepdims=True)
+    return x / (norm + epsilon)
 
 
 def find_distance(
@@ -341,7 +344,7 @@ def find_distance(
         )
     else:
         raise ValueError("Invalid distance_metric passed - ", distance_metric)
-    return distance
+    return np.round(distance, 6)
 
 
 def find_threshold(model_name: str, distance_metric: str) -> float:
diff --git a/tests/test_find_batched.py b/tests/test_find_batched.py
index fa38bc6..7bceee6 100644
--- a/tests/test_find_batched.py
+++ b/tests/test_find_batched.py
@@ -3,12 +3,10 @@ import os
 
 # 3rd party dependencies
 import cv2
-import pandas as pd
 
 # project dependencies
 from deepface import DeepFace
 from deepface.modules import verification
-from deepface.commons import image_utils
 from deepface.commons.logger import Logger
 
 logger = Logger()
@@ -21,12 +19,18 @@ def test_find_with_exact_path():
     img_path = os.path.join("dataset", "img1.jpg")
     results = DeepFace.find(img_path=img_path, db_path="dataset", silent=True, batched=True)
     assert len(results) > 0
+    required_keys = set([
+        "identity", "distance", "threshold", "hash",
+        "target_x", "target_y", "target_w", "target_h",
+        "source_x", "source_y", "source_w", "source_h"
+    ])
     for result in results:
         assert isinstance(result, list)
 
         found_image_itself = False
         for face in result:
             assert isinstance(face, dict)
+            assert set(face.keys()) == required_keys
             if face["identity"] == img_path:
                 # validate reproducability
                 assert face["distance"] < threshold