mirror of
https://github.com/serengil/deepface.git
synced 2025-06-07 12:05:22 +00:00
refactoring distance functions
This commit is contained in:
parent
a93fb63c97
commit
53a96f635a
@ -265,40 +265,34 @@ def find_cosine_distance(
|
|||||||
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
||||||
) -> Union[np.float64, np.ndarray]:
|
) -> Union[np.float64, np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Find cosine distance between two given vectors
|
Find cosine distance between two given vectors or batches of vectors.
|
||||||
Args:
|
Args:
|
||||||
source_representation (np.ndarray or list): 1st vector
|
source_representation (np.ndarray or list): 1st vector or batch of vectors.
|
||||||
test_representation (np.ndarray or list): 2nd vector
|
test_representation (np.ndarray or list): 2nd vector or batch of vectors.
|
||||||
Returns
|
Returns
|
||||||
distance (np.float64 or np.ndarray): calculated cosine distance(s).
|
np.float64 or np.ndarray: Calculated cosine distance(s).
|
||||||
it is type of np.float64 for given single embeddings
|
It returns a np.float64 for single embeddings and np.ndarray for batch embeddings.
|
||||||
or type of np.ndarray for given batch embeddings
|
|
||||||
"""
|
"""
|
||||||
if isinstance(source_representation, list):
|
# Convert inputs to numpy arrays if necessary
|
||||||
source_representation = np.array(source_representation)
|
source_representation = np.asarray(source_representation)
|
||||||
|
test_representation = np.asarray(test_representation)
|
||||||
|
|
||||||
if isinstance(test_representation, list):
|
if source_representation.ndim == 1 and test_representation.ndim == 1:
|
||||||
test_representation = np.array(test_representation)
|
|
||||||
|
|
||||||
if len(source_representation.shape) == 1 and len(test_representation.shape) == 1:
|
|
||||||
# single embedding
|
# single embedding
|
||||||
a = np.dot(source_representation, test_representation)
|
dot_product = np.dot(source_representation, test_representation)
|
||||||
b = np.linalg.norm(source_representation)
|
source_norm = np.linalg.norm(source_representation)
|
||||||
c = np.linalg.norm(test_representation)
|
test_norm = np.linalg.norm(test_representation)
|
||||||
distances = 1 - a / (b * c)
|
distances = 1 - dot_product / (source_norm * test_norm)
|
||||||
elif len(source_representation.shape) == 2 and len(test_representation.shape) == 2:
|
elif source_representation.ndim == 2 and test_representation.ndim == 2:
|
||||||
# list of embeddings (batch)
|
# list of embeddings (batch)
|
||||||
# source_representation's shape is (N, D)
|
source_normed = l2_normalize(source_representation, axis=1) # (N, D)
|
||||||
# test_representation's shape is (M, D)
|
test_normed = l2_normalize(test_representation, axis=1) # (M, D)
|
||||||
# distances' shape is (M, N)
|
cosine_similarities = np.dot(test_normed, source_normed.T) # (M, N)
|
||||||
source_embeddings_norm = l2_normalize(source_representation, axis=1)
|
|
||||||
test_embeddings_norm = l2_normalize(test_representation, axis=1)
|
|
||||||
cosine_similarities = np.dot(test_embeddings_norm, source_embeddings_norm.T)
|
|
||||||
distances = 1 - cosine_similarities
|
distances = 1 - cosine_similarities
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"embeddings can either be 1 or 2 dimensional "
|
f"Embeddings must be 1D or 2D, but received "
|
||||||
f"but it is {len(source_representation.shape)} & {len(test_representation.shape)}"
|
f"source shape: {source_representation.shape}, test shape: {test_representation.shape}"
|
||||||
)
|
)
|
||||||
return distances
|
return distances
|
||||||
|
|
||||||
@ -307,36 +301,33 @@ def find_euclidean_distance(
|
|||||||
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
||||||
) -> Union[np.float64, np.ndarray]:
|
) -> Union[np.float64, np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Find euclidean distance between two given vectors
|
Find Euclidean distance between two vectors or batches of vectors.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
source_representation (np.ndarray or list): 1st vector
|
source_representation (np.ndarray or list): 1st vector or batch of vectors.
|
||||||
test_representation (np.ndarray or list): 2nd vector
|
test_representation (np.ndarray or list): 2nd vector or batch of vectors.
|
||||||
Returns
|
|
||||||
distance (np.float64 or np.ndarray): calculated euclidean distance(s).
|
Returns:
|
||||||
it is type of np.float64 for given single embeddings
|
np.float64 or np.ndarray: Euclidean distance(s).
|
||||||
or type of np.ndarray for given batch embeddings
|
Returns a np.float64 for single embeddings and np.ndarray for batch embeddings.
|
||||||
"""
|
"""
|
||||||
if isinstance(source_representation, list):
|
# Convert inputs to numpy arrays if necessary
|
||||||
source_representation = np.array(source_representation)
|
source_representation = np.asarray(source_representation)
|
||||||
|
test_representation = np.asarray(test_representation)
|
||||||
|
|
||||||
if isinstance(test_representation, list):
|
# Single embedding case (1D arrays)
|
||||||
test_representation = np.array(test_representation)
|
if source_representation.ndim == 1 and test_representation.ndim == 1:
|
||||||
|
distances = np.linalg.norm(source_representation - test_representation)
|
||||||
if len(source_representation.shape) == 1 and len(test_representation.shape) == 1:
|
# Batch embeddings case (2D arrays)
|
||||||
# single embedding
|
elif source_representation.ndim == 2 and test_representation.ndim == 2:
|
||||||
diff = source_representation - test_representation
|
diff = (
|
||||||
distances = np.linalg.norm(diff)
|
source_representation[None, :, :] - test_representation[:, None, :]
|
||||||
elif len(source_representation.shape) == 2 and len(test_representation.shape) == 2:
|
) # (N, D) - (M, D) = (M, N, D)
|
||||||
# list of embeddings (batch)
|
|
||||||
# source_representation's shape is (N, D)
|
|
||||||
# test_representation's shape is (M, D)
|
|
||||||
# distances' shape is (M, N)
|
|
||||||
diff = source_representation[None, :, :] - test_representation[:, None, :] # (M, N, D)
|
|
||||||
distances = np.linalg.norm(diff, axis=2) # (M, N)
|
distances = np.linalg.norm(diff, axis=2) # (M, N)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"embeddings can either be 1 or 2 dimensional "
|
f"Embeddings must be 1D or 2D, but received "
|
||||||
f"but it is {len(source_representation.shape)} & {len(test_representation.shape)}"
|
f"source shape: {source_representation.shape}, test shape: {test_representation.shape}"
|
||||||
)
|
)
|
||||||
return distances
|
return distances
|
||||||
|
|
||||||
@ -352,8 +343,8 @@ def l2_normalize(
|
|||||||
Returns:
|
Returns:
|
||||||
np.ndarray: l2 normalized vector
|
np.ndarray: l2 normalized vector
|
||||||
"""
|
"""
|
||||||
if isinstance(x, list):
|
# Convert inputs to numpy arrays if necessary
|
||||||
x = np.array(x)
|
x = np.asarray(x)
|
||||||
norm = np.linalg.norm(x, axis=axis, keepdims=True)
|
norm = np.linalg.norm(x, axis=axis, keepdims=True)
|
||||||
return x / (norm + epsilon)
|
return x / (norm + epsilon)
|
||||||
|
|
||||||
@ -364,40 +355,37 @@ def find_distance(
|
|||||||
distance_metric: str,
|
distance_metric: str,
|
||||||
) -> Union[np.float64, np.ndarray]:
|
) -> Union[np.float64, np.ndarray]:
|
||||||
"""
|
"""
|
||||||
Wrapper to find distance between vectors according to the given distance metric
|
Wrapper to find the distance between vectors based on the specified distance metric.
|
||||||
Args:
|
|
||||||
source_representation (np.ndarray or list): 1st vector
|
|
||||||
test_representation (np.ndarray or list): 2nd vector
|
|
||||||
Returns
|
|
||||||
distance (np.float64 or np.ndarray): calculated cosine distance(s).
|
|
||||||
it is type of np.float64 for given single embeddings
|
|
||||||
or type of np.ndarray for given batch embeddings
|
|
||||||
"""
|
|
||||||
if isinstance(alpha_embedding, list):
|
|
||||||
alpha_embedding = np.array(alpha_embedding)
|
|
||||||
|
|
||||||
if isinstance(beta_embedding, list):
|
Args:
|
||||||
beta_embedding = np.array(beta_embedding)
|
alpha_embedding (np.ndarray or list): 1st vector or batch of vectors.
|
||||||
|
beta_embedding (np.ndarray or list): 2nd vector or batch of vectors.
|
||||||
|
distance_metric (str): The type of distance to compute
|
||||||
|
('cosine', 'euclidean', or 'euclidean_l2').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.float64 or np.ndarray: The calculated distance(s).
|
||||||
|
"""
|
||||||
|
# Convert inputs to numpy arrays if necessary
|
||||||
|
alpha_embedding = np.asarray(alpha_embedding)
|
||||||
|
beta_embedding = np.asarray(beta_embedding)
|
||||||
|
|
||||||
|
# Ensure that both embeddings are either 1D or 2D
|
||||||
|
if alpha_embedding.ndim != beta_embedding.ndim or alpha_embedding.ndim not in (1, 2):
|
||||||
|
raise ValueError(
|
||||||
|
f"Both embeddings must be either 1D or 2D, but received "
|
||||||
|
f"alpha shape: {alpha_embedding.shape}, beta shape: {beta_embedding.shape}"
|
||||||
|
)
|
||||||
|
|
||||||
if distance_metric == "cosine":
|
if distance_metric == "cosine":
|
||||||
distance = find_cosine_distance(alpha_embedding, beta_embedding)
|
distance = find_cosine_distance(alpha_embedding, beta_embedding)
|
||||||
elif distance_metric == "euclidean":
|
elif distance_metric == "euclidean":
|
||||||
distance = find_euclidean_distance(alpha_embedding, beta_embedding)
|
distance = find_euclidean_distance(alpha_embedding, beta_embedding)
|
||||||
elif distance_metric == "euclidean_l2":
|
elif distance_metric == "euclidean_l2":
|
||||||
if len(alpha_embedding.shape) == 1 and len(beta_embedding.shape) == 1:
|
axis = None if alpha_embedding.ndim == 1 else 1
|
||||||
# single embedding
|
normalized_alpha = l2_normalize(alpha_embedding, axis=axis)
|
||||||
axis = None
|
normalized_beta = l2_normalize(beta_embedding, axis=axis)
|
||||||
elif len(alpha_embedding.shape) == 2 and len(beta_embedding.shape) == 2:
|
distance = find_euclidean_distance(normalized_alpha, normalized_beta)
|
||||||
# list of embeddings (batch)
|
|
||||||
axis = 1
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"embeddings can either be 1 or 2 dimensional "
|
|
||||||
f"but it is {len(alpha_embedding.shape)} & {len(beta_embedding.shape)}"
|
|
||||||
)
|
|
||||||
distance = find_euclidean_distance(
|
|
||||||
l2_normalize(alpha_embedding, axis=axis), l2_normalize(beta_embedding, axis=axis)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid distance_metric passed - ", distance_metric)
|
raise ValueError("Invalid distance_metric passed - ", distance_metric)
|
||||||
return np.round(distance, 6)
|
return np.round(distance, 6)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user