Merge pull request #990 from serengil/feat-task-2901-cleaning-modules

Feat task 2901 cleaning modules
This commit is contained in:
Sefik Ilkin Serengil 2024-01-31 23:49:57 +00:00 committed by GitHub
commit 64d33717a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 758 additions and 475 deletions

2
.gitignore vendored
View File

@ -11,3 +11,5 @@ tests/dataset/*.pkl
tests/*.ipynb
tests/*.csv
*.pyc
**/.coverage
**/.coverage.*

View File

@ -2,4 +2,7 @@ test:
cd tests && python -m pytest . -s --disable-warnings
lint:
python -m pylint deepface/ --fail-under=10
python -m pylint deepface/ --fail-under=10
coverage:
pip install pytest-cov && cd tests && python -m pytest --cov=deepface

View File

@ -58,6 +58,7 @@ def verify(
distance_metric: str = "cosine",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
) -> Dict[str, Any]:
"""
@ -83,6 +84,8 @@ def verify(
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
@ -119,6 +122,7 @@ def verify(
distance_metric=distance_metric,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
normalization=normalization,
)
@ -129,6 +133,7 @@ def analyze(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
silent: bool = False,
) -> List[Dict[str, Any]]:
"""
@ -152,6 +157,8 @@ def analyze(
align (boolean): Perform alignment based on the eye positions (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
@ -209,6 +216,7 @@ def analyze(
enforce_detection=enforce_detection,
detector_backend=detector_backend,
align=align,
expand_percentage=expand_percentage,
silent=silent,
)
@ -221,6 +229,7 @@ def find(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
@ -249,6 +258,8 @@ def find(
align (boolean): Perform alignment based on the eye positions (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
threshold (float): Specify a threshold to determine whether a pair represents the same
person or different individuals. This threshold is used for comparing distances.
If left unset, default pre-tuned threshold values will be applied based on the specified
@ -286,6 +297,7 @@ def find(
enforce_detection=enforce_detection,
detector_backend=detector_backend,
align=align,
expand_percentage=expand_percentage,
threshold=threshold,
normalization=normalization,
silent=silent,
@ -298,6 +310,7 @@ def represent(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
) -> List[Dict[str, Any]]:
"""
@ -320,6 +333,8 @@ def represent(
align (boolean): Perform alignment based on the eye positions (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
(default is base).
@ -346,6 +361,7 @@ def represent(
enforce_detection=enforce_detection,
detector_backend=detector_backend,
align=align,
expand_percentage=expand_percentage,
normalization=normalization,
)
@ -409,6 +425,7 @@ def extract_faces(
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
grayscale: bool = False,
) -> List[Dict[str, Any]]:
"""
@ -429,6 +446,8 @@ def extract_faces(
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
grayscale (boolean): Flag to convert the image to grayscale before
processing (default is False).
@ -448,7 +467,9 @@ def extract_faces(
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
grayscale=grayscale,
human_readable=True,
)
@ -459,3 +480,49 @@ def cli() -> None:
import fire
fire.Fire()
# deprecated function(s)
def detectFace(
img_path: Union[str, np.ndarray],
target_size: tuple = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
) -> Union[np.ndarray, None]:
"""
Deprecated face detection function. Use extract_faces for same functionality.
Args:
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image (default is (224, 224)).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
enforce_detection (boolean): If no face is detected in an image, raise an exception.
Set to False to avoid the exception for low-resolution images (default is True).
align (bool): Flag to enable face alignment (default is True).
Returns:
img (np.ndarray): detected (and aligned) facial area image as numpy array
"""
logger.warn("Function detectFace is deprecated. Use extract_faces instead.")
face_objs = extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
grayscale=False,
)
extracted_face = None
if len(face_objs) > 0:
extracted_face = face_objs[0]["face"]
return extracted_face

View File

@ -53,6 +53,8 @@ class ArcFaceClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "ArcFace"
self.input_shape = (112, 112)
self.output_shape = 512
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -49,6 +49,8 @@ class DeepIdClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "DeepId"
self.input_shape = (47, 55)
self.output_shape = 160
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -20,6 +20,8 @@ class DlibClient(FacialRecognition):
def __init__(self):
self.model = DlibResNet()
self.model_name = "Dlib"
self.input_shape = (150, 150)
self.output_shape = 128
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -53,6 +53,8 @@ class FaceNet128dClient(FacialRecognition):
def __init__(self):
self.model = load_facenet128d_model()
self.model_name = "FaceNet-128d"
self.input_shape = (160, 160)
self.output_shape = 128
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""
@ -75,6 +77,8 @@ class FaceNet512dClient(FacialRecognition):
def __init__(self):
self.model = load_facenet512d_model()
self.model_name = "FaceNet-512d"
self.input_shape = (160, 160)
self.output_shape = 512
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -46,6 +46,8 @@ class DeepFaceClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "DeepFace"
self.input_shape = (152, 152)
self.output_shape = 4096
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -36,6 +36,8 @@ class OpenFaceClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "OpenFace"
self.input_shape = (96, 96)
self.output_shape = 128
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -22,6 +22,8 @@ class SFaceClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "SFace"
self.input_shape = (112, 112)
self.output_shape = 128
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -43,6 +43,8 @@ class VggFaceClient(FacialRecognition):
def __init__(self):
self.model = load_model()
self.model_name = "VGG-Face"
self.input_shape = (224, 224)
self.output_shape = 4096
def find_embeddings(self, img: np.ndarray) -> List[float]:
"""

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np
def findCosineDistance(
def find_cosine_distance(
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> np.float64:
if isinstance(source_representation, list):
@ -17,7 +17,7 @@ def findCosineDistance(
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
def findEuclideanDistance(
def find_euclidean_distance(
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> np.float64:
if isinstance(source_representation, list):
@ -38,7 +38,7 @@ def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray:
return x / np.sqrt(np.sum(np.multiply(x, x)))
def findThreshold(model_name: str, distance_metric: str) -> float:
def find_threshold(model_name: str, distance_metric: str) -> float:
base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75}

View File

@ -1,42 +1,19 @@
import os
from typing import Union, Tuple, List
import base64
from pathlib import Path
# 3rd party dependencies
from PIL import Image
import requests
import numpy as np
import cv2
import tensorflow as tf
# package dependencies
from deepface.detectors import DetectorWrapper
from deepface.models.Detector import DetectedFace, FacialAreaRegion
from deepface.commons.logger import Logger
logger = Logger(module="commons.functions")
# pylint: disable=no-else-raise
# --------------------------------------------------
# configurations of dependencies
def get_tf_major_version() -> int:
return int(tf.__version__.split(".", maxsplit=1)[0])
tf_major_version = get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image
# --------------------------------------------------
def initialize_folder() -> None:
"""Initialize the folder for storing weights and models.
@ -63,293 +40,3 @@ def get_deepface_home() -> str:
str: the home directory.
"""
return str(os.getenv("DEEPFACE_HOME", default=str(Path.home())))
# --------------------------------------------------
def loadBase64Img(uri: str) -> np.ndarray:
"""Load image from base64 string.
Args:
uri: a base64 string.
Returns:
numpy array: the loaded image.
"""
encoded_data = uri.split(",")[1]
nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
return img_bgr
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
"""
Load image from path, url, base64 or numpy array.
Args:
img: a path, url, base64 or numpy array.
Returns:
image (numpy array): the loaded image in BGR format
image name (str): image name itself
"""
# The image is already a numpy array
if isinstance(img, np.ndarray):
return img, "numpy array"
if isinstance(img, Path):
img = str(img)
if not isinstance(img, str):
raise ValueError(f"img must be numpy array or str but it is {type(img)}")
# The image is a base64 string
if img.startswith("data:image/"):
return loadBase64Img(img), "base64 encoded string"
# The image is a url
if img.startswith("http"):
return (
np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")),
# return url as image name
img,
)
# The image is a path
if os.path.isfile(img) is not True:
raise ValueError(f"Confirm that {img} exists")
# image must be a file on the system then
# image name must have english characters
if img.isascii() is False:
raise ValueError(f"Input image must not have non-english characters - {img}")
img_obj_bgr = cv2.imread(img)
# img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB)
return img_obj_bgr, img
# --------------------------------------------------
def extract_faces(
img: Union[str, np.ndarray],
target_size: tuple = (224, 224),
detector_backend: str = "opencv",
grayscale: bool = False,
enforce_detection: bool = True,
align: bool = True,
) -> List[Tuple[np.ndarray, dict, float]]:
"""
Extract faces from an image.
Args:
img: a path, url, base64 or numpy array.
target_size (tuple, optional): the target size of the extracted faces.
Defaults to (224, 224).
detector_backend (str, optional): the face detector backend. Defaults to "opencv".
grayscale (bool, optional): whether to convert the extracted faces to grayscale.
Defaults to False.
enforce_detection (bool, optional): whether to enforce face detection. Defaults to True.
align (bool, optional): whether to align the extracted faces. Defaults to True.
Raises:
ValueError: if face could not be detected and enforce_detection is True.
Returns:
results (List[Tuple[np.ndarray, dict, float]]): A list of tuples
where each tuple contains:
- detected_face (np.ndarray): The detected face as a NumPy array.
- face_region (dict): The image region represented as
{"x": x, "y": y, "w": w, "h": h}
- confidence (float): The confidence score associated with the detected face.
"""
# this is going to store a list of img itself (numpy), it region and confidence
extracted_faces = []
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img, img_name = load_image(img)
base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0])
if detector_backend == "skip":
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
else:
face_objs = DetectorWrapper.detect_faces(detector_backend, img, align)
# in case of no face found
if len(face_objs) == 0 and enforce_detection is True:
if img_name is not None:
raise ValueError(
f"Face could not be detected in {img_name}."
"Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
else:
raise ValueError(
"Face could not be detected. Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
if len(face_objs) == 0 and enforce_detection is False:
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
for face_obj in face_objs:
current_img = face_obj.img
current_region = face_obj.facial_area
confidence = face_obj.confidence
if current_img.shape[0] > 0 and current_img.shape[1] > 0:
if grayscale is True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
# resize and padding
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)
dsize = (
int(current_img.shape[1] * factor),
int(current_img.shape[0] * factor),
)
current_img = cv2.resize(current_img, dsize)
diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale is False:
# Put the base image in the middle of the padded image
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)
else:
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
),
"constant",
)
# double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)
# normalizing the image pixels
# what this line doing? must?
img_pixels = image.img_to_array(current_img)
img_pixels = np.expand_dims(img_pixels, axis=0)
img_pixels /= 255 # normalize input in [0, 1]
# int cast is for the exception - object of type 'float32' is not JSON serializable
region_obj = {
"x": current_region.x,
"y": current_region.y,
"w": current_region.w,
"h": current_region.h,
}
extracted_face = (img_pixels, region_obj, confidence)
extracted_faces.append(extracted_face)
if len(extracted_faces) == 0 and enforce_detection == True:
raise ValueError(
f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
)
return extracted_faces
def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
"""Normalize input image.
Args:
img (numpy array): the input image.
normalization (str, optional): the normalization technique. Defaults to "base",
for no normalization.
Returns:
numpy array: the normalized image.
"""
# issue 131 declares that some normalization techniques improves the accuracy
if normalization == "base":
return img
# @trevorgribble and @davedgd contributed this feature
# restore input in scale of [0, 255] because it was normalized in scale of
# [0, 1] in preprocess_face
img *= 255
if normalization == "raw":
pass # return just restored pixels
elif normalization == "Facenet":
mean, std = img.mean(), img.std()
img = (img - mean) / std
elif normalization == "Facenet2018":
# simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
img /= 127.5
img -= 1
elif normalization == "VGGFace":
# mean subtraction based on VGGFace1 training data
img[..., 0] -= 93.5940
img[..., 1] -= 104.7624
img[..., 2] -= 129.1863
elif normalization == "VGGFace2":
# mean subtraction based on VGGFace2 training data
img[..., 0] -= 91.4953
img[..., 1] -= 103.8827
img[..., 2] -= 131.0912
elif normalization == "ArcFace":
# Reference study: The faces are cropped and resized to 112×112,
# and each pixel (ranged between [0, 255]) in RGB images is normalised
# by subtracting 127.5 then divided by 128.
img -= 127.5
img /= 128
else:
raise ValueError(f"unimplemented normalization type - {normalization}")
return img
def find_target_size(model_name: str) -> tuple:
"""Find the target size of the model.
Args:
model_name (str): the model name.
Returns:
tuple: the target size.
"""
target_sizes = {
"VGG-Face": (224, 224),
"Facenet": (160, 160),
"Facenet512": (160, 160),
"OpenFace": (96, 96),
"DeepFace": (152, 152),
"DeepID": (47, 55),
"Dlib": (150, 150),
"ArcFace": (112, 112),
"SFace": (112, 112),
}
target_size = target_sizes.get(model_name)
if target_size == None:
raise ValueError(f"unimplemented model name - {model_name}")
return target_size

View File

@ -12,6 +12,9 @@ from deepface.detectors import (
Yolo,
YuNet,
)
from deepface.commons.logger import Logger
logger = Logger(module="deepface/detectors/DetectorWrapper.py")
def build_model(detector_backend: str) -> Any:
@ -52,19 +55,35 @@ def build_model(detector_backend: str) -> Any:
return face_detector_obj[detector_backend]
def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
detector_backend: str, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect face(s) from a given image
Args:
detector_backend (str): detector name
img (np.ndarray): pre-loaded image
alig (bool): enable or disable alignment after detection
align (bool): enable or disable alignment after detection
expand_percentage (int): expand detected facial area with a percentage (default is 0).
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
face_detector: Detector = build_model(detector_backend)
return face_detector.detect_faces(img=img, align=align)
if expand_percentage < 0:
logger.warn(
f"Expand percentage cannot be negative but you set it to {expand_percentage}."
"Overwritten it to 0."
)
expand_percentage = 0
return face_detector.detect_faces(img=img, align=align, expand_percentage=expand_percentage)

View File

@ -56,18 +56,27 @@ class DlibClient(Detector):
detector["sp"] = sp
return detector
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with dlib
Args:
face_detector (Any): dlib face detector object
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
# this is not a must dependency. do not import it in the global level.
@ -79,6 +88,12 @@ class DlibClient(Detector):
"Please install using 'pip install dlib' "
) from e
if expand_percentage != 0:
logger.warn(
f"You set expand_percentage argument to {expand_percentage},"
"but dlib hog handles detection by itself"
)
resp = []
sp = self.model["sp"]

View File

@ -12,17 +12,27 @@ class FastMtCnnClient(Detector):
def __init__(self):
self.model = self.build_model()
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with mtcnn
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -37,7 +47,16 @@ class FastMtCnnClient(Detector):
for current_detection in zip(*detections):
x, y, w, h = xyxy_to_xywh(current_detection[0])
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
confidence = current_detection[1]

View File

@ -29,17 +29,27 @@ class MediaPipeClient(Detector):
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
return face_detection
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with mediapipe
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace): A list of DetectedFace objects
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -74,7 +84,16 @@ class MediaPipeClient(Detector):
# left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height))
if x > 0 and y > 0:
detected_face = img[y : y + h, x : x + w]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
if align:

View File

@ -1,5 +1,4 @@
from typing import List
import cv2
import numpy as np
from mtcnn import MTCNN
from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
@ -14,17 +13,27 @@ class MtCnnClient(Detector):
def __init__(self):
self.model = MTCNN()
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with mtcnn
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
@ -32,14 +41,25 @@ class MtCnnClient(Detector):
detected_face = None
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB but OpenCV read BGR
# mtcnn expects RGB but OpenCV read BGR
# img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_rgb = img[:, :, ::-1]
detections = self.model.detect_faces(img_rgb)
if detections is not None and len(detections) > 0:
for current_detection in detections:
x, y, w, h = current_detection["box"]
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
confidence = current_detection["confidence"]

View File

@ -25,18 +25,27 @@ class OpenCvClient(Detector):
detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
return detector
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with opencv
Args:
face_detector (Any): opencv face detector object
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -56,7 +65,15 @@ class OpenCvClient(Detector):
if len(faces) > 0:
for (x, y, w, h), confidence in zip(faces, scores):
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
if align:
left_eye, right_eye = self.find_eyes(img=detected_face)

View File

@ -9,17 +9,27 @@ class RetinaFaceClient(Detector):
def __init__(self):
self.model = rf.build_model()
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with retinaface
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace object
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -38,10 +48,14 @@ class RetinaFaceClient(Detector):
img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
confidence = identity["score"]
# detected_face = img[int(y):int(y+h), int(x):int(x+w)] #opencv
detected_face = img[
facial_area[1] : facial_area[3], facial_area[0] : facial_area[2]
]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
if align:
landmarks = identity["landmarks"]

View File

@ -71,17 +71,27 @@ class SsdClient(Detector):
return detector
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with ssd
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace object
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -92,16 +102,14 @@ class SsdClient(Detector):
target_size = (300, 300)
base_img = img.copy() # we will restore base_img to img later
original_size = img.shape
img = cv2.resize(img, target_size)
current_img = cv2.resize(img, target_size)
aspect_ratio_x = original_size[1] / target_size[1]
aspect_ratio_y = original_size[0] / target_size[0]
imageBlob = cv2.dnn.blobFromImage(image=img)
imageBlob = cv2.dnn.blobFromImage(image=current_img)
face_detector = self.model["face_detector"]
face_detector.setInput(imageBlob)
@ -126,17 +134,21 @@ class SsdClient(Detector):
bottom = instance["bottom"]
top = instance["top"]
detected_face = base_img[
int(top * aspect_ratio_y) : int(bottom * aspect_ratio_y),
int(left * aspect_ratio_x) : int(right * aspect_ratio_x),
]
x = int(left * aspect_ratio_x)
y = int(top * aspect_ratio_y)
w = int(right * aspect_ratio_x) - int(left * aspect_ratio_x)
h = int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y)
face_region = FacialAreaRegion(
x=int(left * aspect_ratio_x),
y=int(top * aspect_ratio_y),
w=int(right * aspect_ratio_x) - int(left * aspect_ratio_x),
h=int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y),
)
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
face_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
confidence = instance["confidence"]

View File

@ -51,18 +51,27 @@ class YoloClient(Detector):
# Return face_detector
return YOLO(weight_path)
def detect_faces(self, img: np.ndarray, align: bool = False) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = False, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with yolo
Args:
face_detector (Any): yolo face detector object
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
resp = []
@ -78,7 +87,15 @@ class YoloClient(Detector):
x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
region = FacialAreaRegion(x=x, y=y, w=w, h=h)
detected_face = img[y : y + h, x : x + w].copy()
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
if align:
# Tuple of x,y and confidence for left eye

View File

@ -49,17 +49,27 @@ class YuNetClient(Detector):
) from err
return face_detector
def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List[DetectedFace]:
"""
Detect and align face with yunet
Args:
img (np.ndarray): pre-loaded image
align (bool): default is true
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace objects
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
# FaceDetector.detect_faces does not support score_threshold parameter.
@ -115,7 +125,16 @@ class YuNetClient(Detector):
)
confidence = face[-1]
confidence = f"{confidence:.2f}"
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
# expand the facial area to be extracted and stay within img.shape limits
x2 = max(0, x - int((w * expand_percentage) / 100)) # expand left
y2 = max(0, y - int((h * expand_percentage) / 100)) # expand top
w2 = min(img.shape[1], w + int((w * expand_percentage) / 100)) # expand right
h2 = min(img.shape[0], h + int((h * expand_percentage) / 100)) # expand bottom
# detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
detected_face = img[int(y2) : int(y2 + h2), int(x2) : int(x2 + w2)]
img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
if align:
detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le))

View File

@ -8,19 +8,28 @@ import numpy as np
# pylint: disable=unnecessary-pass, too-few-public-methods
class Detector(ABC):
@abstractmethod
def detect_faces(self, img: np.ndarray, align: bool = True) -> List["DetectedFace"]:
def detect_faces(
self, img: np.ndarray, align: bool = True, expand_percentage: int = 0
) -> List["DetectedFace"]:
"""
Detect faces from a given image
Interface for detect and align face
Args:
img (np.ndarray): pre-loaded image as a NumPy array
align (bool): enable or disable alignment after face detection
img (np.ndarray): pre-loaded image as numpy array
align (bool): flag to enable or disable alignment after detection (default is True)
expand_percentage (int): expand detected facial area with a percentage
Returns:
results (List[DetectedFace]): A list of DetectedFace object
results (List[Tuple[DetectedFace]): A list of DetectedFace objects
where each object contains:
- face (np.ndarray): The detected face as a NumPy array.
- face_region (List[float]): The image region represented as
a list of floats e.g. [x, y, w, h]
- confidence (float): The confidence score associated with the detected face.
- img (np.ndarray): The detected face as a NumPy array.
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
- confidence (float): The confidence score associated with the detected face.
"""
pass

View File

@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Union, List
from typing import Any, Union, List, Tuple
import numpy as np
from deepface.commons import functions
@ -15,6 +15,9 @@ else:
class FacialRecognition(ABC):
model: Union[Model, Any]
model_name: str
input_shape: Tuple[int, int]
output_shape: int
@abstractmethod
def find_embeddings(self, img: np.ndarray) -> List[float]:

View File

@ -6,8 +6,7 @@ import numpy as np
from tqdm import tqdm
# project dependencies
from deepface.modules import modeling
from deepface.commons import functions
from deepface.modules import modeling, detection
from deepface.extendedmodels import Gender, Race, Emotion
@ -17,6 +16,7 @@ def analyze(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
silent: bool = False,
) -> List[Dict[str, Any]]:
"""
@ -41,6 +41,8 @@ def analyze(
align (boolean): Perform alignment based on the eye positions (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).
@ -114,16 +116,20 @@ def analyze(
# ---------------------------------
resp_objects = []
img_objs = functions.extract_faces(
img=img_path,
img_objs = detection.extract_faces(
img_path=img_path,
target_size=(224, 224),
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
for img_content, img_region, img_confidence in img_objs:
for img_obj in img_objs:
img_content = img_obj["face"]
img_region = img_obj["facial_area"]
img_confidence = img_obj["confidence"]
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
obj = {}
# facial attribute analysis

View File

@ -3,10 +3,26 @@ from typing import Any, Dict, List, Tuple, Union
# 3rd part dependencies
import numpy as np
import cv2
from PIL import Image
# project dependencies
from deepface.modules import preprocessing
from deepface.models.Detector import DetectedFace, FacialAreaRegion
from deepface.detectors import DetectorWrapper
from deepface.commons import functions
from deepface.commons.logger import Logger
logger = Logger(module="deepface/modules/detection.py")
# pylint: disable=no-else-raise
tf_major_version = functions.get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image
def extract_faces(
@ -15,7 +31,9 @@ def extract_faces(
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
grayscale: bool = False,
human_readable=False,
) -> List[Dict[str, Any]]:
"""
Extract faces from a given image
@ -35,9 +53,13 @@ def extract_faces(
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage
grayscale (boolean): Flag to convert the image to grayscale before
processing (default is False).
human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
or 4D BGR for ML models (default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
@ -48,27 +70,113 @@ def extract_faces(
resp_objs = []
img_objs = functions.extract_faces(
img=img_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=grayscale,
enforce_detection=enforce_detection,
align=align,
)
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img, img_name = preprocessing.load_image(img_path)
for img, region, confidence in img_objs:
resp_obj = {}
base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0])
if detector_backend == "skip":
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
else:
face_objs = DetectorWrapper.detect_faces(
detector_backend=detector_backend,
img=img,
align=align,
expand_percentage=expand_percentage,
)
# in case of no face found
if len(face_objs) == 0 and enforce_detection is True:
if img_name is not None:
raise ValueError(
f"Face could not be detected in {img_name}."
"Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
else:
raise ValueError(
"Face could not be detected. Please confirm that the picture is a face photo "
"or consider to set enforce_detection param to False."
)
if len(face_objs) == 0 and enforce_detection is False:
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
for face_obj in face_objs:
current_img = face_obj.img
current_region = face_obj.facial_area
confidence = face_obj.confidence
if current_img.shape[0] == 0 or current_img.shape[1] == 0:
continue
if grayscale is True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
# resize and padding
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)
dsize = (
int(current_img.shape[1] * factor),
int(current_img.shape[0] * factor),
)
current_img = cv2.resize(current_img, dsize)
diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale is False:
# Put the base image in the middle of the padded image
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)
else:
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
),
"constant",
)
# double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)
# normalizing the image pixels
# what this line doing? must?
img_pixels = image.img_to_array(current_img)
img_pixels = np.expand_dims(img_pixels, axis=0)
img_pixels /= 255 # normalize input in [0, 1]
# discard expanded dimension
if len(img.shape) == 4:
img = img[0]
if human_readable is True and len(img_pixels.shape) == 4:
img_pixels = img_pixels[0]
# bgr to rgb
resp_obj["face"] = img[:, :, ::-1]
resp_obj["facial_area"] = region
resp_obj["confidence"] = confidence
resp_objs.append(resp_obj)
resp_objs.append(
{
"face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
"facial_area": {
"x": current_region.x,
"y": current_region.y,
"w": current_region.w,
"h": current_region.h,
},
"confidence": confidence,
}
)
if len(resp_objs) == 0 and enforce_detection == True:
raise ValueError(
f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False."
)
return resp_objs

View File

@ -0,0 +1,131 @@
import os
from typing import Union, Tuple
import base64
from pathlib import Path
# 3rd party
import numpy as np
import cv2
from PIL import Image
import requests
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
"""
Load image from path, url, base64 or numpy array.
Args:
img: a path, url, base64 or numpy array.
Returns:
image (numpy array): the loaded image in BGR format
image name (str): image name itself
"""
# The image is already a numpy array
if isinstance(img, np.ndarray):
return img, "numpy array"
if isinstance(img, Path):
img = str(img)
if not isinstance(img, str):
raise ValueError(f"img must be numpy array or str but it is {type(img)}")
# The image is a base64 string
if img.startswith("data:image/"):
return load_base64(img), "base64 encoded string"
# The image is a url
if img.startswith("http"):
return (
np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")),
# return url as image name
img,
)
# The image is a path
if os.path.isfile(img) is not True:
raise ValueError(f"Confirm that {img} exists")
# image must be a file on the system then
# image name must have english characters
if img.isascii() is False:
raise ValueError(f"Input image must not have non-english characters - {img}")
img_obj_bgr = cv2.imread(img)
# img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB)
return img_obj_bgr, img
def load_base64(uri: str) -> np.ndarray:
"""Load image from base64 string.
Args:
uri: a base64 string.
Returns:
numpy array: the loaded image.
"""
encoded_data = uri.split(",")[1]
nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
# img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
return img_bgr
def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
"""Normalize input image.
Args:
img (numpy array): the input image.
normalization (str, optional): the normalization technique. Defaults to "base",
for no normalization.
Returns:
numpy array: the normalized image.
"""
# issue 131 declares that some normalization techniques improves the accuracy
if normalization == "base":
return img
# @trevorgribble and @davedgd contributed this feature
# restore input in scale of [0, 255] because it was normalized in scale of
# [0, 1] in preprocess_face
img *= 255
if normalization == "raw":
pass # return just restored pixels
elif normalization == "Facenet":
mean, std = img.mean(), img.std()
img = (img - mean) / std
elif normalization == "Facenet2018":
# simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
img /= 127.5
img -= 1
elif normalization == "VGGFace":
# mean subtraction based on VGGFace1 training data
img[..., 0] -= 93.5940
img[..., 1] -= 104.7624
img[..., 2] -= 129.1863
elif normalization == "VGGFace2":
# mean subtraction based on VGGFace2 training data
img[..., 0] -= 91.4953
img[..., 1] -= 103.8827
img[..., 2] -= 131.0912
elif normalization == "ArcFace":
# Reference study: The faces are cropped and resized to 112×112,
# and each pixel (ranged between [0, 255]) in RGB images is normalised
# by subtracting 127.5 then divided by 128.
img -= 127.5
img /= 128
else:
raise ValueError(f"unimplemented normalization type - {normalization}")
return img

View File

@ -4,7 +4,7 @@ import numpy as np
import pandas as pd
import cv2
from deepface import DeepFace
from deepface.commons import functions
from deepface.models.FacialRecognition import FacialRecognition
from deepface.commons.logger import Logger
logger = Logger(module="commons.realtime")
@ -32,12 +32,13 @@ def analysis(
enable_emotion = True
enable_age_gender = True
# ------------------------
# find custom values for this input set
target_size = functions.find_target_size(model_name=model_name)
# ------------------------
# build models once to store them in the memory
# otherwise, they will be built after cam started and this will cause delays
DeepFace.build_model(model_name=model_name)
model: FacialRecognition = DeepFace.build_model(model_name=model_name)
# find custom values for this input set
target_size = model.input_shape
logger.info(f"facial recognition model {model_name} is just built")
if enable_face_analysis:

View File

@ -10,9 +10,10 @@ import pandas as pd
from tqdm import tqdm
# project dependencies
from deepface.commons import functions, distance as dst
from deepface.commons import distance as dst
from deepface.commons.logger import Logger
from deepface.modules import representation
from deepface.modules import representation, detection, modeling
from deepface.models.FacialRecognition import FacialRecognition
logger = Logger(module="deepface/modules/recognition.py")
@ -25,6 +26,7 @@ def find(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
@ -54,6 +56,8 @@ def find(
align (boolean): Perform alignment based on the eye positions.
expand_percentage (int): expand detected facial area with a percentage (default is 0).
threshold (float): Specify a threshold to determine whether a pair represents the same
person or different individuals. This threshold is used for comparing distances.
If left unset, default pre-tuned threshold values will be applied based on the specified
@ -89,7 +93,8 @@ def find(
if os.path.isdir(db_path) is not True:
raise ValueError("Passed db_path does not exist!")
target_size = functions.find_target_size(model_name=model_name)
model: FacialRecognition = modeling.build_model(model_name)
target_size = model.input_shape
# ---------------------------------------
@ -202,18 +207,21 @@ def find(
)
# img path might have more than once face
source_objs = functions.extract_faces(
img=img_path,
source_objs = detection.extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
resp_obj = []
for source_img, source_region, _ in source_objs:
for source_obj in source_objs:
source_img = source_obj["face"]
source_region = source_obj["facial_area"]
target_embedding_obj = representation.represent(
img_path=source_img,
model_name=model_name,
@ -245,11 +253,11 @@ def find(
)
if distance_metric == "cosine":
distance = dst.findCosineDistance(source_representation, target_representation)
distance = dst.find_cosine_distance(source_representation, target_representation)
elif distance_metric == "euclidean":
distance = dst.findEuclideanDistance(source_representation, target_representation)
distance = dst.find_euclidean_distance(source_representation, target_representation)
elif distance_metric == "euclidean_l2":
distance = dst.findEuclideanDistance(
distance = dst.find_euclidean_distance(
dst.l2_normalize(source_representation),
dst.l2_normalize(target_representation),
)
@ -259,7 +267,7 @@ def find(
distances.append(distance)
# ---------------------------
target_threshold = threshold or dst.findThreshold(model_name, distance_metric)
target_threshold = threshold or dst.find_threshold(model_name, distance_metric)
result_df["threshold"] = target_threshold
result_df["distance"] = distances
@ -305,6 +313,7 @@ def __find_bulk_embeddings(
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
silent: bool = False,
):
@ -313,15 +322,24 @@ def __find_bulk_embeddings(
Args:
employees (list): list of exact image paths
model_name (str): facial recognition model name
target_size (tuple): expected input shape of facial
recognition model
target_size (tuple): expected input shape of facial recognition model
detector_backend (str): face detector model name
enforce_detection (bool): set this to False if you
want to proceed when you cannot detect any face
align (bool): enable or disable alignment of image
before feeding to facial recognition model
expand_percentage (int): expand detected facial area with a
percentage (default is 0).
normalization (bool): normalization technique
silent (bool): enable or disable informative logging
Returns:
representations (list): pivot list of embeddings with
@ -333,16 +351,19 @@ def __find_bulk_embeddings(
desc="Finding representations",
disable=silent,
):
img_objs = functions.extract_faces(
img=employee,
img_objs = detection.extract_faces(
img_path=employee,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
for img_content, img_region, _ in img_objs:
for img_obj in img_objs:
img_content = img_obj["face"]
img_region = img_obj["facial_area"]
embedding_obj = representation.represent(
img_path=img_content,
model_name=model_name,

View File

@ -6,8 +6,7 @@ import numpy as np
import cv2
# project dependencies
from deepface.modules import modeling
from deepface.commons import functions
from deepface.modules import modeling, detection, preprocessing
from deepface.models.FacialRecognition import FacialRecognition
@ -17,6 +16,7 @@ def represent(
enforce_detection: bool = True,
detector_backend: str = "opencv",
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
) -> List[Dict[str, Any]]:
"""
@ -38,6 +38,8 @@ def represent(
align (boolean): Perform alignment based on the eye positions.
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
@ -61,19 +63,20 @@ def represent(
# ---------------------------------
# we have run pre-process in verification. so, this can be skipped if it is coming from verify.
target_size = functions.find_target_size(model_name=model_name)
target_size = model.input_shape
if detector_backend != "skip":
img_objs = functions.extract_faces(
img=img_path,
img_objs = detection.extract_faces(
img_path=img_path,
target_size=(target_size[1], target_size[0]),
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
else: # skip
# Try load. If load error, will raise exception internal
img, _ = functions.load_image(img_path)
img, _ = preprocessing.load_image(img_path)
# --------------------------------
if len(img.shape) == 4:
img = img[0] # e.g. (1, 224, 224, 3) to (224, 224, 3)
@ -85,13 +88,21 @@ def represent(
img = (img.astype(np.float32) / 255.0).astype(np.float32)
# --------------------------------
# make dummy region and confidence to keep compatibility with `extract_faces`
img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}
img_objs = [(img, img_region, 0)]
img_objs = [
{
"face": img,
"facial_area": {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]},
"confidence": 0,
}
]
# ---------------------------------
for img, region, confidence in img_objs:
for img_obj in img_objs:
img = img_obj["face"]
region = img_obj["facial_area"]
confidence = img_obj["confidence"]
# custom normalization
img = functions.normalize_input(img=img, normalization=normalization)
img = preprocessing.normalize_input(img=img, normalization=normalization)
embedding = model.find_embeddings(img)

View File

@ -6,8 +6,9 @@ from typing import Any, Dict, Union
import numpy as np
# project dependencies
from deepface.commons import functions, distance as dst
from deepface.modules import representation
from deepface.commons import distance as dst
from deepface.modules import representation, detection, modeling
from deepface.models.FacialRecognition import FacialRecognition
def verify(
@ -18,6 +19,7 @@ def verify(
distance_metric: str = "cosine",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
normalization: str = "base",
) -> Dict[str, Any]:
"""
@ -48,6 +50,8 @@ def verify(
align (bool): Flag to enable face alignment (default is True).
expand_percentage (int): expand detected facial area with a percentage (default is 0).
normalization (string): Normalize the input image before feeding it to the model.
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
@ -79,32 +83,39 @@ def verify(
tic = time.time()
# --------------------------------
target_size = functions.find_target_size(model_name=model_name)
model: FacialRecognition = modeling.build_model(model_name)
target_size = model.input_shape
# img pairs might have many faces
img1_objs = functions.extract_faces(
img=img1_path,
img1_objs = detection.extract_faces(
img_path=img1_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
img2_objs = functions.extract_faces(
img=img2_path,
img2_objs = detection.extract_faces(
img_path=img2_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
)
# --------------------------------
distances = []
regions = []
# now we will find the face pair with minimum distance
for img1_content, img1_region, _ in img1_objs:
for img2_content, img2_region, _ in img2_objs:
for img1_obj in img1_objs:
img1_content = img1_obj["face"]
img1_region = img1_obj["facial_area"]
for img2_obj in img2_objs:
img2_content = img2_obj["face"]
img2_region = img2_obj["facial_area"]
img1_embedding_obj = representation.represent(
img_path=img1_content,
model_name=model_name,
@ -127,11 +138,11 @@ def verify(
img2_representation = img2_embedding_obj[0]["embedding"]
if distance_metric == "cosine":
distance = dst.findCosineDistance(img1_representation, img2_representation)
distance = dst.find_cosine_distance(img1_representation, img2_representation)
elif distance_metric == "euclidean":
distance = dst.findEuclideanDistance(img1_representation, img2_representation)
distance = dst.find_euclidean_distance(img1_representation, img2_representation)
elif distance_metric == "euclidean_l2":
distance = dst.findEuclideanDistance(
distance = dst.find_euclidean_distance(
dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)
)
else:
@ -141,7 +152,7 @@ def verify(
regions.append((img1_region, img2_region))
# -------------------------------
threshold = dst.findThreshold(model_name, distance_metric)
threshold = dst.find_threshold(model_name, distance_metric)
distance = min(distances) # best distance
facial_areas = regions[np.argmin(distances)]

View File

@ -1,7 +1,8 @@
import matplotlib.pyplot as plt
import numpy as np
from deepface import DeepFace
from deepface.commons import functions
from deepface.commons import distance
from deepface.models.FacialRecognition import FacialRecognition
from deepface.commons.logger import Logger
logger = Logger()
@ -11,9 +12,9 @@ logger = Logger()
model_name = "VGG-Face"
model = DeepFace.build_model(model_name=model_name)
model: FacialRecognition = DeepFace.build_model(model_name=model_name)
target_size = functions.find_target_size(model_name)
target_size = model.input_shape
logger.info(f"target_size: {target_size}")
@ -22,21 +23,34 @@ logger.info(f"target_size: {target_size}")
img1 = DeepFace.extract_faces(img_path="dataset/img1.jpg", target_size=target_size)[0]["face"]
img1 = np.expand_dims(img1, axis=0) # to (1, 224, 224, 3)
img1_representation = model.predict(img1)[0, :]
img1_representation = model.find_embeddings(img1)
img2 = DeepFace.extract_faces(img_path="dataset/img3.jpg", target_size=target_size)[0]["face"]
img2 = np.expand_dims(img2, axis=0)
img2_representation = model.predict(img2)[0, :]
img2_representation = model.find_embeddings(img2)
img1_representation = np.array(img1_representation)
img2_representation = np.array(img2_representation)
# ----------------------------------------------
# distance between two images
# distance between two images - euclidean distance formula
distance_vector = np.square(img1_representation - img2_representation)
logger.debug(distance_vector)
current_distance = np.sqrt(distance_vector.sum())
logger.info(f"Euclidean distance: {current_distance}")
distance = np.sqrt(distance_vector.sum())
logger.info(f"Euclidean distance: {distance}")
threshold = distance.find_threshold(model_name=model_name, distance_metric="euclidean")
logger.info(f"Threshold for {model_name}-euclidean pair is {threshold}")
if current_distance < threshold:
logger.info(
f"This pair is same person because its distance {current_distance}"
f" is less than threshold {threshold}"
)
else:
logger.info(
f"This pair is different persons because its distance {current_distance}"
f" is greater than threshold {threshold}"
)
# ----------------------------------------------
# expand vectors to be shown better in graph
@ -75,7 +89,7 @@ im = plt.imshow(img2_graph, interpolation="nearest", cmap=plt.cm.ocean)
plt.colorbar()
ax5 = fig.add_subplot(3, 2, 5)
plt.text(0.35, 0, f"Distance: {distance}")
plt.text(0.35, 0, f"Distance: {current_distance}")
plt.axis("off")
ax6 = fig.add_subplot(3, 2, 6)

View File

@ -1,12 +1,14 @@
import numpy as np
import pytest
from deepface import DeepFace
from deepface.commons.logger import Logger
logger = Logger("tests/test_extract_faces.py")
detectors = ["opencv", "mtcnn"]
def test_different_detectors():
detectors = ["opencv", "mtcnn"]
for detector in detectors:
img_objs = DeepFace.extract_faces(img_path="dataset/img11.jpg", detector_backend=detector)
for img_obj in img_objs:
@ -22,3 +24,21 @@ def test_different_detectors():
img = img_obj["face"]
assert img.shape[0] > 0 and img.shape[1] > 0
logger.info(f"✅ extract_faces for {detector} backend test is done")
def test_backends_for_enforced_detection_with_non_facial_inputs():
black_img = np.zeros([224, 224, 3])
for detector in detectors:
with pytest.raises(ValueError):
_ = DeepFace.extract_faces(img_path=black_img, detector_backend=detector)
logger.info("✅ extract_faces for enforced detection and non-facial image test is done")
def test_backends_for_not_enforced_detection_with_non_facial_inputs():
black_img = np.zeros([224, 224, 3])
for detector in detectors:
objs = DeepFace.extract_faces(
img_path=black_img, detector_backend=detector, enforce_detection=False
)
assert objs[0]["face"].shape == (224, 224, 3)
logger.info("✅ extract_faces for not enforced detection and non-facial image test is done")

View File

@ -6,7 +6,7 @@ from deepface.commons.logger import Logger
logger = Logger("tests/test_find.py")
threshold = distance.findThreshold(model_name="VGG-Face", distance_metric="cosine")
threshold = distance.find_threshold(model_name="VGG-Face", distance_metric="cosine")
def test_find_with_exact_path():