Merge pull request #1175 from serengil/feat-task-0704-resizing-functionality

resize functionality moved to represent module
This commit is contained in:
Sefik Ilkin Serengil 2024-04-07 18:39:28 +01:00 committed by GitHub
commit fb4da39961
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 152 additions and 168 deletions

View File

@ -237,7 +237,6 @@ demographies = DeepFace.analyze(img_path = "img4.jpg",
#face detection and alignment
face_objs = DeepFace.extract_faces(img_path = "img.jpg",
target_size = (224, 224),
detector_backend = backends[4]
)
```

View File

@ -2,7 +2,7 @@
import os
import warnings
import logging
from typing import Any, Dict, List, Tuple, Union, Optional
from typing import Any, Dict, List, Union, Optional
# this has to be set before importing tensorflow
os.environ["TF_USE_LEGACY_KERAS"] = "1"
@ -10,6 +10,7 @@ os.environ["TF_USE_LEGACY_KERAS"] = "1"
# pylint: disable=wrong-import-position
# 3rd party dependencies
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
@ -439,7 +440,6 @@ def stream(
def extract_faces(
img_path: Union[str, np.ndarray],
target_size: Optional[Tuple[int, int]] = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
@ -453,9 +453,6 @@ def extract_faces(
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image (default is (224, 224)).
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
@ -485,13 +482,11 @@ def extract_faces(
return detection.extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
expand_percentage=expand_percentage,
grayscale=grayscale,
human_readable=True,
)
@ -538,7 +533,6 @@ def detectFace(
logger.warn("Function detectFace is deprecated. Use extract_faces instead.")
face_objs = extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
@ -547,4 +541,5 @@ def detectFace(
extracted_face = None
if len(face_objs) > 0:
extracted_face = face_objs[0]["face"]
extracted_face = cv2.resize(extracted_face, target_size)
return extracted_face

View File

@ -6,7 +6,7 @@ import numpy as np
from tqdm import tqdm
# project dependencies
from deepface.modules import modeling, detection
from deepface.modules import modeling, detection, preprocessing
from deepface.extendedmodels import Gender, Race, Emotion
@ -118,7 +118,6 @@ def analyze(
img_objs = detection.extract_faces(
img_path=img_path,
target_size=(224, 224),
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
@ -130,60 +129,68 @@ def analyze(
img_content = img_obj["face"]
img_region = img_obj["facial_area"]
img_confidence = img_obj["confidence"]
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
obj = {}
# facial attribute analysis
pbar = tqdm(
range(0, len(actions)),
desc="Finding actions",
disable=silent if len(actions) > 1 else True,
)
for index in pbar:
action = actions[index]
pbar.set_description(f"Action: {action}")
if img_content.shape[0] == 0 or img_content.shape[1] == 0:
continue
if action == "emotion":
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
sum_of_predictions = emotion_predictions.sum()
# rgb to bgr
img_content = img_content[:, :, ::-1]
obj["emotion"] = {}
for i, emotion_label in enumerate(Emotion.labels):
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
obj["emotion"][emotion_label] = emotion_prediction
# resize input image
img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))
obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
obj = {}
# facial attribute analysis
pbar = tqdm(
range(0, len(actions)),
desc="Finding actions",
disable=silent if len(actions) > 1 else True,
)
for index in pbar:
action = actions[index]
pbar.set_description(f"Action: {action}")
elif action == "age":
apparent_age = modeling.build_model("Age").predict(img_content)
# int cast is for exception - object of type 'float32' is not JSON serializable
obj["age"] = int(apparent_age)
if action == "emotion":
emotion_predictions = modeling.build_model("Emotion").predict(img_content)
sum_of_predictions = emotion_predictions.sum()
elif action == "gender":
gender_predictions = modeling.build_model("Gender").predict(img_content)
obj["gender"] = {}
for i, gender_label in enumerate(Gender.labels):
gender_prediction = 100 * gender_predictions[i]
obj["gender"][gender_label] = gender_prediction
obj["emotion"] = {}
for i, emotion_label in enumerate(Emotion.labels):
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
obj["emotion"][emotion_label] = emotion_prediction
obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
elif action == "race":
race_predictions = modeling.build_model("Race").predict(img_content)
sum_of_predictions = race_predictions.sum()
elif action == "age":
apparent_age = modeling.build_model("Age").predict(img_content)
# int cast is for exception - object of type 'float32' is not JSON serializable
obj["age"] = int(apparent_age)
obj["race"] = {}
for i, race_label in enumerate(Race.labels):
race_prediction = 100 * race_predictions[i] / sum_of_predictions
obj["race"][race_label] = race_prediction
elif action == "gender":
gender_predictions = modeling.build_model("Gender").predict(img_content)
obj["gender"] = {}
for i, gender_label in enumerate(Gender.labels):
gender_prediction = 100 * gender_predictions[i]
obj["gender"][gender_label] = gender_prediction
obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
# -----------------------------
# mention facial areas
obj["region"] = img_region
# include image confidence
obj["face_confidence"] = img_confidence
elif action == "race":
race_predictions = modeling.build_model("Race").predict(img_content)
sum_of_predictions = race_predictions.sum()
resp_objects.append(obj)
obj["race"] = {}
for i, race_label in enumerate(Race.labels):
race_prediction = 100 * race_predictions[i] / sum_of_predictions
obj["race"][race_label] = race_prediction
obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
# -----------------------------
# mention facial areas
obj["region"] = img_region
# include image confidence
obj["face_confidence"] = img_confidence
resp_objects.append(obj)
return resp_objects

View File

@ -1,5 +1,5 @@
# built-in dependencies
from typing import Any, Dict, List, Tuple, Union, Optional
from typing import Any, Dict, List, Tuple, Union
# 3rd part dependencies
import numpy as np
@ -10,7 +10,6 @@ from PIL import Image
from deepface.modules import preprocessing
from deepface.models.Detector import DetectedFace, FacialAreaRegion
from deepface.detectors import DetectorWrapper
from deepface.commons import package_utils
from deepface.commons.logger import Logger
logger = Logger(module="deepface/modules/detection.py")
@ -18,22 +17,13 @@ logger = Logger(module="deepface/modules/detection.py")
# pylint: disable=no-else-raise
tf_major_version = package_utils.get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image
def extract_faces(
img_path: Union[str, np.ndarray],
target_size: Optional[Tuple[int, int]] = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
expand_percentage: int = 0,
grayscale: bool = False,
human_readable=False,
) -> List[Dict[str, Any]]:
"""
Extract faces from a given image
@ -42,9 +32,6 @@ def extract_faces(
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
as a string, numpy array (BGR), or base64 encoded images.
target_size (tuple): final shape of facial image. black pixels will be
added to resize the image.
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv)
@ -58,13 +45,10 @@ def extract_faces(
grayscale (boolean): Flag to convert the image to grayscale before
processing (default is False).
human_readable (bool): Flag to make the image human readable. 3D RGB for human readable
or 4D BGR for ML models (default is False).
Returns:
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
- "face" (np.ndarray): The detected face as a NumPy array.
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
- keys 'x', 'y', 'w', 'h' with int values
@ -122,57 +106,11 @@ def extract_faces(
if grayscale is True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
# resize and padding
if target_size is not None:
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)
dsize = (
int(current_img.shape[1] * factor),
int(current_img.shape[0] * factor),
)
current_img = cv2.resize(current_img, dsize)
diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale is False:
# Put the base image in the middle of the padded image
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)
else:
current_img = np.pad(
current_img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
),
"constant",
)
# double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)
# normalizing the image pixels
# what this line doing? must?
img_pixels = image.img_to_array(current_img)
img_pixels = np.expand_dims(img_pixels, axis=0)
img_pixels /= 255 # normalize input in [0, 1]
# discard expanded dimension
if human_readable is True and len(img_pixels.shape) == 4:
img_pixels = img_pixels[0]
current_img = current_img / 255 # normalize input in [0, 1]
resp_objs.append(
{
"face": img_pixels[:, :, ::-1] if human_readable is True else img_pixels,
"face": current_img[:, :, ::-1],
"facial_area": {
"x": int(current_region.x),
"y": int(current_region.y),

View File

@ -11,6 +11,16 @@ import cv2
import requests
from PIL import Image
# project dependencies
from deepface.commons import package_utils
tf_major_version = package_utils.get_tf_major_version()
if tf_major_version == 1:
from keras.preprocessing import image
elif tf_major_version == 2:
from tensorflow.keras.preprocessing import image
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
"""
@ -66,8 +76,8 @@ def load_image_from_web(url: str) -> np.ndarray:
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
return image
img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
return img
def load_base64(uri: str) -> np.ndarray:
@ -157,3 +167,50 @@ def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
raise ValueError(f"unimplemented normalization type - {normalization}")
return img
def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
"""
Resize an image to expected size of a ml model with adding black pixels.
Args:
img (np.ndarray): pre-loaded image as numpy array
target_size (tuple): input shape of ml model
Returns:
img (np.ndarray): resized input image
"""
factor_0 = target_size[0] / img.shape[0]
factor_1 = target_size[1] / img.shape[1]
factor = min(factor_0, factor_1)
dsize = (
int(img.shape[1] * factor),
int(img.shape[0] * factor),
)
img = cv2.resize(img, dsize)
diff_0 = target_size[0] - img.shape[0]
diff_1 = target_size[1] - img.shape[1]
# Put the base image in the middle of the padded image
img = np.pad(
img,
(
(diff_0 // 2, diff_0 - diff_0 // 2),
(diff_1 // 2, diff_1 - diff_1 // 2),
(0, 0),
),
"constant",
)
# double check: if target image is not still the same size with target.
if img.shape[0:2] != target_size:
img = cv2.resize(img, target_size)
# make it 4-dimensional how ML models expect
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
if img.max() > 1:
img = (img.astype(np.float32) / 255.0).astype(np.float32)
return img

View File

@ -13,8 +13,7 @@ from PIL import Image
# project dependencies
from deepface.commons.logger import Logger
from deepface.commons import package_utils
from deepface.modules import representation, detection, modeling, verification
from deepface.models.FacialRecognition import FacialRecognition
from deepface.modules import representation, detection, verification
logger = Logger(module="deepface/modules/recognition.py")
@ -90,15 +89,9 @@ def find(
tic = time.time()
# -------------------------------
if os.path.isdir(db_path) is not True:
raise ValueError("Passed db_path does not exist!")
model: FacialRecognition = modeling.build_model(model_name)
target_size = model.input_shape
# ---------------------------------------
file_name = f"ds_{model_name}_{detector_backend}_v2.pkl"
file_name = file_name.replace("-", "").lower()
datastore_path = os.path.join(db_path, file_name)
@ -180,7 +173,6 @@ def find(
representations += __find_bulk_embeddings(
employees=new_images,
model_name=model_name,
target_size=target_size,
detector_backend=detector_backend,
enforce_detection=enforce_detection,
align=align,
@ -212,7 +204,6 @@ def find(
# img path might have more than once face
source_objs = detection.extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
@ -314,7 +305,6 @@ def __list_images(path: str) -> List[str]:
def __find_bulk_embeddings(
employees: List[str],
model_name: str = "VGG-Face",
target_size: tuple = (224, 224),
detector_backend: str = "opencv",
enforce_detection: bool = True,
align: bool = True,
@ -331,8 +321,6 @@ def __find_bulk_embeddings(
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
target_size (tuple): expected input shape of facial recognition model
detector_backend (str): face detector model name
enforce_detection (bool): set this to False if you
@ -362,7 +350,6 @@ def __find_bulk_embeddings(
try:
img_objs = detection.extract_faces(
img_path=employee,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,

View File

@ -3,7 +3,6 @@ from typing import Any, Dict, List, Union
# 3rd party dependencies
import numpy as np
import cv2
# project dependencies
from deepface.modules import modeling, detection, preprocessing
@ -67,7 +66,6 @@ def represent(
if detector_backend != "skip":
img_objs = detection.extract_faces(
img_path=img_path,
target_size=(target_size[1], target_size[0]),
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,
@ -77,16 +75,10 @@ def represent(
else: # skip
# Try load. If load error, will raise exception internal
img, _ = preprocessing.load_image(img_path)
# --------------------------------
if len(img.shape) == 4:
img = img[0] # e.g. (1, 224, 224, 3) to (224, 224, 3)
if len(img.shape) == 3:
img = cv2.resize(img, target_size)
img = np.expand_dims(img, axis=0)
# when called from verify, this is already normalized. But needed when user given.
if img.max() > 1:
img = (img.astype(np.float32) / 255.0).astype(np.float32)
# --------------------------------
if len(img.shape) != 3:
raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}")
# make dummy region and confidence to keep compatibility with `extract_faces`
img_objs = [
{
@ -99,8 +91,20 @@ def represent(
for img_obj in img_objs:
img = img_obj["face"]
# rgb to bgr
img = img[:, :, ::-1]
region = img_obj["facial_area"]
confidence = img_obj["confidence"]
# resize to expected shape of ml model
img = preprocessing.resize_image(
img=img,
# thanks to DeepId (!)
target_size=(target_size[1], target_size[0]),
)
# custom normalization
img = preprocessing.normalize_input(img=img, normalization=normalization)

View File

@ -10,7 +10,6 @@ import cv2
# project dependencies
from deepface import DeepFace
from deepface.models.FacialRecognition import FacialRecognition
from deepface.commons.logger import Logger
logger = Logger(module="commons.realtime")
@ -62,7 +61,7 @@ def analysis(
"""
# initialize models
build_demography_models(enable_face_analysis=enable_face_analysis)
target_size = build_facial_recognition_model(model_name=model_name)
build_facial_recognition_model(model_name=model_name)
# call a dummy find function for db_path once to create embeddings before starting webcam
_ = search_identity(
detected_face=np.zeros([224, 224, 3]),
@ -89,9 +88,7 @@ def analysis(
faces_coordinates = []
if freeze is False:
faces_coordinates = grab_facial_areas(
img=img, detector_backend=detector_backend, target_size=target_size
)
faces_coordinates = grab_facial_areas(img=img, detector_backend=detector_backend)
# we will pass img to analyze modules (identity, demography) and add some illustrations
# that is why, we will not be able to extract detected face from img clearly
@ -156,7 +153,7 @@ def analysis(
cv2.destroyAllWindows()
def build_facial_recognition_model(model_name: str) -> tuple:
def build_facial_recognition_model(model_name: str) -> None:
"""
Build facial recognition model
Args:
@ -165,9 +162,8 @@ def build_facial_recognition_model(model_name: str) -> tuple:
Returns
input_shape (tuple): input shape of given facial recognitio n model.
"""
model: FacialRecognition = DeepFace.build_model(model_name=model_name)
_ = DeepFace.build_model(model_name=model_name)
logger.info(f"{model_name} is built")
return model.input_shape
def search_identity(
@ -231,7 +227,6 @@ def search_identity(
# load found identity image - extracted if possible
target_objs = DeepFace.extract_faces(
img_path=target_path,
target_size=(IDENTIFIED_IMG_SIZE, IDENTIFIED_IMG_SIZE),
detector_backend=detector_backend,
enforce_detection=False,
align=True,
@ -243,6 +238,7 @@ def search_identity(
# extract 1st item directly
target_obj = target_objs[0]
target_img = target_obj["face"]
target_img = cv2.resize(target_img, (IDENTIFIED_IMG_SIZE, IDENTIFIED_IMG_SIZE))
target_img *= 255
target_img = target_img[:, :, ::-1]
else:
@ -346,7 +342,7 @@ def countdown_to_release(
def grab_facial_areas(
img: np.ndarray, detector_backend: str, target_size: Tuple[int, int], threshold: int = 130
img: np.ndarray, detector_backend: str, threshold: int = 130
) -> List[Tuple[int, int, int, int]]:
"""
Find facial area coordinates in the given image
@ -354,7 +350,6 @@ def grab_facial_areas(
img (np.ndarray): image itself
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8' (default is opencv).
target_size (tuple): input shape of the facial recognition model.
threshold (int): threshold for facial area, discard smaller ones
Returns
result (list): list of tuple with x, y, w and h coordinates
@ -363,7 +358,6 @@ def grab_facial_areas(
face_objs = DeepFace.extract_faces(
img_path=img,
detector_backend=detector_backend,
target_size=target_size,
# you may consider to extract with larger expanding value
expand_percentage=0,
)

View File

@ -223,12 +223,8 @@ def __extract_faces_and_embeddings(
embeddings = []
facial_areas = []
model: FacialRecognition = modeling.build_model(model_name)
target_size = model.input_shape
img_objs = detection.extract_faces(
img_path=img_path,
target_size=target_size,
detector_backend=detector_backend,
grayscale=False,
enforce_detection=enforce_detection,

View File

@ -1,5 +1,7 @@
import matplotlib.pyplot as plt
import numpy as np
import cv2
from deepface import DeepFace
from deepface.modules import verification
from deepface.models.FacialRecognition import FacialRecognition
@ -21,11 +23,13 @@ logger.info(f"target_size: {target_size}")
# ----------------------------------------------
# load images and find embeddings
img1 = DeepFace.extract_faces(img_path="dataset/img1.jpg", target_size=target_size)[0]["face"]
img1 = DeepFace.extract_faces(img_path="dataset/img1.jpg")[0]["face"]
img1 = cv2.resize(img1, target_size)
img1 = np.expand_dims(img1, axis=0) # to (1, 224, 224, 3)
img1_representation = model.forward(img1)
img2 = DeepFace.extract_faces(img_path="dataset/img3.jpg", target_size=target_size)[0]["face"]
img2 = DeepFace.extract_faces(img_path="dataset/img3.jpg")[0]["face"]
img2 = cv2.resize(img2, target_size)
img2 = np.expand_dims(img2, axis=0)
img2_representation = model.forward(img2)

View File

@ -7,9 +7,11 @@ img_path = "dataset/img1.jpg"
img = cv2.imread(img_path)
overlay_img_path = "dataset/img6.jpg"
face_objs = DeepFace.extract_faces(overlay_img_path, target_size=(112, 112))
face_objs = DeepFace.extract_faces(overlay_img_path)
overlay_img = face_objs[0]["face"][:, :, ::-1] * 255
overlay_img = cv2.resize(overlay_img, (112, 112))
raw_img = img.copy()
demographies = DeepFace.analyze(img_path=img_path, actions=("age", "gender", "emotion"))

View File

@ -18,6 +18,7 @@ model_names = [
"Dlib",
"ArcFace",
"SFace",
"GhostFaceNet",
]
detector_backends = [