handling many faces

This commit is contained in:
Sefik Ilkin Serengil 2023-01-23 21:54:52 +00:00
parent ba3db18671
commit b62e3671f8
6 changed files with 405 additions and 649 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@ deepface/extendedmodels/__pycache__/*
deepface/subsidiarymodels/__pycache__/*
deepface/detectors/__pycache__/*
tests/dataset/*.pkl
tests/sandbox.ipynb
.DS_Store
deepface/.DS_Store
*.pyc

View File

@ -7,12 +7,13 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import time
from os import path
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace, Boosting
from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace
from deepface.extendedmodels import Age, Gender, Race, Emotion
from deepface.commons import functions, realtime, distance as dst
@ -67,33 +68,24 @@ def build_model(model_name):
return model_obj[model_name]
def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base'):
def verify(img1_path, img2_path, model_name = 'VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enforce_detection = True, align = True, normalization = 'base'):
"""
This function verifies an image pair is same person or different persons.
Parameters:
img1_path, img2_path: exact image path, numpy array (BGR) or based64 encoded images could be passed. If you are going to call verify function for a list of image pairs, then you should pass an array instead of calling the function in for loops.
img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded images are also welcome.
If one of pair has more than one face, then we will compare the face pair with max similarity.
e.g. img1_path = [
['img1.jpg', 'img2.jpg'],
['img2.jpg', 'img3.jpg']
]
model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace or Ensemble
model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace
distance_metric (string): cosine, euclidean, euclidean_l2
model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. Set this to False not to have this exception. This might be convenient for low resolution images.
enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default.
Set this to False not to have this exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
prog_bar (boolean): enable/disable a progress bar
Returns:
Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary.
@ -103,194 +95,116 @@ def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric =
, "max_threshold_to_verify": 0.40
, "model": "VGG-Face"
, "similarity_metric": "cosine"
, 'facial_areas': {
'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769},
'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
}
, "time": 2
}
"""
tic = time.time()
img_list, bulkProcess = functions.initialize_input(img1_path, img2_path)
resp_objects = []
#--------------------------------
target_size = functions.find_target_size(model_name=model_name)
if model_name == 'Ensemble':
model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]
else:
model_names = []; metrics = []
model_names.append(model_name)
metrics.append(distance_metric)
# img pairs might have many faces
img1_objs = functions.extract_faces(
img = img1_path,
target_size = (target_size[1], target_size[0]),
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align)
img2_objs = functions.extract_faces(
img = img2_path,
target_size = (target_size[1], target_size[0]),
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align)
#--------------------------------
if model == None:
if model_name == 'Ensemble':
models = Boosting.loadModel()
else:
model = build_model(model_name)
models = {}
models[model_name] = model
else:
if model_name == 'Ensemble':
Boosting.validate_model(model)
models = model.copy()
else:
models = {}
models[model_name] = model
#------------------------------
disable_option = (False if len(img_list) > 1 else True) or not prog_bar
pbar = tqdm(range(0,len(img_list)), desc='Verification', disable = disable_option)
for index in pbar:
instance = img_list[index]
if type(instance) == list and len(instance) >= 2:
img1_path = instance[0]; img2_path = instance[1]
ensemble_features = []
for i in model_names:
custom_model = models[i]
#img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'mtcnn'
img1_representation = represent(img_path = img1_path
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
distances = []
regions = []
# now we will find the face pair with minimum distance
for img1_content, img1_region, img1_confidence in img1_objs:
for img2_content, img2_region, img2_confidence in img2_objs:
img1_embedding_obj = represent(img_path = img1_content
, model_name = model_name
, enforce_detection = enforce_detection
, detector_backend = "skip"
, align = align
, normalization = normalization
)
img2_representation = represent(img_path = img2_path
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
img2_embedding_obj = represent(img_path = img2_content
, model_name = model_name
, enforce_detection = enforce_detection
, detector_backend = "skip"
, align = align
, normalization = normalization
)
#----------------------
#find distances between embeddings
img1_representation = img1_embedding_obj[0]["embedding"]
img2_representation = img2_embedding_obj[0]["embedding"]
for j in metrics:
if j == 'cosine':
if distance_metric == 'cosine':
distance = dst.findCosineDistance(img1_representation, img2_representation)
elif j == 'euclidean':
elif distance_metric == 'euclidean':
distance = dst.findEuclideanDistance(img1_representation, img2_representation)
elif j == 'euclidean_l2':
elif distance_metric == 'euclidean_l2':
distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation))
else:
raise ValueError("Invalid distance_metric passed - ", distance_metric)
distance = np.float64(distance) #causes trobule for euclideans in api calls if this is not set (issue #175)
#----------------------
#decision
distances.append(distance)
regions.append((img1_region, img2_region))
if model_name != 'Ensemble':
# -------------------------------
threshold = dst.findThreshold(model_name, distance_metric)
distance = min(distances) #best distance
facial_areas = regions[np.argmin(distances)]
threshold = dst.findThreshold(i, j)
if distance <= threshold:
identified = True
else:
identified = False
toc = time.time()
resp_obj = {
"verified": identified
"verified": True if distance <= threshold else False
, "distance": distance
, "threshold": threshold
, "model": model_name
, "detector_backend": detector_backend
, "similarity_metric": distance_metric
, "facial_areas": {
"img1": facial_areas[0],
"img2": facial_areas[1]
}
, "time": round(toc - tic, 2)
}
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj
else: #Ensemble
#this returns same with OpenFace - euclidean_l2
if i == 'OpenFace' and j == 'euclidean':
continue
else:
ensemble_features.append(distance)
#----------------------
if model_name == 'Ensemble':
boosted_tree = Boosting.build_gbm()
prediction = boosted_tree.predict(np.expand_dims(np.array(ensemble_features), axis=0))[0]
verified = np.argmax(prediction) == 1
score = prediction[np.argmax(prediction)]
resp_obj = {
"verified": verified
, "score": score
, "distance": ensemble_features
, "model": ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
, "similarity_metric": ["cosine", "euclidean", "euclidean_l2"]
}
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj
#----------------------
else:
raise ValueError("Invalid arguments passed to verify function: ", instance)
#-------------------------
toc = time.time()
if bulkProcess == True:
resp_obj = {}
for i in range(0, len(resp_objects)):
resp_item = resp_objects[i]
resp_obj["pair_%d" % (i+1)] = resp_item
return resp_obj
def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models = None, enforce_detection = True, detector_backend = 'opencv', prog_bar = True):
def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , enforce_detection = True, detector_backend = 'opencv', align = True, silent = False):
"""
This function analyzes facial attributes including age, gender, emotion and race
Parameters:
img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. If you are going to analyze lots of images, then set this to list. e.g. img_path = ['img1.jpg', 'img2.jpg']
img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.
actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes.
models: (Optional[dict]) facial attribute analysis models are built in every call of analyze function. You can pass pre-built models to speed the function up.
models = {}
models['age'] = DeepFace.build_model('Age')
models['gender'] = DeepFace.build_model('Gender')
models['emotion'] = DeepFace.build_model('Emotion')
models['race'] = DeepFace.build_model('Race')
enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib.
prog_bar (boolean): enable/disable a progress bar
Returns:
The function returns a dictionary. If img_path is a list, then it will return list of dictionary.
silent (boolean): disable (some) log messages
Returns:
The function returns a list of dictionaries for each face appearing in the image.
[
{
"region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
"age": 28.66,
@ -319,165 +233,104 @@ def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models =
'white': 93.44925880432129
}
}
]
"""
#---------------------------------
# validate actions
if type(actions) == str:
actions = (actions,)
actions = list(actions)
if not models:
#---------------------------------
# build models
models = {}
img_paths, bulkProcess = functions.initialize_input(img_path)
#---------------------------------
built_models = list(models.keys())
#---------------------------------
#pre-trained models passed but it doesn't exist in actions
if len(built_models) > 0:
if 'emotion' in built_models and 'emotion' not in actions:
actions.append('emotion')
if 'age' in built_models and 'age' not in actions:
actions.append('age')
if 'gender' in built_models and 'gender' not in actions:
actions.append('gender')
if 'race' in built_models and 'race' not in actions:
actions.append('race')
#---------------------------------
if 'emotion' in actions and 'emotion' not in built_models:
if 'emotion' in actions:
models['emotion'] = build_model('Emotion')
if 'age' in actions and 'age' not in built_models:
if 'age' in actions:
models['age'] = build_model('Age')
if 'gender' in actions and 'gender' not in built_models:
if 'gender' in actions:
models['gender'] = build_model('Gender')
if 'race' in actions and 'race' not in built_models:
if 'race' in actions:
models['race'] = build_model('Race')
#---------------------------------
resp_objects = []
disable_option = (False if len(img_paths) > 1 else True) or not prog_bar
global_pbar = tqdm(range(0,len(img_paths)), desc='Analyzing', disable = disable_option)
for j in global_pbar:
img_path = img_paths[j]
resp_obj = {}
disable_option = (False if len(actions) > 1 else True) or not prog_bar
pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = disable_option)
img_224 = None # Set to prevent re-detection
region = [] # x, y, w, h of the detected face region
region_labels = ['x', 'y', 'w', 'h']
is_region_set = False
img_objs = functions.extract_faces(img=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale = False, enforce_detection=enforce_detection, align=align)
for img_content, img_region, img_confidence in img_objs:
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
obj = {}
#facial attribute analysis
pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = silent)
for index in pbar:
action = actions[index]
pbar.set_description("Action: %s" % (action))
if action == 'emotion':
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
img, region = functions.preprocess_face(img = img_path, target_size = (48, 48), grayscale = True, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
img_gray = cv2.resize(img_gray, (48, 48))
img_gray = np.expand_dims(img_gray, axis = 0)
emotion_predictions = models['emotion'].predict(img, verbose=0)[0,:]
emotion_predictions = models['emotion'].predict(img_gray, verbose=0)[0,:]
sum_of_predictions = emotion_predictions.sum()
resp_obj["emotion"] = {}
obj["emotion"] = {}
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
for i in range(0, len(emotion_labels)):
emotion_label = emotion_labels[i]
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
resp_obj["emotion"][emotion_label] = emotion_prediction
obj["emotion"][emotion_label] = emotion_prediction
resp_obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
elif action == 'age':
if img_224 is None:
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
age_predictions = models['age'].predict(img_224, verbose=0)[0,:]
age_predictions = models['age'].predict(img_content, verbose=0)[0,:]
apparent_age = Age.findApparentAge(age_predictions)
resp_obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
elif action == 'gender':
if img_224 is None:
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
gender_predictions = models['gender'].predict(img_224, verbose=0)[0,:]
gender_predictions = models['gender'].predict(img_content, verbose=0)[0,:]
gender_labels = ["Woman", "Man"]
resp_obj["gender"] = {}
obj["gender"] = {}
for i, gender_label in enumerate(gender_labels):
gender_prediction = 100 * gender_predictions[i]
resp_obj["gender"][gender_label] = gender_prediction
obj["gender"][gender_label] = gender_prediction
resp_obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
elif action == 'race':
if img_224 is None:
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) #just emotion model expects grayscale images
race_predictions = models['race'].predict(img_224, verbose=0)[0,:]
race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
race_predictions = models['race'].predict(img_content, verbose=0)[0,:]
sum_of_predictions = race_predictions.sum()
resp_obj["race"] = {}
obj["race"] = {}
race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
for i in range(0, len(race_labels)):
race_label = race_labels[i]
race_prediction = 100 * race_predictions[i] / sum_of_predictions
resp_obj["race"][race_label] = race_prediction
obj["race"][race_label] = race_prediction
resp_obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
#-----------------------------
# mention facial areas
obj["region"] = img_region
if is_region_set != True and region:
resp_obj["region"] = {}
is_region_set = True
for i, parameter in enumerate(region_labels):
resp_obj["region"][parameter] = int(region[i]) #int cast is for the exception - object of type 'float32' is not JSON serializable
resp_objects.append(obj)
#---------------------------------
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj
if bulkProcess == True:
return resp_objects
def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base', silent=False):
def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base', silent=False):
"""
This function applies verification several times and find an identity in a database
This function applies verification several times and find the identities in a database
Parameters:
img_path: exact image path, numpy array (BGR) or based64 encoded image. If you are going to find several identities, then you should pass img_path as array instead of calling find function in a for loop. e.g. img_path = ["img1.jpg", "img2.jpg"]
img_path: exact image path, numpy array (BGR) or based64 encoded image.
db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this.
@ -485,58 +338,23 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
distance_metric (string): cosine, euclidean, euclidean_l2
model: built deepface model. A face recognition models are built in every call of find function. You can pass pre-built models to speed the function up.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
prog_bar (boolean): enable/disable a progress bar
silent (boolean): disable some logging and progress bars
Returns:
This function returns pandas data frame. If a list of images is passed to img_path, then it will return list of pandas data frame.
This function returns list of pandas data frame. Each item of the list corresponding to an identity in the img_path.
"""
tic = time.time()
img_paths, bulkProcess = functions.initialize_input(img_path)
#-------------------------------
if os.path.isdir(db_path) == True:
if model == None:
if model_name == 'Ensemble':
if not silent: print("Ensemble learning enabled")
models = Boosting.loadModel()
else: #model is not ensemble
model = build_model(model_name)
models = {}
models[model_name] = model
else: #model != None
if not silent: print("Already built model is passed")
if model_name == 'Ensemble':
Boosting.validate_model(model)
models = model.copy()
if os.path.isdir(db_path) != True:
raise ValueError("Passed db_path does not exist!")
else:
models = {}
models[model_name] = model
#---------------------------------------
if model_name == 'Ensemble':
model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
metric_names = ['cosine', 'euclidean', 'euclidean_l2']
elif model_name != 'Ensemble':
model_names = []; metric_names = []
model_names.append(model_name)
metric_names.append(distance_metric)
target_size = functions.find_target_size(model_name=model_name)
#---------------------------------------
@ -545,19 +363,21 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
if path.exists(db_path+"/"+file_name):
if not silent: print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
if not silent:
print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
f = open(db_path+'/'+file_name, 'rb')
representations = pickle.load(f)
if not silent: print("There are ", len(representations)," representations found in ",file_name)
if not silent:
print("There are ", len(representations)," representations found in ",file_name)
else: #create representation.pkl from scratch
employees = []
for r, d, f in os.walk(db_path): # r=root, d=directories, f = files
for file in f:
if ('.jpg' in file.lower()) or ('.png' in file.lower()):
if ('.jpg' in file.lower()) or ('.jpeg' in file.lower()) or ('.png' in file.lower()):
exact_path = r + "/" + file
employees.append(exact_path)
@ -569,158 +389,108 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
representations = []
pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = prog_bar)
#for employee in employees:
pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = True if silent == True else False)
for index in pbar:
employee = employees[index]
instance = []
instance.append(employee)
img_objs = functions.extract_faces(img = employee,
target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align
)
for j in model_names:
custom_model = models[j]
representation = represent(img_path = employee
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
for img_content, img_region, img_confidence in img_objs:
embedding_obj = represent(img_path = img_content
, model_name = model_name
, enforce_detection = enforce_detection
, detector_backend = "skip"
, align = align
, normalization = normalization
)
instance.append(representation)
img_representation = embedding_obj[0]["embedding"]
instance = []
instance.append(employee)
instance.append(img_representation)
representations.append(instance)
#-------------------------------
representations.append(instance)
f = open(db_path+'/'+file_name, "wb")
pickle.dump(representations, f)
f.close()
if not silent: print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
if not silent:
print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
#----------------------------
#now, we got representations for facial database
df = pd.DataFrame(representations, columns = ["identity", f"{model_name}_representation"])
if model_name != 'Ensemble':
df = pd.DataFrame(representations, columns = ["identity", "%s_representation" % (model_name)])
else: #ensemble learning
columns = ['identity']
[columns.append('%s_representation' % i) for i in model_names]
df = pd.DataFrame(representations, columns = columns)
df_base = df.copy() #df will be filtered in each img. we will restore it for the next item.
# img path might have move than once face
target_objs = functions.extract_faces(img = img_path,
target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align
)
resp_obj = []
global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing', disable = prog_bar)
for j in global_pbar:
img_path = img_paths[j]
#find representation for passed image
for j in model_names:
custom_model = models[j]
target_representation = represent(img_path = img_path
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
for target_img, target_region, target_confidence in target_objs:
target_embedding_obj = represent(img_path = target_img
, model_name = model_name
, enforce_detection = enforce_detection
, detector_backend = "skip"
, align = align
, normalization = normalization
)
for k in metric_names:
target_representation = target_embedding_obj[0]["embedding"]
result_df = df.copy() #df will be filtered in each img
#TODO: add facial area in df
distances = []
for index, instance in df.iterrows():
source_representation = instance["%s_representation" % (j)]
source_representation = instance[f"{model_name}_representation"]
if k == 'cosine':
if distance_metric == 'cosine':
distance = dst.findCosineDistance(source_representation, target_representation)
elif k == 'euclidean':
elif distance_metric == 'euclidean':
distance = dst.findEuclideanDistance(source_representation, target_representation)
elif k == 'euclidean_l2':
elif distance_metric == 'euclidean_l2':
distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation))
else:
raise ValueError(f"invalid distance metric passes - {distance_metric}")
distances.append(distance)
#---------------------------
if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
continue
else:
df["%s_%s" % (j, k)] = distances
result_df[f"{model_name}_{distance_metric}"] = distances
if model_name != 'Ensemble':
threshold = dst.findThreshold(j, k)
df = df.drop(columns = ["%s_representation" % (j)])
df = df[df["%s_%s" % (j, k)] <= threshold]
threshold = dst.findThreshold(model_name, distance_metric)
result_df = result_df.drop(columns = [f"{model_name}_representation"])
result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
result_df = result_df.sort_values(by = [f"{model_name}_{distance_metric}"], ascending=True).reset_index(drop=True)
df = df.sort_values(by = ["%s_%s" % (j, k)], ascending=True).reset_index(drop=True)
resp_obj.append(result_df)
resp_obj.append(df)
df = df_base.copy() #restore df for the next iteration
#----------------------------------
if model_name == 'Ensemble':
feature_names = []
for j in model_names:
for k in metric_names:
if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
continue
else:
feature = '%s_%s' % (j, k)
feature_names.append(feature)
#print(df.head())
x = df[feature_names].values
#--------------------------------------
boosted_tree = Boosting.build_gbm()
y = boosted_tree.predict(x)
verified_labels = []; scores = []
for i in y:
verified = np.argmax(i) == 1
score = i[np.argmax(i)]
verified_labels.append(verified)
scores.append(score)
df['verified'] = verified_labels
df['score'] = scores
df = df[df.verified == True]
#df = df[df.score > 0.99] #confidence score
df = df.sort_values(by = ["score"], ascending=False).reset_index(drop=True)
df = df[['identity', 'verified', 'score']]
resp_obj.append(df)
df = df_base.copy() #restore df for the next iteration
#----------------------------------
# -----------------------------------
toc = time.time()
if not silent: print("find function lasts ",toc-tic," seconds")
if len(resp_obj) == 1:
return resp_obj[0]
if not silent:
print("find function lasts ",toc-tic," seconds")
return resp_obj
else:
raise ValueError("Passed db_path does not exist!")
return None
def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'):
"""
@ -731,10 +501,6 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace.
model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. Consider to pass model if you are going to call represent function in a for loop.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
@ -744,38 +510,51 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
Returns:
Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
"""
resp_objs = []
if model is None:
model = build_model(model_name)
#---------------------------------
# we started to run pre-process in verification. so, this can be skipped if it is coming from verification.
if detector_backend != "skip":
target_size = functions.find_target_size(model_name=model_name)
#decide input shape
input_shape_x, input_shape_y = functions.find_input_shape(model)
#detect and align
img = functions.preprocess_face(img = img_path
, target_size=(input_shape_y, input_shape_x)
, enforce_detection = enforce_detection
, detector_backend = detector_backend
, align = align)
img_objs = functions.extract_faces(img = img_path,
target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align)
else: # skip
if type(img_path) == str:
img = functions.load_image(img_path)
elif type(img_path).__module__ == np.__name__:
img = img_path.copy()
else:
raise ValueError(f"unexpected type for img_path - {type(img_path)}")
img_region = [0, 0, img.shape[1], img.shape[0]]
img_objs = [(img, img_region, 0)]
#---------------------------------
for img, region, confidence in img_objs:
#custom normalization
img = functions.normalize_input(img = img, normalization = normalization)
#---------------------------------
#represent
if "keras" in str(type(model)):
#new tf versions show progress bar and it is annoying
embedding = model.predict(img, verbose=0)[0].tolist()
else:
#SFace is not a keras model and it has no verbose argument
#SFace and Dlib are not keras models and no verbose arguments
embedding = model.predict(img)[0].tolist()
return embedding
resp_obj = {}
resp_obj["embedding"] = embedding
resp_obj["facial_area"] = region
resp_objs.append(resp_obj)
return resp_objs
def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5):
@ -810,7 +589,7 @@ def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', di
realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis
, source = source, time_threshold = time_threshold, frame_threshold = frame_threshold)
def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
def extract_faces(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
"""
This function applies pre-processing stages of a face recognition pipeline including detection and alignment
@ -818,15 +597,43 @@ def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv',
Parameters:
img_path: exact image path, numpy array (BGR) or base64 encoded image
target_size (tuple): final shape of facial image. black pixels will be added to resize the image.
detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib
enforce_detection (boolean): function throws exception if face cannot be detected in the fed image.
Set this to False if you do not want to get exception and run the function anyway.
align (boolean): alignment according to the eye positions.
Returns:
deteced and aligned face in numpy format
list of dictionaries. Each dictionary will have facial image itself, extracted area from the original image and confidence score.
"""
img = functions.preprocess_face(img = img_path, target_size = target_size, detector_backend = detector_backend
, enforce_detection = enforce_detection, align = align)[0] #preprocess_face returns (1, 224, 224, 3)
return img[:, :, ::-1] #bgr to rgb
resp_objs = []
img_objs = functions.extract_faces(
img = img_path,
target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align
)
for img, region, confidence in img_objs:
resp_obj = {}
# discard expanded dimension
if len(img.shape) == 4:
img = img[0]
resp_obj["face"] = img[:, :, ::-1]
resp_obj["facial_area"] = region
resp_obj["confidence"] = confidence
resp_objs.append(resp_obj)
return resp_objs
#---------------------------
#main

View File

@ -27,24 +27,6 @@ elif tf_major_version == 2:
#--------------------------------------------------
def initialize_input(img1_path, img2_path = None):
if type(img1_path) == list:
bulkProcess = True
img_list = img1_path.copy()
else:
bulkProcess = False
if (
(type(img2_path) == str and img2_path != None) #exact image path, base64 image
or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
):
img_list = [[img1_path, img2_path]]
else: #analyze function passes just img1_path
img_list = [img1_path]
return img_list, bulkProcess
def initialize_folder():
home = get_deepface_home()
@ -59,6 +41,8 @@ def initialize_folder():
def get_deepface_home():
return str(os.getenv('DEEPFACE_HOME', default=Path.home()))
#--------------------------------------------------
def loadBase64Img(uri):
encoded_data = uri.split(',')[1]
nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
@ -93,35 +77,71 @@ def load_image(img):
return img
def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
#--------------------------------------------------
def extract_faces(img, target_size=(224, 224), detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
# this is going to store a list of img itself (numpy), it region and confidence
extracted_faces = []
#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img = load_image(img)
img_region = [0, 0, img.shape[1], img.shape[0]]
#----------------------------------------------
#people would like to skip detection and alignment if they already have pre-processed images
if detector_backend == 'skip':
return img, img_region
#----------------------------------------------
#detector stored in a global variable in FaceDetector object.
#this call should be completed very fast because it will return found in memory
#it will not build face detector model in each call (consider for loops)
face_objs = [(img, img_region, 0)]
else:
face_detector = FaceDetector.build_model(detector_backend)
face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align)
try:
detected_face, img_region, _ = FaceDetector.detect_face(face_detector, detector_backend, img, align)
except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run
detected_face = None
if (isinstance(detected_face, np.ndarray)):
return detected_face, img_region
else:
if detected_face == None:
if enforce_detection != True:
return img, img_region
else:
# in case of no face found
if len(face_objs) == 0 and enforce_detection == True:
raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
elif len(face_objs) == 0 and enforce_detection == False:
face_objs = [(img, img_region, 0)]
for current_img, current_region, confidence in face_objs:
if current_img.shape[0] > 0 and current_img.shape[1] > 0:
if grayscale == True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
# resize and padding
if current_img.shape[0] > 0 and current_img.shape[1] > 0:
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)
dsize = (int(current_img.shape[1] * factor), int(current_img.shape[0] * factor))
current_img = cv2.resize(current_img, dsize)
diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale == False:
# Put the base image in the middle of the padded image
current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
else:
current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
#double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)
#normalizing the image pixels
img_pixels = image.img_to_array(current_img) #what this line doing? must?
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255 #normalize input in [0, 1]
#int cast is for the exception - object of type 'float32' is not JSON serializable
region_obj = {"x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3])}
extracted_face = [img_pixels, region_obj, confidence]
extracted_faces.append(extracted_face)
if len(extracted_faces) == 0 and enforce_detection == True:
raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
return extracted_faces
def normalize_input(img, normalization = 'base'):
@ -169,94 +189,21 @@ def normalize_input(img, normalization = 'base'):
return img
def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True):
def find_target_size(model_name):
#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img = load_image(img)
base_img = img.copy()
target_sizes = {
"VGG-Face": (224, 224),
"Facenet": (160, 160),
"Facenet512": (160, 160),
"OpenFace": (96, 96),
"DeepFace": (152, 152),
"DeepID": (55, 47), #TODO: might be opposite
"Dlib": (150, 150),
"ArcFace": (112, 112),
"SFace": (112, 112)
}
img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align)
if model_name not in target_sizes.keys():
raise ValueError(f"unimplemented model name - {model_name}")
#--------------------------
if img.shape[0] == 0 or img.shape[1] == 0:
if enforce_detection == True:
raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
else: #restore base image
img = base_img.copy()
#--------------------------
#post-processing
if grayscale == True:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#---------------------------------------------------
#resize image to expected shape
# img = cv2.resize(img, target_size) #resize causes transformation on base image, adding black pixels to resize will not deform the base image
if img.shape[0] > 0 and img.shape[1] > 0:
factor_0 = target_size[0] / img.shape[0]
factor_1 = target_size[1] / img.shape[1]
factor = min(factor_0, factor_1)
dsize = (int(img.shape[1] * factor), int(img.shape[0] * factor))
img = cv2.resize(img, dsize)
# Then pad the other side to the target size by adding black pixels
diff_0 = target_size[0] - img.shape[0]
diff_1 = target_size[1] - img.shape[1]
if grayscale == False:
# Put the base image in the middle of the padded image
img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
else:
img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
#------------------------------------------
#double check: if target image is not still the same size with target.
if img.shape[0:2] != target_size:
img = cv2.resize(img, target_size)
#---------------------------------------------------
#normalizing the image pixels
img_pixels = image.img_to_array(img) #what this line doing? must?
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255 #normalize input in [0, 1]
#---------------------------------------------------
if return_region == True:
return img_pixels, region
else:
return img_pixels
def find_input_shape(model):
#face recognition models have different size of inputs
#my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
input_shape = model.layers[0].input_shape
if type(input_shape) == list:
input_shape = input_shape[0][1:3]
else:
input_shape = input_shape[1:3]
#----------------------
#issue 289: it seems that tf 2.5 expects you to resize images with (x, y)
#whereas its older versions expect (y, x)
if tf_major_version == 2 and tf_minor_version >= 5:
x = input_shape[0]; y = input_shape[1]
input_shape = (y, x)
#----------------------
if type(input_shape) == list: #issue 197: some people got array here instead of tuple
input_shape = tuple(input_shape)
return input_shape
return target_sizes[model_name]

View File

@ -50,7 +50,7 @@ def analysis(db_path, model_name = 'VGG-Face', detector_backend = 'opencv', dist
#------------------------
input_shape = functions.find_input_shape(model)
input_shape = functions.find_target_size(model_name=model_name)
input_shape_x = input_shape[0]; input_shape_y = input_shape[1]
#tuned thresholds for model and metric pair

BIN
tests/dataset/couple.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 923 KiB

View File

@ -1,6 +1,7 @@
import warnings
import os
import tensorflow as tf
import numpy as np
import cv2
from deepface import DeepFace