handling many faces

This commit is contained in:
Sefik Ilkin Serengil 2023-01-23 21:54:52 +00:00
parent ba3db18671
commit b62e3671f8
6 changed files with 405 additions and 649 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@ deepface/extendedmodels/__pycache__/*
deepface/subsidiarymodels/__pycache__/* deepface/subsidiarymodels/__pycache__/*
deepface/detectors/__pycache__/* deepface/detectors/__pycache__/*
tests/dataset/*.pkl tests/dataset/*.pkl
tests/sandbox.ipynb
.DS_Store .DS_Store
deepface/.DS_Store deepface/.DS_Store
*.pyc *.pyc

View File

@ -7,12 +7,13 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import time import time
from os import path from os import path
import cv2
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
import pickle import pickle
from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace, Boosting from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace
from deepface.extendedmodels import Age, Gender, Race, Emotion from deepface.extendedmodels import Age, Gender, Race, Emotion
from deepface.commons import functions, realtime, distance as dst from deepface.commons import functions, realtime, distance as dst
@ -67,33 +68,24 @@ def build_model(model_name):
return model_obj[model_name] return model_obj[model_name]
def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base'): def verify(img1_path, img2_path, model_name = 'VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enforce_detection = True, align = True, normalization = 'base'):
""" """
This function verifies an image pair is same person or different persons. This function verifies an image pair is same person or different persons.
Parameters: Parameters:
img1_path, img2_path: exact image path, numpy array (BGR) or based64 encoded images could be passed. If you are going to call verify function for a list of image pairs, then you should pass an array instead of calling the function in for loops. img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded images are also welcome.
If one of pair has more than one face, then we will compare the face pair with max similarity.
e.g. img1_path = [ model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace
['img1.jpg', 'img2.jpg'],
['img2.jpg', 'img3.jpg']
]
model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace or Ensemble
distance_metric (string): cosine, euclidean, euclidean_l2 distance_metric (string): cosine, euclidean, euclidean_l2
model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default.
Set this to False not to have this exception. This might be convenient for low resolution images.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. Set this to False not to have this exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
prog_bar (boolean): enable/disable a progress bar
Returns: Returns:
Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary. Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary.
@ -103,194 +95,116 @@ def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric =
, "max_threshold_to_verify": 0.40 , "max_threshold_to_verify": 0.40
, "model": "VGG-Face" , "model": "VGG-Face"
, "similarity_metric": "cosine" , "similarity_metric": "cosine"
, 'facial_areas': {
'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769},
'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
}
, "time": 2
} }
""" """
tic = time.time() tic = time.time()
img_list, bulkProcess = functions.initialize_input(img1_path, img2_path)
resp_objects = []
#-------------------------------- #--------------------------------
target_size = functions.find_target_size(model_name=model_name)
if model_name == 'Ensemble': # img pairs might have many faces
model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"] img1_objs = functions.extract_faces(
metrics = ["cosine", "euclidean", "euclidean_l2"] img = img1_path,
else: target_size = (target_size[1], target_size[0]),
model_names = []; metrics = [] detector_backend = detector_backend,
model_names.append(model_name) grayscale = False,
metrics.append(distance_metric) enforce_detection = enforce_detection,
align = align)
img2_objs = functions.extract_faces(
img = img2_path,
target_size = (target_size[1], target_size[0]),
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align)
#-------------------------------- #--------------------------------
distances = []
if model == None: regions = []
if model_name == 'Ensemble': # now we will find the face pair with minimum distance
models = Boosting.loadModel() for img1_content, img1_region, img1_confidence in img1_objs:
else: for img2_content, img2_region, img2_confidence in img2_objs:
model = build_model(model_name) img1_embedding_obj = represent(img_path = img1_content
models = {} , model_name = model_name
models[model_name] = model , enforce_detection = enforce_detection
else: , detector_backend = "skip"
if model_name == 'Ensemble':
Boosting.validate_model(model)
models = model.copy()
else:
models = {}
models[model_name] = model
#------------------------------
disable_option = (False if len(img_list) > 1 else True) or not prog_bar
pbar = tqdm(range(0,len(img_list)), desc='Verification', disable = disable_option)
for index in pbar:
instance = img_list[index]
if type(instance) == list and len(instance) >= 2:
img1_path = instance[0]; img2_path = instance[1]
ensemble_features = []
for i in model_names:
custom_model = models[i]
#img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'mtcnn'
img1_representation = represent(img_path = img1_path
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
, align = align , align = align
, normalization = normalization , normalization = normalization
) )
img2_representation = represent(img_path = img2_path img2_embedding_obj = represent(img_path = img2_content
, model_name = model_name, model = custom_model , model_name = model_name
, enforce_detection = enforce_detection, detector_backend = detector_backend , enforce_detection = enforce_detection
, detector_backend = "skip"
, align = align , align = align
, normalization = normalization , normalization = normalization
) )
#---------------------- img1_representation = img1_embedding_obj[0]["embedding"]
#find distances between embeddings img2_representation = img2_embedding_obj[0]["embedding"]
for j in metrics: if distance_metric == 'cosine':
if j == 'cosine':
distance = dst.findCosineDistance(img1_representation, img2_representation) distance = dst.findCosineDistance(img1_representation, img2_representation)
elif j == 'euclidean': elif distance_metric == 'euclidean':
distance = dst.findEuclideanDistance(img1_representation, img2_representation) distance = dst.findEuclideanDistance(img1_representation, img2_representation)
elif j == 'euclidean_l2': elif distance_metric == 'euclidean_l2':
distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)) distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation))
else: else:
raise ValueError("Invalid distance_metric passed - ", distance_metric) raise ValueError("Invalid distance_metric passed - ", distance_metric)
distance = np.float64(distance) #causes trobule for euclideans in api calls if this is not set (issue #175) distances.append(distance)
#---------------------- regions.append((img1_region, img2_region))
#decision
if model_name != 'Ensemble': # -------------------------------
threshold = dst.findThreshold(model_name, distance_metric)
distance = min(distances) #best distance
facial_areas = regions[np.argmin(distances)]
threshold = dst.findThreshold(i, j) toc = time.time()
if distance <= threshold:
identified = True
else:
identified = False
resp_obj = { resp_obj = {
"verified": identified "verified": True if distance <= threshold else False
, "distance": distance , "distance": distance
, "threshold": threshold , "threshold": threshold
, "model": model_name , "model": model_name
, "detector_backend": detector_backend , "detector_backend": detector_backend
, "similarity_metric": distance_metric , "similarity_metric": distance_metric
, "facial_areas": {
"img1": facial_areas[0],
"img2": facial_areas[1]
}
, "time": round(toc - tic, 2)
} }
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj return resp_obj
else: #Ensemble def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , enforce_detection = True, detector_backend = 'opencv', align = True, silent = False):
#this returns same with OpenFace - euclidean_l2
if i == 'OpenFace' and j == 'euclidean':
continue
else:
ensemble_features.append(distance)
#----------------------
if model_name == 'Ensemble':
boosted_tree = Boosting.build_gbm()
prediction = boosted_tree.predict(np.expand_dims(np.array(ensemble_features), axis=0))[0]
verified = np.argmax(prediction) == 1
score = prediction[np.argmax(prediction)]
resp_obj = {
"verified": verified
, "score": score
, "distance": ensemble_features
, "model": ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
, "similarity_metric": ["cosine", "euclidean", "euclidean_l2"]
}
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj
#----------------------
else:
raise ValueError("Invalid arguments passed to verify function: ", instance)
#-------------------------
toc = time.time()
if bulkProcess == True:
resp_obj = {}
for i in range(0, len(resp_objects)):
resp_item = resp_objects[i]
resp_obj["pair_%d" % (i+1)] = resp_item
return resp_obj
def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models = None, enforce_detection = True, detector_backend = 'opencv', prog_bar = True):
""" """
This function analyzes facial attributes including age, gender, emotion and race This function analyzes facial attributes including age, gender, emotion and race
Parameters: Parameters:
img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. If you are going to analyze lots of images, then set this to list. e.g. img_path = ['img1.jpg', 'img2.jpg'] img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.
actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes. actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes.
models: (Optional[dict]) facial attribute analysis models are built in every call of analyze function. You can pass pre-built models to speed the function up.
models = {}
models['age'] = DeepFace.build_model('Age')
models['gender'] = DeepFace.build_model('Gender')
models['emotion'] = DeepFace.build_model('Emotion')
models['race'] = DeepFace.build_model('Race')
enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images. enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib. detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib.
prog_bar (boolean): enable/disable a progress bar silent (boolean): disable (some) log messages
Returns:
The function returns a dictionary. If img_path is a list, then it will return list of dictionary.
Returns:
The function returns a list of dictionaries for each face appearing in the image.
[
{ {
"region": {'x': 230, 'y': 120, 'w': 36, 'h': 45}, "region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
"age": 28.66, "age": 28.66,
@ -319,165 +233,104 @@ def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models =
'white': 93.44925880432129 'white': 93.44925880432129
} }
} }
]
""" """
#---------------------------------
# validate actions
if type(actions) == str: if type(actions) == str:
actions = (actions,) actions = (actions,)
actions = list(actions) actions = list(actions)
if not models: #---------------------------------
# build models
models = {} models = {}
if 'emotion' in actions:
img_paths, bulkProcess = functions.initialize_input(img_path)
#---------------------------------
built_models = list(models.keys())
#---------------------------------
#pre-trained models passed but it doesn't exist in actions
if len(built_models) > 0:
if 'emotion' in built_models and 'emotion' not in actions:
actions.append('emotion')
if 'age' in built_models and 'age' not in actions:
actions.append('age')
if 'gender' in built_models and 'gender' not in actions:
actions.append('gender')
if 'race' in built_models and 'race' not in actions:
actions.append('race')
#---------------------------------
if 'emotion' in actions and 'emotion' not in built_models:
models['emotion'] = build_model('Emotion') models['emotion'] = build_model('Emotion')
if 'age' in actions and 'age' not in built_models: if 'age' in actions:
models['age'] = build_model('Age') models['age'] = build_model('Age')
if 'gender' in actions and 'gender' not in built_models: if 'gender' in actions:
models['gender'] = build_model('Gender') models['gender'] = build_model('Gender')
if 'race' in actions and 'race' not in built_models: if 'race' in actions:
models['race'] = build_model('Race') models['race'] = build_model('Race')
#--------------------------------- #---------------------------------
resp_objects = [] resp_objects = []
disable_option = (False if len(img_paths) > 1 else True) or not prog_bar img_objs = functions.extract_faces(img=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale = False, enforce_detection=enforce_detection, align=align)
global_pbar = tqdm(range(0,len(img_paths)), desc='Analyzing', disable = disable_option)
for j in global_pbar:
img_path = img_paths[j]
resp_obj = {}
disable_option = (False if len(actions) > 1 else True) or not prog_bar
pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = disable_option)
img_224 = None # Set to prevent re-detection
region = [] # x, y, w, h of the detected face region
region_labels = ['x', 'y', 'w', 'h']
is_region_set = False
for img_content, img_region, img_confidence in img_objs:
if img_content.shape[0] > 0 and img_content.shape[1] > 0:
obj = {}
#facial attribute analysis #facial attribute analysis
pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = silent)
for index in pbar: for index in pbar:
action = actions[index] action = actions[index]
pbar.set_description("Action: %s" % (action)) pbar.set_description("Action: %s" % (action))
if action == 'emotion': if action == 'emotion':
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
img, region = functions.preprocess_face(img = img_path, target_size = (48, 48), grayscale = True, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) img_gray = cv2.resize(img_gray, (48, 48))
img_gray = np.expand_dims(img_gray, axis = 0)
emotion_predictions = models['emotion'].predict(img, verbose=0)[0,:] emotion_predictions = models['emotion'].predict(img_gray, verbose=0)[0,:]
sum_of_predictions = emotion_predictions.sum() sum_of_predictions = emotion_predictions.sum()
resp_obj["emotion"] = {} obj["emotion"] = {}
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
for i in range(0, len(emotion_labels)): for i in range(0, len(emotion_labels)):
emotion_label = emotion_labels[i] emotion_label = emotion_labels[i]
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
resp_obj["emotion"][emotion_label] = emotion_prediction obj["emotion"][emotion_label] = emotion_prediction
resp_obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)] obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
elif action == 'age': elif action == 'age':
if img_224 is None: age_predictions = models['age'].predict(img_content, verbose=0)[0,:]
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
age_predictions = models['age'].predict(img_224, verbose=0)[0,:]
apparent_age = Age.findApparentAge(age_predictions) apparent_age = Age.findApparentAge(age_predictions)
obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
resp_obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
elif action == 'gender': elif action == 'gender':
gender_predictions = models['gender'].predict(img_content, verbose=0)[0,:]
if img_224 is None:
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
gender_predictions = models['gender'].predict(img_224, verbose=0)[0,:]
gender_labels = ["Woman", "Man"] gender_labels = ["Woman", "Man"]
resp_obj["gender"] = {} obj["gender"] = {}
for i, gender_label in enumerate(gender_labels): for i, gender_label in enumerate(gender_labels):
gender_prediction = 100 * gender_predictions[i] gender_prediction = 100 * gender_predictions[i]
resp_obj["gender"][gender_label] = gender_prediction obj["gender"][gender_label] = gender_prediction
resp_obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)] obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
elif action == 'race': elif action == 'race':
if img_224 is None: race_predictions = models['race'].predict(img_content, verbose=0)[0,:]
img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) #just emotion model expects grayscale images
race_predictions = models['race'].predict(img_224, verbose=0)[0,:]
race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
sum_of_predictions = race_predictions.sum() sum_of_predictions = race_predictions.sum()
resp_obj["race"] = {} obj["race"] = {}
race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
for i in range(0, len(race_labels)): for i in range(0, len(race_labels)):
race_label = race_labels[i] race_label = race_labels[i]
race_prediction = 100 * race_predictions[i] / sum_of_predictions race_prediction = 100 * race_predictions[i] / sum_of_predictions
resp_obj["race"][race_label] = race_prediction obj["race"][race_label] = race_prediction
resp_obj["dominant_race"] = race_labels[np.argmax(race_predictions)] obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
#----------------------------- #-----------------------------
# mention facial areas
obj["region"] = img_region
if is_region_set != True and region: resp_objects.append(obj)
resp_obj["region"] = {}
is_region_set = True
for i, parameter in enumerate(region_labels):
resp_obj["region"][parameter] = int(region[i]) #int cast is for the exception - object of type 'float32' is not JSON serializable
#---------------------------------
if bulkProcess == True:
resp_objects.append(resp_obj)
else:
return resp_obj
if bulkProcess == True:
return resp_objects return resp_objects
def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base', silent=False):
def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base', silent=False):
""" """
This function applies verification several times and find an identity in a database This function applies verification several times and find the identities in a database
Parameters: Parameters:
img_path: exact image path, numpy array (BGR) or based64 encoded image. If you are going to find several identities, then you should pass img_path as array instead of calling find function in a for loop. e.g. img_path = ["img1.jpg", "img2.jpg"] img_path: exact image path, numpy array (BGR) or based64 encoded image.
db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this. db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this.
@ -485,58 +338,23 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
distance_metric (string): cosine, euclidean, euclidean_l2 distance_metric (string): cosine, euclidean, euclidean_l2
model: built deepface model. A face recognition models are built in every call of find function. You can pass pre-built models to speed the function up.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images. enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
prog_bar (boolean): enable/disable a progress bar silent (boolean): disable some logging and progress bars
Returns: Returns:
This function returns pandas data frame. If a list of images is passed to img_path, then it will return list of pandas data frame. This function returns list of pandas data frame. Each item of the list corresponding to an identity in the img_path.
""" """
tic = time.time() tic = time.time()
img_paths, bulkProcess = functions.initialize_input(img_path)
#------------------------------- #-------------------------------
if os.path.isdir(db_path) != True:
if os.path.isdir(db_path) == True: raise ValueError("Passed db_path does not exist!")
if model == None:
if model_name == 'Ensemble':
if not silent: print("Ensemble learning enabled")
models = Boosting.loadModel()
else: #model is not ensemble
model = build_model(model_name)
models = {}
models[model_name] = model
else: #model != None
if not silent: print("Already built model is passed")
if model_name == 'Ensemble':
Boosting.validate_model(model)
models = model.copy()
else: else:
models = {} target_size = functions.find_target_size(model_name=model_name)
models[model_name] = model
#---------------------------------------
if model_name == 'Ensemble':
model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
metric_names = ['cosine', 'euclidean', 'euclidean_l2']
elif model_name != 'Ensemble':
model_names = []; metric_names = []
model_names.append(model_name)
metric_names.append(distance_metric)
#--------------------------------------- #---------------------------------------
@ -545,19 +363,21 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
if path.exists(db_path+"/"+file_name): if path.exists(db_path+"/"+file_name):
if not silent: print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.") if not silent:
print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
f = open(db_path+'/'+file_name, 'rb') f = open(db_path+'/'+file_name, 'rb')
representations = pickle.load(f) representations = pickle.load(f)
if not silent: print("There are ", len(representations)," representations found in ",file_name) if not silent:
print("There are ", len(representations)," representations found in ",file_name)
else: #create representation.pkl from scratch else: #create representation.pkl from scratch
employees = [] employees = []
for r, d, f in os.walk(db_path): # r=root, d=directories, f = files for r, d, f in os.walk(db_path): # r=root, d=directories, f = files
for file in f: for file in f:
if ('.jpg' in file.lower()) or ('.png' in file.lower()): if ('.jpg' in file.lower()) or ('.jpeg' in file.lower()) or ('.png' in file.lower()):
exact_path = r + "/" + file exact_path = r + "/" + file
employees.append(exact_path) employees.append(exact_path)
@ -569,158 +389,108 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
representations = [] representations = []
pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = prog_bar)
#for employee in employees: #for employee in employees:
pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = True if silent == True else False)
for index in pbar: for index in pbar:
employee = employees[index] employee = employees[index]
instance = [] img_objs = functions.extract_faces(img = employee,
instance.append(employee) target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align
)
for j in model_names: for img_content, img_region, img_confidence in img_objs:
custom_model = models[j] embedding_obj = represent(img_path = img_content
, model_name = model_name
representation = represent(img_path = employee , enforce_detection = enforce_detection
, model_name = model_name, model = custom_model , detector_backend = "skip"
, enforce_detection = enforce_detection, detector_backend = detector_backend
, align = align , align = align
, normalization = normalization , normalization = normalization
) )
instance.append(representation) img_representation = embedding_obj[0]["embedding"]
instance = []
instance.append(employee)
instance.append(img_representation)
representations.append(instance)
#------------------------------- #-------------------------------
representations.append(instance)
f = open(db_path+'/'+file_name, "wb") f = open(db_path+'/'+file_name, "wb")
pickle.dump(representations, f) pickle.dump(representations, f)
f.close() f.close()
if not silent: print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.") if not silent:
print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
#---------------------------- #----------------------------
#now, we got representations for facial database #now, we got representations for facial database
df = pd.DataFrame(representations, columns = ["identity", f"{model_name}_representation"])
if model_name != 'Ensemble': # img path might have move than once face
df = pd.DataFrame(representations, columns = ["identity", "%s_representation" % (model_name)]) target_objs = functions.extract_faces(img = img_path,
else: #ensemble learning target_size = target_size,
detector_backend = detector_backend,
columns = ['identity'] grayscale = False,
[columns.append('%s_representation' % i) for i in model_names] enforce_detection = enforce_detection,
align = align
df = pd.DataFrame(representations, columns = columns) )
df_base = df.copy() #df will be filtered in each img. we will restore it for the next item.
resp_obj = [] resp_obj = []
global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing', disable = prog_bar) for target_img, target_region, target_confidence in target_objs:
for j in global_pbar: target_embedding_obj = represent(img_path = target_img
img_path = img_paths[j] , model_name = model_name
, enforce_detection = enforce_detection
#find representation for passed image , detector_backend = "skip"
for j in model_names:
custom_model = models[j]
target_representation = represent(img_path = img_path
, model_name = model_name, model = custom_model
, enforce_detection = enforce_detection, detector_backend = detector_backend
, align = align , align = align
, normalization = normalization , normalization = normalization
) )
for k in metric_names: target_representation = target_embedding_obj[0]["embedding"]
result_df = df.copy() #df will be filtered in each img
#TODO: add facial area in df
distances = [] distances = []
for index, instance in df.iterrows(): for index, instance in df.iterrows():
source_representation = instance["%s_representation" % (j)] source_representation = instance[f"{model_name}_representation"]
if k == 'cosine': if distance_metric == 'cosine':
distance = dst.findCosineDistance(source_representation, target_representation) distance = dst.findCosineDistance(source_representation, target_representation)
elif k == 'euclidean': elif distance_metric == 'euclidean':
distance = dst.findEuclideanDistance(source_representation, target_representation) distance = dst.findEuclideanDistance(source_representation, target_representation)
elif k == 'euclidean_l2': elif distance_metric == 'euclidean_l2':
distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation)) distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation))
else:
raise ValueError(f"invalid distance metric passes - {distance_metric}")
distances.append(distance) distances.append(distance)
#--------------------------- #---------------------------
if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean': result_df[f"{model_name}_{distance_metric}"] = distances
continue
else:
df["%s_%s" % (j, k)] = distances
if model_name != 'Ensemble': threshold = dst.findThreshold(model_name, distance_metric)
threshold = dst.findThreshold(j, k) result_df = result_df.drop(columns = [f"{model_name}_representation"])
df = df.drop(columns = ["%s_representation" % (j)]) result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
df = df[df["%s_%s" % (j, k)] <= threshold] result_df = result_df.sort_values(by = [f"{model_name}_{distance_metric}"], ascending=True).reset_index(drop=True)
df = df.sort_values(by = ["%s_%s" % (j, k)], ascending=True).reset_index(drop=True) resp_obj.append(result_df)
resp_obj.append(df) # -----------------------------------
df = df_base.copy() #restore df for the next iteration
#----------------------------------
if model_name == 'Ensemble':
feature_names = []
for j in model_names:
for k in metric_names:
if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
continue
else:
feature = '%s_%s' % (j, k)
feature_names.append(feature)
#print(df.head())
x = df[feature_names].values
#--------------------------------------
boosted_tree = Boosting.build_gbm()
y = boosted_tree.predict(x)
verified_labels = []; scores = []
for i in y:
verified = np.argmax(i) == 1
score = i[np.argmax(i)]
verified_labels.append(verified)
scores.append(score)
df['verified'] = verified_labels
df['score'] = scores
df = df[df.verified == True]
#df = df[df.score > 0.99] #confidence score
df = df.sort_values(by = ["score"], ascending=False).reset_index(drop=True)
df = df[['identity', 'verified', 'score']]
resp_obj.append(df)
df = df_base.copy() #restore df for the next iteration
#----------------------------------
toc = time.time() toc = time.time()
if not silent: print("find function lasts ",toc-tic," seconds") if not silent:
print("find function lasts ",toc-tic," seconds")
if len(resp_obj) == 1:
return resp_obj[0]
return resp_obj return resp_obj
else:
raise ValueError("Passed db_path does not exist!")
return None
def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'): def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'):
""" """
@ -731,10 +501,6 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace. model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace.
model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. Consider to pass model if you are going to call represent function in a for loop.
model = DeepFace.build_model('VGG-Face')
enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images. enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images.
detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
@ -744,38 +510,51 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
Returns: Returns:
Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector. Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
""" """
resp_objs = []
if model is None:
model = build_model(model_name) model = build_model(model_name)
#--------------------------------- #---------------------------------
# we started to run pre-process in verification. so, this can be skipped if it is coming from verification.
if detector_backend != "skip":
target_size = functions.find_target_size(model_name=model_name)
#decide input shape img_objs = functions.extract_faces(img = img_path,
input_shape_x, input_shape_y = functions.find_input_shape(model) target_size = target_size,
detector_backend = detector_backend,
#detect and align grayscale = False,
img = functions.preprocess_face(img = img_path enforce_detection = enforce_detection,
, target_size=(input_shape_y, input_shape_x) align = align)
, enforce_detection = enforce_detection else: # skip
, detector_backend = detector_backend if type(img_path) == str:
, align = align) img = functions.load_image(img_path)
elif type(img_path).__module__ == np.__name__:
img = img_path.copy()
else:
raise ValueError(f"unexpected type for img_path - {type(img_path)}")
img_region = [0, 0, img.shape[1], img.shape[0]]
img_objs = [(img, img_region, 0)]
#--------------------------------- #---------------------------------
for img, region, confidence in img_objs:
#custom normalization #custom normalization
img = functions.normalize_input(img = img, normalization = normalization) img = functions.normalize_input(img = img, normalization = normalization)
#---------------------------------
#represent #represent
if "keras" in str(type(model)): if "keras" in str(type(model)):
#new tf versions show progress bar and it is annoying #new tf versions show progress bar and it is annoying
embedding = model.predict(img, verbose=0)[0].tolist() embedding = model.predict(img, verbose=0)[0].tolist()
else: else:
#SFace is not a keras model and it has no verbose argument #SFace and Dlib are not keras models and no verbose arguments
embedding = model.predict(img)[0].tolist() embedding = model.predict(img)[0].tolist()
return embedding resp_obj = {}
resp_obj["embedding"] = embedding
resp_obj["facial_area"] = region
resp_objs.append(resp_obj)
return resp_objs
def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5): def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5):
@ -810,7 +589,7 @@ def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', di
realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis
, source = source, time_threshold = time_threshold, frame_threshold = frame_threshold) , source = source, time_threshold = time_threshold, frame_threshold = frame_threshold)
def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True): def extract_faces(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
""" """
This function applies pre-processing stages of a face recognition pipeline including detection and alignment This function applies pre-processing stages of a face recognition pipeline including detection and alignment
@ -818,15 +597,43 @@ def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv',
Parameters: Parameters:
img_path: exact image path, numpy array (BGR) or base64 encoded image img_path: exact image path, numpy array (BGR) or base64 encoded image
target_size (tuple): final shape of facial image. black pixels will be added to resize the image.
detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib
enforce_detection (boolean): function throws exception if face cannot be detected in the fed image.
Set this to False if you do not want to get exception and run the function anyway.
align (boolean): alignment according to the eye positions.
Returns: Returns:
deteced and aligned face in numpy format list of dictionaries. Each dictionary will have facial image itself, extracted area from the original image and confidence score.
""" """
img = functions.preprocess_face(img = img_path, target_size = target_size, detector_backend = detector_backend resp_objs = []
, enforce_detection = enforce_detection, align = align)[0] #preprocess_face returns (1, 224, 224, 3) img_objs = functions.extract_faces(
return img[:, :, ::-1] #bgr to rgb img = img_path,
target_size = target_size,
detector_backend = detector_backend,
grayscale = False,
enforce_detection = enforce_detection,
align = align
)
for img, region, confidence in img_objs:
resp_obj = {}
# discard expanded dimension
if len(img.shape) == 4:
img = img[0]
resp_obj["face"] = img[:, :, ::-1]
resp_obj["facial_area"] = region
resp_obj["confidence"] = confidence
resp_objs.append(resp_obj)
return resp_objs
#--------------------------- #---------------------------
#main #main

View File

@ -27,24 +27,6 @@ elif tf_major_version == 2:
#-------------------------------------------------- #--------------------------------------------------
def initialize_input(img1_path, img2_path = None):
if type(img1_path) == list:
bulkProcess = True
img_list = img1_path.copy()
else:
bulkProcess = False
if (
(type(img2_path) == str and img2_path != None) #exact image path, base64 image
or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
):
img_list = [[img1_path, img2_path]]
else: #analyze function passes just img1_path
img_list = [img1_path]
return img_list, bulkProcess
def initialize_folder(): def initialize_folder():
home = get_deepface_home() home = get_deepface_home()
@ -59,6 +41,8 @@ def initialize_folder():
def get_deepface_home(): def get_deepface_home():
return str(os.getenv('DEEPFACE_HOME', default=Path.home())) return str(os.getenv('DEEPFACE_HOME', default=Path.home()))
#--------------------------------------------------
def loadBase64Img(uri): def loadBase64Img(uri):
encoded_data = uri.split(',')[1] encoded_data = uri.split(',')[1]
nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
@ -93,35 +77,71 @@ def load_image(img):
return img return img
def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True): #--------------------------------------------------
def extract_faces(img, target_size=(224, 224), detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
# this is going to store a list of img itself (numpy), it region and confidence
extracted_faces = []
#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
img = load_image(img)
img_region = [0, 0, img.shape[1], img.shape[0]] img_region = [0, 0, img.shape[1], img.shape[0]]
#----------------------------------------------
#people would like to skip detection and alignment if they already have pre-processed images
if detector_backend == 'skip': if detector_backend == 'skip':
return img, img_region face_objs = [(img, img_region, 0)]
else:
#----------------------------------------------
#detector stored in a global variable in FaceDetector object.
#this call should be completed very fast because it will return found in memory
#it will not build face detector model in each call (consider for loops)
face_detector = FaceDetector.build_model(detector_backend) face_detector = FaceDetector.build_model(detector_backend)
face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align)
try: # in case of no face found
detected_face, img_region, _ = FaceDetector.detect_face(face_detector, detector_backend, img, align) if len(face_objs) == 0 and enforce_detection == True:
except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run
detected_face = None
if (isinstance(detected_face, np.ndarray)):
return detected_face, img_region
else:
if detected_face == None:
if enforce_detection != True:
return img, img_region
else:
raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.") raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
elif len(face_objs) == 0 and enforce_detection == False:
face_objs = [(img, img_region, 0)]
for current_img, current_region, confidence in face_objs:
if current_img.shape[0] > 0 and current_img.shape[1] > 0:
if grayscale == True:
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
# resize and padding
if current_img.shape[0] > 0 and current_img.shape[1] > 0:
factor_0 = target_size[0] / current_img.shape[0]
factor_1 = target_size[1] / current_img.shape[1]
factor = min(factor_0, factor_1)
dsize = (int(current_img.shape[1] * factor), int(current_img.shape[0] * factor))
current_img = cv2.resize(current_img, dsize)
diff_0 = target_size[0] - current_img.shape[0]
diff_1 = target_size[1] - current_img.shape[1]
if grayscale == False:
# Put the base image in the middle of the padded image
current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
else:
current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
#double check: if target image is not still the same size with target.
if current_img.shape[0:2] != target_size:
current_img = cv2.resize(current_img, target_size)
#normalizing the image pixels
img_pixels = image.img_to_array(current_img) #what this line doing? must?
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255 #normalize input in [0, 1]
#int cast is for the exception - object of type 'float32' is not JSON serializable
region_obj = {"x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3])}
extracted_face = [img_pixels, region_obj, confidence]
extracted_faces.append(extracted_face)
if len(extracted_faces) == 0 and enforce_detection == True:
raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
return extracted_faces
def normalize_input(img, normalization = 'base'): def normalize_input(img, normalization = 'base'):
@ -169,94 +189,21 @@ def normalize_input(img, normalization = 'base'):
return img return img
def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True): def find_target_size(model_name):
#img might be path, base64 or numpy array. Convert it to numpy whatever it is. target_sizes = {
img = load_image(img) "VGG-Face": (224, 224),
base_img = img.copy() "Facenet": (160, 160),
"Facenet512": (160, 160),
"OpenFace": (96, 96),
"DeepFace": (152, 152),
"DeepID": (55, 47), #TODO: might be opposite
"Dlib": (150, 150),
"ArcFace": (112, 112),
"SFace": (112, 112)
}
img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align) if model_name not in target_sizes.keys():
raise ValueError(f"unimplemented model name - {model_name}")
#-------------------------- return target_sizes[model_name]
if img.shape[0] == 0 or img.shape[1] == 0:
if enforce_detection == True:
raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
else: #restore base image
img = base_img.copy()
#--------------------------
#post-processing
if grayscale == True:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#---------------------------------------------------
#resize image to expected shape
# img = cv2.resize(img, target_size) #resize causes transformation on base image, adding black pixels to resize will not deform the base image
if img.shape[0] > 0 and img.shape[1] > 0:
factor_0 = target_size[0] / img.shape[0]
factor_1 = target_size[1] / img.shape[1]
factor = min(factor_0, factor_1)
dsize = (int(img.shape[1] * factor), int(img.shape[0] * factor))
img = cv2.resize(img, dsize)
# Then pad the other side to the target size by adding black pixels
diff_0 = target_size[0] - img.shape[0]
diff_1 = target_size[1] - img.shape[1]
if grayscale == False:
# Put the base image in the middle of the padded image
img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
else:
img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
#------------------------------------------
#double check: if target image is not still the same size with target.
if img.shape[0:2] != target_size:
img = cv2.resize(img, target_size)
#---------------------------------------------------
#normalizing the image pixels
img_pixels = image.img_to_array(img) #what this line doing? must?
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255 #normalize input in [0, 1]
#---------------------------------------------------
if return_region == True:
return img_pixels, region
else:
return img_pixels
def find_input_shape(model):
#face recognition models have different size of inputs
#my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
input_shape = model.layers[0].input_shape
if type(input_shape) == list:
input_shape = input_shape[0][1:3]
else:
input_shape = input_shape[1:3]
#----------------------
#issue 289: it seems that tf 2.5 expects you to resize images with (x, y)
#whereas its older versions expect (y, x)
if tf_major_version == 2 and tf_minor_version >= 5:
x = input_shape[0]; y = input_shape[1]
input_shape = (y, x)
#----------------------
if type(input_shape) == list: #issue 197: some people got array here instead of tuple
input_shape = tuple(input_shape)
return input_shape

View File

@ -50,7 +50,7 @@ def analysis(db_path, model_name = 'VGG-Face', detector_backend = 'opencv', dist
#------------------------ #------------------------
input_shape = functions.find_input_shape(model) input_shape = functions.find_target_size(model_name=model_name)
input_shape_x = input_shape[0]; input_shape_y = input_shape[1] input_shape_x = input_shape[0]; input_shape_y = input_shape[1]
#tuned thresholds for model and metric pair #tuned thresholds for model and metric pair

BIN
tests/dataset/couple.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 923 KiB

View File

@ -1,6 +1,7 @@
import warnings import warnings
import os import os
import tensorflow as tf import tensorflow as tf
import numpy as np
import cv2 import cv2
from deepface import DeepFace from deepface import DeepFace