handling many faces

2025-06-07 12:05:22 +00:00 · 2023-01-23 21:54:52 +00:00 · 2023-01-23 21:54:52 +00:00 · b62e3671f8
commit b62e3671f8
parent ba3db18671
6 changed files with 405 additions and 649 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,6 +17,7 @@ deepface/extendedmodels/__pycache__/*
 deepface/subsidiarymodels/__pycache__/*
 deepface/detectors/__pycache__/*
 tests/dataset/*.pkl
 tests/sandbox.ipynb
 .DS_Store
 deepface/.DS_Store
 *.pyc
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@ -7,12 +7,13 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 import time
 from os import path
 import cv2
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 import pickle
-from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace, Boosting
+from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace
 from deepface.extendedmodels import Age, Gender, Race, Emotion
 from deepface.commons import functions, realtime, distance as dst
@ -67,33 +68,24 @@ def build_model(model_name):
 	return model_obj[model_name]
-def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base'):
+def verify(img1_path, img2_path, model_name = 'VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enforce_detection = True, align = True, normalization = 'base'):
 	"""
 	This function verifies an image pair is same person or different persons.
 	Parameters:
-		img1_path, img2_path: exact image path, numpy array (BGR) or based64 encoded images could be passed. If you are going to call verify function for a list of image pairs, then you should pass an array instead of calling the function in for loops.
+		img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded images are also welcome. 
 		If one of pair has more than one face, then we will compare the face pair with max similarity.
-		e.g. img1_path = [
+		model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace
 			['img1.jpg', 'img2.jpg'],
 			['img2.jpg', 'img3.jpg']
 		]
 		model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace or Ensemble
 		distance_metric (string): cosine, euclidean, euclidean_l2
-		model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times.
+		enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. 
-
+		Set this to False not to have this exception. This might be convenient for low resolution images.
 			model = DeepFace.build_model('VGG-Face')
 		enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. Set this to False not to have this exception. This might be convenient for low resolution images.
 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
 		prog_bar (boolean): enable/disable a progress bar
 	Returns:
 		Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary.
@ -103,194 +95,116 @@ def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric =
 			, "max_threshold_to_verify": 0.40
 			, "model": "VGG-Face"
 			, "similarity_metric": "cosine"
 			, 'facial_areas': {
 				'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769}, 
 				'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
 			}
 			, "time": 2
 		}
 	"""
 	tic = time.time()
 	img_list, bulkProcess = functions.initialize_input(img1_path, img2_path)
 	resp_objects = []
 	#--------------------------------
 	target_size = functions.find_target_size(model_name=model_name)
-	if model_name == 'Ensemble':
+	# img pairs might have many faces
-		model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
+	img1_objs = functions.extract_faces(
-		metrics = ["cosine", "euclidean", "euclidean_l2"]
+		img = img1_path, 
-	else:
+		target_size = (target_size[1], target_size[0]), 
-		model_names = []; metrics = []
+		detector_backend = detector_backend, 
-		model_names.append(model_name)
+		grayscale = False, 
-		metrics.append(distance_metric)
+		enforce_detection = enforce_detection, 
 		align = align)
 	img2_objs = functions.extract_faces(
 		img = img2_path, 
 		target_size = (target_size[1], target_size[0]), 
 		detector_backend = detector_backend, 
 		grayscale = False, 
 		enforce_detection = enforce_detection, 
 		align = align)
 	#--------------------------------
-
+	distances = []
-	if model == None:
+	regions = []
-		if model_name == 'Ensemble':
+	# now we will find the face pair with minimum distance
-			models = Boosting.loadModel()
+	for img1_content, img1_region, img1_confidence in img1_objs:
-		else:
+		for img2_content, img2_region, img2_confidence in img2_objs:
-			model = build_model(model_name)
+			img1_embedding_obj = represent(img_path = img1_content
-			models = {}
+						, model_name = model_name
-			models[model_name] = model
+						, enforce_detection = enforce_detection
-	else:
+						, detector_backend = "skip"
 		if model_name == 'Ensemble':
 			Boosting.validate_model(model)
 			models = model.copy()
 		else:
 			models = {}
 			models[model_name] = model
 	#------------------------------
 	disable_option = (False if len(img_list) > 1 else True) or not prog_bar
 	pbar = tqdm(range(0,len(img_list)), desc='Verification', disable = disable_option)
 	for index in pbar:
 		instance = img_list[index]
 		if type(instance) == list and len(instance) >= 2:
 			img1_path = instance[0]; img2_path = instance[1]
 			ensemble_features = []
 			for i in  model_names:
 				custom_model = models[i]
 				#img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'mtcnn'
 				img1_representation = represent(img_path = img1_path
 						, model_name = model_name, model = custom_model
 						, enforce_detection = enforce_detection, detector_backend = detector_backend
 						, align = align
 						, normalization = normalization
 						)
-				img2_representation = represent(img_path = img2_path
+			img2_embedding_obj = represent(img_path = img2_content
-						, model_name = model_name, model = custom_model
+						, model_name = model_name
-						, enforce_detection = enforce_detection, detector_backend = detector_backend
+						, enforce_detection = enforce_detection
 						, detector_backend = "skip"
 						, align = align
 						, normalization = normalization
 						)
-				#----------------------
+			img1_representation = img1_embedding_obj[0]["embedding"]
-				#find distances between embeddings
+			img2_representation = img2_embedding_obj[0]["embedding"]
-				for j in metrics:
+			if distance_metric == 'cosine':
 					if j == 'cosine':
 				distance = dst.findCosineDistance(img1_representation, img2_representation)
-					elif j == 'euclidean':
+			elif distance_metric == 'euclidean':
 				distance = dst.findEuclideanDistance(img1_representation, img2_representation)
-					elif j == 'euclidean_l2':
+			elif distance_metric == 'euclidean_l2':
 				distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation))
 			else:
 				raise ValueError("Invalid distance_metric passed - ", distance_metric)
-					distance = np.float64(distance) #causes trobule for euclideans in api calls if this is not set (issue #175)
+			distances.append(distance)
-					#----------------------
+			regions.append((img1_region, img2_region))
 					#decision
-					if model_name != 'Ensemble':
+	# -------------------------------
 	threshold = dst.findThreshold(model_name, distance_metric)
 	distance = min(distances) #best distance
 	facial_areas = regions[np.argmin(distances)]
-						threshold = dst.findThreshold(i, j)
+	toc = time.time()
 						if distance <= threshold:
 							identified = True
 						else:
 							identified = False
 	resp_obj = {
-							"verified": identified
+		"verified": True if distance <= threshold else False
 		, "distance": distance
 		, "threshold": threshold
 		, "model": model_name
 		, "detector_backend": detector_backend
 		, "similarity_metric": distance_metric
 		, "facial_areas": {
 			"img1": facial_areas[0],
 			"img2": facial_areas[1]
 		}
 		, "time": round(toc - tic, 2)
 	}
 						if bulkProcess == True:
 							resp_objects.append(resp_obj)
 						else:
 	return resp_obj
-					else: #Ensemble
+def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , enforce_detection = True, detector_backend = 'opencv', align = True, silent = False):
 						#this returns same with OpenFace - euclidean_l2
 						if i == 'OpenFace' and j == 'euclidean':
 							continue
 						else:
 							ensemble_features.append(distance)
 			#----------------------
 			if model_name == 'Ensemble':
 				boosted_tree = Boosting.build_gbm()
 				prediction = boosted_tree.predict(np.expand_dims(np.array(ensemble_features), axis=0))[0]
 				verified = np.argmax(prediction) == 1
 				score = prediction[np.argmax(prediction)]
 				resp_obj = {
 					"verified": verified
 					, "score": score
 					, "distance": ensemble_features
 					, "model": ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
 					, "similarity_metric": ["cosine", "euclidean", "euclidean_l2"]
 				}
 				if bulkProcess == True:
 					resp_objects.append(resp_obj)
 				else:
 					return resp_obj
 			#----------------------
 		else:
 			raise ValueError("Invalid arguments passed to verify function: ", instance)
 	#-------------------------
 	toc = time.time()
 	if bulkProcess == True:
 		resp_obj = {}
 		for i in range(0, len(resp_objects)):
 			resp_item = resp_objects[i]
 			resp_obj["pair_%d" % (i+1)] = resp_item
 		return resp_obj
 def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models = None, enforce_detection = True, detector_backend = 'opencv', prog_bar = True):
 	"""
 	This function analyzes facial attributes including age, gender, emotion and race
 	Parameters:
-		img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. If you are going to analyze lots of images, then set this to list. e.g. img_path = ['img1.jpg', 'img2.jpg']
+		img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.
 		actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes.
 		models: (Optional[dict]) facial attribute analysis models are built in every call of analyze function. You can pass pre-built models to speed the function up.
 			models = {}
 			models['age'] = DeepFace.build_model('Age')
 			models['gender'] = DeepFace.build_model('Gender')
 			models['emotion'] = DeepFace.build_model('Emotion')
 			models['race'] = DeepFace.build_model('Race')
 		enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images.
 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib.
-		prog_bar (boolean): enable/disable a progress bar
+		silent (boolean): disable (some) log messages
 	Returns:
 		The function returns a dictionary. If img_path is a list, then it will return list of dictionary.
 	Returns:
 		The function returns a list of dictionaries for each face appearing in the image.
 		[
 			{
 				"region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
 				"age": 28.66,
@ -319,165 +233,104 @@ def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models =
 					'white': 93.44925880432129
 				}
 			}
-
+		]
 	"""
-	
+	#---------------------------------
 	# validate actions 
 	if type(actions) == str:
 		actions = (actions,)
 	actions = list(actions)
-	if not models:
+	#---------------------------------
 	# build models
 	models = {}
-
+	if 'emotion' in actions:
 	img_paths, bulkProcess = functions.initialize_input(img_path)
 	#---------------------------------
 	built_models = list(models.keys())
 	#---------------------------------
 	#pre-trained models passed but it doesn't exist in actions
 	if len(built_models) > 0:
 		if 'emotion' in built_models and 'emotion' not in actions:
 			actions.append('emotion')
 		if 'age' in built_models and 'age' not in actions:
 			actions.append('age')
 		if 'gender' in built_models and 'gender' not in actions:
 			actions.append('gender')
 		if 'race' in built_models and 'race' not in actions:
 			actions.append('race')
 	#---------------------------------
 	if 'emotion' in actions and 'emotion' not in built_models:
 		models['emotion'] = build_model('Emotion')
-	if 'age' in actions and 'age' not in built_models:
+	if 'age' in actions:
 		models['age'] = build_model('Age')
-	if 'gender' in actions and 'gender' not in built_models:
+	if 'gender' in actions:
 		models['gender'] = build_model('Gender')
-	if 'race' in actions and 'race' not in built_models:
+	if 'race' in actions:
 		models['race'] = build_model('Race')
 	#---------------------------------
 	resp_objects = []
-	disable_option = (False if len(img_paths) > 1 else True) or not prog_bar
+	img_objs = functions.extract_faces(img=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale = False, enforce_detection=enforce_detection, align=align)
 	global_pbar = tqdm(range(0,len(img_paths)), desc='Analyzing', disable = disable_option)
 	for j in global_pbar:
 		img_path = img_paths[j]
 		resp_obj = {}
 		disable_option = (False if len(actions) > 1 else True) or not prog_bar
 		pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = disable_option)
 		img_224 = None # Set to prevent re-detection
 		region = [] # x, y, w, h of the detected face region
 		region_labels = ['x', 'y', 'w', 'h']
 		is_region_set = False
 	for img_content, img_region, img_confidence in img_objs:
 		if img_content.shape[0] > 0 and img_content.shape[1] > 0:
 			obj = {}
 			#facial attribute analysis
 			pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = silent)
 			for index in pbar:
 				action = actions[index]
 				pbar.set_description("Action: %s" % (action))
 				if action == 'emotion':
-				emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
+					img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
-				img, region = functions.preprocess_face(img = img_path, target_size = (48, 48), grayscale = True, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
+					img_gray = cv2.resize(img_gray, (48, 48))
 					img_gray = np.expand_dims(img_gray, axis = 0)
-				emotion_predictions = models['emotion'].predict(img, verbose=0)[0,:]
+					emotion_predictions = models['emotion'].predict(img_gray, verbose=0)[0,:]
 					sum_of_predictions = emotion_predictions.sum()
-				resp_obj["emotion"] = {}
+					obj["emotion"] = {}
 					emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
 					for i in range(0, len(emotion_labels)):
 						emotion_label = emotion_labels[i]
 						emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
-					resp_obj["emotion"][emotion_label] = emotion_prediction
+						obj["emotion"][emotion_label] = emotion_prediction
-				resp_obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
+					obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
 				elif action == 'age':
-				if img_224 is None:
+					age_predictions = models['age'].predict(img_content, verbose=0)[0,:]
 					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
 				age_predictions = models['age'].predict(img_224, verbose=0)[0,:]
 					apparent_age = Age.findApparentAge(age_predictions)
-
+					obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
 				resp_obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
 				elif action == 'gender':
-
+					gender_predictions = models['gender'].predict(img_content, verbose=0)[0,:]
 				if img_224 is None:
 					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
 				gender_predictions = models['gender'].predict(img_224, verbose=0)[0,:]
 					gender_labels = ["Woman", "Man"]
-				resp_obj["gender"] = {}
+					obj["gender"] = {}
 					for i, gender_label in enumerate(gender_labels):
 						gender_prediction = 100 * gender_predictions[i]
-					resp_obj["gender"][gender_label] = gender_prediction
+						obj["gender"][gender_label] = gender_prediction
-				resp_obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
+					obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
 				elif action == 'race':
-				if img_224 is None:
+					race_predictions = models['race'].predict(img_content, verbose=0)[0,:]
 					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) #just emotion model expects grayscale images
 				race_predictions = models['race'].predict(img_224, verbose=0)[0,:]
 				race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
 					sum_of_predictions = race_predictions.sum()
-				resp_obj["race"] = {}
+					obj["race"] = {}
 					race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
 					for i in range(0, len(race_labels)):
 						race_label = race_labels[i]
 						race_prediction = 100 * race_predictions[i] / sum_of_predictions
-					resp_obj["race"][race_label] = race_prediction
+						obj["race"][race_label] = race_prediction
-				resp_obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
+					obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
 				#-----------------------------
 				# mention facial areas
 				obj["region"] = img_region
-			if is_region_set != True and region:
+			resp_objects.append(obj)
 				resp_obj["region"] = {}
 				is_region_set = True
 				for i, parameter in enumerate(region_labels):
 					resp_obj["region"][parameter] = int(region[i]) #int cast is for the exception - object of type 'float32' is not JSON serializable
 		#---------------------------------
 		if bulkProcess == True:
 			resp_objects.append(resp_obj)
 		else:
 			return resp_obj
 	if bulkProcess == True:
 	return resp_objects	
-
+def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base', silent=False):
 def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base', silent=False):
 	"""
-	This function applies verification several times and find an identity in a database
+	This function applies verification several times and find the identities in a database
 	Parameters:
-		img_path: exact image path, numpy array (BGR) or based64 encoded image. If you are going to find several identities, then you should pass img_path as array instead of calling find function in a for loop. e.g. img_path = ["img1.jpg", "img2.jpg"]
+		img_path: exact image path, numpy array (BGR) or based64 encoded image. 
 		db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this.
@ -485,58 +338,23 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
 		distance_metric (string): cosine, euclidean, euclidean_l2
 		model: built deepface model. A face recognition models are built in every call of find function. You can pass pre-built models to speed the function up.
 			model = DeepFace.build_model('VGG-Face')
 		enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images.
 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
-		prog_bar (boolean): enable/disable a progress bar
+		silent (boolean): disable some logging and progress bars
 	Returns:
-		This function returns pandas data frame. If a list of images is passed to img_path, then it will return list of pandas data frame.
+		This function returns list of pandas data frame. Each item of the list corresponding to an identity in the img_path.
 	"""
 	tic = time.time()
 	img_paths, bulkProcess = functions.initialize_input(img_path)
 	#-------------------------------
-
+	if os.path.isdir(db_path) != True:
-	if os.path.isdir(db_path) == True:
+		raise ValueError("Passed db_path does not exist!")
 		if model == None:
 			if model_name == 'Ensemble':
 				if not silent: print("Ensemble learning enabled")
 				models = Boosting.loadModel()
 			else: #model is not ensemble
 				model = build_model(model_name)
 				models = {}
 				models[model_name] = model
 		else: #model != None
 			if not silent: print("Already built model is passed")
 			if model_name == 'Ensemble':
 				Boosting.validate_model(model)
 				models = model.copy()
 	else:
-				models = {}
+		target_size = functions.find_target_size(model_name=model_name)
 				models[model_name] = model
 		#---------------------------------------
 		if model_name == 'Ensemble':
 			model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
 			metric_names = ['cosine', 'euclidean', 'euclidean_l2']
 		elif model_name != 'Ensemble':
 			model_names = []; metric_names = []
 			model_names.append(model_name)
 			metric_names.append(distance_metric)
 		#---------------------------------------
@ -545,19 +363,21 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
 		if path.exists(db_path+"/"+file_name):
-			if not silent: print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
+			if not silent:
 				print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
 			f = open(db_path+'/'+file_name, 'rb')
 			representations = pickle.load(f)
-			if not silent: print("There are ", len(representations)," representations found in ",file_name)
+			if not silent:
 				print("There are ", len(representations)," representations found in ",file_name)
 		else: #create representation.pkl from scratch
 			employees = []
 			for r, d, f in os.walk(db_path): # r=root, d=directories, f = files
 				for file in f:
-					if ('.jpg' in file.lower()) or ('.png' in file.lower()):
+					if ('.jpg' in file.lower()) or ('.jpeg' in file.lower()) or ('.png' in file.lower()):
 						exact_path = r + "/" + file
 						employees.append(exact_path)
@ -569,158 +389,108 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',
 			representations = []
 			pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = prog_bar)
 			#for employee in employees:
 			pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = True if silent == True else False)
 			for index in pbar:
 				employee = employees[index]
-				instance = []
+				img_objs = functions.extract_faces(img = employee, 
-				instance.append(employee)
+					target_size = target_size, 
 					detector_backend = detector_backend, 
 					grayscale = False, 
 					enforce_detection = enforce_detection, 
 					align = align
 				)
-				for j in model_names:
+				for img_content, img_region, img_confidence in img_objs:
-					custom_model = models[j]
+					embedding_obj = represent(img_path = img_content
-
+						, model_name = model_name
-					representation = represent(img_path = employee
+						, enforce_detection = enforce_detection
-						, model_name = model_name, model = custom_model
+						, detector_backend = "skip"
 						, enforce_detection = enforce_detection, detector_backend = detector_backend
 						, align = align
 						, normalization = normalization
 						)
-					instance.append(representation)
+					img_representation = embedding_obj[0]["embedding"]
 					instance = []
 					instance.append(employee) 
 					instance.append(img_representation)
 					representations.append(instance)
 			#-------------------------------
 				representations.append(instance)
 			f = open(db_path+'/'+file_name, "wb")
 			pickle.dump(representations, f)
 			f.close()
-			if not silent: print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
+			if not silent: 
 				print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
 		#----------------------------
 		#now, we got representations for facial database
 		df = pd.DataFrame(representations, columns = ["identity", f"{model_name}_representation"])
-		if model_name != 'Ensemble':
+		# img path might have move than once face
-			df = pd.DataFrame(representations, columns = ["identity", "%s_representation" % (model_name)])
+		target_objs = functions.extract_faces(img = img_path, 
-		else: #ensemble learning
+					target_size = target_size, 
-
+					detector_backend = detector_backend, 
-			columns = ['identity']
+					grayscale = False, 
-			[columns.append('%s_representation' % i) for i in model_names]
+					enforce_detection = enforce_detection, 
-
+					align = align
-			df = pd.DataFrame(representations, columns = columns)
+				)
 		df_base = df.copy() #df will be filtered in each img. we will restore it for the next item.
 		resp_obj = []
-		global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing', disable = prog_bar)
+		for target_img, target_region, target_confidence in target_objs:
-		for j in global_pbar:
+			target_embedding_obj = represent(img_path = target_img
-			img_path = img_paths[j]
+						, model_name = model_name
-
+						, enforce_detection = enforce_detection
-			#find representation for passed image
+						, detector_backend = "skip"
 			for j in model_names:
 				custom_model = models[j]
 				target_representation = represent(img_path = img_path
 					, model_name = model_name, model = custom_model
 					, enforce_detection = enforce_detection, detector_backend = detector_backend
 						, align = align
 						, normalization = normalization
 						)
-				for k in metric_names:
+			target_representation = target_embedding_obj[0]["embedding"]
 			result_df = df.copy() #df will be filtered in each img
 			#TODO: add facial area in df
 			distances = []
 			for index, instance in df.iterrows():
-						source_representation = instance["%s_representation" % (j)]
+				source_representation = instance[f"{model_name}_representation"]
-						if k == 'cosine':
+				if distance_metric == 'cosine':
 					distance = dst.findCosineDistance(source_representation, target_representation)
-						elif k == 'euclidean':
+				elif distance_metric == 'euclidean':
 					distance = dst.findEuclideanDistance(source_representation, target_representation)
-						elif k == 'euclidean_l2':
+				elif distance_metric == 'euclidean_l2':
 					distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation))
 				else:
 					raise ValueError(f"invalid distance metric passes - {distance_metric}")
 				distances.append(distance)
 				#---------------------------
-					if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
+			result_df[f"{model_name}_{distance_metric}"] = distances
 						continue
 					else:
 						df["%s_%s" % (j, k)] = distances
-						if model_name != 'Ensemble':
+			threshold = dst.findThreshold(model_name, distance_metric)
-							threshold = dst.findThreshold(j, k)
+			result_df = result_df.drop(columns = [f"{model_name}_representation"])
-							df = df.drop(columns = ["%s_representation" % (j)])
+			result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
-							df = df[df["%s_%s" % (j, k)] <= threshold]
+			result_df = result_df.sort_values(by = [f"{model_name}_{distance_metric}"], ascending=True).reset_index(drop=True)
-							df = df.sort_values(by = ["%s_%s" % (j, k)], ascending=True).reset_index(drop=True)
+			resp_obj.append(result_df)
-							resp_obj.append(df)
+		# -----------------------------------
 							df = df_base.copy() #restore df for the next iteration
 			#----------------------------------
 			if model_name == 'Ensemble':
 				feature_names = []
 				for j in model_names:
 					for k in metric_names:
 						if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
 							continue
 						else:
 							feature = '%s_%s' % (j, k)
 							feature_names.append(feature)
 				#print(df.head())
 				x = df[feature_names].values
 				#--------------------------------------
 				boosted_tree = Boosting.build_gbm()
 				y = boosted_tree.predict(x)
 				verified_labels = []; scores = []
 				for i in y:
 					verified = np.argmax(i) == 1
 					score = i[np.argmax(i)]
 					verified_labels.append(verified)
 					scores.append(score)
 				df['verified'] = verified_labels
 				df['score'] = scores
 				df = df[df.verified == True]
 				#df = df[df.score > 0.99] #confidence score
 				df = df.sort_values(by = ["score"], ascending=False).reset_index(drop=True)
 				df = df[['identity', 'verified', 'score']]
 				resp_obj.append(df)
 				df = df_base.copy() #restore df for the next iteration
 			#----------------------------------
 		toc = time.time()
-		if not silent: print("find function lasts ",toc-tic," seconds")
+		if not silent:
-
+			print("find function lasts ",toc-tic," seconds")
 		if len(resp_obj) == 1:
 			return resp_obj[0]
 		return resp_obj
 	else:
 		raise ValueError("Passed db_path does not exist!")
 	return None
 def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'):
 	"""
@ -731,10 +501,6 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
 		model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace.
 		model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. Consider to pass model if you are going to call represent function in a for loop.
 			model = DeepFace.build_model('VGG-Face')
 		enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images.
 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
@ -744,38 +510,51 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
 	Returns:
 		Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
 	"""
 	resp_objs = []
 	if model is None:
 	model = build_model(model_name)
 	#---------------------------------
 	# we started to run pre-process in verification. so, this can be skipped if it is coming from verification.
 	if detector_backend != "skip":
 		target_size = functions.find_target_size(model_name=model_name)
-	#decide input shape
+		img_objs = functions.extract_faces(img = img_path, 
-	input_shape_x, input_shape_y = functions.find_input_shape(model)
+								target_size = target_size, 
-
+								detector_backend = detector_backend, 
-	#detect and align
+								grayscale = False, 
-	img = functions.preprocess_face(img = img_path
+								enforce_detection = enforce_detection, 
-		, target_size=(input_shape_y, input_shape_x)
+								align = align)
-		, enforce_detection = enforce_detection
+	else: # skip
-		, detector_backend = detector_backend
+		if type(img_path) == str:
-		, align = align)
+			img = functions.load_image(img_path)
 		elif type(img_path).__module__ == np.__name__:
 			img = img_path.copy()
 		else:
 			raise ValueError(f"unexpected type for img_path - {type(img_path)}")
 		img_region = [0, 0, img.shape[1], img.shape[0]]
 		img_objs = [(img, img_region, 0)]
 	#---------------------------------
 	for img, region, confidence in img_objs:
 		#custom normalization
 		img = functions.normalize_input(img = img, normalization = normalization)
 	#---------------------------------
 		#represent
 		if "keras" in str(type(model)):
 			#new tf versions show progress bar and it is annoying
 			embedding = model.predict(img, verbose=0)[0].tolist()
 		else:
-		#SFace is not a keras model and it has no verbose argument
+			#SFace and Dlib are not keras models and no verbose arguments
 			embedding = model.predict(img)[0].tolist()
-	return embedding
+		resp_obj = {}
 		resp_obj["embedding"] = embedding
 		resp_obj["facial_area"] = region
 		resp_objs.append(resp_obj)
 	return resp_objs
 def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5):
@ -810,7 +589,7 @@ def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', di
 	realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis
 						, source = source, time_threshold = time_threshold, frame_threshold = frame_threshold)
-def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
+def extract_faces(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
 	"""
 	This function applies pre-processing stages of a face recognition pipeline including detection and alignment
@ -818,15 +597,43 @@ def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv',
 	Parameters:
 		img_path: exact image path, numpy array (BGR) or base64 encoded image
 		target_size (tuple): final shape of facial image. black pixels will be added to resize the image.
 		detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib
 		enforce_detection (boolean): function throws exception if face cannot be detected in the fed image. 
 		Set this to False if you do not want to get exception and run the function anyway.
 		align (boolean): alignment according to the eye positions.
 	Returns:
-		deteced and aligned face in numpy format
+		list of dictionaries. Each dictionary will have facial image itself, extracted area from the original image and confidence score.
 	"""
-	img = functions.preprocess_face(img = img_path, target_size = target_size, detector_backend = detector_backend
+	resp_objs = []
-		, enforce_detection = enforce_detection, align = align)[0] #preprocess_face returns (1, 224, 224, 3)
+	img_objs = functions.extract_faces(
-	return img[:, :, ::-1] #bgr to rgb
+						img = img_path, 
 						target_size = target_size, 
 						detector_backend = detector_backend, 
 						grayscale = False, 
 						enforce_detection = enforce_detection, 
 						align = align
 					)
 	for img, region, confidence in img_objs:
 		resp_obj = {}
 		# discard expanded dimension
 		if len(img.shape) == 4:
 			img = img[0]
 		resp_obj["face"] = img[:, :, ::-1]
 		resp_obj["facial_area"] = region
 		resp_obj["confidence"] = confidence
 		resp_objs.append(resp_obj)
 	return resp_objs
 #---------------------------
 #main
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@ -27,24 +27,6 @@ elif tf_major_version == 2:
 #--------------------------------------------------
 def initialize_input(img1_path, img2_path = None):
 	if type(img1_path) == list:
 		bulkProcess = True
 		img_list = img1_path.copy()
 	else:
 		bulkProcess = False
 		if (
 			(type(img2_path) == str and img2_path != None) #exact image path, base64 image
 			or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
 		):
 			img_list = [[img1_path, img2_path]]
 		else: #analyze function passes just img1_path
 			img_list = [img1_path]
 	return img_list, bulkProcess
 def initialize_folder():
 	home = get_deepface_home()
@ -59,6 +41,8 @@ def initialize_folder():
 def get_deepface_home():
 	return str(os.getenv('DEEPFACE_HOME', default=Path.home()))
 #--------------------------------------------------
 def loadBase64Img(uri):
   encoded_data = uri.split(',')[1]
   nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
@ -93,35 +77,71 @@ def load_image(img):
 	return img
-def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
+#--------------------------------------------------
 def extract_faces(img, target_size=(224, 224), detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
 	# this is going to store a list of img itself (numpy), it region and confidence
 	extracted_faces = []
 	#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
 	img = load_image(img)
 	img_region = [0, 0, img.shape[1], img.shape[0]]
 	#----------------------------------------------
 	#people would like to skip detection and alignment if they already have pre-processed images
 	if detector_backend == 'skip':
-		return img, img_region
+		face_objs = [(img, img_region, 0)]
-
+	else:
 	#----------------------------------------------
 	#detector stored in a global variable in FaceDetector object.
 	#this call should be completed very fast because it will return found in memory
 	#it will not build face detector model in each call (consider for loops)
 		face_detector = FaceDetector.build_model(detector_backend)
 		face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align)
-	try:
+	# in case of no face found
-		detected_face, img_region, _ = FaceDetector.detect_face(face_detector, detector_backend, img, align)
+	if len(face_objs) == 0 and enforce_detection == True:
 	except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run
 		detected_face = None
 	if (isinstance(detected_face, np.ndarray)):
 		return detected_face, img_region
 	else:
 		if detected_face == None:
 			if enforce_detection != True:
 				return img, img_region
 			else:
 		raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
 	elif len(face_objs) == 0 and enforce_detection == False:
 		face_objs = [(img, img_region, 0)]
 	for current_img, current_region, confidence in face_objs:
 		if current_img.shape[0] > 0 and current_img.shape[1] > 0:
 			if grayscale == True:
 				current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
 			# resize and padding
 			if current_img.shape[0] > 0 and current_img.shape[1] > 0:
 				factor_0 = target_size[0] / current_img.shape[0]
 				factor_1 = target_size[1] / current_img.shape[1]
 				factor = min(factor_0, factor_1)
 				dsize = (int(current_img.shape[1] * factor), int(current_img.shape[0] * factor))
 				current_img = cv2.resize(current_img, dsize)
 				diff_0 = target_size[0] - current_img.shape[0]
 				diff_1 = target_size[1] - current_img.shape[1]
 				if grayscale == False:
 					# Put the base image in the middle of the padded image
 					current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
 				else:
 					current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
 			#double check: if target image is not still the same size with target.
 			if current_img.shape[0:2] != target_size:
 				current_img = cv2.resize(current_img, target_size)
 			#normalizing the image pixels
 			img_pixels = image.img_to_array(current_img) #what this line doing? must?
 			img_pixels = np.expand_dims(img_pixels, axis = 0)
 			img_pixels /= 255 #normalize input in [0, 1]
 			#int cast is for the exception - object of type 'float32' is not JSON serializable
 			region_obj = {"x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3])}
 			extracted_face = [img_pixels, region_obj, confidence]
 			extracted_faces.append(extracted_face)
 	if len(extracted_faces) == 0 and enforce_detection == True:
 		raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
 	return extracted_faces
 def normalize_input(img, normalization = 'base'):
@ -169,94 +189,21 @@ def normalize_input(img, normalization = 'base'):
 	return img
-def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True):
+def find_target_size(model_name):
-	#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
+	target_sizes = {
-	img = load_image(img)
+		"VGG-Face": (224, 224),
-	base_img = img.copy()
+		"Facenet": (160, 160),
 		"Facenet512": (160, 160),
 		"OpenFace": (96, 96),
 		"DeepFace": (152, 152),
 		"DeepID": (55, 47), #TODO: might be opposite
 		"Dlib": (150, 150),
 		"ArcFace": (112, 112),
 		"SFace": (112, 112)
 	}
-	img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align)
+	if model_name not in target_sizes.keys():
 		raise ValueError(f"unimplemented model name - {model_name}")
-	#--------------------------
+	return target_sizes[model_name]
 	if img.shape[0] == 0 or img.shape[1] == 0:
 		if enforce_detection == True:
 			raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
 		else: #restore base image
 			img = base_img.copy()
 	#--------------------------
 	#post-processing
 	if grayscale == True:
 		img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 	#---------------------------------------------------
 	#resize image to expected shape
 	# img = cv2.resize(img, target_size) #resize causes transformation on base image, adding black pixels to resize will not deform the base image
 	if img.shape[0] > 0 and img.shape[1] > 0:
 		factor_0 = target_size[0] / img.shape[0]
 		factor_1 = target_size[1] / img.shape[1]
 		factor = min(factor_0, factor_1)
 		dsize = (int(img.shape[1] * factor), int(img.shape[0] * factor))
 		img = cv2.resize(img, dsize)
 		# Then pad the other side to the target size by adding black pixels
 		diff_0 = target_size[0] - img.shape[0]
 		diff_1 = target_size[1] - img.shape[1]
 		if grayscale == False:
 			# Put the base image in the middle of the padded image
 			img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
 		else:
 			img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
 	#------------------------------------------
 	#double check: if target image is not still the same size with target.
 	if img.shape[0:2] != target_size:
 		img = cv2.resize(img, target_size)
 	#---------------------------------------------------
 	#normalizing the image pixels
 	img_pixels = image.img_to_array(img) #what this line doing? must?
 	img_pixels = np.expand_dims(img_pixels, axis = 0)
 	img_pixels /= 255 #normalize input in [0, 1]
 	#---------------------------------------------------
 	if return_region == True:
 		return img_pixels, region
 	else:
 		return img_pixels
 def find_input_shape(model):
 	#face recognition models have different size of inputs
 	#my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
 	input_shape = model.layers[0].input_shape
 	if type(input_shape) == list:
 		input_shape = input_shape[0][1:3]
 	else:
 		input_shape = input_shape[1:3]
 	#----------------------
 	#issue 289: it seems that tf 2.5 expects you to resize images with (x, y)
 	#whereas its older versions expect (y, x)
 	if tf_major_version == 2 and tf_minor_version >= 5:
 		x = input_shape[0]; y = input_shape[1]
 		input_shape = (y, x)
 	#----------------------
 	if type(input_shape) == list: #issue 197: some people got array here instead of tuple
 		input_shape = tuple(input_shape)
 	return input_shape
--- a/deepface/commons/realtime.py
+++ b/deepface/commons/realtime.py
@ -50,7 +50,7 @@ def analysis(db_path, model_name = 'VGG-Face', detector_backend = 'opencv', dist
 		#------------------------
-		input_shape = functions.find_input_shape(model)
+		input_shape = functions.find_target_size(model_name=model_name)
 		input_shape_x = input_shape[0]; input_shape_y = input_shape[1]
 		#tuned thresholds for model and metric pair
--- a/tests/dataset/couple.jpg
+++ b/tests/dataset/couple.jpg
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@ -1,6 +1,7 @@
 import warnings
 import os
 import tensorflow as tf
 import numpy as np
 import cv2
 from deepface import DeepFace