handling many faces

2025-07-24 02:40:00 +00:00 · 2023-01-23 21:54:52 +00:00 · 2023-01-23 21:54:52 +00:00 · b62e3671f8
commit b62e3671f8
parent ba3db18671
6 changed files with 405 additions and 649 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,6 +17,7 @@ deepface/extendedmodels/__pycache__/*
 deepface/subsidiarymodels/__pycache__/*
 deepface/detectors/__pycache__/*
 tests/dataset/*.pkl
+tests/sandbox.ipynb
 .DS_Store
 deepface/.DS_Store
 *.pyc
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@ -7,12 +7,13 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

 import time
 from os import path
+import cv2
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
 import pickle

-from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace, Boosting
+from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace
 from deepface.extendedmodels import Age, Gender, Race, Emotion
 from deepface.commons import functions, realtime, distance as dst

@ -67,33 +68,24 @@ def build_model(model_name):

 	return model_obj[model_name]

-def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base'):
+def verify(img1_path, img2_path, model_name = 'VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enforce_detection = True, align = True, normalization = 'base'):

 	"""
 	This function verifies an image pair is same person or different persons.

 	Parameters:
-		img1_path, img2_path: exact image path, numpy array (BGR) or based64 encoded images could be passed. If you are going to call verify function for a list of image pairs, then you should pass an array instead of calling the function in for loops.
+		img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded images are also welcome. 
+		If one of pair has more than one face, then we will compare the face pair with max similarity.

-		e.g. img1_path = [
-			['img1.jpg', 'img2.jpg'],
-			['img2.jpg', 'img3.jpg']
-		]
-
-		model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace or Ensemble
+		model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace

 		distance_metric (string): cosine, euclidean, euclidean_l2

-		model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times.
-
-			model = DeepFace.build_model('VGG-Face')
-
-		enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. Set this to False not to have this exception. This might be convenient for low resolution images.
+		enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. 
+		Set this to False not to have this exception. This might be convenient for low resolution images.

 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib

-		prog_bar (boolean): enable/disable a progress bar
-
 	Returns:
 		Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary.

@ -103,194 +95,116 @@ def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric =
 			, "max_threshold_to_verify": 0.40
 			, "model": "VGG-Face"
 			, "similarity_metric": "cosine"
+			, 'facial_areas': {
+				'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769}, 
+				'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
+			}
+			, "time": 2
 		}

 	"""

 	tic = time.time()

-	img_list, bulkProcess = functions.initialize_input(img1_path, img2_path)
-
-	resp_objects = []
-
 	#--------------------------------
+	target_size = functions.find_target_size(model_name=model_name)

-	if model_name == 'Ensemble':
-		model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
-		metrics = ["cosine", "euclidean", "euclidean_l2"]
-	else:
-		model_names = []; metrics = []
-		model_names.append(model_name)
-		metrics.append(distance_metric)
+	# img pairs might have many faces
+	img1_objs = functions.extract_faces(
+		img = img1_path, 
+		target_size = (target_size[1], target_size[0]), 
+		detector_backend = detector_backend, 
+		grayscale = False, 
+		enforce_detection = enforce_detection, 
+		align = align)
 	
+	img2_objs = functions.extract_faces(
+		img = img2_path, 
+		target_size = (target_size[1], target_size[0]), 
+		detector_backend = detector_backend, 
+		grayscale = False, 
+		enforce_detection = enforce_detection, 
+		align = align)
 	#--------------------------------
-
-	if model == None:
-		if model_name == 'Ensemble':
-			models = Boosting.loadModel()
-		else:
-			model = build_model(model_name)
-			models = {}
-			models[model_name] = model
-	else:
-		if model_name == 'Ensemble':
-			Boosting.validate_model(model)
-			models = model.copy()
-		else:
-			models = {}
-			models[model_name] = model
-
-	#------------------------------
-
-	disable_option = (False if len(img_list) > 1 else True) or not prog_bar
-
-	pbar = tqdm(range(0,len(img_list)), desc='Verification', disable = disable_option)
-
-	for index in pbar:
-
-		instance = img_list[index]
-
-		if type(instance) == list and len(instance) >= 2:
-			img1_path = instance[0]; img2_path = instance[1]
-
-			ensemble_features = []
-
-			for i in  model_names:
-				custom_model = models[i]
-
-				#img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'mtcnn'
-				img1_representation = represent(img_path = img1_path
-						, model_name = model_name, model = custom_model
-						, enforce_detection = enforce_detection, detector_backend = detector_backend
+	distances = []
+	regions = []
+	# now we will find the face pair with minimum distance
+	for img1_content, img1_region, img1_confidence in img1_objs:
+		for img2_content, img2_region, img2_confidence in img2_objs:
+			img1_embedding_obj = represent(img_path = img1_content
+						, model_name = model_name
+						, enforce_detection = enforce_detection
+						, detector_backend = "skip"
 						, align = align
 						, normalization = normalization
 						)
 			
-				img2_representation = represent(img_path = img2_path
-						, model_name = model_name, model = custom_model
-						, enforce_detection = enforce_detection, detector_backend = detector_backend
+			img2_embedding_obj = represent(img_path = img2_content
+						, model_name = model_name
+						, enforce_detection = enforce_detection
+						, detector_backend = "skip"
 						, align = align
 						, normalization = normalization
 						)
 			
-				#----------------------
-				#find distances between embeddings
+			img1_representation = img1_embedding_obj[0]["embedding"]
+			img2_representation = img2_embedding_obj[0]["embedding"]
 			
-				for j in metrics:
-
-					if j == 'cosine':
+			if distance_metric == 'cosine':
 				distance = dst.findCosineDistance(img1_representation, img2_representation)
-					elif j == 'euclidean':
+			elif distance_metric == 'euclidean':
 				distance = dst.findEuclideanDistance(img1_representation, img2_representation)
-					elif j == 'euclidean_l2':
+			elif distance_metric == 'euclidean_l2':
 				distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation))
 			else:
 				raise ValueError("Invalid distance_metric passed - ", distance_metric)
 			
-					distance = np.float64(distance) #causes trobule for euclideans in api calls if this is not set (issue #175)
-					#----------------------
-					#decision
+			distances.append(distance)
+			regions.append((img1_region, img2_region))

-					if model_name != 'Ensemble':
+	# -------------------------------
+	threshold = dst.findThreshold(model_name, distance_metric)
+	distance = min(distances) #best distance
+	facial_areas = regions[np.argmin(distances)]

-						threshold = dst.findThreshold(i, j)
-
-						if distance <= threshold:
-							identified = True
-						else:
-							identified = False
+	toc = time.time()

 	resp_obj = {
-							"verified": identified
+		"verified": True if distance <= threshold else False
 		, "distance": distance
 		, "threshold": threshold
 		, "model": model_name
 		, "detector_backend": detector_backend
 		, "similarity_metric": distance_metric
+		, "facial_areas": {
+			"img1": facial_areas[0],
+			"img2": facial_areas[1]
+		}
+		, "time": round(toc - tic, 2)
 	}

-						if bulkProcess == True:
-							resp_objects.append(resp_obj)
-						else:
 	return resp_obj

-					else: #Ensemble
-
-						#this returns same with OpenFace - euclidean_l2
-						if i == 'OpenFace' and j == 'euclidean':
-							continue
-						else:
-							ensemble_features.append(distance)
-
-			#----------------------
-
-			if model_name == 'Ensemble':
-
-				boosted_tree = Boosting.build_gbm()
-
-				prediction = boosted_tree.predict(np.expand_dims(np.array(ensemble_features), axis=0))[0]
-
-				verified = np.argmax(prediction) == 1
-				score = prediction[np.argmax(prediction)]
-
-				resp_obj = {
-					"verified": verified
-					, "score": score
-					, "distance": ensemble_features
-					, "model": ["VGG-Face", "Facenet", "OpenFace", "DeepFace"]
-					, "similarity_metric": ["cosine", "euclidean", "euclidean_l2"]
-				}
-
-				if bulkProcess == True:
-					resp_objects.append(resp_obj)
-				else:
-					return resp_obj
-
-			#----------------------
-
-		else:
-			raise ValueError("Invalid arguments passed to verify function: ", instance)
-
-	#-------------------------
-
-	toc = time.time()
-
-	if bulkProcess == True:
-
-		resp_obj = {}
-
-		for i in range(0, len(resp_objects)):
-			resp_item = resp_objects[i]
-			resp_obj["pair_%d" % (i+1)] = resp_item
-
-		return resp_obj
-
-def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models = None, enforce_detection = True, detector_backend = 'opencv', prog_bar = True):
+def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , enforce_detection = True, detector_backend = 'opencv', align = True, silent = False):

 	"""
 	This function analyzes facial attributes including age, gender, emotion and race

 	Parameters:
-		img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. If you are going to analyze lots of images, then set this to list. e.g. img_path = ['img1.jpg', 'img2.jpg']
+		img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.

 		actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes.

-		models: (Optional[dict]) facial attribute analysis models are built in every call of analyze function. You can pass pre-built models to speed the function up.
-
-			models = {}
-			models['age'] = DeepFace.build_model('Age')
-			models['gender'] = DeepFace.build_model('Gender')
-			models['emotion'] = DeepFace.build_model('Emotion')
-			models['race'] = DeepFace.build_model('Race')
-
 		enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images.

 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib.

-		prog_bar (boolean): enable/disable a progress bar
-	Returns:
-		The function returns a dictionary. If img_path is a list, then it will return list of dictionary.
+		silent (boolean): disable (some) log messages

+	Returns:
+		The function returns a list of dictionaries for each face appearing in the image.
+
+		[
 			{
 				"region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
 				"age": 28.66,
@ -319,165 +233,104 @@ def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models =
 					'white': 93.44925880432129
 				}
 			}
-
+		]
 	"""
-	
+	#---------------------------------
+	# validate actions 
 	if type(actions) == str:
 		actions = (actions,)

 	actions = list(actions)
-	if not models:
+	#---------------------------------
+	# build models
 	models = {}
-
-	img_paths, bulkProcess = functions.initialize_input(img_path)
-
-	#---------------------------------
-
-	built_models = list(models.keys())
-
-	#---------------------------------
-
-	#pre-trained models passed but it doesn't exist in actions
-	if len(built_models) > 0:
-		if 'emotion' in built_models and 'emotion' not in actions:
-			actions.append('emotion')
-
-		if 'age' in built_models and 'age' not in actions:
-			actions.append('age')
-
-		if 'gender' in built_models and 'gender' not in actions:
-			actions.append('gender')
-
-		if 'race' in built_models and 'race' not in actions:
-			actions.append('race')
-
-	#---------------------------------
-
-	if 'emotion' in actions and 'emotion' not in built_models:
+	if 'emotion' in actions:
 		models['emotion'] = build_model('Emotion')

-	if 'age' in actions and 'age' not in built_models:
+	if 'age' in actions:
 		models['age'] = build_model('Age')

-	if 'gender' in actions and 'gender' not in built_models:
+	if 'gender' in actions:
 		models['gender'] = build_model('Gender')

-	if 'race' in actions and 'race' not in built_models:
+	if 'race' in actions:
 		models['race'] = build_model('Race')
-
 	#---------------------------------
-
 	resp_objects = []

-	disable_option = (False if len(img_paths) > 1 else True) or not prog_bar
-
-	global_pbar = tqdm(range(0,len(img_paths)), desc='Analyzing', disable = disable_option)
-
-	for j in global_pbar:
-		img_path = img_paths[j]
-
-		resp_obj = {}
-
-		disable_option = (False if len(actions) > 1 else True) or not prog_bar
-
-		pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = disable_option)
-
-		img_224 = None # Set to prevent re-detection
-
-		region = [] # x, y, w, h of the detected face region
-		region_labels = ['x', 'y', 'w', 'h']
-
-		is_region_set = False
+	img_objs = functions.extract_faces(img=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale = False, enforce_detection=enforce_detection, align=align)

+	for img_content, img_region, img_confidence in img_objs:
+		if img_content.shape[0] > 0 and img_content.shape[1] > 0:
+			obj = {}
 			#facial attribute analysis
+			pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = silent)
 			for index in pbar:
 				action = actions[index]
 				pbar.set_description("Action: %s" % (action))

 				if action == 'emotion':
-				emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
-				img, region = functions.preprocess_face(img = img_path, target_size = (48, 48), grayscale = True, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
+					img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
+					img_gray = cv2.resize(img_gray, (48, 48))
+					img_gray = np.expand_dims(img_gray, axis = 0)

-				emotion_predictions = models['emotion'].predict(img, verbose=0)[0,:]
+					emotion_predictions = models['emotion'].predict(img_gray, verbose=0)[0,:]

 					sum_of_predictions = emotion_predictions.sum()

-				resp_obj["emotion"] = {}
+					obj["emotion"] = {}
+					emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

 					for i in range(0, len(emotion_labels)):
 						emotion_label = emotion_labels[i]
 						emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
-					resp_obj["emotion"][emotion_label] = emotion_prediction
+						obj["emotion"][emotion_label] = emotion_prediction

-				resp_obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]
+					obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)]

 				elif action == 'age':
-				if img_224 is None:
-					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
-
-				age_predictions = models['age'].predict(img_224, verbose=0)[0,:]
+					age_predictions = models['age'].predict(img_content, verbose=0)[0,:]
 					apparent_age = Age.findApparentAge(age_predictions)
-
-				resp_obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable
+					obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable

 				elif action == 'gender':
-
-				if img_224 is None:
-					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True)
-
-				gender_predictions = models['gender'].predict(img_224, verbose=0)[0,:]
-
+					gender_predictions = models['gender'].predict(img_content, verbose=0)[0,:]
 					gender_labels = ["Woman", "Man"]
-				resp_obj["gender"] = {}
-
+					obj["gender"] = {}
 					for i, gender_label in enumerate(gender_labels):
 						gender_prediction = 100 * gender_predictions[i]
-					resp_obj["gender"][gender_label] = gender_prediction
+						obj["gender"][gender_label] = gender_prediction

-				resp_obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]
+					obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)]

 				elif action == 'race':
-				if img_224 is None:
-					img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) #just emotion model expects grayscale images
-				race_predictions = models['race'].predict(img_224, verbose=0)[0,:]
-				race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
-
+					race_predictions = models['race'].predict(img_content, verbose=0)[0,:]
 					sum_of_predictions = race_predictions.sum()

-				resp_obj["race"] = {}
+					obj["race"] = {}
+					race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic']
 					for i in range(0, len(race_labels)):
 						race_label = race_labels[i]
 						race_prediction = 100 * race_predictions[i] / sum_of_predictions
-					resp_obj["race"][race_label] = race_prediction
+						obj["race"][race_label] = race_prediction

-				resp_obj["dominant_race"] = race_labels[np.argmax(race_predictions)]
+					obj["dominant_race"] = race_labels[np.argmax(race_predictions)]

 				#-----------------------------
+				# mention facial areas
+				obj["region"] = img_region
 				
-			if is_region_set != True and region:
-				resp_obj["region"] = {}
-				is_region_set = True
-				for i, parameter in enumerate(region_labels):
-					resp_obj["region"][parameter] = int(region[i]) #int cast is for the exception - object of type 'float32' is not JSON serializable
+			resp_objects.append(obj)
 	
-		#---------------------------------
-
-		if bulkProcess == True:
-			resp_objects.append(resp_obj)
-		else:
-			return resp_obj
-
-	if bulkProcess == True:
 	return resp_objects	

-
-def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base', silent=False):
+def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base', silent=False):

 	"""
-	This function applies verification several times and find an identity in a database
+	This function applies verification several times and find the identities in a database

 	Parameters:
-		img_path: exact image path, numpy array (BGR) or based64 encoded image. If you are going to find several identities, then you should pass img_path as array instead of calling find function in a for loop. e.g. img_path = ["img1.jpg", "img2.jpg"]
+		img_path: exact image path, numpy array (BGR) or based64 encoded image. 
 		
 		db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this.

@ -485,58 +338,23 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',

 		distance_metric (string): cosine, euclidean, euclidean_l2

-		model: built deepface model. A face recognition models are built in every call of find function. You can pass pre-built models to speed the function up.
-
-			model = DeepFace.build_model('VGG-Face')
-
 		enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images.

 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib

-		prog_bar (boolean): enable/disable a progress bar
+		silent (boolean): disable some logging and progress bars

 	Returns:
-		This function returns pandas data frame. If a list of images is passed to img_path, then it will return list of pandas data frame.
+		This function returns list of pandas data frame. Each item of the list corresponding to an identity in the img_path.
 	"""

 	tic = time.time()

-	img_paths, bulkProcess = functions.initialize_input(img_path)
-
 	#-------------------------------
-
-	if os.path.isdir(db_path) == True:
-
-		if model == None:
-
-			if model_name == 'Ensemble':
-				if not silent: print("Ensemble learning enabled")
-				models = Boosting.loadModel()
-
-			else: #model is not ensemble
-				model = build_model(model_name)
-				models = {}
-				models[model_name] = model
-
-		else: #model != None
-			if not silent: print("Already built model is passed")
-
-			if model_name == 'Ensemble':
-				Boosting.validate_model(model)
-				models = model.copy()
+	if os.path.isdir(db_path) != True:
+		raise ValueError("Passed db_path does not exist!")
 	else:
-				models = {}
-				models[model_name] = model
-
-		#---------------------------------------
-
-		if model_name == 'Ensemble':
-			model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
-			metric_names = ['cosine', 'euclidean', 'euclidean_l2']
-		elif model_name != 'Ensemble':
-			model_names = []; metric_names = []
-			model_names.append(model_name)
-			metric_names.append(distance_metric)
+		target_size = functions.find_target_size(model_name=model_name)

 		#---------------------------------------

@ -545,19 +363,21 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',

 		if path.exists(db_path+"/"+file_name):

-			if not silent: print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")
+			if not silent:
+				print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.")

 			f = open(db_path+'/'+file_name, 'rb')
 			representations = pickle.load(f)

-			if not silent: print("There are ", len(representations)," representations found in ",file_name)
+			if not silent:
+				print("There are ", len(representations)," representations found in ",file_name)

 		else: #create representation.pkl from scratch
 			employees = []

 			for r, d, f in os.walk(db_path): # r=root, d=directories, f = files
 				for file in f:
-					if ('.jpg' in file.lower()) or ('.png' in file.lower()):
+					if ('.jpg' in file.lower()) or ('.jpeg' in file.lower()) or ('.png' in file.lower()):
 						exact_path = r + "/" + file
 						employees.append(exact_path)

@ -569,158 +389,108 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine',

 			representations = []

-			pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = prog_bar)
-
 			#for employee in employees:
+			pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = True if silent == True else False)
 			for index in pbar:
 				employee = employees[index]

-				instance = []
-				instance.append(employee)
+				img_objs = functions.extract_faces(img = employee, 
+					target_size = target_size, 
+					detector_backend = detector_backend, 
+					grayscale = False, 
+					enforce_detection = enforce_detection, 
+					align = align
+				)

-				for j in model_names:
-					custom_model = models[j]
-
-					representation = represent(img_path = employee
-						, model_name = model_name, model = custom_model
-						, enforce_detection = enforce_detection, detector_backend = detector_backend
+				for img_content, img_region, img_confidence in img_objs:
+					embedding_obj = represent(img_path = img_content
+						, model_name = model_name
+						, enforce_detection = enforce_detection
+						, detector_backend = "skip"
 						, align = align
 						, normalization = normalization
 						)
 					
-					instance.append(representation)
+					img_representation = embedding_obj[0]["embedding"]
+
+					instance = []
+					instance.append(employee) 
+					instance.append(img_representation)
+					representations.append(instance)

 			#-------------------------------

-				representations.append(instance)
-
 			f = open(db_path+'/'+file_name, "wb")
 			pickle.dump(representations, f)
 			f.close()

-			if not silent: print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")
+			if not silent: 
+				print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.")

 		#----------------------------
 		#now, we got representations for facial database
+		df = pd.DataFrame(representations, columns = ["identity", f"{model_name}_representation"])

-		if model_name != 'Ensemble':
-			df = pd.DataFrame(representations, columns = ["identity", "%s_representation" % (model_name)])
-		else: #ensemble learning
-
-			columns = ['identity']
-			[columns.append('%s_representation' % i) for i in model_names]
-
-			df = pd.DataFrame(representations, columns = columns)
-
-		df_base = df.copy() #df will be filtered in each img. we will restore it for the next item.
+		# img path might have move than once face
+		target_objs = functions.extract_faces(img = img_path, 
+					target_size = target_size, 
+					detector_backend = detector_backend, 
+					grayscale = False, 
+					enforce_detection = enforce_detection, 
+					align = align
+				)
 		
 		resp_obj = []

-		global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing', disable = prog_bar)
-		for j in global_pbar:
-			img_path = img_paths[j]
-
-			#find representation for passed image
-
-			for j in model_names:
-				custom_model = models[j]
-
-				target_representation = represent(img_path = img_path
-					, model_name = model_name, model = custom_model
-					, enforce_detection = enforce_detection, detector_backend = detector_backend
+		for target_img, target_region, target_confidence in target_objs:
+			target_embedding_obj = represent(img_path = target_img
+						, model_name = model_name
+						, enforce_detection = enforce_detection
+						, detector_backend = "skip"
 						, align = align
 						, normalization = normalization
 						)
 			
-				for k in metric_names:
+			target_representation = target_embedding_obj[0]["embedding"]
+
+			result_df = df.copy() #df will be filtered in each img
+			#TODO: add facial area in df
+
 			distances = []
 			for index, instance in df.iterrows():
-						source_representation = instance["%s_representation" % (j)]
+				source_representation = instance[f"{model_name}_representation"]

-						if k == 'cosine':
+				if distance_metric == 'cosine':
 					distance = dst.findCosineDistance(source_representation, target_representation)
-						elif k == 'euclidean':
+				elif distance_metric == 'euclidean':
 					distance = dst.findEuclideanDistance(source_representation, target_representation)
-						elif k == 'euclidean_l2':
+				elif distance_metric == 'euclidean_l2':
 					distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation))
+				else:
+					raise ValueError(f"invalid distance metric passes - {distance_metric}")

 				distances.append(distance)

 				#---------------------------

-					if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
-						continue
-					else:
-						df["%s_%s" % (j, k)] = distances
+			result_df[f"{model_name}_{distance_metric}"] = distances

-						if model_name != 'Ensemble':
-							threshold = dst.findThreshold(j, k)
-							df = df.drop(columns = ["%s_representation" % (j)])
-							df = df[df["%s_%s" % (j, k)] <= threshold]
+			threshold = dst.findThreshold(model_name, distance_metric)
+			result_df = result_df.drop(columns = [f"{model_name}_representation"])
+			result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
+			result_df = result_df.sort_values(by = [f"{model_name}_{distance_metric}"], ascending=True).reset_index(drop=True)

-							df = df.sort_values(by = ["%s_%s" % (j, k)], ascending=True).reset_index(drop=True)
+			resp_obj.append(result_df)

-							resp_obj.append(df)
-							df = df_base.copy() #restore df for the next iteration
-
-			#----------------------------------
-
-			if model_name == 'Ensemble':
-
-				feature_names = []
-				for j in model_names:
-					for k in metric_names:
-						if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean':
-							continue
-						else:
-							feature = '%s_%s' % (j, k)
-							feature_names.append(feature)
-
-				#print(df.head())
-
-				x = df[feature_names].values
-
-				#--------------------------------------
-
-				boosted_tree = Boosting.build_gbm()
-
-				y = boosted_tree.predict(x)
-
-				verified_labels = []; scores = []
-				for i in y:
-					verified = np.argmax(i) == 1
-					score = i[np.argmax(i)]
-
-					verified_labels.append(verified)
-					scores.append(score)
-
-				df['verified'] = verified_labels
-				df['score'] = scores
-
-				df = df[df.verified == True]
-				#df = df[df.score > 0.99] #confidence score
-				df = df.sort_values(by = ["score"], ascending=False).reset_index(drop=True)
-				df = df[['identity', 'verified', 'score']]
-
-				resp_obj.append(df)
-				df = df_base.copy() #restore df for the next iteration
-
-			#----------------------------------
+		# -----------------------------------

 		toc = time.time()

-		if not silent: print("find function lasts ",toc-tic," seconds")
-
-		if len(resp_obj) == 1:
-			return resp_obj[0]
+		if not silent:
+			print("find function lasts ",toc-tic," seconds")

 		return resp_obj

-	else:
-		raise ValueError("Passed db_path does not exist!")
-
-	return None
-
 def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'):

 	"""
@ -731,10 +501,6 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection

 		model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace.

-		model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. Consider to pass model if you are going to call represent function in a for loop.
-
-			model = DeepFace.build_model('VGG-Face')
-
 		enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images.

 		detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib
@ -744,38 +510,51 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection
 	Returns:
 		Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector.
 	"""
+	resp_objs = []

-	if model is None:
 	model = build_model(model_name)

 	#---------------------------------
+	# we started to run pre-process in verification. so, this can be skipped if it is coming from verification.
+	if detector_backend != "skip":
+		target_size = functions.find_target_size(model_name=model_name)

-	#decide input shape
-	input_shape_x, input_shape_y = functions.find_input_shape(model)
-
-	#detect and align
-	img = functions.preprocess_face(img = img_path
-		, target_size=(input_shape_y, input_shape_x)
-		, enforce_detection = enforce_detection
-		, detector_backend = detector_backend
-		, align = align)
+		img_objs = functions.extract_faces(img = img_path, 
+								target_size = target_size, 
+								detector_backend = detector_backend, 
+								grayscale = False, 
+								enforce_detection = enforce_detection, 
+								align = align)
+	else: # skip
+		if type(img_path) == str:
+			img = functions.load_image(img_path)
+		elif type(img_path).__module__ == np.__name__:
+			img = img_path.copy()
+		else:
+			raise ValueError(f"unexpected type for img_path - {type(img_path)}")
 		
+		img_region = [0, 0, img.shape[1], img.shape[0]]
+		img_objs = [(img, img_region, 0)]
 	#---------------------------------
+
+	for img, region, confidence in img_objs:
 		#custom normalization
-
 		img = functions.normalize_input(img = img, normalization = normalization)

-	#---------------------------------
-
 		#represent
 		if "keras" in str(type(model)):
 			#new tf versions show progress bar and it is annoying
 			embedding = model.predict(img, verbose=0)[0].tolist()
 		else:
-		#SFace is not a keras model and it has no verbose argument
+			#SFace and Dlib are not keras models and no verbose arguments
 			embedding = model.predict(img)[0].tolist()
 		
-	return embedding
+		resp_obj = {}
+		resp_obj["embedding"] = embedding
+		resp_obj["facial_area"] = region
+		resp_objs.append(resp_obj)
+
+	return resp_objs

 def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5):

@ -810,7 +589,7 @@ def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', di
 	realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis
 						, source = source, time_threshold = time_threshold, frame_threshold = frame_threshold)

-def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):
+def extract_faces(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True):

 	"""
 	This function applies pre-processing stages of a face recognition pipeline including detection and alignment
@ -818,15 +597,43 @@ def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv',
 	Parameters:
 		img_path: exact image path, numpy array (BGR) or base64 encoded image

+		target_size (tuple): final shape of facial image. black pixels will be added to resize the image.
+
 		detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib

+		enforce_detection (boolean): function throws exception if face cannot be detected in the fed image. 
+		Set this to False if you do not want to get exception and run the function anyway.
+
+		align (boolean): alignment according to the eye positions.
+
 	Returns:
-		deteced and aligned face in numpy format
+		list of dictionaries. Each dictionary will have facial image itself, extracted area from the original image and confidence score.
+
 	"""
 	
-	img = functions.preprocess_face(img = img_path, target_size = target_size, detector_backend = detector_backend
-		, enforce_detection = enforce_detection, align = align)[0] #preprocess_face returns (1, 224, 224, 3)
-	return img[:, :, ::-1] #bgr to rgb
+	resp_objs = []
+	img_objs = functions.extract_faces(
+						img = img_path, 
+						target_size = target_size, 
+						detector_backend = detector_backend, 
+						grayscale = False, 
+						enforce_detection = enforce_detection, 
+						align = align
+					)
+
+	for img, region, confidence in img_objs:
+		resp_obj = {}
+
+		# discard expanded dimension
+		if len(img.shape) == 4:
+			img = img[0]
+
+		resp_obj["face"] = img[:, :, ::-1]
+		resp_obj["facial_area"] = region
+		resp_obj["confidence"] = confidence
+		resp_objs.append(resp_obj)
+	
+	return resp_objs

 #---------------------------
 #main
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@ -27,24 +27,6 @@ elif tf_major_version == 2:

 #--------------------------------------------------

-def initialize_input(img1_path, img2_path = None):
-
-	if type(img1_path) == list:
-		bulkProcess = True
-		img_list = img1_path.copy()
-	else:
-		bulkProcess = False
-
-		if (
-			(type(img2_path) == str and img2_path != None) #exact image path, base64 image
-			or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
-		):
-			img_list = [[img1_path, img2_path]]
-		else: #analyze function passes just img1_path
-			img_list = [img1_path]
-
-	return img_list, bulkProcess
-
 def initialize_folder():
 	home = get_deepface_home()

@ -59,6 +41,8 @@ def initialize_folder():
 def get_deepface_home():
 	return str(os.getenv('DEEPFACE_HOME', default=Path.home()))

+#--------------------------------------------------
+
 def loadBase64Img(uri):
   encoded_data = uri.split(',')[1]
   nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
@ -93,35 +77,71 @@ def load_image(img):

 	return img

-def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
+#--------------------------------------------------

+def extract_faces(img, target_size=(224, 224), detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
+
+	# this is going to store a list of img itself (numpy), it region and confidence
+	extracted_faces = []
+
+	#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
+	img = load_image(img)
 	img_region = [0, 0, img.shape[1], img.shape[0]]

-	#----------------------------------------------
-	#people would like to skip detection and alignment if they already have pre-processed images
 	if detector_backend == 'skip':
-		return img, img_region
-
-	#----------------------------------------------
-
-	#detector stored in a global variable in FaceDetector object.
-	#this call should be completed very fast because it will return found in memory
-	#it will not build face detector model in each call (consider for loops)
+		face_objs = [(img, img_region, 0)]
+	else:
 		face_detector = FaceDetector.build_model(detector_backend)
+		face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align)

-	try:
-		detected_face, img_region, _ = FaceDetector.detect_face(face_detector, detector_backend, img, align)
-	except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run
-		detected_face = None
-
-	if (isinstance(detected_face, np.ndarray)):
-		return detected_face, img_region
-	else:
-		if detected_face == None:
-			if enforce_detection != True:
-				return img, img_region
-			else:
+	# in case of no face found
+	if len(face_objs) == 0 and enforce_detection == True:
 		raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
+	elif len(face_objs) == 0 and enforce_detection == False:
+		face_objs = [(img, img_region, 0)]
+
+	for current_img, current_region, confidence in face_objs:
+		if current_img.shape[0] > 0 and current_img.shape[1] > 0:
+
+			if grayscale == True:
+				current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
+
+			# resize and padding
+			if current_img.shape[0] > 0 and current_img.shape[1] > 0:
+				factor_0 = target_size[0] / current_img.shape[0]
+				factor_1 = target_size[1] / current_img.shape[1]
+				factor = min(factor_0, factor_1)
+
+				dsize = (int(current_img.shape[1] * factor), int(current_img.shape[0] * factor))
+				current_img = cv2.resize(current_img, dsize)
+
+				diff_0 = target_size[0] - current_img.shape[0]
+				diff_1 = target_size[1] - current_img.shape[1]
+				if grayscale == False:
+					# Put the base image in the middle of the padded image
+					current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
+				else:
+					current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
+			
+			#double check: if target image is not still the same size with target.
+			if current_img.shape[0:2] != target_size:
+				current_img = cv2.resize(current_img, target_size)
+			
+			#normalizing the image pixels
+			img_pixels = image.img_to_array(current_img) #what this line doing? must?
+			img_pixels = np.expand_dims(img_pixels, axis = 0)
+			img_pixels /= 255 #normalize input in [0, 1]
+
+			#int cast is for the exception - object of type 'float32' is not JSON serializable
+			region_obj = {"x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3])}
+
+			extracted_face = [img_pixels, region_obj, confidence]
+			extracted_faces.append(extracted_face)
+	
+	if len(extracted_faces) == 0 and enforce_detection == True:
+		raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
+	
+	return extracted_faces

 def normalize_input(img, normalization = 'base'):

@ -169,94 +189,21 @@ def normalize_input(img, normalization = 'base'):

 	return img

-def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True):
+def find_target_size(model_name):

-	#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
-	img = load_image(img)
-	base_img = img.copy()
+	target_sizes = {
+		"VGG-Face": (224, 224),
+		"Facenet": (160, 160),
+		"Facenet512": (160, 160),
+		"OpenFace": (96, 96),
+		"DeepFace": (152, 152),
+		"DeepID": (55, 47), #TODO: might be opposite
+		"Dlib": (150, 150),
+		"ArcFace": (112, 112),
+		"SFace": (112, 112)
+	}

-	img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align)
+	if model_name not in target_sizes.keys():
+		raise ValueError(f"unimplemented model name - {model_name}")
 	
-	#--------------------------
-
-	if img.shape[0] == 0 or img.shape[1] == 0:
-		if enforce_detection == True:
-			raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
-		else: #restore base image
-			img = base_img.copy()
-
-	#--------------------------
-
-	#post-processing
-	if grayscale == True:
-		img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-
-	#---------------------------------------------------
-	#resize image to expected shape
-
-	# img = cv2.resize(img, target_size) #resize causes transformation on base image, adding black pixels to resize will not deform the base image
-
-	if img.shape[0] > 0 and img.shape[1] > 0:
-		factor_0 = target_size[0] / img.shape[0]
-		factor_1 = target_size[1] / img.shape[1]
-		factor = min(factor_0, factor_1)
-
-		dsize = (int(img.shape[1] * factor), int(img.shape[0] * factor))
-		img = cv2.resize(img, dsize)
-
-		# Then pad the other side to the target size by adding black pixels
-		diff_0 = target_size[0] - img.shape[0]
-		diff_1 = target_size[1] - img.shape[1]
-		if grayscale == False:
-			# Put the base image in the middle of the padded image
-			img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant')
-		else:
-			img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant')
-
-	#------------------------------------------
-
-	#double check: if target image is not still the same size with target.
-	if img.shape[0:2] != target_size:
-		img = cv2.resize(img, target_size)
-
-	#---------------------------------------------------
-
-	#normalizing the image pixels
-
-	img_pixels = image.img_to_array(img) #what this line doing? must?
-	img_pixels = np.expand_dims(img_pixels, axis = 0)
-	img_pixels /= 255 #normalize input in [0, 1]
-
-	#---------------------------------------------------
-
-	if return_region == True:
-		return img_pixels, region
-	else:
-		return img_pixels
-
-def find_input_shape(model):
-
-	#face recognition models have different size of inputs
-	#my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
-
-	input_shape = model.layers[0].input_shape
-
-	if type(input_shape) == list:
-		input_shape = input_shape[0][1:3]
-	else:
-		input_shape = input_shape[1:3]
-
-	#----------------------
-	#issue 289: it seems that tf 2.5 expects you to resize images with (x, y)
-	#whereas its older versions expect (y, x)
-
-	if tf_major_version == 2 and tf_minor_version >= 5:
-		x = input_shape[0]; y = input_shape[1]
-		input_shape = (y, x)
-
-	#----------------------
-
-	if type(input_shape) == list: #issue 197: some people got array here instead of tuple
-		input_shape = tuple(input_shape)
-
-	return input_shape
+	return target_sizes[model_name]
--- a/deepface/commons/realtime.py
+++ b/deepface/commons/realtime.py
@ -50,7 +50,7 @@ def analysis(db_path, model_name = 'VGG-Face', detector_backend = 'opencv', dist

 		#------------------------

-		input_shape = functions.find_input_shape(model)
+		input_shape = functions.find_target_size(model_name=model_name)
 		input_shape_x = input_shape[0]; input_shape_y = input_shape[1]

 		#tuned thresholds for model and metric pair
--- a/tests/dataset/couple.jpg
+++ b/tests/dataset/couple.jpg
--- a/tests/unit_tests.py
+++ b/tests/unit_tests.py
@ -1,6 +1,7 @@
 import warnings
 import os
 import tensorflow as tf
+import numpy as np
 import cv2
 from deepface import DeepFace