diff --git a/.gitignore b/.gitignore index fe67f86..cde284c 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ deepface/extendedmodels/__pycache__/* deepface/subsidiarymodels/__pycache__/* deepface/detectors/__pycache__/* tests/dataset/*.pkl +tests/sandbox.ipynb .DS_Store deepface/.DS_Store *.pyc diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index d4dd779..9a038a6 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -7,12 +7,13 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import time from os import path +import cv2 import numpy as np import pandas as pd from tqdm import tqdm import pickle -from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace, Boosting +from deepface.basemodels import VGGFace, OpenFace, Facenet, Facenet512, FbDeepFace, DeepID, DlibWrapper, ArcFace, SFace from deepface.extendedmodels import Age, Gender, Race, Emotion from deepface.commons import functions, realtime, distance as dst @@ -67,33 +68,24 @@ def build_model(model_name): return model_obj[model_name] -def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base'): +def verify(img1_path, img2_path, model_name = 'VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enforce_detection = True, align = True, normalization = 'base'): """ This function verifies an image pair is same person or different persons. Parameters: - img1_path, img2_path: exact image path, numpy array (BGR) or based64 encoded images could be passed. If you are going to call verify function for a list of image pairs, then you should pass an array instead of calling the function in for loops. + img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded images are also welcome. + If one of pair has more than one face, then we will compare the face pair with max similarity. - e.g. img1_path = [ - ['img1.jpg', 'img2.jpg'], - ['img2.jpg', 'img3.jpg'] - ] - - model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace or Ensemble + model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace distance_metric (string): cosine, euclidean, euclidean_l2 - model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. - - model = DeepFace.build_model('VGG-Face') - - enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. Set this to False not to have this exception. This might be convenient for low resolution images. + enforce_detection (boolean): If no face could not be detected in an image, then this function will return exception by default. + Set this to False not to have this exception. This might be convenient for low resolution images. detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib - prog_bar (boolean): enable/disable a progress bar - Returns: Verify function returns a dictionary. If img1_path is a list of image pairs, then the function will return list of dictionary. @@ -103,440 +95,266 @@ def verify(img1_path, img2_path = '', model_name = 'VGG-Face', distance_metric = , "max_threshold_to_verify": 0.40 , "model": "VGG-Face" , "similarity_metric": "cosine" + , 'facial_areas': { + 'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769}, + 'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779} + } + , "time": 2 } """ tic = time.time() - img_list, bulkProcess = functions.initialize_input(img1_path, img2_path) - - resp_objects = [] - #-------------------------------- + target_size = functions.find_target_size(model_name=model_name) - if model_name == 'Ensemble': - model_names = ["VGG-Face", "Facenet", "OpenFace", "DeepFace"] - metrics = ["cosine", "euclidean", "euclidean_l2"] - else: - model_names = []; metrics = [] - model_names.append(model_name) - metrics.append(distance_metric) - + # img pairs might have many faces + img1_objs = functions.extract_faces( + img = img1_path, + target_size = (target_size[1], target_size[0]), + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align) + + img2_objs = functions.extract_faces( + img = img2_path, + target_size = (target_size[1], target_size[0]), + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align) #-------------------------------- - - if model == None: - if model_name == 'Ensemble': - models = Boosting.loadModel() - else: - model = build_model(model_name) - models = {} - models[model_name] = model - else: - if model_name == 'Ensemble': - Boosting.validate_model(model) - models = model.copy() - else: - models = {} - models[model_name] = model - - #------------------------------ - - disable_option = (False if len(img_list) > 1 else True) or not prog_bar - - pbar = tqdm(range(0,len(img_list)), desc='Verification', disable = disable_option) - - for index in pbar: - - instance = img_list[index] - - if type(instance) == list and len(instance) >= 2: - img1_path = instance[0]; img2_path = instance[1] - - ensemble_features = [] - - for i in model_names: - custom_model = models[i] - - #img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'mtcnn' - img1_representation = represent(img_path = img1_path - , model_name = model_name, model = custom_model - , enforce_detection = enforce_detection, detector_backend = detector_backend + distances = [] + regions = [] + # now we will find the face pair with minimum distance + for img1_content, img1_region, img1_confidence in img1_objs: + for img2_content, img2_region, img2_confidence in img2_objs: + img1_embedding_obj = represent(img_path = img1_content + , model_name = model_name + , enforce_detection = enforce_detection + , detector_backend = "skip" , align = align , normalization = normalization ) - - img2_representation = represent(img_path = img2_path - , model_name = model_name, model = custom_model - , enforce_detection = enforce_detection, detector_backend = detector_backend + + img2_embedding_obj = represent(img_path = img2_content + , model_name = model_name + , enforce_detection = enforce_detection + , detector_backend = "skip" , align = align , normalization = normalization ) + + img1_representation = img1_embedding_obj[0]["embedding"] + img2_representation = img2_embedding_obj[0]["embedding"] + + if distance_metric == 'cosine': + distance = dst.findCosineDistance(img1_representation, img2_representation) + elif distance_metric == 'euclidean': + distance = dst.findEuclideanDistance(img1_representation, img2_representation) + elif distance_metric == 'euclidean_l2': + distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)) + else: + raise ValueError("Invalid distance_metric passed - ", distance_metric) + + distances.append(distance) + regions.append((img1_region, img2_region)) - #---------------------- - #find distances between embeddings - - for j in metrics: - - if j == 'cosine': - distance = dst.findCosineDistance(img1_representation, img2_representation) - elif j == 'euclidean': - distance = dst.findEuclideanDistance(img1_representation, img2_representation) - elif j == 'euclidean_l2': - distance = dst.findEuclideanDistance(dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)) - else: - raise ValueError("Invalid distance_metric passed - ", distance_metric) - - distance = np.float64(distance) #causes trobule for euclideans in api calls if this is not set (issue #175) - #---------------------- - #decision - - if model_name != 'Ensemble': - - threshold = dst.findThreshold(i, j) - - if distance <= threshold: - identified = True - else: - identified = False - - resp_obj = { - "verified": identified - , "distance": distance - , "threshold": threshold - , "model": model_name - , "detector_backend": detector_backend - , "similarity_metric": distance_metric - } - - if bulkProcess == True: - resp_objects.append(resp_obj) - else: - return resp_obj - - else: #Ensemble - - #this returns same with OpenFace - euclidean_l2 - if i == 'OpenFace' and j == 'euclidean': - continue - else: - ensemble_features.append(distance) - - #---------------------- - - if model_name == 'Ensemble': - - boosted_tree = Boosting.build_gbm() - - prediction = boosted_tree.predict(np.expand_dims(np.array(ensemble_features), axis=0))[0] - - verified = np.argmax(prediction) == 1 - score = prediction[np.argmax(prediction)] - - resp_obj = { - "verified": verified - , "score": score - , "distance": ensemble_features - , "model": ["VGG-Face", "Facenet", "OpenFace", "DeepFace"] - , "similarity_metric": ["cosine", "euclidean", "euclidean_l2"] - } - - if bulkProcess == True: - resp_objects.append(resp_obj) - else: - return resp_obj - - #---------------------- - - else: - raise ValueError("Invalid arguments passed to verify function: ", instance) - - #------------------------- + # ------------------------------- + threshold = dst.findThreshold(model_name, distance_metric) + distance = min(distances) #best distance + facial_areas = regions[np.argmin(distances)] toc = time.time() - if bulkProcess == True: + resp_obj = { + "verified": True if distance <= threshold else False + , "distance": distance + , "threshold": threshold + , "model": model_name + , "detector_backend": detector_backend + , "similarity_metric": distance_metric + , "facial_areas": { + "img1": facial_areas[0], + "img2": facial_areas[1] + } + , "time": round(toc - tic, 2) + } - resp_obj = {} + return resp_obj - for i in range(0, len(resp_objects)): - resp_item = resp_objects[i] - resp_obj["pair_%d" % (i+1)] = resp_item - - return resp_obj - -def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , models = None, enforce_detection = True, detector_backend = 'opencv', prog_bar = True): +def analyze(img_path, actions = ('emotion', 'age', 'gender', 'race') , enforce_detection = True, detector_backend = 'opencv', align = True, silent = False): """ This function analyzes facial attributes including age, gender, emotion and race Parameters: - img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. If you are going to analyze lots of images, then set this to list. e.g. img_path = ['img1.jpg', 'img2.jpg'] + img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed. actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop some of those attributes. - models: (Optional[dict]) facial attribute analysis models are built in every call of analyze function. You can pass pre-built models to speed the function up. - - models = {} - models['age'] = DeepFace.build_model('Age') - models['gender'] = DeepFace.build_model('Gender') - models['emotion'] = DeepFace.build_model('Emotion') - models['race'] = DeepFace.build_model('Race') - enforce_detection (boolean): The function throws exception if no face detected by default. Set this to False if you don't want to get exception. This might be convenient for low resolution images. detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib. - prog_bar (boolean): enable/disable a progress bar + silent (boolean): disable (some) log messages + Returns: - The function returns a dictionary. If img_path is a list, then it will return list of dictionary. + The function returns a list of dictionaries for each face appearing in the image. - { - "region": {'x': 230, 'y': 120, 'w': 36, 'h': 45}, - "age": 28.66, - "dominant_gender": "Woman", - "gender": { - 'Woman': 99.99407529830933, - 'Man': 0.005928758764639497, + [ + { + "region": {'x': 230, 'y': 120, 'w': 36, 'h': 45}, + "age": 28.66, + "dominant_gender": "Woman", + "gender": { + 'Woman': 99.99407529830933, + 'Man': 0.005928758764639497, + } + "dominant_emotion": "neutral", + "emotion": { + 'sad': 37.65260875225067, + 'angry': 0.15512987738475204, + 'surprise': 0.0022171278033056296, + 'fear': 1.2489334680140018, + 'happy': 4.609785228967667, + 'disgust': 9.698561953541684e-07, + 'neutral': 56.33133053779602 + } + "dominant_race": "white", + "race": { + 'indian': 0.5480832420289516, + 'asian': 0.7830780930817127, + 'latino hispanic': 2.0677512511610985, + 'black': 0.06337375962175429, + 'middle eastern': 3.088453598320484, + 'white': 93.44925880432129 + } } - "dominant_emotion": "neutral", - "emotion": { - 'sad': 37.65260875225067, - 'angry': 0.15512987738475204, - 'surprise': 0.0022171278033056296, - 'fear': 1.2489334680140018, - 'happy': 4.609785228967667, - 'disgust': 9.698561953541684e-07, - 'neutral': 56.33133053779602 - } - "dominant_race": "white", - "race": { - 'indian': 0.5480832420289516, - 'asian': 0.7830780930817127, - 'latino hispanic': 2.0677512511610985, - 'black': 0.06337375962175429, - 'middle eastern': 3.088453598320484, - 'white': 93.44925880432129 - } - } - + ] """ - + #--------------------------------- + # validate actions if type(actions) == str: actions = (actions,) actions = list(actions) - if not models: - models = {} - - img_paths, bulkProcess = functions.initialize_input(img_path) - #--------------------------------- - - built_models = list(models.keys()) - - #--------------------------------- - - #pre-trained models passed but it doesn't exist in actions - if len(built_models) > 0: - if 'emotion' in built_models and 'emotion' not in actions: - actions.append('emotion') - - if 'age' in built_models and 'age' not in actions: - actions.append('age') - - if 'gender' in built_models and 'gender' not in actions: - actions.append('gender') - - if 'race' in built_models and 'race' not in actions: - actions.append('race') - - #--------------------------------- - - if 'emotion' in actions and 'emotion' not in built_models: + # build models + models = {} + if 'emotion' in actions: models['emotion'] = build_model('Emotion') - if 'age' in actions and 'age' not in built_models: + if 'age' in actions: models['age'] = build_model('Age') - if 'gender' in actions and 'gender' not in built_models: + if 'gender' in actions: models['gender'] = build_model('Gender') - if 'race' in actions and 'race' not in built_models: + if 'race' in actions: models['race'] = build_model('Race') - #--------------------------------- - resp_objects = [] - disable_option = (False if len(img_paths) > 1 else True) or not prog_bar + img_objs = functions.extract_faces(img=img_path, target_size=(224, 224), detector_backend=detector_backend, grayscale = False, enforce_detection=enforce_detection, align=align) - global_pbar = tqdm(range(0,len(img_paths)), desc='Analyzing', disable = disable_option) + for img_content, img_region, img_confidence in img_objs: + if img_content.shape[0] > 0 and img_content.shape[1] > 0: + obj = {} + #facial attribute analysis + pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = silent) + for index in pbar: + action = actions[index] + pbar.set_description("Action: %s" % (action)) - for j in global_pbar: - img_path = img_paths[j] + if action == 'emotion': + img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY) + img_gray = cv2.resize(img_gray, (48, 48)) + img_gray = np.expand_dims(img_gray, axis = 0) - resp_obj = {} + emotion_predictions = models['emotion'].predict(img_gray, verbose=0)[0,:] - disable_option = (False if len(actions) > 1 else True) or not prog_bar + sum_of_predictions = emotion_predictions.sum() - pbar = tqdm(range(0, len(actions)), desc='Finding actions', disable = disable_option) + obj["emotion"] = {} + emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] - img_224 = None # Set to prevent re-detection + for i in range(0, len(emotion_labels)): + emotion_label = emotion_labels[i] + emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions + obj["emotion"][emotion_label] = emotion_prediction - region = [] # x, y, w, h of the detected face region - region_labels = ['x', 'y', 'w', 'h'] + obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)] - is_region_set = False + elif action == 'age': + age_predictions = models['age'].predict(img_content, verbose=0)[0,:] + apparent_age = Age.findApparentAge(age_predictions) + obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable - #facial attribute analysis - for index in pbar: - action = actions[index] - pbar.set_description("Action: %s" % (action)) + elif action == 'gender': + gender_predictions = models['gender'].predict(img_content, verbose=0)[0,:] + gender_labels = ["Woman", "Man"] + obj["gender"] = {} + for i, gender_label in enumerate(gender_labels): + gender_prediction = 100 * gender_predictions[i] + obj["gender"][gender_label] = gender_prediction - if action == 'emotion': - emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] - img, region = functions.preprocess_face(img = img_path, target_size = (48, 48), grayscale = True, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) + obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)] - emotion_predictions = models['emotion'].predict(img, verbose=0)[0,:] + elif action == 'race': + race_predictions = models['race'].predict(img_content, verbose=0)[0,:] + sum_of_predictions = race_predictions.sum() - sum_of_predictions = emotion_predictions.sum() + obj["race"] = {} + race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic'] + for i in range(0, len(race_labels)): + race_label = race_labels[i] + race_prediction = 100 * race_predictions[i] / sum_of_predictions + obj["race"][race_label] = race_prediction - resp_obj["emotion"] = {} + obj["dominant_race"] = race_labels[np.argmax(race_predictions)] - for i in range(0, len(emotion_labels)): - emotion_label = emotion_labels[i] - emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions - resp_obj["emotion"][emotion_label] = emotion_prediction + #----------------------------- + # mention facial areas + obj["region"] = img_region + + resp_objects.append(obj) + + return resp_objects - resp_obj["dominant_emotion"] = emotion_labels[np.argmax(emotion_predictions)] - - elif action == 'age': - if img_224 is None: - img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) - - age_predictions = models['age'].predict(img_224, verbose=0)[0,:] - apparent_age = Age.findApparentAge(age_predictions) - - resp_obj["age"] = int(apparent_age) #int cast is for the exception - object of type 'float32' is not JSON serializable - - elif action == 'gender': - - if img_224 is None: - img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) - - gender_predictions = models['gender'].predict(img_224, verbose=0)[0,:] - - gender_labels = ["Woman", "Man"] - resp_obj["gender"] = {} - - for i, gender_label in enumerate(gender_labels): - gender_prediction = 100 * gender_predictions[i] - resp_obj["gender"][gender_label] = gender_prediction - - resp_obj["dominant_gender"] = gender_labels[np.argmax(gender_predictions)] - - elif action == 'race': - if img_224 is None: - img_224, region = functions.preprocess_face(img = img_path, target_size = (224, 224), grayscale = False, enforce_detection = enforce_detection, detector_backend = detector_backend, return_region = True) #just emotion model expects grayscale images - race_predictions = models['race'].predict(img_224, verbose=0)[0,:] - race_labels = ['asian', 'indian', 'black', 'white', 'middle eastern', 'latino hispanic'] - - sum_of_predictions = race_predictions.sum() - - resp_obj["race"] = {} - for i in range(0, len(race_labels)): - race_label = race_labels[i] - race_prediction = 100 * race_predictions[i] / sum_of_predictions - resp_obj["race"][race_label] = race_prediction - - resp_obj["dominant_race"] = race_labels[np.argmax(race_predictions)] - - #----------------------------- - - if is_region_set != True and region: - resp_obj["region"] = {} - is_region_set = True - for i, parameter in enumerate(region_labels): - resp_obj["region"][parameter] = int(region[i]) #int cast is for the exception - object of type 'float32' is not JSON serializable - - #--------------------------------- - - if bulkProcess == True: - resp_objects.append(resp_obj) - else: - return resp_obj - - if bulkProcess == True: - return resp_objects - - -def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, prog_bar = True, normalization = 'base', silent=False): +def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base', silent=False): """ - This function applies verification several times and find an identity in a database + This function applies verification several times and find the identities in a database Parameters: - img_path: exact image path, numpy array (BGR) or based64 encoded image. If you are going to find several identities, then you should pass img_path as array instead of calling find function in a for loop. e.g. img_path = ["img1.jpg", "img2.jpg"] - + img_path: exact image path, numpy array (BGR) or based64 encoded image. + db_path (string): You should store some .jpg files in a folder and pass the exact folder path to this. model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib or Ensemble distance_metric (string): cosine, euclidean, euclidean_l2 - model: built deepface model. A face recognition models are built in every call of find function. You can pass pre-built models to speed the function up. - - model = DeepFace.build_model('VGG-Face') - enforce_detection (boolean): The function throws exception if a face could not be detected. Set this to True if you don't want to get exception. This might be convenient for low resolution images. detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib - prog_bar (boolean): enable/disable a progress bar + silent (boolean): disable some logging and progress bars Returns: - This function returns pandas data frame. If a list of images is passed to img_path, then it will return list of pandas data frame. + This function returns list of pandas data frame. Each item of the list corresponding to an identity in the img_path. """ tic = time.time() - img_paths, bulkProcess = functions.initialize_input(img_path) - #------------------------------- - - if os.path.isdir(db_path) == True: - - if model == None: - - if model_name == 'Ensemble': - if not silent: print("Ensemble learning enabled") - models = Boosting.loadModel() - - else: #model is not ensemble - model = build_model(model_name) - models = {} - models[model_name] = model - - else: #model != None - if not silent: print("Already built model is passed") - - if model_name == 'Ensemble': - Boosting.validate_model(model) - models = model.copy() - else: - models = {} - models[model_name] = model - - #--------------------------------------- - - if model_name == 'Ensemble': - model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace'] - metric_names = ['cosine', 'euclidean', 'euclidean_l2'] - elif model_name != 'Ensemble': - model_names = []; metric_names = [] - model_names.append(model_name) - metric_names.append(distance_metric) + if os.path.isdir(db_path) != True: + raise ValueError("Passed db_path does not exist!") + else: + target_size = functions.find_target_size(model_name=model_name) #--------------------------------------- @@ -545,19 +363,21 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', if path.exists(db_path+"/"+file_name): - if not silent: print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.") + if not silent: + print("WARNING: Representations for images in ",db_path," folder were previously stored in ", file_name, ". If you added new instances after this file creation, then please delete this file and call find function again. It will create it again.") f = open(db_path+'/'+file_name, 'rb') representations = pickle.load(f) - if not silent: print("There are ", len(representations)," representations found in ",file_name) + if not silent: + print("There are ", len(representations)," representations found in ",file_name) else: #create representation.pkl from scratch employees = [] for r, d, f in os.walk(db_path): # r=root, d=directories, f = files for file in f: - if ('.jpg' in file.lower()) or ('.png' in file.lower()): + if ('.jpg' in file.lower()) or ('.jpeg' in file.lower()) or ('.png' in file.lower()): exact_path = r + "/" + file employees.append(exact_path) @@ -569,158 +389,108 @@ def find(img_path, db_path, model_name ='VGG-Face', distance_metric = 'cosine', representations = [] - pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = prog_bar) - #for employee in employees: + pbar = tqdm(range(0,len(employees)), desc='Finding representations', disable = True if silent == True else False) for index in pbar: employee = employees[index] - instance = [] - instance.append(employee) + img_objs = functions.extract_faces(img = employee, + target_size = target_size, + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align + ) - for j in model_names: - custom_model = models[j] - - representation = represent(img_path = employee - , model_name = model_name, model = custom_model - , enforce_detection = enforce_detection, detector_backend = detector_backend + for img_content, img_region, img_confidence in img_objs: + embedding_obj = represent(img_path = img_content + , model_name = model_name + , enforce_detection = enforce_detection + , detector_backend = "skip" , align = align , normalization = normalization ) + + img_representation = embedding_obj[0]["embedding"] - instance.append(representation) + instance = [] + instance.append(employee) + instance.append(img_representation) + representations.append(instance) - #------------------------------- - - representations.append(instance) + #------------------------------- f = open(db_path+'/'+file_name, "wb") pickle.dump(representations, f) f.close() - if not silent: print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.") + if not silent: + print("Representations stored in ",db_path,"/",file_name," file. Please delete this file when you add new identities in your database.") #---------------------------- #now, we got representations for facial database + df = pd.DataFrame(representations, columns = ["identity", f"{model_name}_representation"]) - if model_name != 'Ensemble': - df = pd.DataFrame(representations, columns = ["identity", "%s_representation" % (model_name)]) - else: #ensemble learning - - columns = ['identity'] - [columns.append('%s_representation' % i) for i in model_names] - - df = pd.DataFrame(representations, columns = columns) - - df_base = df.copy() #df will be filtered in each img. we will restore it for the next item. - + # img path might have move than once face + target_objs = functions.extract_faces(img = img_path, + target_size = target_size, + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align + ) + resp_obj = [] - global_pbar = tqdm(range(0, len(img_paths)), desc='Analyzing', disable = prog_bar) - for j in global_pbar: - img_path = img_paths[j] + for target_img, target_region, target_confidence in target_objs: + target_embedding_obj = represent(img_path = target_img + , model_name = model_name + , enforce_detection = enforce_detection + , detector_backend = "skip" + , align = align + , normalization = normalization + ) + + target_representation = target_embedding_obj[0]["embedding"] - #find representation for passed image + result_df = df.copy() #df will be filtered in each img + #TODO: add facial area in df - for j in model_names: - custom_model = models[j] + distances = [] + for index, instance in df.iterrows(): + source_representation = instance[f"{model_name}_representation"] - target_representation = represent(img_path = img_path - , model_name = model_name, model = custom_model - , enforce_detection = enforce_detection, detector_backend = detector_backend - , align = align - , normalization = normalization - ) + if distance_metric == 'cosine': + distance = dst.findCosineDistance(source_representation, target_representation) + elif distance_metric == 'euclidean': + distance = dst.findEuclideanDistance(source_representation, target_representation) + elif distance_metric == 'euclidean_l2': + distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation)) + else: + raise ValueError(f"invalid distance metric passes - {distance_metric}") - for k in metric_names: - distances = [] - for index, instance in df.iterrows(): - source_representation = instance["%s_representation" % (j)] + distances.append(distance) - if k == 'cosine': - distance = dst.findCosineDistance(source_representation, target_representation) - elif k == 'euclidean': - distance = dst.findEuclideanDistance(source_representation, target_representation) - elif k == 'euclidean_l2': - distance = dst.findEuclideanDistance(dst.l2_normalize(source_representation), dst.l2_normalize(target_representation)) + #--------------------------- - distances.append(distance) + result_df[f"{model_name}_{distance_metric}"] = distances - #--------------------------- + threshold = dst.findThreshold(model_name, distance_metric) + result_df = result_df.drop(columns = [f"{model_name}_representation"]) + result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold] + result_df = result_df.sort_values(by = [f"{model_name}_{distance_metric}"], ascending=True).reset_index(drop=True) - if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean': - continue - else: - df["%s_%s" % (j, k)] = distances + resp_obj.append(result_df) - if model_name != 'Ensemble': - threshold = dst.findThreshold(j, k) - df = df.drop(columns = ["%s_representation" % (j)]) - df = df[df["%s_%s" % (j, k)] <= threshold] - - df = df.sort_values(by = ["%s_%s" % (j, k)], ascending=True).reset_index(drop=True) - - resp_obj.append(df) - df = df_base.copy() #restore df for the next iteration - - #---------------------------------- - - if model_name == 'Ensemble': - - feature_names = [] - for j in model_names: - for k in metric_names: - if model_name == 'Ensemble' and j == 'OpenFace' and k == 'euclidean': - continue - else: - feature = '%s_%s' % (j, k) - feature_names.append(feature) - - #print(df.head()) - - x = df[feature_names].values - - #-------------------------------------- - - boosted_tree = Boosting.build_gbm() - - y = boosted_tree.predict(x) - - verified_labels = []; scores = [] - for i in y: - verified = np.argmax(i) == 1 - score = i[np.argmax(i)] - - verified_labels.append(verified) - scores.append(score) - - df['verified'] = verified_labels - df['score'] = scores - - df = df[df.verified == True] - #df = df[df.score > 0.99] #confidence score - df = df.sort_values(by = ["score"], ascending=False).reset_index(drop=True) - df = df[['identity', 'verified', 'score']] - - resp_obj.append(df) - df = df_base.copy() #restore df for the next iteration - - #---------------------------------- + # ----------------------------------- toc = time.time() - if not silent: print("find function lasts ",toc-tic," seconds") - - if len(resp_obj) == 1: - return resp_obj[0] + if not silent: + print("find function lasts ",toc-tic," seconds") return resp_obj - else: - raise ValueError("Passed db_path does not exist!") - - return None - def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection = True, detector_backend = 'opencv', align = True, normalization = 'base'): """ @@ -731,10 +501,6 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection model_name (string): VGG-Face, Facenet, OpenFace, DeepFace, DeepID, Dlib, ArcFace. - model: Built deepface model. A face recognition model is built every call of verify function. You can pass pre-built face recognition model optionally if you will call verify function several times. Consider to pass model if you are going to call represent function in a for loop. - - model = DeepFace.build_model('VGG-Face') - enforce_detection (boolean): If any face could not be detected in an image, then verify function will return exception. Set this to False not to have this exception. This might be convenient for low resolution images. detector_backend (string): set face detector backend as retinaface, mtcnn, opencv, ssd or dlib @@ -744,38 +510,51 @@ def represent(img_path, model_name = 'VGG-Face', model = None, enforce_detection Returns: Represent function returns a multidimensional vector. The number of dimensions is changing based on the reference model. E.g. FaceNet returns 128 dimensional vector; VGG-Face returns 2622 dimensional vector. """ + resp_objs = [] - if model is None: - model = build_model(model_name) + model = build_model(model_name) #--------------------------------- + # we started to run pre-process in verification. so, this can be skipped if it is coming from verification. + if detector_backend != "skip": + target_size = functions.find_target_size(model_name=model_name) - #decide input shape - input_shape_x, input_shape_y = functions.find_input_shape(model) - - #detect and align - img = functions.preprocess_face(img = img_path - , target_size=(input_shape_y, input_shape_x) - , enforce_detection = enforce_detection - , detector_backend = detector_backend - , align = align) - - #--------------------------------- - #custom normalization - - img = functions.normalize_input(img = img, normalization = normalization) - + img_objs = functions.extract_faces(img = img_path, + target_size = target_size, + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align) + else: # skip + if type(img_path) == str: + img = functions.load_image(img_path) + elif type(img_path).__module__ == np.__name__: + img = img_path.copy() + else: + raise ValueError(f"unexpected type for img_path - {type(img_path)}") + + img_region = [0, 0, img.shape[1], img.shape[0]] + img_objs = [(img, img_region, 0)] #--------------------------------- - #represent - if "keras" in str(type(model)): - #new tf versions show progress bar and it is annoying - embedding = model.predict(img, verbose=0)[0].tolist() - else: - #SFace is not a keras model and it has no verbose argument - embedding = model.predict(img)[0].tolist() + for img, region, confidence in img_objs: + #custom normalization + img = functions.normalize_input(img = img, normalization = normalization) - return embedding + #represent + if "keras" in str(type(model)): + #new tf versions show progress bar and it is annoying + embedding = model.predict(img, verbose=0)[0].tolist() + else: + #SFace and Dlib are not keras models and no verbose arguments + embedding = model.predict(img)[0].tolist() + + resp_obj = {} + resp_obj["embedding"] = embedding + resp_obj["facial_area"] = region + resp_objs.append(resp_obj) + + return resp_objs def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', distance_metric = 'cosine', enable_face_analysis = True, source = 0, time_threshold = 5, frame_threshold = 5): @@ -810,7 +589,7 @@ def stream(db_path = '', model_name ='VGG-Face', detector_backend = 'opencv', di realtime.analysis(db_path, model_name, detector_backend, distance_metric, enable_face_analysis , source = source, time_threshold = time_threshold, frame_threshold = frame_threshold) -def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True): +def extract_faces(img_path, target_size = (224, 224), detector_backend = 'opencv', enforce_detection = True, align = True): """ This function applies pre-processing stages of a face recognition pipeline including detection and alignment @@ -818,15 +597,43 @@ def detectFace(img_path, target_size = (224, 224), detector_backend = 'opencv', Parameters: img_path: exact image path, numpy array (BGR) or base64 encoded image + target_size (tuple): final shape of facial image. black pixels will be added to resize the image. + detector_backend (string): face detection backends are retinaface, mtcnn, opencv, ssd or dlib - Returns: - deteced and aligned face in numpy format - """ + enforce_detection (boolean): function throws exception if face cannot be detected in the fed image. + Set this to False if you do not want to get exception and run the function anyway. - img = functions.preprocess_face(img = img_path, target_size = target_size, detector_backend = detector_backend - , enforce_detection = enforce_detection, align = align)[0] #preprocess_face returns (1, 224, 224, 3) - return img[:, :, ::-1] #bgr to rgb + align (boolean): alignment according to the eye positions. + + Returns: + list of dictionaries. Each dictionary will have facial image itself, extracted area from the original image and confidence score. + + """ + + resp_objs = [] + img_objs = functions.extract_faces( + img = img_path, + target_size = target_size, + detector_backend = detector_backend, + grayscale = False, + enforce_detection = enforce_detection, + align = align + ) + + for img, region, confidence in img_objs: + resp_obj = {} + + # discard expanded dimension + if len(img.shape) == 4: + img = img[0] + + resp_obj["face"] = img[:, :, ::-1] + resp_obj["facial_area"] = region + resp_obj["confidence"] = confidence + resp_objs.append(resp_obj) + + return resp_objs #--------------------------- #main diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py index 3f7b4d1..f7d2d91 100644 --- a/deepface/commons/functions.py +++ b/deepface/commons/functions.py @@ -27,24 +27,6 @@ elif tf_major_version == 2: #-------------------------------------------------- -def initialize_input(img1_path, img2_path = None): - - if type(img1_path) == list: - bulkProcess = True - img_list = img1_path.copy() - else: - bulkProcess = False - - if ( - (type(img2_path) == str and img2_path != None) #exact image path, base64 image - or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array - ): - img_list = [[img1_path, img2_path]] - else: #analyze function passes just img1_path - img_list = [img1_path] - - return img_list, bulkProcess - def initialize_folder(): home = get_deepface_home() @@ -59,6 +41,8 @@ def initialize_folder(): def get_deepface_home(): return str(os.getenv('DEEPFACE_HOME', default=Path.home())) +#-------------------------------------------------- + def loadBase64Img(uri): encoded_data = uri.split(',')[1] nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) @@ -93,35 +77,71 @@ def load_image(img): return img -def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True): +#-------------------------------------------------- +def extract_faces(img, target_size=(224, 224), detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True): + + # this is going to store a list of img itself (numpy), it region and confidence + extracted_faces = [] + + #img might be path, base64 or numpy array. Convert it to numpy whatever it is. + img = load_image(img) img_region = [0, 0, img.shape[1], img.shape[0]] - #---------------------------------------------- - #people would like to skip detection and alignment if they already have pre-processed images if detector_backend == 'skip': - return img, img_region - - #---------------------------------------------- - - #detector stored in a global variable in FaceDetector object. - #this call should be completed very fast because it will return found in memory - #it will not build face detector model in each call (consider for loops) - face_detector = FaceDetector.build_model(detector_backend) - - try: - detected_face, img_region, _ = FaceDetector.detect_face(face_detector, detector_backend, img, align) - except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run - detected_face = None - - if (isinstance(detected_face, np.ndarray)): - return detected_face, img_region + face_objs = [(img, img_region, 0)] else: - if detected_face == None: - if enforce_detection != True: - return img, img_region - else: - raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.") + face_detector = FaceDetector.build_model(detector_backend) + face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align) + + # in case of no face found + if len(face_objs) == 0 and enforce_detection == True: + raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.") + elif len(face_objs) == 0 and enforce_detection == False: + face_objs = [(img, img_region, 0)] + + for current_img, current_region, confidence in face_objs: + if current_img.shape[0] > 0 and current_img.shape[1] > 0: + + if grayscale == True: + current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) + + # resize and padding + if current_img.shape[0] > 0 and current_img.shape[1] > 0: + factor_0 = target_size[0] / current_img.shape[0] + factor_1 = target_size[1] / current_img.shape[1] + factor = min(factor_0, factor_1) + + dsize = (int(current_img.shape[1] * factor), int(current_img.shape[0] * factor)) + current_img = cv2.resize(current_img, dsize) + + diff_0 = target_size[0] - current_img.shape[0] + diff_1 = target_size[1] - current_img.shape[1] + if grayscale == False: + # Put the base image in the middle of the padded image + current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant') + else: + current_img = np.pad(current_img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant') + + #double check: if target image is not still the same size with target. + if current_img.shape[0:2] != target_size: + current_img = cv2.resize(current_img, target_size) + + #normalizing the image pixels + img_pixels = image.img_to_array(current_img) #what this line doing? must? + img_pixels = np.expand_dims(img_pixels, axis = 0) + img_pixels /= 255 #normalize input in [0, 1] + + #int cast is for the exception - object of type 'float32' is not JSON serializable + region_obj = {"x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3])} + + extracted_face = [img_pixels, region_obj, confidence] + extracted_faces.append(extracted_face) + + if len(extracted_faces) == 0 and enforce_detection == True: + raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.") + + return extracted_faces def normalize_input(img, normalization = 'base'): @@ -169,94 +189,21 @@ def normalize_input(img, normalization = 'base'): return img -def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True): +def find_target_size(model_name): - #img might be path, base64 or numpy array. Convert it to numpy whatever it is. - img = load_image(img) - base_img = img.copy() + target_sizes = { + "VGG-Face": (224, 224), + "Facenet": (160, 160), + "Facenet512": (160, 160), + "OpenFace": (96, 96), + "DeepFace": (152, 152), + "DeepID": (55, 47), #TODO: might be opposite + "Dlib": (150, 150), + "ArcFace": (112, 112), + "SFace": (112, 112) + } - img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align) - - #-------------------------- - - if img.shape[0] == 0 or img.shape[1] == 0: - if enforce_detection == True: - raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.") - else: #restore base image - img = base_img.copy() - - #-------------------------- - - #post-processing - if grayscale == True: - img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - - #--------------------------------------------------- - #resize image to expected shape - - # img = cv2.resize(img, target_size) #resize causes transformation on base image, adding black pixels to resize will not deform the base image - - if img.shape[0] > 0 and img.shape[1] > 0: - factor_0 = target_size[0] / img.shape[0] - factor_1 = target_size[1] / img.shape[1] - factor = min(factor_0, factor_1) - - dsize = (int(img.shape[1] * factor), int(img.shape[0] * factor)) - img = cv2.resize(img, dsize) - - # Then pad the other side to the target size by adding black pixels - diff_0 = target_size[0] - img.shape[0] - diff_1 = target_size[1] - img.shape[1] - if grayscale == False: - # Put the base image in the middle of the padded image - img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0)), 'constant') - else: - img = np.pad(img, ((diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2)), 'constant') - - #------------------------------------------ - - #double check: if target image is not still the same size with target. - if img.shape[0:2] != target_size: - img = cv2.resize(img, target_size) - - #--------------------------------------------------- - - #normalizing the image pixels - - img_pixels = image.img_to_array(img) #what this line doing? must? - img_pixels = np.expand_dims(img_pixels, axis = 0) - img_pixels /= 255 #normalize input in [0, 1] - - #--------------------------------------------------- - - if return_region == True: - return img_pixels, region - else: - return img_pixels - -def find_input_shape(model): - - #face recognition models have different size of inputs - #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue. - - input_shape = model.layers[0].input_shape - - if type(input_shape) == list: - input_shape = input_shape[0][1:3] - else: - input_shape = input_shape[1:3] - - #---------------------- - #issue 289: it seems that tf 2.5 expects you to resize images with (x, y) - #whereas its older versions expect (y, x) - - if tf_major_version == 2 and tf_minor_version >= 5: - x = input_shape[0]; y = input_shape[1] - input_shape = (y, x) - - #---------------------- - - if type(input_shape) == list: #issue 197: some people got array here instead of tuple - input_shape = tuple(input_shape) - - return input_shape + if model_name not in target_sizes.keys(): + raise ValueError(f"unimplemented model name - {model_name}") + + return target_sizes[model_name] diff --git a/deepface/commons/realtime.py b/deepface/commons/realtime.py index 1211433..31f82a1 100644 --- a/deepface/commons/realtime.py +++ b/deepface/commons/realtime.py @@ -50,7 +50,7 @@ def analysis(db_path, model_name = 'VGG-Face', detector_backend = 'opencv', dist #------------------------ - input_shape = functions.find_input_shape(model) + input_shape = functions.find_target_size(model_name=model_name) input_shape_x = input_shape[0]; input_shape_y = input_shape[1] #tuned thresholds for model and metric pair diff --git a/tests/dataset/couple.jpg b/tests/dataset/couple.jpg new file mode 100644 index 0000000..1a07d76 Binary files /dev/null and b/tests/dataset/couple.jpg differ diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 1e52971..30d6196 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -1,6 +1,7 @@ import warnings import os import tensorflow as tf +import numpy as np import cv2 from deepface import DeepFace