# Perform Experiments with DeepFace on LFW dataset

In [11]:
# built-in dependencies
import os

# 3rd party dependencies
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_lfw_pairs
from deepface import DeepFace

In [2]:
print(f"This experiment is done with pip package of deepface with {DeepFace.__version__} version")

This experiment is done with pip package of deepface with 0.0.90 version


### Configuration Sets

In [3]:
# all configuration alternatives for 4 dimensions of arguments
alignment = [True, False]
models = ["Facenet512", "Facenet", "VGG-Face", "ArcFace", "Dlib", "GhostFaceNet", "SFace", "OpenFace", "DeepFace", "DeepID"]
detectors = ["retinaface", "mtcnn", "fastmtcnn", "dlib", "yolov8", "yunet", "centerface", "mediapipe", "ssd", "opencv", "skip"]
metrics = ["euclidean", "euclidean_l2", "cosine"]
expand_percentage = 0

### Create Required Folders if necessary

In [4]:
target_paths = ["lfwe", "dataset", "outputs", "outputs/test", "results"]
for target_path in target_paths:
    if not os.path.exists(target_path):
        os.mkdir(target_path)
        print(f"{target_path} is just created")

### Load LFW Dataset

In [5]:
pairs_touch = "outputs/test_lfwe.txt"
instances = 1000 #pairs.shape[0]

In [6]:
target_path = "dataset/test_lfw.npy"
labels_path = "dataset/test_labels.npy"

if os.path.exists(target_path) != True:
    fetch_lfw_pairs = fetch_lfw_pairs(subset = 'test', color = True
                                  , resize = 2
                                  , funneled = False
                                  , slice_=None
                                 )
    pairs = fetch_lfw_pairs.pairs
    labels = fetch_lfw_pairs.target
    target_names = fetch_lfw_pairs.target_names
    np.save(target_path, pairs)
    np.save(labels_path, labels)
else:
    if not os.path.exists(pairs_touch):
        # loading pairs takes some time. but if we extract these pairs as image, no need to load it anymore
        pairs = np.load(target_path)
    labels = np.load(labels_path)    

### Save LFW image pairs into file system

In [7]:
for i in tqdm(range(0, instances)):
    img1_target = f"lfwe/test/{i}_1.jpg"
    img2_target = f"lfwe/test/{i}_2.jpg"
    
    if not os.path.exists(img1_target):
        img1 = pairs[i][0]
        # plt.imsave(img1_target, img1/255) #works for my mac
        plt.imsave(img1_target, img1) #works for my debian
    
    if not os.path.exists(img2_target):
        img2 = pairs[i][1]
        # plt.imsave(img2_target, img2/255) #works for my mac
        plt.imsave(img2_target, img2) #works for my debian
    
if not os.path.exists(pairs_touch):
    open(pairs_touch,'a').close()

100%|██████████| 1000/1000 [00:00<00:00, 190546.25it/s]


### Perform Experiments

This block will save the experiments results in outputs folder

In [8]:
for model_name in models:
    for detector_backend in detectors:
        for distance_metric in metrics:
            for align in alignment:
                
                if detector_backend == "skip" and align is True:
                    # Alignment is not possible for a skipped detector configuration
                    continue
                
                alignment_text = "aligned" if align is True else "unaligned"
                task = f"{model_name}_{detector_backend}_{distance_metric}_{alignment_text}"
                output_file = f"outputs/test/{task}.csv"
                if os.path.exists(output_file):
                     #print(f"{output_file} is available already")
                     continue
                
                distances = []
                for i in tqdm(range(0, instances), desc = task):
                    img1_target = f"lfwe/test/{i}_1.jpg"
                    img2_target = f"lfwe/test/{i}_2.jpg"
                    result = DeepFace.verify(
                        img1_path=img1_target,
                        img2_path=img2_target,
                        model_name=model_name,
                        detector_backend=detector_backend,
                        distance_metric=distance_metric,
                        align=align,
                        enforce_detection=False,
                        expand_percentage=expand_percentage,
                    )
                    distance = result["distance"]
                    distances.append(distance)
                # -----------------------------------
                df = pd.DataFrame(list(labels), columns = ["actuals"])
                df["distances"] = distances
                df.to_csv(output_file, index=False)

### Calculate Results

Experiments were responsible for calculating distances. We will calculate the best accuracy scores in this block.

In [9]:
data = [[0 for _ in range(len(models))] for _ in range(len(detectors))]
base_df = pd.DataFrame(data, columns=models, index=detectors)

In [10]:
for is_aligned in alignment:
    for distance_metric in metrics:

        current_df = base_df.copy()
        
        target_file = f"results/pivot_{distance_metric}_with_alignment_{is_aligned}.csv"
        if os.path.exists(target_file):
            continue
        
        for model_name in models:
            for detector_backend in detectors:

                align = "aligned" if is_aligned is True else "unaligned"

                if detector_backend == "skip" and is_aligned is True:
                    # Alignment is not possible for a skipped detector configuration
                    align = "unaligned"

                source_file = f"outputs/test/{model_name}_{detector_backend}_{distance_metric}_{align}.csv"
                df = pd.read_csv(source_file)
                  
                positive_mean = df[(df["actuals"] == True) | (df["actuals"] == 1)]["distances"].mean()
                negative_mean = df[(df["actuals"] == False) | (df["actuals"] == 0)]["distances"].mean()

                distances = sorted(df["distances"].values.tolist())

                items = []
                for i, distance in enumerate(distances):
                    if distance >= positive_mean and distance <= negative_mean:
                        sandbox_df = df.copy()
                        sandbox_df["predictions"] = False
                        idx = sandbox_df[sandbox_df["distances"] < distance].index
                        sandbox_df.loc[idx, "predictions"] = True

                        actuals = sandbox_df.actuals.values.tolist()
                        predictions = sandbox_df.predictions.values.tolist()
                        accuracy = 100*accuracy_score(actuals, predictions)
                        items.append((distance, accuracy))

                pivot_df = pd.DataFrame(items, columns = ["distance", "accuracy"])
                pivot_df = pivot_df.sort_values(by = ["accuracy"], ascending = False)
                threshold = pivot_df.iloc[0]["distance"]
                # print(f"threshold for {model_name}/{detector_backend} is {threshold}")
                accuracy = pivot_df.iloc[0]["accuracy"]

                # print(source_file, round(accuracy, 1))
                current_df.at[detector_backend, model_name] = round(accuracy, 1)
        
        current_df.to_csv(target_file)
        print(f"{target_file} saved")

results/pivot_euclidean_with_alignment_True.csv saved
results/pivot_euclidean_l2_with_alignment_True.csv saved
results/pivot_cosine_with_alignment_True.csv saved
results/pivot_euclidean_with_alignment_False.csv saved
results/pivot_euclidean_l2_with_alignment_False.csv saved
results/pivot_cosine_with_alignment_False.csv saved
