{ "cells": [ { "cell_type": "markdown", "id": "8133a99d", "metadata": {}, "source": [ "# Perform Experiments with DeepFace on LFW dataset" ] }, { "cell_type": "code", "execution_count": 11, "id": "5aab0cbe", "metadata": {}, "outputs": [], "source": [ "# built-in dependencies\n", "import os\n", "\n", "# 3rd party dependencies\n", "import numpy as np\n", "import pandas as pd\n", "from tqdm import tqdm\n", "import matplotlib.pyplot as plt\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.datasets import fetch_lfw_pairs\n", "from deepface import DeepFace" ] }, { "cell_type": "code", "execution_count": 2, "id": "64c9ed9a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This experiment is done with pip package of deepface with 0.0.90 version\n" ] } ], "source": [ "print(f\"This experiment is done with pip package of deepface with {DeepFace.__version__} version\")" ] }, { "cell_type": "markdown", "id": "feaec973", "metadata": {}, "source": [ "### Configuration Sets" ] }, { "cell_type": "code", "execution_count": 3, "id": "453104b4", "metadata": {}, "outputs": [], "source": [ "# all configuration alternatives for 4 dimensions of arguments\n", "alignment = [True, False]\n", "models = [\"Facenet512\", \"Facenet\", \"VGG-Face\", \"ArcFace\", \"Dlib\", \"GhostFaceNet\", \"SFace\", \"OpenFace\", \"DeepFace\", \"DeepID\"]\n", "detectors = [\"retinaface\", \"mtcnn\", \"fastmtcnn\", \"dlib\", \"yolov8\", \"yunet\", \"centerface\", \"mediapipe\", \"ssd\", \"opencv\", \"skip\"]\n", "metrics = [\"euclidean\", \"euclidean_l2\", \"cosine\"]\n", "expand_percentage = 0" ] }, { "cell_type": "markdown", "id": "c9aeb57a", "metadata": {}, "source": [ "### Create Required Folders if necessary" ] }, { "cell_type": "code", "execution_count": 4, "id": "671d8a00", "metadata": {}, "outputs": [], "source": [ "target_paths = [\"lfwe\", \"dataset\", \"outputs\", \"outputs/test\", \"results\"]\n", "for target_path in target_paths:\n", " if os.path.exists(target_path) != True:\n", " os.mkdir(target_path)\n", " print(f\"{target_path} is just created\")" ] }, { "cell_type": "markdown", "id": "fc31f03a", "metadata": {}, "source": [ "### Load LFW Dataset" ] }, { "cell_type": "code", "execution_count": 5, "id": "721a7d70", "metadata": {}, "outputs": [], "source": [ "pairs_touch = \"outputs/test_lfwe.txt\"\n", "instances = 1000 #pairs.shape[0]" ] }, { "cell_type": "code", "execution_count": 6, "id": "010184d8", "metadata": {}, "outputs": [], "source": [ "target_path = \"dataset/test_lfw.npy\"\n", "labels_path = \"dataset/test_labels.npy\"\n", "\n", "if os.path.exists(target_path) != True:\n", " fetch_lfw_pairs = fetch_lfw_pairs(subset = 'test', color = True\n", " , resize = 2\n", " , funneled = False\n", " , slice_=None\n", " )\n", " pairs = fetch_lfw_pairs.pairs\n", " labels = fetch_lfw_pairs.target\n", " target_names = fetch_lfw_pairs.target_names\n", " np.save(target_path, pairs)\n", " np.save(labels_path, labels)\n", "else:\n", " if os.path.exists(pairs_touch) != True:\n", " # loading pairs takes some time. but if we extract these pairs as image, no need to load it anymore\n", " pairs = np.load(target_path)\n", " labels = np.load(labels_path) " ] }, { "cell_type": "markdown", "id": "005f582e", "metadata": {}, "source": [ "### Save LFW image pairs into file system" ] }, { "cell_type": "code", "execution_count": 7, "id": "5bc23313", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1000/1000 [00:00<00:00, 190546.25it/s]\n" ] } ], "source": [ "for i in tqdm(range(0, instances)):\n", " img1_target = f\"lfwe/test/{i}_1.jpg\"\n", " img2_target = f\"lfwe/test/{i}_2.jpg\"\n", " \n", " if os.path.exists(img1_target) != True:\n", " img1 = pairs[i][0]\n", " # plt.imsave(img1_target, img1/255) #works for my mac\n", " plt.imsave(img1_target, img1) #works for my debian\n", " \n", " if os.path.exists(img2_target) != True:\n", " img2 = pairs[i][1]\n", " # plt.imsave(img2_target, img2/255) #works for my mac\n", " plt.imsave(img2_target, img2) #works for my debian\n", " \n", "if os.path.exists(pairs_touch) != True:\n", " open(pairs_touch,'a').close()" ] }, { "cell_type": "markdown", "id": "6f8fa8fa", "metadata": {}, "source": [ "### Perform Experiments\n", "\n", "This block will save the experiments results in outputs folder" ] }, { "cell_type": "code", "execution_count": 8, "id": "e7fba936", "metadata": {}, "outputs": [], "source": [ "for model_name in models:\n", " for detector_backend in detectors:\n", " for distance_metric in metrics:\n", " for align in alignment:\n", " \n", " if detector_backend == \"skip\" and align is True:\n", " # Alignment is not possible for a skipped detector configuration\n", " continue\n", " \n", " alignment_text = \"aligned\" if align is True else \"unaligned\"\n", " task = f\"{model_name}_{detector_backend}_{distance_metric}_{alignment_text}\"\n", " output_file = f\"outputs/test/{task}.csv\"\n", " if os.path.exists(output_file) is True:\n", " #print(f\"{output_file} is available already\")\n", " continue\n", " \n", " distances = []\n", " for i in tqdm(range(0, instances), desc = task):\n", " img1_target = f\"lfwe/test/{i}_1.jpg\"\n", " img2_target = f\"lfwe/test/{i}_2.jpg\"\n", " result = DeepFace.verify(\n", " img1_path=img1_target,\n", " img2_path=img2_target,\n", " model_name=model_name,\n", " detector_backend=detector_backend,\n", " distance_metric=distance_metric,\n", " align=align,\n", " enforce_detection=False,\n", " expand_percentage=expand_percentage,\n", " )\n", " distance = result[\"distance\"]\n", " distances.append(distance)\n", " # -----------------------------------\n", " df = pd.DataFrame(list(labels), columns = [\"actuals\"])\n", " df[\"distances\"] = distances\n", " df.to_csv(output_file, index=False)" ] }, { "cell_type": "markdown", "id": "a0b8dafa", "metadata": {}, "source": [ "### Calculate Results\n", "\n", "Experiments were responsible for calculating distances. We will calculate the best accuracy scores in this block." ] }, { "cell_type": "code", "execution_count": 9, "id": "67376e76", "metadata": {}, "outputs": [], "source": [ "data = [[0 for _ in range(len(models))] for _ in range(len(detectors))]\n", "base_df = pd.DataFrame(data, columns=models, index=detectors)" ] }, { "cell_type": "code", "execution_count": 10, "id": "f2cc536b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "results/pivot_euclidean_with_alignment_True.csv saved\n", "results/pivot_euclidean_l2_with_alignment_True.csv saved\n", "results/pivot_cosine_with_alignment_True.csv saved\n", "results/pivot_euclidean_with_alignment_False.csv saved\n", "results/pivot_euclidean_l2_with_alignment_False.csv saved\n", "results/pivot_cosine_with_alignment_False.csv saved\n" ] } ], "source": [ "for is_aligned in alignment:\n", " for distance_metric in metrics:\n", "\n", " current_df = base_df.copy()\n", " \n", " target_file = f\"results/pivot_{distance_metric}_with_alignment_{is_aligned}.csv\"\n", " if os.path.exists(target_file):\n", " continue\n", " \n", " for model_name in models:\n", " for detector_backend in detectors:\n", "\n", " align = \"aligned\" if is_aligned is True else \"unaligned\"\n", "\n", " if detector_backend == \"skip\" and is_aligned is True:\n", " # Alignment is not possible for a skipped detector configuration\n", " align = \"unaligned\"\n", "\n", " source_file = f\"outputs/test/{model_name}_{detector_backend}_{distance_metric}_{align}.csv\"\n", " df = pd.read_csv(source_file)\n", " \n", " positive_mean = df[(df[\"actuals\"] == True) | (df[\"actuals\"] == 1)][\"distances\"].mean()\n", " negative_mean = df[(df[\"actuals\"] == False) | (df[\"actuals\"] == 0)][\"distances\"].mean()\n", "\n", " distances = sorted(df[\"distances\"].values.tolist())\n", "\n", " items = []\n", " for i, distance in enumerate(distances):\n", " if distance >= positive_mean and distance <= negative_mean:\n", " sandbox_df = df.copy()\n", " sandbox_df[\"predictions\"] = False\n", " idx = sandbox_df[sandbox_df[\"distances\"] < distance].index\n", " sandbox_df.loc[idx, \"predictions\"] = True\n", "\n", " actuals = sandbox_df.actuals.values.tolist()\n", " predictions = sandbox_df.predictions.values.tolist()\n", " accuracy = 100*accuracy_score(actuals, predictions)\n", " items.append((distance, accuracy))\n", "\n", " pivot_df = pd.DataFrame(items, columns = [\"distance\", \"accuracy\"])\n", " pivot_df = pivot_df.sort_values(by = [\"accuracy\"], ascending = False)\n", " threshold = pivot_df.iloc[0][\"distance\"]\n", " # print(f\"threshold for {model_name}/{detector_backend} is {threshold}\")\n", " accuracy = pivot_df.iloc[0][\"accuracy\"]\n", "\n", " # print(source_file, round(accuracy, 1))\n", " current_df.at[detector_backend, model_name] = round(accuracy, 1)\n", " \n", " current_df.to_csv(target_file)\n", " print(f\"{target_file} saved\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }