deepface/benchmarks/Perform-Experiments.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "8133a99d",
   "metadata": {},
   "source": [
    "# Perform Experiments with DeepFace on LFW dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5aab0cbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "# built-in dependencies\n",
    "import os\n",
    "\n",
    "# 3rd party dependencies\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.datasets import fetch_lfw_pairs\n",
    "from deepface import DeepFace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "64c9ed9a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This experiment is done with pip package of deepface with 0.0.90 version\n"
     ]
    }
   ],
   "source": [
    "print(f\"This experiment is done with pip package of deepface with {DeepFace.__version__} version\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "feaec973",
   "metadata": {},
   "source": [
    "### Configuration Sets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "453104b4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# all configuration alternatives for 4 dimensions of arguments\n",
    "alignment = [True, False]\n",
    "models = [\"Facenet512\", \"Facenet\", \"VGG-Face\", \"ArcFace\", \"Dlib\", \"GhostFaceNet\", \"SFace\", \"OpenFace\", \"DeepFace\", \"DeepID\"]\n",
    "detectors = [\"retinaface\", \"mtcnn\", \"fastmtcnn\", \"dlib\", \"yolov8\", \"yunet\", \"centerface\", \"mediapipe\", \"ssd\", \"opencv\", \"skip\"]\n",
    "metrics = [\"euclidean\", \"euclidean_l2\", \"cosine\"]\n",
    "expand_percentage = 0"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9aeb57a",
   "metadata": {},
   "source": [
    "### Create Required Folders if necessary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "671d8a00",
   "metadata": {},
   "outputs": [],
   "source": [
    "target_paths = [\"lfwe\", \"dataset\", \"outputs\", \"outputs/test\", \"results\"]\n",
    "for target_path in target_paths:\n",
    "    if not os.path.exists(target_path):\n",
    "        os.mkdir(target_path)\n",
    "        print(f\"{target_path} is just created\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fc31f03a",
   "metadata": {},
   "source": [
    "### Load LFW Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "721a7d70",
   "metadata": {},
   "outputs": [],
   "source": [
    "pairs_touch = \"outputs/test_lfwe.txt\"\n",
    "instances = 1000 #pairs.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "010184d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "target_path = \"dataset/test_lfw.npy\"\n",
    "labels_path = \"dataset/test_labels.npy\"\n",
    "\n",
    "if os.path.exists(target_path) != True:\n",
    "    fetch_lfw_pairs = fetch_lfw_pairs(subset = 'test', color = True\n",
    "                                  , resize = 2\n",
    "                                  , funneled = False\n",
    "                                  , slice_=None\n",
    "                                 )\n",
    "    pairs = fetch_lfw_pairs.pairs\n",
    "    labels = fetch_lfw_pairs.target\n",
    "    target_names = fetch_lfw_pairs.target_names\n",
    "    np.save(target_path, pairs)\n",
    "    np.save(labels_path, labels)\n",
    "else:\n",
    "    if not os.path.exists(pairs_touch):\n",
    "        # loading pairs takes some time. but if we extract these pairs as image, no need to load it anymore\n",
    "        pairs = np.load(target_path)\n",
    "    labels = np.load(labels_path)    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "005f582e",
   "metadata": {},
   "source": [
    "### Save LFW image pairs into file system"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5bc23313",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1000/1000 [00:00<00:00, 190546.25it/s]\n"
     ]
    }
   ],
   "source": [
    "for i in tqdm(range(0, instances)):\n",
    "    img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
    "    img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
    "    \n",
    "    if not os.path.exists(img1_target):\n",
    "        img1 = pairs[i][0]\n",
    "        # plt.imsave(img1_target, img1/255) #works for my mac\n",
    "        plt.imsave(img1_target, img1) #works for my debian\n",
    "    \n",
    "    if not os.path.exists(img2_target):\n",
    "        img2 = pairs[i][1]\n",
    "        # plt.imsave(img2_target, img2/255) #works for my mac\n",
    "        plt.imsave(img2_target, img2) #works for my debian\n",
    "    \n",
    "if not os.path.exists(pairs_touch):\n",
    "    open(pairs_touch,'a').close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f8fa8fa",
   "metadata": {},
   "source": [
    "### Perform Experiments\n",
    "\n",
    "This block will save the experiments results in outputs folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e7fba936",
   "metadata": {},
   "outputs": [],
   "source": [
    "for model_name in models:\n",
    "    for detector_backend in detectors:\n",
    "        for distance_metric in metrics:\n",
    "            for align in alignment:\n",
    "                \n",
    "                if detector_backend == \"skip\" and align is True:\n",
    "                    # Alignment is not possible for a skipped detector configuration\n",
    "                    continue\n",
    "                \n",
    "                alignment_text = \"aligned\" if align is True else \"unaligned\"\n",
    "                task = f\"{model_name}_{detector_backend}_{distance_metric}_{alignment_text}\"\n",
    "                output_file = f\"outputs/test/{task}.csv\"\n",
    "                if os.path.exists(output_file):\n",
    "                     #print(f\"{output_file} is available already\")\n",
    "                     continue\n",
    "                \n",
    "                distances = []\n",
    "                for i in tqdm(range(0, instances), desc = task):\n",
    "                    img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
    "                    img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
    "                    result = DeepFace.verify(\n",
    "                        img1_path=img1_target,\n",
    "                        img2_path=img2_target,\n",
    "                        model_name=model_name,\n",
    "                        detector_backend=detector_backend,\n",
    "                        distance_metric=distance_metric,\n",
    "                        align=align,\n",
    "                        enforce_detection=False,\n",
    "                        expand_percentage=expand_percentage,\n",
    "                    )\n",
    "                    distance = result[\"distance\"]\n",
    "                    distances.append(distance)\n",
    "                # -----------------------------------\n",
    "                df = pd.DataFrame(list(labels), columns = [\"actuals\"])\n",
    "                df[\"distances\"] = distances\n",
    "                df.to_csv(output_file, index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0b8dafa",
   "metadata": {},
   "source": [
    "### Calculate Results\n",
    "\n",
    "Experiments were responsible for calculating distances. We will calculate the best accuracy scores in this block."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "67376e76",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = [[0 for _ in range(len(models))] for _ in range(len(detectors))]\n",
    "base_df = pd.DataFrame(data, columns=models, index=detectors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f2cc536b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "results/pivot_euclidean_with_alignment_True.csv saved\n",
      "results/pivot_euclidean_l2_with_alignment_True.csv saved\n",
      "results/pivot_cosine_with_alignment_True.csv saved\n",
      "results/pivot_euclidean_with_alignment_False.csv saved\n",
      "results/pivot_euclidean_l2_with_alignment_False.csv saved\n",
      "results/pivot_cosine_with_alignment_False.csv saved\n"
     ]
    }
   ],
   "source": [
    "for is_aligned in alignment:\n",
    "    for distance_metric in metrics:\n",
    "\n",
    "        current_df = base_df.copy()\n",
    "        \n",
    "        target_file = f\"results/pivot_{distance_metric}_with_alignment_{is_aligned}.csv\"\n",
    "        if os.path.exists(target_file):\n",
    "            continue\n",
    "        \n",
    "        for model_name in models:\n",
    "            for detector_backend in detectors:\n",
    "\n",
    "                align = \"aligned\" if is_aligned is True else \"unaligned\"\n",
    "\n",
    "                if detector_backend == \"skip\" and is_aligned is True:\n",
    "                    # Alignment is not possible for a skipped detector configuration\n",
    "                    align = \"unaligned\"\n",
    "\n",
    "                source_file = f\"outputs/test/{model_name}_{detector_backend}_{distance_metric}_{align}.csv\"\n",
    "                df = pd.read_csv(source_file)\n",
    "                  \n",
    "                positive_mean = df[(df[\"actuals\"] == True) | (df[\"actuals\"] == 1)][\"distances\"].mean()\n",
    "                negative_mean = df[(df[\"actuals\"] == False) | (df[\"actuals\"] == 0)][\"distances\"].mean()\n",
    "\n",
    "                distances = sorted(df[\"distances\"].values.tolist())\n",
    "\n",
    "                items = []\n",
    "                for i, distance in enumerate(distances):\n",
    "                    if distance >= positive_mean and distance <= negative_mean:\n",
    "                        sandbox_df = df.copy()\n",
    "                        sandbox_df[\"predictions\"] = False\n",
    "                        idx = sandbox_df[sandbox_df[\"distances\"] < distance].index\n",
    "                        sandbox_df.loc[idx, \"predictions\"] = True\n",
    "\n",
    "                        actuals = sandbox_df.actuals.values.tolist()\n",
    "                        predictions = sandbox_df.predictions.values.tolist()\n",
    "                        accuracy = 100*accuracy_score(actuals, predictions)\n",
    "                        items.append((distance, accuracy))\n",
    "\n",
    "                pivot_df = pd.DataFrame(items, columns = [\"distance\", \"accuracy\"])\n",
    "                pivot_df = pivot_df.sort_values(by = [\"accuracy\"], ascending = False)\n",
    "                threshold = pivot_df.iloc[0][\"distance\"]\n",
    "                # print(f\"threshold for {model_name}/{detector_backend} is {threshold}\")\n",
    "                accuracy = pivot_df.iloc[0][\"accuracy\"]\n",
    "\n",
    "                # print(source_file, round(accuracy, 1))\n",
    "                current_df.at[detector_backend, model_name] = round(accuracy, 1)\n",
    "        \n",
    "        current_df.to_csv(target_file)\n",
    "        print(f\"{target_file} saved\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}