deepface/benchmarks/Perform-Experiments.ipynb
2024-08-12 03:10:20 +03:00

353 lines
12 KiB
Plaintext
Vendored

{
"cells": [
{
"cell_type": "markdown",
"id": "8133a99d",
"metadata": {},
"source": [
"# Perform Experiments with DeepFace on LFW dataset"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "5aab0cbe",
"metadata": {},
"outputs": [],
"source": [
"# built-in dependencies\n",
"import os\n",
"\n",
"# 3rd party dependencies\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm import tqdm\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.datasets import fetch_lfw_pairs\n",
"from deepface import DeepFace"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "64c9ed9a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This experiment is done with pip package of deepface with 0.0.90 version\n"
]
}
],
"source": [
"print(f\"This experiment is done with pip package of deepface with {DeepFace.__version__} version\")"
]
},
{
"cell_type": "markdown",
"id": "feaec973",
"metadata": {},
"source": [
"### Configuration Sets"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "453104b4",
"metadata": {},
"outputs": [],
"source": [
"# all configuration alternatives for 4 dimensions of arguments\n",
"alignment = [True, False]\n",
"models = [\"Facenet512\", \"Facenet\", \"VGG-Face\", \"ArcFace\", \"Dlib\", \"GhostFaceNet\", \"SFace\", \"OpenFace\", \"DeepFace\", \"DeepID\"]\n",
"detectors = [\"retinaface\", \"mtcnn\", \"fastmtcnn\", \"dlib\", \"yolov8\", \"yunet\", \"centerface\", \"mediapipe\", \"ssd\", \"opencv\", \"skip\"]\n",
"metrics = [\"euclidean\", \"euclidean_l2\", \"cosine\"]\n",
"expand_percentage = 0"
]
},
{
"cell_type": "markdown",
"id": "c9aeb57a",
"metadata": {},
"source": [
"### Create Required Folders if necessary"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "671d8a00",
"metadata": {},
"outputs": [],
"source": [
"target_paths = [\"lfwe\", \"dataset\", \"outputs\", \"outputs/test\", \"results\"]\n",
"for target_path in target_paths:\n",
" if not os.path.exists(target_path):\n",
" os.mkdir(target_path)\n",
" print(f\"{target_path} is just created\")"
]
},
{
"cell_type": "markdown",
"id": "fc31f03a",
"metadata": {},
"source": [
"### Load LFW Dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "721a7d70",
"metadata": {},
"outputs": [],
"source": [
"pairs_touch = \"outputs/test_lfwe.txt\"\n",
"instances = 1000 #pairs.shape[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "010184d8",
"metadata": {},
"outputs": [],
"source": [
"target_path = \"dataset/test_lfw.npy\"\n",
"labels_path = \"dataset/test_labels.npy\"\n",
"\n",
"if os.path.exists(target_path) != True:\n",
" fetch_lfw_pairs = fetch_lfw_pairs(subset = 'test', color = True\n",
" , resize = 2\n",
" , funneled = False\n",
" , slice_=None\n",
" )\n",
" pairs = fetch_lfw_pairs.pairs\n",
" labels = fetch_lfw_pairs.target\n",
" target_names = fetch_lfw_pairs.target_names\n",
" np.save(target_path, pairs)\n",
" np.save(labels_path, labels)\n",
"else:\n",
" if not os.path.exists(pairs_touch):\n",
" # loading pairs takes some time. but if we extract these pairs as image, no need to load it anymore\n",
" pairs = np.load(target_path)\n",
" labels = np.load(labels_path) "
]
},
{
"cell_type": "markdown",
"id": "005f582e",
"metadata": {},
"source": [
"### Save LFW image pairs into file system"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5bc23313",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1000/1000 [00:00<00:00, 190546.25it/s]\n"
]
}
],
"source": [
"for i in tqdm(range(0, instances)):\n",
" img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
" img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
" \n",
" if not os.path.exists(img1_target):\n",
" img1 = pairs[i][0]\n",
" # plt.imsave(img1_target, img1/255) #works for my mac\n",
" plt.imsave(img1_target, img1) #works for my debian\n",
" \n",
" if not os.path.exists(img2_target):\n",
" img2 = pairs[i][1]\n",
" # plt.imsave(img2_target, img2/255) #works for my mac\n",
" plt.imsave(img2_target, img2) #works for my debian\n",
" \n",
"if not os.path.exists(pairs_touch):\n",
" open(pairs_touch,'a').close()"
]
},
{
"cell_type": "markdown",
"id": "6f8fa8fa",
"metadata": {},
"source": [
"### Perform Experiments\n",
"\n",
"This block will save the experiments results in outputs folder"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e7fba936",
"metadata": {},
"outputs": [],
"source": [
"for model_name in models:\n",
" for detector_backend in detectors:\n",
" for distance_metric in metrics:\n",
" for align in alignment:\n",
" \n",
" if detector_backend == \"skip\" and align is True:\n",
" # Alignment is not possible for a skipped detector configuration\n",
" continue\n",
" \n",
" alignment_text = \"aligned\" if align is True else \"unaligned\"\n",
" task = f\"{model_name}_{detector_backend}_{distance_metric}_{alignment_text}\"\n",
" output_file = f\"outputs/test/{task}.csv\"\n",
" if os.path.exists(output_file):\n",
" #print(f\"{output_file} is available already\")\n",
" continue\n",
" \n",
" distances = []\n",
" for i in tqdm(range(0, instances), desc = task):\n",
" img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
" img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
" result = DeepFace.verify(\n",
" img1_path=img1_target,\n",
" img2_path=img2_target,\n",
" model_name=model_name,\n",
" detector_backend=detector_backend,\n",
" distance_metric=distance_metric,\n",
" align=align,\n",
" enforce_detection=False,\n",
" expand_percentage=expand_percentage,\n",
" )\n",
" distance = result[\"distance\"]\n",
" distances.append(distance)\n",
" # -----------------------------------\n",
" df = pd.DataFrame(list(labels), columns = [\"actuals\"])\n",
" df[\"distances\"] = distances\n",
" df.to_csv(output_file, index=False)"
]
},
{
"cell_type": "markdown",
"id": "a0b8dafa",
"metadata": {},
"source": [
"### Calculate Results\n",
"\n",
"Experiments were responsible for calculating distances. We will calculate the best accuracy scores in this block."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "67376e76",
"metadata": {},
"outputs": [],
"source": [
"data = [[0 for _ in range(len(models))] for _ in range(len(detectors))]\n",
"base_df = pd.DataFrame(data, columns=models, index=detectors)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f2cc536b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"results/pivot_euclidean_with_alignment_True.csv saved\n",
"results/pivot_euclidean_l2_with_alignment_True.csv saved\n",
"results/pivot_cosine_with_alignment_True.csv saved\n",
"results/pivot_euclidean_with_alignment_False.csv saved\n",
"results/pivot_euclidean_l2_with_alignment_False.csv saved\n",
"results/pivot_cosine_with_alignment_False.csv saved\n"
]
}
],
"source": [
"for is_aligned in alignment:\n",
" for distance_metric in metrics:\n",
"\n",
" current_df = base_df.copy()\n",
" \n",
" target_file = f\"results/pivot_{distance_metric}_with_alignment_{is_aligned}.csv\"\n",
" if os.path.exists(target_file):\n",
" continue\n",
" \n",
" for model_name in models:\n",
" for detector_backend in detectors:\n",
"\n",
" align = \"aligned\" if is_aligned is True else \"unaligned\"\n",
"\n",
" if detector_backend == \"skip\" and is_aligned is True:\n",
" # Alignment is not possible for a skipped detector configuration\n",
" align = \"unaligned\"\n",
"\n",
" source_file = f\"outputs/test/{model_name}_{detector_backend}_{distance_metric}_{align}.csv\"\n",
" df = pd.read_csv(source_file)\n",
" \n",
" positive_mean = df[(df[\"actuals\"] == True) | (df[\"actuals\"] == 1)][\"distances\"].mean()\n",
" negative_mean = df[(df[\"actuals\"] == False) | (df[\"actuals\"] == 0)][\"distances\"].mean()\n",
"\n",
" distances = sorted(df[\"distances\"].values.tolist())\n",
"\n",
" items = []\n",
" for i, distance in enumerate(distances):\n",
" if distance >= positive_mean and distance <= negative_mean:\n",
" sandbox_df = df.copy()\n",
" sandbox_df[\"predictions\"] = False\n",
" idx = sandbox_df[sandbox_df[\"distances\"] < distance].index\n",
" sandbox_df.loc[idx, \"predictions\"] = True\n",
"\n",
" actuals = sandbox_df.actuals.values.tolist()\n",
" predictions = sandbox_df.predictions.values.tolist()\n",
" accuracy = 100*accuracy_score(actuals, predictions)\n",
" items.append((distance, accuracy))\n",
"\n",
" pivot_df = pd.DataFrame(items, columns = [\"distance\", \"accuracy\"])\n",
" pivot_df = pivot_df.sort_values(by = [\"accuracy\"], ascending = False)\n",
" threshold = pivot_df.iloc[0][\"distance\"]\n",
" # print(f\"threshold for {model_name}/{detector_backend} is {threshold}\")\n",
" accuracy = pivot_df.iloc[0][\"accuracy\"]\n",
"\n",
" # print(source_file, round(accuracy, 1))\n",
" current_df.at[detector_backend, model_name] = round(accuracy, 1)\n",
" \n",
" current_df.to_csv(target_file)\n",
" print(f\"{target_file} saved\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}