diff --git a/tests/Fine-Tuning-Threshold.ipynb b/tests/Fine-Tuning-Threshold.ipynb deleted file mode 100644 index 6272b36..0000000 --- a/tests/Fine-Tuning-Threshold.ipynb +++ /dev/null @@ -1,774 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import itertools\n", - "from sklearn.metrics import confusion_matrix\n", - "from tqdm import tqdm\n", - "tqdm.pandas()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Summary\n", - "\n", - "Face recognition models are regular convolutional neural networks models. They represent face photos as vectors. We find the distance between these two vectors to compare two faces. Finally, we classify two faces as same person whose distance is less than a threshold value.\n", - "\n", - "The question is that how to determine the threshold. In this notebook, we will find the best split point for a threshold." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data set" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Ref: https://github.com/serengil/deepface/tree/master/tests/dataset\n", - "idendities = {\n", - " \"Angelina\": [\"img1.jpg\", \"img2.jpg\", \"img4.jpg\", \"img5.jpg\", \"img6.jpg\", \"img7.jpg\", \"img10.jpg\", \"img11.jpg\"],\n", - " \"Scarlett\": [\"img8.jpg\", \"img9.jpg\"],\n", - " \"Jennifer\": [\"img3.jpg\", \"img12.jpg\"],\n", - " \"Mark\": [\"img13.jpg\", \"img14.jpg\", \"img15.jpg\"],\n", - " \"Jack\": [\"img16.jpg\", \"img17.jpg\"],\n", - " \"Elon\": [\"img18.jpg\", \"img19.jpg\"],\n", - " \"Jeff\": [\"img20.jpg\", \"img21.jpg\"],\n", - " \"Marissa\": [\"img22.jpg\", \"img23.jpg\"],\n", - " \"Sundar\": [\"img24.jpg\", \"img25.jpg\"]\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Positive samples\n", - "Find different photos of same people" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "positives = []\n", - "\n", - "for key, values in idendities.items():\n", - " \n", - " #print(key)\n", - " for i in range(0, len(values)-1):\n", - " for j in range(i+1, len(values)):\n", - " #print(values[i], \" and \", values[j])\n", - " positive = []\n", - " positive.append(values[i])\n", - " positive.append(values[j])\n", - " positives.append(positive)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "positives = pd.DataFrame(positives, columns = [\"file_x\", \"file_y\"])\n", - "positives[\"decision\"] = \"Yes\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Negative samples\n", - "Compare photos of different people" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "samples_list = list(idendities.values())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "negatives = []\n", - "\n", - "for i in range(0, len(idendities) - 1):\n", - " for j in range(i+1, len(idendities)):\n", - " #print(samples_list[i], \" vs \",samples_list[j]) \n", - " cross_product = itertools.product(samples_list[i], samples_list[j])\n", - " cross_product = list(cross_product)\n", - " #print(cross_product)\n", - " \n", - " for cross_sample in cross_product:\n", - " #print(cross_sample[0], \" vs \", cross_sample[1])\n", - " negative = []\n", - " negative.append(cross_sample[0])\n", - " negative.append(cross_sample[1])\n", - " negatives.append(negative)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "negatives = pd.DataFrame(negatives, columns = [\"file_x\", \"file_y\"])\n", - "negatives[\"decision\"] = \"No\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Merge Positives and Negative Samples" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.concat([positives, negatives]).reset_index(drop = True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(300, 3)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "No 262\n", - "Yes 38\n", - "Name: decision, dtype: int64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.decision.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "df.file_x = \"deepface/tests/dataset/\"+df.file_x\n", - "df.file_y = \"deepface/tests/dataset/\"+df.file_y" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DeepFace" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "from deepface import DeepFace" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "instances = df[[\"file_x\", \"file_y\"]].values.tolist()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"VGG-Face\"\n", - "distance_metric = \"cosine\"" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using VGG-Face model backend and cosine distance.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Verification: 100%|██████████| 300/300 [11:35<00:00, 2.32s/it]\n" - ] - } - ], - "source": [ - "resp_obj = DeepFace.verify(instances, model_name = model_name, distance_metric = distance_metric)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "distances = []\n", - "for i in range(0, len(instances)):\n", - " distance = round(resp_obj[\"pair_%s\" % (i+1)][\"distance\"], 4)\n", - " distances.append(distance)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"distance\"] = distances" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Analyzing Distances" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "tp_mean = round(df[df.decision == \"Yes\"].mean().values[0], 4)\n", - "tp_std = round(df[df.decision == \"Yes\"].std().values[0], 4)\n", - "fp_mean = round(df[df.decision == \"No\"].mean().values[0], 4)\n", - "fp_std = round(df[df.decision == \"No\"].std().values[0], 4)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Mean of true positives: 0.2263\n", - "Std of true positives: 0.0744\n", - "Mean of false positives: 0.6489\n", - "Std of false positives: 0.12\n" - ] - } - ], - "source": [ - "print(\"Mean of true positives: \", tp_mean)\n", - "print(\"Std of true positives: \", tp_std)\n", - "print(\"Mean of false positives: \", fp_mean)\n", - "print(\"Std of false positives: \", fp_std)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Distribution" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xd4XOWV+PHvmVHv3bZkq7g3cJMNBmww1ZTgJCQECKRAwqbXX3o2m7abbHaX9AIhhUCAhECoMR3TbGPLBSN3Wa6SZUm2etfM+/vjSkbYkjWS5s6duXM+zzPPjEd37j1jSUfvnPve84oxBqWUUu7ncToApZRSoaEJXymlooQmfKWUihKa8JVSKkpowldKqSihCV8ppaKEJnyllIoSmvCVUipKaMJXSqkoEeN0AAPl5OSY4uJip8NQSqmIsWnTpnpjTG4g24ZVwi8uLqasrMzpMJRSKmKIyMFAt9WSjlJKRQlN+EopFSU04SulVJTQhK+UUlFCE75SSkUJTfhKKRUlNOErpVSU0IQfJYwxvLDzGHe9so8jDe1Oh6OUckBYXXil7POTZ3bz2zX7APjlixX89WPncPbEDIejUkqFko7wo8DmQw38ds0+PlA6iee/dCFpCbF85v4ttHf3Oh2aUiqEbE34InJARN4Ska0ioj0THPLT5/aQmxrPd941m6l5Kfzf9fM4dKKdP6894HRoSqkQCsUIf4UxZr4xpjQEx1KnOFDfxqt767n5nCKS460K3rmTs7l4Zh6/W7NPR/lKRREt6bjc429WA3D94onveP6TF02hubOXJ/q+rpRyP7sTvgGeFZFNInK7zcdSg3hhVy3zJmUwIT3xHc+XFmUyfVwK960/5FBkSqlQszvhn2+MWQhcCXxaRJafuoGI3C4iZSJSVldXZ3M40aW2pZM3Dzdy6cy8074mIty4pJC3qprYc6zFgeiUUqFma8I3xlT33dcC/wSWDLLNXcaYUmNMaW5uQD38VYDW7TsOwIpBEj7A1WdNQASe2nY0lGEppRxiW8IXkWQRSe1/DFwOlNt1PHW6sgMNpMTHMGtC2qBfz0tLYElxFk+9dRRjTIijU0qFmp0j/HHAayLyJrABeMoY87SNx1OnKDvYwILCDLweGXKba86eQEVtK3trW0MYmVLKCbYlfGNMpTFmXt9tjjHmP+06ljpdS2cPu2uaWVSUecbtLp09DoCXdtWGIiyllIN0WqZLbT3ciN8wbMKfkJ7IzPGpvLRbE75SbqcJ36V2VDcDcFZB+rDbrpiZR9mBBpo7e+wOSynlIE34LrXzaDMT0hPISIobdtsVM/Lo9Rte31sfgsiUUk7RhO9SO4+2DDk751QLCzNIS4jRso5SLqcJ34W6en3sq2tl5vjUgLaP8XpYOiWbtX3z9pVS7qQJ34Uqalvp9ZuAR/gA503J4UhDB4dP6OIoSrmVJnwX2nnUapUwkoS/dEo2AGv3aR1fKbfShO9Ce2tbiPN6KM5OCvg10/JSyEmJO9mOQSnlPprwXaiyro2i7CRivIF/e0WEpVNyWLvvuLZZUMqlNOG70P76Nkpykkf8uqWTs6lt6aKyvs2GqJRSTtOE7zK9Pj8Hj7cxOTdlxK8972QdX8s6SrmRJnyXqWrsoMdnmDyKEX5RdhIT0hNYX6kJXyk30oTvMpV1Vjlmcu7IE76IUFqcRdmBE1rHV8qFNOG7TH/9fTQ1fIAlxZkca+7iSENHMMNSSoUBTfguU1nXSnpiLFnJw/fQGUxpcRYAGw+cCGZYSqkwoAnfZfpn6IgMvejJmcwYl0pqQowmfKVcSBO+yxxuaKcwK/ALrk7l8QilRZlsPNAQxKiUUuFAE76L+PyGmqZOCjITx7Sf0uIsKmpbOdHWHaTIlFLhQBO+i9S3dtHjM+RnjC3hLymx6vhlWtZRylU04btIVaM1s6YgI2FM+zmrIJ04r4eyg1rWUcpNNOG7SFXfVMqxjvATYr3Mm5TOhv06wlfKTTThu0h1Y3ASPlh1/PKqJjq6fWPel1IqPGjCd5Hqxg5SE2JIS4gd874WF2fS6zdsOaxlHaXcQhO+i1Q1dlIQhNE9wKKiLERg435N+Eq5hSZ8F6lu7AhKOQcgPTGWGeNSKTuodXyl3EITvotUN3WQP8YZOgOVFmey+WADvT5/0PaplHKOJnyXaOvqpbG9J2gjfIDFxVm0dfvYVdMStH0qpZyjCd8ljjb1z8EPXsLvb6SmF2Ap5Q6a8F2iqrETCM6UzH4FGYnkpyewUS/AUsoVNOG7RDDn4A+kC6Io5R62J3wR8YrIFhF50u5jRbPqxg48AuNS44O638W6IIpSrhGKEf7ngZ0hOE5Uq2rsYHxaAjHe4H5LT9bxdXqmUhHP1oQvIhOBq4G77TyOCu4c/IGmn1wQRev4SkU6u0f4PwO+Cgw5kVtEbheRMhEpq6urszkc96pu7LQl4Xs9wqKiTJ2po5QL2JbwReQaoNYYs+lM2xlj7jLGlBpjSnNzc+0Kx9X8fsPRJntG+GDNx99zrJXGdl0QRalIZucI/3zgWhE5ADwIXCwi99l4vKjVv/DJWPvgD2VRUSYAm3R6plIRzbaEb4z5hjFmojGmGLgBeNEYc7Ndx4tmVTZNyew3b2IGsV7ROr5SEU7n4buA3Qk/Mc7L3IJ0reMrFeFCkvCNMWuMMdeE4ljRyK6LrgZaXJzFtiNNdPbogihKRSod4btAdWMnKfExpCXE2HaM0qJMun1+3qpqsu0YSil7acJ3gapGqy2yiNh2jP4Ttxu1rKNUxNKE7wLVjR1B7ZI5mOyUeKbkJlOmJ26Vilia8F3ArqtsT7W4r5Ga36+N1JSKRJrwI1x7dy8NQV74ZCilxVk0d/ayt7bV9mMppYJPE36Eq+7rg293SQeszpmgdXylIpUm/AgXiimZ/QqzkshNjdf5+EpFKPvm8amQeDvh29NWYSARYXFxpl5xG46aj8LaX0D1FsgsgfM/B3mznI5KhRkd4Ue4kwufpNmf8AFKi7Koauw4+YdGhYGj2+DO5bDh99a/dz1p/XvXU87GpcKOJvwIV9XYybi0BGKDvPDJUBafXBBFR/lhobUW7r8evLHwidfg1qfhs5th/Fnw0EegeqvTEaowogk/woVqSma/WRNSSYrzsknr+OFh9Veh/Tjc9HfIm2k9l5ILNz0ESTnwyO3g63E2RhU2NOFHuGob++APJsbrYWGh1vHDQsULsP2fsPyrMH7uO7+WnA3X3AH1u2HjH5yJT4UdTfgRzO83HG3sDMkJ24EWFWWys6aZ5k4dOTrGGFjzI0ifBOd/fvBtpq+EyRfByz+GLr12QmnCj2j1bV10+/whmYM/0JKSLIzRBVEcVfkSHNkIF3wRYuIG30YEVnwbOhpgy72hjU+FJU34EayqoW9KZnpoE/6CwgxiPMLG/VrHd8xrP4PUfFgwzJpCkxZD4Xmw7tfg6w1NbCpsacKPYP1X2Yayhg+QFBfD3IJ0NmjCd0Z9Bex/GRbfCjHxw2+/9FPQdBgqnrc/NhXWNOFHsP658KEu6QCcU6ILojhm05/AEwMLbgls++krITlXyzpKE34kq2rsIDnOS1pi6C+YXlycRbfPz9bDjSE/dlTr7YKt98OMqyB1fGCv8cbC2R+APU9Da5298amwpgk/gvXPwbdz4ZOhLC7OQgQt64RaxQvQcSLw0X2/BbeAvxfeesieuFRE0IQfwUI9B3+g9KRYZoxL1c6ZoVb+D0jMgikrRva6vJkwbi7seMyeuFRE0IQfwaobOynIdCbhgzU9c9PBBnp9fsdiiCrdbbB7NcxeZZVpRmr2Kji8Hpqrgx+bigia8CNUR7ePE23djpyw7bekJIv2bh/bq5sdiyGq7F4NPe1w1vtG9/rZ77budz4ZvJhURNGEH6Gqm0LXFnkoS/oaqWkdP0TKH4HUCda8+tHInQ65s7SsE8U04Ueok33wQ3zR1UB5aQkUZyexQev49uvpgH0vwsyrwTOGX9vZq+Dg6zpbJ0ppwo9QoVzp6kwWF2exURc2t9/+V6C3A6ZfObb9zFgJGNj3QlDCUpFFE36EqmrsRATGpztX0gGrjt/Y3kNFnTbnstXu1RCbDMUXjG0/4+dBch7sfS44camIogk/QlU3djAuNXQLnwzlnJJsAN7QOr59jIE9z1hTMWPH+Afe44Gpl1ptFvx6lXS00YQfoayLrpwd3QNMykpkXFq8nri1U802aKmGGWMs5/Sbdhl0NsKRsuDsT0UMTfgRqirEK10NRURYUpLNxv0nMEbr+LbY/TQgMO2K4OxvygoQL+x9Njj7UxHDtoQvIgkiskFE3hSR7SLyPbuOFW36Fz5x8qKrgZYUZ1LT3MnhE7qwuS32rIaJpdbShcGQmAmTztGEH4XsHOF3ARcbY+YB84GVInKujceLGnWt1sInE8NghA+wpK+Or9MzbdB8FKq3WB0vg2napVapSKdnRhXbEr6x9E/diO276Wf+IDjSt/BJuIzwp+WlkJEUy4b9x50OxX32PmPdB6t+36/kIut+/8vB3a8KawElfBF5WESuFpER/YEQEa+IbAVqgeeMMW+MJkj1TlUn++AnORyJxeMRSouydGFzO+x+GtILIW92cPebPx/i0zXhR5lAE/hvgZuAvSLyYxGZGciLjDE+Y8x8YCKwRETmnrqNiNwuImUiUlZXpx8vA3GkoR0InxE+WAui7K9vo7a50+lQ3KOnAyrXWBdLBbsFtsdrzemv1IQfTQJK+MaY540xHwQWAgeA50RkrYh8VESGbdtnjGkE1gCnFSKNMXcZY0qNMaW5uUE6KeVyVQ0dpCfGkhIf+oVPhrK4pK+vjtbxg6fy5b6ra4M0O+dUky+CxoPQcMCe/auwE3CJRkSygY8AHwO2AD/H+gMw6CV7IpIrIhl9jxOBS4FdY4xXYZV0nOySOZg5+WkkxXl1YfNg2rMa4lKgeJk9+598oXWvo/yoEWgN/xHgVSAJeJcx5lpjzN+MMZ8FUoZ42QTgJRHZBmzEquFrX9YgqGroYGIYlXMAYr0eFhVl6hW3wTLw6tpAFiofjZzpkDJe6/hRJNCawN3GmH8NfEJE4o0xXcaY0sFeYIzZBiwYa4DqnYwxVDV2cMG0HKdDOc3i4ix++vwemtp7SE8axQId6m1Ht0LL0bE3SzsTEWuUv+9F6w+MA0tlqtAKtKTzw0GeWxfMQFRgGtt7aO/2hV1JB6xGasZA2UEd5Y/ZyatrL7f3OCUXQlsd1O6w9zgqLJwx4YvIeBFZBCSKyAIRWdh3uwirvKNCrH9KZriVdADmT8og1it64jYY9qyGiYuDd3XtULSOH1WGK+lcgXWidiJwx4DnW4Bv2hSTOoOTF12FyRz8gRJivcybmKGN1MaqqQqOvgmXfMf+Y6VPhKzJcOBVWPop+4+nHHXGhG+MuQe4R0SuM8Y8HKKY1BmcvOgqDEf4YJV17nqlkvbuXpLiwmfaaETZs9q6n3F1aI5XshzK/2m1S/Z4Q3NM5YjhSjo39z0sFpEvnXoLQXzqFFUNHSTGeskM05Oii0uy6PUbth5qdDqUyLV7NWSWQO6M0ByveBl0NVmfKpSrDXfSNrnvPgVIHeSmQuzQiTYKs5KQMJ1RsagoE4/ogiij1tViLWc48+rQzZrpn+e//5XQHE85ZriSzp1999raOEwcPN5OcU7y8Bs6JC0hllkT0tioJ25Hp+IF8HUHv1namaSOg9yZVh3/gi+E7rgq5AK98OonIpImIrEi8oKI1A8o96gQ8fsNh060U5wdfidsB1pSksXmQw109/qdDiXy7F7d168+xJ3Ei5fBwXXg6wntcVVIBToP/3JjTDNwDXAEmA58xbao1KBqW7ro6vVTmB2+I3ywGql19vh5q6rJ6VAii6/Xaoc87QrwhviEd8ly6GmDqs2hPa4KqUATfv8ZwquAB4wx+nndAQeOtwFQlBXeI/xFRVYjtS2HtF3yiBxeDx0NoS3n9Cu+ABA4oHV8Nws04T8hIruAUuAFEckFtA9uiB06brVFLgrzkk5uajwFGYlsOawzdUZkx2MQkwBTLwn9sZOyYNxcPXHrcoG2R/46sBQoNcb0AG3AKjsDU6c7eKKNGI+EZVuFU80vzOBNTfiB8/ushD/tMoh3aAJcyXI4vAF6u5w5vrLdSFawmgV8QEQ+BLwPsLnJhzrVwePtFGQmEuO1cyni4Jg/MYMjDR3Ut2ryCMjBtdB6DOa817kYSpZBbycc2ehcDMpWgc7SuRf4X+ACYHHfbdAumco+B4+3Uxjm9ft+8yZlAOgoP1DbH4HYJPsWOwlE0XkgHi3ruFigUwFKgdnGGF2E3EEHj7cxb1K+02EEZG5BGl6P8ObhRi6ZNc7pcMKbrxd2PG4l+zgHZ2AlpMOE+bD/VVjhXBjKPoHWBsqB8XYGos6soa2b5s5eisN8Sma/pLgYpo9LZesRnZo5rAOvQHs9zHmP05FYZZ0jG6G73elIlA0CTfg5wA4ReUZEHu+/2RmYeqeKulYApuQOtcBY+Jk/KZ03DzeiHwyHseWv1uh6moPlnH7Fy8HfY00RVa4TaEnnu3YGoYZXUWsl/Kl5kZTwM3hgw2EOHG+nJIzbQTiqoxF2PQkLbobYBKejgcJzwRNjlXWmXOx0NCrIAp2W+TJwAIjte7wR0EvyQqiitpWEWE9ETMnspyduA1D+sDUzZv4HnY7EEp8CBYv0xK1LBTpL5+PAP4A7+54qAB61Kyh1uoraVqbkpuDxhGeXzMFMy0slKc7LVk34Q9tyH+TNgfwwWv65eBlUb7E6dypXCbSG/2ngfKAZwBizF8izKyh1uora1ogq5wB4PcJZBel6xe1QjmyC6s2w8EPhtYB4yXIwPquZmnKVQBN+lzGmu/8fIhID6Jm4EGnv7qWqsYOpEXTCtt/ZE9PZebSZHp92zjzNG7+FuFSYf5PTkbzTpCXgjYP9us6t2wSa8F8WkW9iLWZ+GfAQ8IR9YamB9tVaTdMibYQPMLcgne5e/8mTzqpP81HY/k9YeAskpDkdzTvFJsKkc6z++MpVAk34XwfqgLeAfwP+BXzbrqDUO1XUWbXUSEz4c/LTASjXVsnvtOEuq3/OktudjmRwxcvg6DZo18a4bhLoLB0/1knaTxlj3meM+b1edRs6O6qbiYvxhPVKV0OZnJNMcpyX7dXNTocSPtqOWwl/9irIKnE6msGVLAOM1eNHucZwi5iLiHxXROqBXcBuEakTke+EJjwFsL26mZnjU4mNgKZpp/J4hNn5aTrCH2jtz6G7DS76htORDK2gFGIStazjMsNlkC9gzc5ZbIzJNsZkAecA54vIF22PTmGMYXt1M3Pyw6zOOwJz8tPZcbQZn18/FNJyDDb8Hs56P+TNdDqaocXEWRdh6Xx8Vxku4X8IuNEYs7//CWNMJXBz39eUzaoaO2jq6DlZC49EcwvSae/2sb++zelQnPfcd8DfCxd93elIhleyDGp3QGud05GoIBku4ccaY+pPfdIYU8fbyx4qG5VXWbXvSB7hzy2wYt9eHeVlnYPrYNuDcN5nIXuK09EMr+RC617LOq4xXMLvHuXXVJDsqG7CIzBzfOQm/Km5KcTHeKK7jt/VCo99CtILYdmXnY4mMBPmW9cJaMJ3jeGap80TkcGmVwhwxk5PIjIJ+AtWW2U/cJcx5uejijKKbT7UyIzxaSTGeZ0OZdRivB5mTkg7+WklKj39NTixHz7ylLM970fCGwNFS61GasoVzjjCN8Z4jTFpg9xSjTHDlXR6gS8bY2YB5wKfFpHZwQo8GvT6/Gw51EBpUabToYzZ3Pw0yquborNV8uZ7rZ45y74Exec7Hc3IlCyH43utC8VUxLNtnp8x5qgxZnPf4xZgJ1bTNRWgXTUttHX7KC12QcIvSKels5dDJ6JsYY3KNfDkF2DyivCehjmU4mXWvZZ1XCEkE7tFpBhYALwxyNduF5EyESmrq9PZAAOVHbCuciwtznI4krGbe/KK2ygq6xx4HR78IORMh+vvAW8EznMYfxYkZOj0TJewPeGLSArwMPAFY8xpv+3GmLuMMaXGmNLc3Fy7w4ko6ytPUJCRGFE98IcyfXwKsV6hPFpm6ux5Fu67DtLy4eZHrBWtIpHHC8UXaMJ3CVsTvojEYiX7vxpjHrHzWG7T4/PzekU9y6blOB1KUMTHeJk+LtX9M3WMgdd+CvdfDznT4CP/grQJTkc1NsXLoPEgNBx0OhI1RrYlfBER4A/ATmPMHXYdx622Hm6kpauXC6e751PPnPw0tlc3u/fEbUsNPHADPP9da0HyW5+BFBd8/0qWW/dax494do7wzwduAS4Wka19t6tsPJ6rrNldi9cjnDfVHSN8sE7cnmjrpqa50+lQgssYeOsf8JtzrZO0K38M7/sjxCU5HVlw5M2CpBydnukCgS5iPmLGmNew5uurETLGsPqtGpYUZ5GeGIEn+obQf7VweVUzE9Ij/7wEADVvweqvw8HXrLVg3/07yJ3udFTBJWLV8Q+8av1xC6fVudSIRF77xShQXtVMZX0bq+bnOx1KUM0cn4aIS1ostNTAk1+EO5db/Wau+Snc9pz7kn2/kuXQXAUnKp2ORI2BbSN8NXqPbq0i1itcOTfCT/adIjk+hsk5yZE9NbPlmHVSdtOfwNdjLWBy0dchMfKvlTij/jr+/lciow+QGpQm/DDT1tXLQ2WHuXz2eNKT3FPO6TcnP/3k9QURpbUWXv85bLzbSvTzboTlX4asyU5HFhrZUyFlvJXwSz/qdDRqlDThh5mHNx+hubOXWy8I05WQxmhOfhqPv1nNibZuspLjnA5neC3HrERf9kfwdcHZN8Dy/xd9o1wRa5RfuUbr+BFME34Y6ej28ZuX9rGwMIOFhRlOh2OLuQXWBUjbq5tYNi2Mpyy21sFrd/Ql+h44+3pY/pXoS/QDlSyDt/4OdbvDe/EWNSRN+GHkdy/vo6a5k1/cuABx6Qiqf6bO9urm8Ez4vh6rbPPSj6C7FebdYLUzjuZE36+/jl+5RhN+hNKEHyZe2l3LL1/cy7Xz8llSEvm9c4aSkRRHQUZieC5qfnwfPHwbVG+BKRfDyv9276yb0cgshqwpUPE8nPsJp6NRo6AJPwxU1Lbwufu3MGN8Gj++7iynw7HdnPw0todbi4WdT8A/P2H1jnn/n2H2u7VOPZhpl1szlLrb3XNhWRTRefgOa2zv5rZ7yoiP9XD3h0tJinP/3+A5+ensP95Ga1ev06FYNt4Nf7sFcmfCJ1632iJosh/ctMugtxMOvOZ0JGoUNOE7qMfn59P3b+ZoYyd33rLIFV0xAzG3IA1jYOfRMCjrbPg9PPVlmHElfPgJyJjkdEThreh8iE2Cvc86HYkaBU34DvrBkzt4veI4//meuSwqcm/d/lRz+nrjO17WKX8Y/vUVmHEVXH+vligCEZtgnbyteM6anqkiiiZ8h/x942H+su4gty+fzPtLo2tUOS4tnuzkOGdP3B59Ex79FBQutRqded1fSguaaZdBwwE4XuF0JGqENOE7oLyqiW8/Vs75U7P52srom94mIswpSKfcqYTf0WDV7JOy4QP3Qmx0lNKCZupl1r2WdSKOJvwQ6+r18fkHt5CVFMcvbliA1xOdJwfn5Kex91gLXb2+0B/8qS9bjcDefw8ku6f9dMhkFlknuDXhRxxN+CH2m5f2sa+ujR9fdxbZKfFOh+OYOflp9PoNe2paQ3vgnU9YtfsLvwaTFof22G4y9VI4uBa6Qvz9U2OiCT+EDp9o57dr9nHtvHwumpHndDiO6l/UPKStkttPwJNfgvFnwwVfDN1x3Wja5eDrhv0vOx2JGgFN+CH0yxf3gsA3roq+uv2pCrOSSImPCe2J2zU/hvbj8O7fgNd9nUhDqnApxKfB7tVOR6JGQBN+iFTWtfLw5ipuPqfIPas9jYHHI8yekEZ5qEb4JyqtRmiLPgzj3X81s+1i4qzZOrtXg9+B8zBqVDThh8jvXt5HrFf45EXahKvfnII0dh5txucPwXzuF39ojeov/Jr9x4oWM6+G9no4stHpSFSANOGHwPHWLh7dWs11CyeSmxq9J2pPNSc/nc4eP5V1Np/4q95inahd+mlIHW/vsaLJ1MvAEwu7nnQ6EhUgTfgh8MCGQ3T3+vnIecVOhxJW5ha83SrZVs9/FxKz4LzP2XucaJOQZl11u+spveo2QmjCt1mvz8996w+xbFoO08alOh1OWJmSm0JcjMfemTr7XrT6t1/4VStBqeCaebV1fqRut9ORqABowrfZqxX11DR38sFzCp0OJezEej3MHJ9q36Lmfj889x+QUQSlt9pzjGg34yrrXss6EUETvs0e3nSEjKRYVsyM7nn3Q5mTn8726iaMHSWB8oehZhtc/O8Qo+dObJE2AQoWwe5/OR2JCoAmfBs1dfTw7I5jXDsvn/gYr9PhhKU5+Wk0d/ZypKEjuDvu7YIXv29NwZx7XXD3rd5p5tVQtQmaq52ORA1DE76Nntp2lO5eP9ctnOh0KGHr7TVug1zHL/sTNB6CS78HHv0xt9XMa6z7XU85G4calv4m2OjRLVVMzUvh7InpTocStmZNSMPrkeDO1Olshld+ApMvgqmXBG+/anA50yFnBux4zOlI1DA04duktqWTjQdPcM3ZExBdLm9ICbFepuQmUx7MxVDW/sJqoXDpd4O3TzU0EWtZyAOvQUuN09GoM9CEb5Nntx/DGLhy7gSnQwl71onbII3wW2pg3a+tun3+guDsUw1vznsAAzsedzoSdQa2JXwR+aOI1IpIuV3HCGdPl9dQkpPM9HEpTocS9ubkp1Hb0kVtS+fYd/byf4OvBy7+9tj3pQKXNxPyZsP2R5yORJ2BnSP8PwMrbdx/2Gps72Zd5XFWzh2v5ZwAzC2wznGMuaxTvxc23WPNuc+aHITI1IjMeQ8cWqezdcKYbQnfGPMKcMKu/Yez53Ycw+c3XDlX+7YE4qyCdDwCWw41jm1Hz3/XWq5w+VeCEpcaoTnvse715G3Y0hq+DZ7ZXkNBRiJnFejsnEAkx8cwc3wamw81jH4nB163rvY8/wuQkhu84FTgcqbBuLOgXMs64crxhC8it4tImYiU1dXVOR3OmLV29fLK3nqumKPlnJFYWJTB1kMY3Zo4AAAQUElEQVSNo2uV7PfDs9+C1HyrI6Zyztz3wJEN1jUQKuw4nvCNMXcZY0qNMaW5uZE/MntpVy3dvX5WajlnRBYWZtLW7WPPsZaRv7j8H1YL5Eu+A3FJwQ9OBa7/quZtf3c2DjUoxxO+2zxdXkNOSjyLijKdDiWiLCy0/r9GXNbpbofnvwcT5sHZH7AhMjUimcVQdD68+YC2TA5Ddk7LfABYB8wQkSMicptdxwoXnT0+Xtpdy+VzxuH1aDlnJIqyk8hKjmPzwRGeuH35v6H5CKz8sbZQCBfzboTjFXCkzOlI1CnsnKVzozFmgjEm1hgz0RjzB7uOFS5e2VNHe7dPZ+eMgoiwsDCDLSMZ4R/bDut+BfNvhqLz7AtOjczsVRCTaI3yVVjRIVEQPV1eQ3piLOdOznY6lIi0oDCTyvo2Gtq6h9/Y74cnvgDxaXDZ9+0PTgUuIQ1mXWO1p+7tcjoaNYAm/CDp7vXz/M5jXDIrj1iv/reORn8df8vhAEb5G+60ZoNc/kNI1j+wYWfejdDZCHuedjoSNYBmpiBZV3mc5s5e7Z0zBvMnZRDn9bC+cpjr9Y7tsFaymr4S5t8UmuDUyEy+CFInwFYt64QTTfhB8nR5DUlxXpZNy3E6lIiVGOdlfmEG6/YdH3qjnk545ONW2eDaX1mdGlX48Xhh3g2w91lttRBGNOEHgc9veG5HDStm5pEQqytbjcXSydlsr26iqaPn9C8aA099GY6Vw6pf6xW14W7hh8H4rf5GKixowg+CsgMnqG/t1tk5QbB0SjZ+Axv2D1LW2Xg3bL0Pln8Vpl8R+uDUyGSVWAvQbL7H6mCqHKcJPwie3l5DXIyHi2boQuVjtaAwg/gYz+llnb3PwdNft+r2F33DmeDUyC3+GLQchd2rnY5EoQl/zIwxPFNew/JpOaTExzgdTsSLj/FSWpzJ2n31bz956A342y1Wv/X3/l4vsIok0y6H9EnWpzPlOP3NGaNtR5qobupkpc7OCZqlk7PZVdNCfWsXHH0T7n8/pOXDzY9YJ2tV5PB4YdGHYf/L1noFylGa8MfoiTerifUKl87Sck6wXDjd+r98a91z8Od3WRdX3fJPPUkbqRZ8CLxx8MbvnI4k6mnCHwOf3/D4m9VcOD2PjKQ4p8NxjTn5aVyVvIfz1t5mXVT10dWQWeR0WGq0UsdZje223Adt9cNvr2yjCX8M1lcep7ali3cvyHc6FFfxbL2XX/p+wCF/Lj0fegoyJjkdkhqr8z5ntVnYcJfTkUQ1Tfhj8OiWKlLiY7h01jinQ3EHvw+e+RY8/lkaxi3luq7vsPG4fnJyhdzpMPNqK+F3tzkdTdTShD9KnT0+ni6vYeXc8XqxVTB0NsMDN1jdL5fcTuJHHqbTm8qLO2udjkwFy/mfh44G2PwXpyOJWprwR+nZHcdo6erl3fMLnA4l8p3YD3+4HCpegKvvgKv+h+TEBM6fms3q8hqMLqThDpOWQPEyePUOHeU7RBP+KN3/xkEmZSVy3hTt1DgmB9fC3ZdYF+fc8ggsfnudnHfNy6eqsWNsi5ur8HLxv0NbLbxxp9ORRCVN+KOwr66V9ZUnuGFxIR5d2Wr0Nt8L91wLiZnw8RetDosDXD5nPPExHh7bqs23XKPwHJh2Bbz+M+gY4epmasw04Y/CA28cIsYjXF+qs0dG5eTJ2c9A8fnwseche8ppm/WfEH9q21F6fX4HAlW2uPjb0NkEr93hdCRRRxP+CLV19fLQpiNcMWc8uanxTocTeTqb4IEbrZOziz8OH3zYGuEPYdX8fI63dfPiLj156xoTzob5H4R1v4G6PU5HE1U04Y/QAxsO0dTRw8eWlTgdSuSp3wu/vwQqnoer/heu/l/wnrn/0MUz8xiflsC96w+GKEgVEpd+D2KTYPVXrLbXKiQ04Y9Ad6+fu1/dz7mTs1hQOPSoVA1izzPw+4uh4wR8+HFY8vGAXhbj9XDTOYW8ureeyrpWm4NUIZOSa5V2KtdYa9+qkNCEPwIPbTpMTXMnn7jw9HqzGoIx8Mr/wP0fgMxiuP1lKL5gRLu4YckkYr3CH17bb0+Myhmlt0JBqbWoTVOV09FEBU34AWrr6uVnz++ltCiTC6drE6+AdDbB32+BF38IZ70fbn1mVG0S8lITeH/pJP5edpiqxg4bAlWO8MbAe++yFkd59JPg1xPzdtOEH6C7XqmkrqWLb1w1C9F1VIdXtQl+twx2/Qsu/6H1ix2XNOrdfXrFVAB+9WJFsCJU4SB7Cqz8kdU++ZX/cToa19OEH4C9x1r47Zp9vGtePouKtHZ/Rn4/rPs1/OEKaz3Tj66G8z475sXGCzISuWlJIX/beIjyqqYgBavCwsIPwbwbYc1/wY7HnI7G1TThD6PH5+cr/9hGcryX/3jXbKfDCW8nKuEv18Iz37TWnP23V6wLbYLkS5fNICs5jm/98y18fp3Z4RoicM3PYOISeOR260SusoUm/GH851M72Xq4kR+8ey45KTrvflC+Hlj7S/jNedYKVe/6BXzgPkjKCuph0pNi+fdrZvPmkSZ+9rzO33aV2AS48UHImgz33wD7XnI6IlfShH8Gf3p9P39ee4DbLijhmrO15/1pjLEWp/7NufDst63WCJ9+w1rSzqbzHKvmF3B96UR++WIFq986assxlEOSs+FDj0NWCfz1fVD2R6cjch1N+IMwxnDny/v43hM7uHz2OL5x5UynQwovfr91MvYPl1stjRG46e9w4wPW2rM2+/6quSwozOBzD27hme01th9PhVBKLtz6NExeAU9+ER76KLQddzoq17A14YvIShHZLSIVIvJ1O48VLPWtXXz2gS38aPUurjprPL+6aSExXv27CEDzUXjtZ/DrJfDgjdBaA1f/H3xqnVWzD9HspYRYL/fcuoTZ+el84r5N3PHcHrp7dUqfaySkw01/gxXfhp1PwK8WWT932lJ5zMSuXuMi4gX2AJcBR4CNwI3GmB1Dvaa0tNSUlZXZEs9wjjV38tf1B/nT2gN09vj4wqXT+eSFU6K7G2ZvNxwrh30vWL3qD79hzbyZdA4s/hjMee+wrRHs1NHt49uPlvPw5iOU5CTzmRVTufrsCbogjZsc2w7P/QdUPAcJGdb1HGe9DwoWgTfW6ejCgohsMsaUBrStjQl/KfBdY8wVff/+BoAx5kdDvcbuhG+Mob3bR0N7N8dbu9lX18qeY62sqzzOtiNWq9ZLZubx9StnMTUvxbY4woLfB10t0N1q3bfVQdMR64rHxgNQUw61O8DXbW0/YR5MuxzOvgFypjoa+qle2l3Lfz21k721raTGx3DulGwWF2cyJTeFouxkspLjSEuI0U9qkezQetjwe2vE7+uCuFRrQZW8WdYtrQBSxkFKHsSlQEx8yD5xOm0kCd/O4VkBcHjAv48AwZujN8DVv3iV9m4fPr/B5zf4zTvvrcdWL5zuU9rsxnqFuQXpfOGS6ayan09xTnLgB654Hp7+JmAGNIA65TH0/Xuwx6duM8jrA97XqY/7X8+Ax/1f90FP+9DvKzkPxs2Gcz8J48+GkuXWL1KYWjEjj4um57K+8gSPba1i7b7jPLfj2GnbJcd5iYvx4PV4iPEIMV4hxiN4RKAvNwjwjStncelsXac4rBSea906Gq2LtCrXwOGNcOA16w/AaQRiE62bNw7EYz0nHusPgfQ/7n9eOPlD4ISkbLh1te2HsTPhD/a/d9rHCRG5HbgdoLCwcFQHmj4ulV6/wSvg8QheEbweeedjEWJjhIzEODKTYslKjmNybjJF2cnEjnbkF59mjS6sN8LbWePUx/D2D9Ugj097zVDPB7CvM+5X3t4mPg3iUyA+1RoRJWVD+kRrpBSbMIr/DGeJCEunZLO0bwWyE23d7K9v4/CJdhrbu2ns6KG5o5dev59ev6HX139vDQzg7b+/aYlaKghbiRkwe5V1A+uTasMBaKmB1mPWJ9XuVujpePvm6+bkgMcYqyxp/H3P9T12umNnQlpIDhNVJR2llHKbkZR07CxqbgSmiUiJiMQBNwCP23g8pZRSZ2BbSccY0ysinwGeAbzAH40x2+06nlJKqTOzdU6dMeZfwL/sPIZSSqnA6Dw1pZSKEprwlVIqSmjCV0qpKKEJXymlooQmfKWUihK2XXg1GiJSBxx0Oo4BcoB6p4MYI30P4UHfQ3hw43soMsbkBvLCsEr44UZEygK9gi1c6XsID/oewkO0vwct6SilVJTQhK+UUlFCE/6Z3eV0AEGg7yE86HsID1H9HrSGr5RSUUJH+EopFSU04TP8YusiEi8if+v7+hsiUhz6KIcWQPxfEpEdIrJNRF4QkSIn4hxOoIvei8j7RMSISFjNtggkfhG5vu97sV1E7g91jIEI4OepUEReEpEtfT9TVzkR51BE5I8iUisi5UN8XUTkF33vb5uILAx1jMMJ4D18sC/2bSKyVkTmBbRjY0xU37BaN+8DJgNxwJvA7FO2+RTwu77HNwB/czruEca/Akjqe/zJcIp/JO+jb7tU4BVgPVDqdNwj/D5MA7YAmX3/znM67lG+j7uAT/Y9ng0ccDruU+JbDiwEyof4+lXAaqyl4M4F3nA65lG8h/MG/BxdGeh70BE+LAEqjDGVxphu4EFg1SnbrALu6Xv8D+ASkbBZIXnY+I0xLxlj+hexXQ9MDHGMgQjk+wDwA+AnQGcogwtAIPF/HPi1MaYBwBhTG+IYAxHI+zBA/5p86UB1COMbljHmFeDEGTZZBfzFWNYDGSIyITTRBWa492CMWdv/c8QIfqc14Q++2HrBUNsYY3qBJiA7JNENL5D4B7oNa3QTboZ9HyKyAJhkjHkylIEFKJDvw3Rguoi8LiLrRWRlyKILXCDv47vAzSJyBGu9i8+GJrSgGenvTLgL+Hfa1gVQIkQgi60HtCC7QwKOTURuBkqBC22NaHTO+D5ExAP8FPhIqAIaoUC+DzFYZZ2LsEZkr4rIXGNMo82xjUQg7+NG4M/GmP/rW7v63r734bc/vKAI59/nERGRFVgJ/4JAttcRvvXXfdKAf0/k9I+oJ7cRkRisj7Fn+sgYSoHEj4hcCnwLuNYY0xWi2EZiuPeRCswF1ojIAaza6+NhdOI20J+jx4wxPcaY/cBurD8A4SSQ93Eb8HcAY8w6IAGrv0ukCOh3JtyJyNnA3cAqY8zxQF6jCT+wxdYfBz7c9/h9wIum72xJGBg2/r5SyJ1YyT4c68YwzPswxjQZY3KMMcXGmGKsuuW1xpgyZ8I9TSA/R49inUBHRHKwSjyVIY1yeIG8j0PAJQAiMgsr4deFNMqxeRz4UN9snXOBJmPMUaeDGgkRKQQeAW4xxuwJ+IVOn40OhxvWWfs9WLMTvtX33PexEgpYP9APARXABmCy0zGPMP7ngWPA1r7b407HPJr3ccq2awijWToBfh8EuAPYAbwF3OB0zKN8H7OB17Fm8GwFLnc65lPifwA4CvRgjeZvAz4BfGLA9+HXfe/vrXD7OQrwPdwNNAz4nS4LZL96pa1SSkUJLekopVSU0ISvlFJRQhO+UkpFCU34SikVJTThK6VUlNCEr5RSUUITvlJKRQlN+EopFSX+P+3C6q5WAY1PAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df[df.decision == \"Yes\"].distance.plot.kde()\n", - "df[df.decision == \"No\"].distance.plot.kde()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Best Split Point" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "from chefboost import Chefboost as chef" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "config = {'algorithm': 'C4.5'}" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "C4.5 tree is going to be built...\n", - "Accuracy: 98.66666666666667 % on 300 instances\n", - "finished in 3.5094187259674072 seconds\n" - ] - } - ], - "source": [ - "tmp_df = df[['distance', 'decision']].rename(columns = {\"decision\": \"Decision\"}).copy()\n", - "model = chef.fit(tmp_df, config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Sigma" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "threshold: 0.3147\n" - ] - } - ], - "source": [ - "sigma = 2\n", - "#2 sigma corresponds 95.45% confidence, and 3 sigma corresponds 99.73% confidence\n", - "\n", - "#threshold = round(tp_mean + sigma * tp_std, 4)\n", - "threshold = 0.3147 #comes from c4.5 algorithm\n", - "print(\"threshold: \", threshold)" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.3637" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.decision == 'Yes'].distance.max()" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.3186" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.decision == 'No'].distance.min()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"prediction\"] = \"No\"" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [], - "source": [ - "idx = df[df.distance <= threshold].index\n", - "df.loc[idx, 'prediction'] = 'Yes'" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
file_xfile_ydecisiondistanceprediction
150deepface/tests/dataset/img16.jpgdeepface/tests/dataset/img4.jpgNo0.7178No
25deepface/tests/dataset/img4.jpgdeepface/tests/dataset/img7.jpgYes0.2450Yes
214deepface/tests/dataset/img24.jpgdeepface/tests/dataset/img4.jpgNo0.7362No
135deepface/tests/dataset/img16.jpgdeepface/tests/dataset/img14.jpgNo0.5281No
63deepface/tests/dataset/img19.jpgdeepface/tests/dataset/img23.jpgNo0.6546No
\n", - "
" - ], - "text/plain": [ - " file_x file_y \\\n", - "150 deepface/tests/dataset/img16.jpg deepface/tests/dataset/img4.jpg \n", - "25 deepface/tests/dataset/img4.jpg deepface/tests/dataset/img7.jpg \n", - "214 deepface/tests/dataset/img24.jpg deepface/tests/dataset/img4.jpg \n", - "135 deepface/tests/dataset/img16.jpg deepface/tests/dataset/img14.jpg \n", - "63 deepface/tests/dataset/img19.jpg deepface/tests/dataset/img23.jpg \n", - "\n", - " decision distance prediction \n", - "150 No 0.7178 No \n", - "25 Yes 0.2450 Yes \n", - "214 No 0.7362 No \n", - "135 No 0.5281 No \n", - "63 No 0.6546 No " - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sample(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [], - "source": [ - "cm = confusion_matrix(df.decision.values, df.prediction.values)" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[262, 0],\n", - " [ 4, 34]], dtype=int64)" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cm" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [], - "source": [ - "tn, fp, fn, tp = cm.ravel()" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(262, 0, 4, 34)" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tn, fp, fn, tp" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [], - "source": [ - "recall = tp / (tp + fn)\n", - "precision = tp / (tp + fp)\n", - "accuracy = (tp + tn)/(tn + fp + fn + tp)\n", - "f1 = 2 * (precision * recall) / (precision + recall)" - ] - }, - { - "cell_type": "code", - "execution_count": 125, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Precision: 100.0 %\n", - "Recall: 89.47368421052632 %\n", - "F1 score 94.44444444444444 %\n", - "Accuracy: 98.66666666666667 %\n" - ] - } - ], - "source": [ - "print(\"Precision: \", 100*precision,\"%\")\n", - "print(\"Recall: \", 100*recall,\"%\")\n", - "print(\"F1 score \",100*f1, \"%\")\n", - "print(\"Accuracy: \", 100*accuracy,\"%\")" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "metadata": {}, - "outputs": [], - "source": [ - "df.to_csv(\"threshold_pivot.csv\", index = False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Test results\n", - "\n", - "### Threshold = 0.3147 (C4.5 best split point)\n", - "\n", - "Precision: 100.0 %\n", - "\n", - "Recall: 89.47368421052632 %\n", - "\n", - "F1 score 94.44444444444444%\n", - "\n", - "Accuracy: 98.66666666666667 %\n", - "\n", - "### Threshold = 0.3751 (2 sigma)\n", - "\n", - "Precision: 90.47619047619048 %\n", - "\n", - "Recall: 100.0 %\n", - "\n", - "F1 score 95.0 %\n", - "\n", - "Accuracy: 98.66666666666667 %" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}