From 7f719b87bdb98ccae03216919de3a41fe9ae1287 Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 13 Jan 2024 22:21:41 +0000
Subject: [PATCH] interface functions moved to modules

---
 .github/pull_request_template.md          |  13 +
 deepface/DeepFace.py                      | 578 ++--------------------
 deepface/modules/__init__.py              |   0
 deepface/modules/demography.py            | 184 +++++++
 deepface/modules/detection.py             |  72 +++
 deepface/modules/modeling.py              |  71 +++
 deepface/{commons => modules}/realtime.py |   0
 deepface/modules/recognition.py           | 268 ++++++++++
 deepface/modules/representation.py        | 129 +++++
 deepface/modules/verification.py          | 151 ++++++
 tests/test_represent.py                   |  22 +-
 11 files changed, 957 insertions(+), 531 deletions(-)
 create mode 100644 .github/pull_request_template.md
 create mode 100644 deepface/modules/__init__.py
 create mode 100644 deepface/modules/demography.py
 create mode 100644 deepface/modules/detection.py
 create mode 100644 deepface/modules/modeling.py
 rename deepface/{commons => modules}/realtime.py (100%)
 create mode 100644 deepface/modules/recognition.py
 create mode 100644 deepface/modules/representation.py
 create mode 100644 deepface/modules/verification.py

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..2ff6a96
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,13 @@
+## Tickets
+
+https://github.com/serengil/deepface/issues/XXX
+
+### What has been done
+
+With this PR, ...
+
+## How to test
+
+```shell
+make lint && make test
+```
\ No newline at end of file
diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py
index 4a157d3..84d5f42 100644
--- a/deepface/DeepFace.py
+++ b/deepface/DeepFace.py
@@ -1,35 +1,27 @@
 # common dependencies
 import os
-from os import path
 import warnings
-import time
-import pickle
 import logging
 from typing import Any, Dict, List, Tuple, Union
 
 # 3rd party dependencies
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
-import cv2
 import tensorflow as tf
 from deprecated import deprecated
 
 # package dependencies
-from deepface.basemodels import (
-    VGGFace,
-    OpenFace,
-    Facenet,
-    Facenet512,
-    FbDeepFace,
-    DeepID,
-    DlibWrapper,
-    ArcFace,
-    SFace,
-)
-from deepface.extendedmodels import Age, Gender, Race, Emotion
-from deepface.commons import functions, realtime, distance as dst
+from deepface.commons import functions
 from deepface.commons.logger import Logger
+from deepface.modules import (
+    modeling,
+    representation,
+    verification,
+    recognition,
+    demography,
+    detection,
+    realtime,
+)
 
 # pylint: disable=no-else-raise, simplifiable-if-expression
 
@@ -60,38 +52,7 @@ def build_model(model_name: str) -> Union[Model, Any]:
     Returns:
             built deepface model ( (tf.)keras.models.Model )
     """
-
-    # singleton design pattern
-    global model_obj
-
-    models = {
-        "VGG-Face": VGGFace.loadModel,
-        "OpenFace": OpenFace.loadModel,
-        "Facenet": Facenet.loadModel,
-        "Facenet512": Facenet512.loadModel,
-        "DeepFace": FbDeepFace.loadModel,
-        "DeepID": DeepID.loadModel,
-        "Dlib": DlibWrapper.loadModel,
-        "ArcFace": ArcFace.loadModel,
-        "SFace": SFace.load_model,
-        "Emotion": Emotion.loadModel,
-        "Age": Age.loadModel,
-        "Gender": Gender.loadModel,
-        "Race": Race.loadModel,
-    }
-
-    if not "model_obj" in globals():
-        model_obj = {}
-
-    if not model_name in model_obj:
-        model = models.get(model_name)
-        if model:
-            model = model()
-            model_obj[model_name] = model
-        else:
-            raise ValueError(f"Invalid model_name passed - {model_name}")
-
-    return model_obj[model_name]
+    return modeling.build_model(model_name=model_name)
 
 
 def verify(
@@ -149,90 +110,17 @@ def verify(
 
     """
 
-    tic = time.time()
-
-    # --------------------------------
-    target_size = functions.find_target_size(model_name=model_name)
-
-    # img pairs might have many faces
-    img1_objs = functions.extract_faces(
-        img=img1_path,
-        target_size=target_size,
+    return verification.verify(
+        img1_path=img1_path,
+        img2_path=img2_path,
+        model_name=model_name,
         detector_backend=detector_backend,
-        grayscale=False,
+        distance_metric=distance_metric,
         enforce_detection=enforce_detection,
         align=align,
+        normalization=normalization,
     )
 
-    img2_objs = functions.extract_faces(
-        img=img2_path,
-        target_size=target_size,
-        detector_backend=detector_backend,
-        grayscale=False,
-        enforce_detection=enforce_detection,
-        align=align,
-    )
-    # --------------------------------
-    distances = []
-    regions = []
-    # now we will find the face pair with minimum distance
-    for img1_content, img1_region, _ in img1_objs:
-        for img2_content, img2_region, _ in img2_objs:
-            img1_embedding_obj = represent(
-                img_path=img1_content,
-                model_name=model_name,
-                enforce_detection=enforce_detection,
-                detector_backend="skip",
-                align=align,
-                normalization=normalization,
-            )
-
-            img2_embedding_obj = represent(
-                img_path=img2_content,
-                model_name=model_name,
-                enforce_detection=enforce_detection,
-                detector_backend="skip",
-                align=align,
-                normalization=normalization,
-            )
-
-            img1_representation = img1_embedding_obj[0]["embedding"]
-            img2_representation = img2_embedding_obj[0]["embedding"]
-
-            if distance_metric == "cosine":
-                distance = dst.findCosineDistance(img1_representation, img2_representation)
-            elif distance_metric == "euclidean":
-                distance = dst.findEuclideanDistance(img1_representation, img2_representation)
-            elif distance_metric == "euclidean_l2":
-                distance = dst.findEuclideanDistance(
-                    dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)
-                )
-            else:
-                raise ValueError("Invalid distance_metric passed - ", distance_metric)
-
-            distances.append(distance)
-            regions.append((img1_region, img2_region))
-
-    # -------------------------------
-    threshold = dst.findThreshold(model_name, distance_metric)
-    distance = min(distances)  # best distance
-    facial_areas = regions[np.argmin(distances)]
-
-    toc = time.time()
-
-    resp_obj = {
-        "verified": True if distance <= threshold else False,
-        "distance": distance,
-        "threshold": threshold,
-        "model": model_name,
-        "detector_backend": detector_backend,
-        "similarity_metric": distance_metric,
-        "facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]},
-        "time": round(toc - tic, 2),
-    }
-
-    return resp_obj
-
 
 def analyze(
     img_path: Union[str, np.ndarray],
@@ -301,116 +189,15 @@ def analyze(
                     }
             ]
     """
-    # ---------------------------------
-    # validate actions
-    if isinstance(actions, str):
-        actions = (actions,)
-
-    # check if actions is not an iterable or empty.
-    if not hasattr(actions, "__getitem__") or not actions:
-        raise ValueError("`actions` must be a list of strings.")
-
-    actions = list(actions)
-
-    # For each action, check if it is valid
-    for action in actions:
-        if action not in ("emotion", "age", "gender", "race"):
-            raise ValueError(
-                f"Invalid action passed ({repr(action)})). "
-                "Valid actions are `emotion`, `age`, `gender`, `race`."
-            )
-    # ---------------------------------
-    # build models
-    models = {}
-    if "emotion" in actions:
-        models["emotion"] = build_model("Emotion")
-
-    if "age" in actions:
-        models["age"] = build_model("Age")
-
-    if "gender" in actions:
-        models["gender"] = build_model("Gender")
-
-    if "race" in actions:
-        models["race"] = build_model("Race")
-    # ---------------------------------
-    resp_objects = []
-
-    img_objs = functions.extract_faces(
-        img=img_path,
-        target_size=(224, 224),
-        detector_backend=detector_backend,
-        grayscale=False,
+    return demography.analyze(
+        img_path=img_path,
+        actions=actions,
         enforce_detection=enforce_detection,
+        detector_backend=detector_backend,
         align=align,
+        silent=silent,
     )
 
-    for img_content, img_region, img_confidence in img_objs:
-        if img_content.shape[0] > 0 and img_content.shape[1] > 0:
-            obj = {}
-            # facial attribute analysis
-            pbar = tqdm(
-                range(0, len(actions)),
-                desc="Finding actions",
-                disable=silent if len(actions) > 1 else True,
-            )
-            for index in pbar:
-                action = actions[index]
-                pbar.set_description(f"Action: {action}")
-
-                if action == "emotion":
-                    img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
-                    img_gray = cv2.resize(img_gray, (48, 48))
-                    img_gray = np.expand_dims(img_gray, axis=0)
-
-                    emotion_predictions = models["emotion"].predict(img_gray, verbose=0)[0, :]
-
-                    sum_of_predictions = emotion_predictions.sum()
-
-                    obj["emotion"] = {}
-
-                    for i, emotion_label in enumerate(Emotion.labels):
-                        emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
-                        obj["emotion"][emotion_label] = emotion_prediction
-
-                    obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
-
-                elif action == "age":
-                    age_predictions = models["age"].predict(img_content, verbose=0)[0, :]
-                    apparent_age = Age.findApparentAge(age_predictions)
-                    # int cast is for exception - object of type 'float32' is not JSON serializable
-                    obj["age"] = int(apparent_age)
-
-                elif action == "gender":
-                    gender_predictions = models["gender"].predict(img_content, verbose=0)[0, :]
-                    obj["gender"] = {}
-                    for i, gender_label in enumerate(Gender.labels):
-                        gender_prediction = 100 * gender_predictions[i]
-                        obj["gender"][gender_label] = gender_prediction
-
-                    obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
-
-                elif action == "race":
-                    race_predictions = models["race"].predict(img_content, verbose=0)[0, :]
-                    sum_of_predictions = race_predictions.sum()
-
-                    obj["race"] = {}
-                    for i, race_label in enumerate(Race.labels):
-                        race_prediction = 100 * race_predictions[i] / sum_of_predictions
-                        obj["race"][race_label] = race_prediction
-
-                    obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
-
-                # -----------------------------
-                # mention facial areas
-                obj["region"] = img_region
-                # include image confidence
-                obj["face_confidence"] = img_confidence
-
-            resp_objects.append(obj)
-
-    return resp_objects
-
 
 def find(
     img_path: Union[str, np.ndarray],
@@ -457,211 +244,18 @@ def find(
             This function returns list of pandas data frame. Each item of the list corresponding to
             an identity in the img_path.
     """
-
-    tic = time.time()
-
-    # -------------------------------
-    if os.path.isdir(db_path) is not True:
-        raise ValueError("Passed db_path does not exist!")
-
-    target_size = functions.find_target_size(model_name=model_name)
-
-    # ---------------------------------------
-
-    file_name = f"representations_{model_name}.pkl"
-    file_name = file_name.replace("-", "_").lower()
-
-    df_cols = [
-        "identity",
-        f"{model_name}_representation",
-        "target_x",
-        "target_y",
-        "target_w",
-        "target_h",
-    ]
-
-    if path.exists(db_path + "/" + file_name):
-        if not silent:
-            logger.warn(
-                f"Representations for images in {db_path} folder were previously stored"
-                f" in {file_name}. If you added new instances after the creation, then please "
-                "delete this file and call find function again. It will create it again."
-            )
-
-        with open(f"{db_path}/{file_name}", "rb") as f:
-            representations = pickle.load(f)
-
-            if len(representations) > 0 and len(representations[0]) != len(df_cols):
-                raise ValueError(
-                    f"Seems existing {db_path}/{file_name} is out-of-the-date."
-                    "Delete it and re-run."
-                )
-
-        if not silent:
-            logger.info(f"There are {len(representations)} representations found in {file_name}")
-
-    else:  # create representation.pkl from scratch
-        employees = []
-
-        for r, _, f in os.walk(db_path):
-            for file in f:
-                if (
-                    (".jpg" in file.lower())
-                    or (".jpeg" in file.lower())
-                    or (".png" in file.lower())
-                ):
-                    exact_path = r + "/" + file
-                    employees.append(exact_path)
-
-        if len(employees) == 0:
-            raise ValueError(
-                "There is no image in ",
-                db_path,
-                " folder! Validate .jpg or .png files exist in this path.",
-            )
-
-        # ------------------------
-        # find representations for db images
-
-        representations = []
-
-        # for employee in employees:
-        pbar = tqdm(
-            range(0, len(employees)),
-            desc="Finding representations",
-            disable=silent,
-        )
-        for index in pbar:
-            employee = employees[index]
-
-            img_objs = functions.extract_faces(
-                img=employee,
-                target_size=target_size,
-                detector_backend=detector_backend,
-                grayscale=False,
-                enforce_detection=enforce_detection,
-                align=align,
-            )
-
-            for img_content, img_region, _ in img_objs:
-                embedding_obj = represent(
-                    img_path=img_content,
-                    model_name=model_name,
-                    enforce_detection=enforce_detection,
-                    detector_backend="skip",
-                    align=align,
-                    normalization=normalization,
-                )
-
-                img_representation = embedding_obj[0]["embedding"]
-
-                instance = []
-                instance.append(employee)
-                instance.append(img_representation)
-                instance.append(img_region["x"])
-                instance.append(img_region["y"])
-                instance.append(img_region["w"])
-                instance.append(img_region["h"])
-                representations.append(instance)
-
-        # -------------------------------
-
-        with open(f"{db_path}/{file_name}", "wb") as f:
-            pickle.dump(representations, f)
-
-        if not silent:
-            logger.info(
-                f"Representations stored in {db_path}/{file_name} file."
-                + "Please delete this file when you add new identities in your database."
-            )
-
-    # ----------------------------
-    # now, we got representations for facial database
-    df = pd.DataFrame(
-        representations,
-        columns=df_cols,
-    )
-
-    # img path might have more than once face
-    source_objs = functions.extract_faces(
-        img=img_path,
-        target_size=target_size,
-        detector_backend=detector_backend,
-        grayscale=False,
+    return recognition.find(
+        img_path=img_path,
+        db_path=db_path,
+        model_name=model_name,
+        distance_metric=distance_metric,
         enforce_detection=enforce_detection,
+        detector_backend=detector_backend,
         align=align,
+        normalization=normalization,
+        silent=silent,
     )
 
-    resp_obj = []
-
-    for source_img, source_region, _ in source_objs:
-        target_embedding_obj = represent(
-            img_path=source_img,
-            model_name=model_name,
-            enforce_detection=enforce_detection,
-            detector_backend="skip",
-            align=align,
-            normalization=normalization,
-        )
-
-        target_representation = target_embedding_obj[0]["embedding"]
-
-        result_df = df.copy()  # df will be filtered in each img
-        result_df["source_x"] = source_region["x"]
-        result_df["source_y"] = source_region["y"]
-        result_df["source_w"] = source_region["w"]
-        result_df["source_h"] = source_region["h"]
-
-        distances = []
-        for index, instance in df.iterrows():
-            source_representation = instance[f"{model_name}_representation"]
-
-            target_dims = len(list(target_representation))
-            source_dims = len(list(source_representation))
-            if target_dims != source_dims:
-                raise ValueError(
-                    "Source and target embeddings must have same dimensions but "
-                    + f"{target_dims}:{source_dims}. Model structure may change"
-                    + " after pickle created. Delete the {file_name} and re-run."
-                )
-
-            if distance_metric == "cosine":
-                distance = dst.findCosineDistance(source_representation, target_representation)
-            elif distance_metric == "euclidean":
-                distance = dst.findEuclideanDistance(source_representation, target_representation)
-            elif distance_metric == "euclidean_l2":
-                distance = dst.findEuclideanDistance(
-                    dst.l2_normalize(source_representation),
-                    dst.l2_normalize(target_representation),
-                )
-            else:
-                raise ValueError(f"invalid distance metric passes - {distance_metric}")
-
-            distances.append(distance)
-
-            # ---------------------------
-
-        result_df[f"{model_name}_{distance_metric}"] = distances
-
-        threshold = dst.findThreshold(model_name, distance_metric)
-        result_df = result_df.drop(columns=[f"{model_name}_representation"])
-        # pylint: disable=unsubscriptable-object
-        result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
-        result_df = result_df.sort_values(
-            by=[f"{model_name}_{distance_metric}"], ascending=True
-        ).reset_index(drop=True)
-
-        resp_obj.append(result_df)
-
-    # -----------------------------------
-
-    toc = time.time()
-
-    if not silent:
-        logger.info(f"find function lasts {toc - tic} seconds")
-
-    return resp_obj
-
 
 def represent(
     img_path: Union[str, np.ndarray],
@@ -714,64 +308,14 @@ def represent(
                 "face_confidence": float
             }
     """
-    resp_objs = []
-
-    model = build_model(model_name)
-
-    # ---------------------------------
-    # we have run pre-process in verification. so, this can be skipped if it is coming from verify.
-    target_size = functions.find_target_size(model_name=model_name)
-    if detector_backend != "skip":
-        img_objs = functions.extract_faces(
-            img=img_path,
-            target_size=target_size,
-            detector_backend=detector_backend,
-            grayscale=False,
-            enforce_detection=enforce_detection,
-            align=align,
-        )
-    else:  # skip
-        # Try load. If load error, will raise exception internal
-        img, _ = functions.load_image(img_path)
-        # --------------------------------
-        if len(img.shape) == 4:
-            img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)
-        if len(img.shape) == 3:
-            img = cv2.resize(img, target_size)
-            img = np.expand_dims(img, axis=0)
-            # when called from verify, this is already normalized. But needed when user given.
-            if img.max() > 1:
-                img = img.astype(np.float32) / 255.0
-        # --------------------------------
-        # make dummy region and confidence to keep compatibility with `extract_faces`
-        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}
-        img_objs = [(img, img_region, 0)]
-    # ---------------------------------
-
-    for img, region, confidence in img_objs:
-        # custom normalization
-        img = functions.normalize_input(img=img, normalization=normalization)
-
-        # represent
-        # if "keras" in str(type(model)):
-        if isinstance(model, Model):
-            # model.predict causes memory issue when it is called in a for loop
-            # embedding = model.predict(img, verbose=0)[0].tolist()
-            embedding = model(img, training=False).numpy()[0].tolist()
-            # if you still get verbose logging. try call
-            # - `tf.keras.utils.disable_interactive_logging()`
-            # in your main program
-        else:
-            # SFace and Dlib are not keras models and no verbose arguments
-            embedding = model.predict(img)[0].tolist()
-
-        resp_obj = {}
-        resp_obj["embedding"] = embedding
-        resp_obj["facial_area"] = region
-        resp_obj["face_confidence"] = confidence
-        resp_objs.append(resp_obj)
-
-    return resp_objs
+    return representation.represent(
+        img_path=img_path,
+        model_name=model_name,
+        enforce_detection=enforce_detection,
+        detector_backend=detector_backend,
+        align=align,
+        normalization=normalization,
+    )
 
 
 def stream(
@@ -807,23 +351,15 @@ def stream(
 
     """
 
-    if time_threshold < 1:
-        raise ValueError(
-            "time_threshold must be greater than the value 1 but you passed " + str(time_threshold)
-        )
-
-    if frame_threshold < 1:
-        raise ValueError(
-            "frame_threshold must be greater than the value 1 but you passed "
-            + str(frame_threshold)
-        )
+    time_threshold = max(time_threshold, 1)
+    frame_threshold = max(frame_threshold, 1)
 
     realtime.analysis(
-        db_path,
-        model_name,
-        detector_backend,
-        distance_metric,
-        enable_face_analysis,
+        db_path=db_path,
+        model_name=model_name,
+        detector_backend=detector_backend,
+        distance_metric=distance_metric,
+        enable_face_analysis=enable_face_analysis,
         source=source,
         time_threshold=time_threshold,
         frame_threshold=frame_threshold,
@@ -867,31 +403,15 @@ def extract_faces(
 
     """
 
-    resp_objs = []
-
-    img_objs = functions.extract_faces(
-        img=img_path,
+    return detection.extract_faces(
+        img_path=img_path,
         target_size=target_size,
         detector_backend=detector_backend,
-        grayscale=grayscale,
         enforce_detection=enforce_detection,
         align=align,
+        grayscale=grayscale,
     )
 
-    for img, region, confidence in img_objs:
-        resp_obj = {}
-
-        # discard expanded dimension
-        if len(img.shape) == 4:
-            img = img[0]
-
-        resp_obj["face"] = img[:, :, ::-1]
-        resp_obj["facial_area"] = region
-        resp_obj["confidence"] = confidence
-        resp_objs.append(resp_obj)
-
-    return resp_objs
-
 
 # ---------------------------
 # deprecated functions
@@ -904,7 +424,7 @@ def detectFace(
     detector_backend: str = "opencv",
     enforce_detection: bool = True,
     align: bool = True,
-) -> np.ndarray:
+) -> Union[np.ndarray, None]:
     """
     Deprecated function. Use extract_faces for same functionality.
 
diff --git a/deepface/modules/__init__.py b/deepface/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/deepface/modules/demography.py b/deepface/modules/demography.py
new file mode 100644
index 0000000..664b85a
--- /dev/null
+++ b/deepface/modules/demography.py
@@ -0,0 +1,184 @@
+# built-in dependencies
+from typing import Any, Dict, List, Union
+
+# 3rd party dependencies
+import numpy as np
+from tqdm import tqdm
+import cv2
+
+# project dependencies
+from deepface.modules import modeling
+from deepface.commons import functions
+from deepface.extendedmodels import Age, Gender, Race, Emotion
+
+
+def analyze(
+    img_path: Union[str, np.ndarray],
+    actions: Union[tuple, list] = ("emotion", "age", "gender", "race"),
+    enforce_detection: bool = True,
+    detector_backend: str = "opencv",
+    align: bool = True,
+    silent: bool = False,
+) -> List[Dict[str, Any]]:
+    """
+    This function analyzes facial attributes including age, gender, emotion and race.
+    In the background, analysis function builds convolutional neural network models to
+    classify age, gender, emotion and race of the input image.
+
+    Parameters:
+            img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.
+            If source image has more than one face, then result will be size of number of faces
+            appearing in the image.
+
+            actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop
+            some of those attributes.
+
+            enforce_detection (bool): The function throws exception if no face detected by default.
+            Set this to False if you don't want to get exception. This might be convenient for low
+            resolution images.
+
+            detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
+            dlib, mediapipe or yolov8.
+
+            align (boolean): alignment according to the eye positions.
+
+            silent (boolean): disable (some) log messages
+
+    Returns:
+            The function returns a list of dictionaries for each face appearing in the image.
+
+            [
+                    {
+                            "region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
+                            "age": 28.66,
+                            'face_confidence': 0.9993908405303955,
+                            "dominant_gender": "Woman",
+                            "gender": {
+                                    'Woman': 99.99407529830933,
+                                    'Man': 0.005928758764639497,
+                            }
+                            "dominant_emotion": "neutral",
+                            "emotion": {
+                                    'sad': 37.65260875225067,
+                                    'angry': 0.15512987738475204,
+                                    'surprise': 0.0022171278033056296,
+                                    'fear': 1.2489334680140018,
+                                    'happy': 4.609785228967667,
+                                    'disgust': 9.698561953541684e-07,
+                                    'neutral': 56.33133053779602
+                            }
+                            "dominant_race": "white",
+                            "race": {
+                                    'indian': 0.5480832420289516,
+                                    'asian': 0.7830780930817127,
+                                    'latino hispanic': 2.0677512511610985,
+                                    'black': 0.06337375962175429,
+                                    'middle eastern': 3.088453598320484,
+                                    'white': 93.44925880432129
+                            }
+                    }
+            ]
+    """
+    # ---------------------------------
+    # validate actions
+    if isinstance(actions, str):
+        actions = (actions,)
+
+    # check if actions is not an iterable or empty.
+    if not hasattr(actions, "__getitem__") or not actions:
+        raise ValueError("`actions` must be a list of strings.")
+
+    actions = list(actions)
+
+    # For each action, check if it is valid
+    for action in actions:
+        if action not in ("emotion", "age", "gender", "race"):
+            raise ValueError(
+                f"Invalid action passed ({repr(action)})). "
+                "Valid actions are `emotion`, `age`, `gender`, `race`."
+            )
+    # ---------------------------------
+    resp_objects = []
+
+    img_objs = functions.extract_faces(
+        img=img_path,
+        target_size=(224, 224),
+        detector_backend=detector_backend,
+        grayscale=False,
+        enforce_detection=enforce_detection,
+        align=align,
+    )
+
+    for img_content, img_region, img_confidence in img_objs:
+        if img_content.shape[0] > 0 and img_content.shape[1] > 0:
+            obj = {}
+            # facial attribute analysis
+            pbar = tqdm(
+                range(0, len(actions)),
+                desc="Finding actions",
+                disable=silent if len(actions) > 1 else True,
+            )
+            for index in pbar:
+                action = actions[index]
+                pbar.set_description(f"Action: {action}")
+
+                if action == "emotion":
+                    img_gray = cv2.cvtColor(img_content[0], cv2.COLOR_BGR2GRAY)
+                    img_gray = cv2.resize(img_gray, (48, 48))
+                    img_gray = np.expand_dims(img_gray, axis=0)
+
+                    emotion_predictions = modeling.build_model("Emotion").predict(
+                        img_gray, verbose=0
+                    )[0, :]
+
+                    sum_of_predictions = emotion_predictions.sum()
+
+                    obj["emotion"] = {}
+
+                    for i, emotion_label in enumerate(Emotion.labels):
+                        emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
+                        obj["emotion"][emotion_label] = emotion_prediction
+
+                    obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
+
+                elif action == "age":
+                    age_predictions = modeling.build_model("Age").predict(img_content, verbose=0)[
+                        0, :
+                    ]
+                    apparent_age = Age.findApparentAge(age_predictions)
+                    # int cast is for exception - object of type 'float32' is not JSON serializable
+                    obj["age"] = int(apparent_age)
+
+                elif action == "gender":
+                    gender_predictions = modeling.build_model("Gender").predict(
+                        img_content, verbose=0
+                    )[0, :]
+                    obj["gender"] = {}
+                    for i, gender_label in enumerate(Gender.labels):
+                        gender_prediction = 100 * gender_predictions[i]
+                        obj["gender"][gender_label] = gender_prediction
+
+                    obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
+
+                elif action == "race":
+                    race_predictions = modeling.build_model("Race").predict(img_content, verbose=0)[
+                        0, :
+                    ]
+                    sum_of_predictions = race_predictions.sum()
+
+                    obj["race"] = {}
+                    for i, race_label in enumerate(Race.labels):
+                        race_prediction = 100 * race_predictions[i] / sum_of_predictions
+                        obj["race"][race_label] = race_prediction
+
+                    obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
+
+                # -----------------------------
+                # mention facial areas
+                obj["region"] = img_region
+                # include image confidence
+                obj["face_confidence"] = img_confidence
+
+            resp_objects.append(obj)
+
+    return resp_objects
diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py
new file mode 100644
index 0000000..ae92aaa
--- /dev/null
+++ b/deepface/modules/detection.py
@@ -0,0 +1,72 @@
+# built-in dependencies
+from typing import Any, Dict, List, Tuple, Union
+
+# 3rd part dependencies
+import numpy as np
+
+# project dependencies
+from deepface.commons import functions
+
+
+def extract_faces(
+    img_path: Union[str, np.ndarray],
+    target_size: Tuple[int, int] = (224, 224),
+    detector_backend: str = "opencv",
+    enforce_detection: bool = True,
+    align: bool = True,
+    grayscale: bool = False,
+) -> List[Dict[str, Any]]:
+    """
+    This function applies pre-processing stages of a face recognition pipeline
+    including detection and alignment
+
+    Parameters:
+            img_path: exact image path, numpy array (BGR) or base64 encoded image.
+            Source image can have many face. Then, result will be the size of number
+            of faces appearing in that source image.
+
+            target_size (tuple): final shape of facial image. black pixels will be
+            added to resize the image.
+
+            detector_backend (string): face detection backends are retinaface, mtcnn,
+            opencv, ssd or dlib
+
+            enforce_detection (boolean): function throws exception if face cannot be
+            detected in the fed image. Set this to False if you do not want to get
+            an exception and run the function anyway.
+
+            align (boolean): alignment according to the eye positions.
+
+            grayscale (boolean): extracting faces in rgb or gray scale
+
+    Returns:
+            list of dictionaries. Each dictionary will have facial image itself (RGB),
+            extracted area from the original image and confidence score.
+
+    """
+
+    resp_objs = []
+
+    img_objs = functions.extract_faces(
+        img=img_path,
+        target_size=target_size,
+        detector_backend=detector_backend,
+        grayscale=grayscale,
+        enforce_detection=enforce_detection,
+        align=align,
+    )
+
+    for img, region, confidence in img_objs:
+        resp_obj = {}
+
+        # discard expanded dimension
+        if len(img.shape) == 4:
+            img = img[0]
+
+        # bgr to rgb
+        resp_obj["face"] = img[:, :, ::-1]
+        resp_obj["facial_area"] = region
+        resp_obj["confidence"] = confidence
+        resp_objs.append(resp_obj)
+
+    return resp_objs
diff --git a/deepface/modules/modeling.py b/deepface/modules/modeling.py
new file mode 100644
index 0000000..8936dab
--- /dev/null
+++ b/deepface/modules/modeling.py
@@ -0,0 +1,71 @@
+# built-in dependencies
+from typing import Any, Union
+
+# 3rd party dependencies
+import tensorflow as tf
+
+# project dependencies
+from deepface.basemodels import (
+    VGGFace,
+    OpenFace,
+    Facenet,
+    Facenet512,
+    FbDeepFace,
+    DeepID,
+    DlibWrapper,
+    ArcFace,
+    SFace,
+)
+from deepface.extendedmodels import Age, Gender, Race, Emotion
+
+# conditional dependencies
+tf_version = int(tf.__version__.split(".", maxsplit=1)[0])
+if tf_version == 2:
+    from tensorflow.keras.models import Model
+else:
+    from keras.models import Model
+
+
+def build_model(model_name: str) -> Union[Model, Any]:
+    """
+    This function builds a deepface model
+    Parameters:
+            model_name (string): face recognition or facial attribute model
+                    VGG-Face, Facenet, OpenFace, DeepFace, DeepID for face recognition
+                    Age, Gender, Emotion, Race for facial attributes
+
+    Returns:
+            built deepface model ( (tf.)keras.models.Model )
+    """
+
+    # singleton design pattern
+    global model_obj
+
+    models = {
+        "VGG-Face": VGGFace.loadModel,
+        "OpenFace": OpenFace.loadModel,
+        "Facenet": Facenet.loadModel,
+        "Facenet512": Facenet512.loadModel,
+        "DeepFace": FbDeepFace.loadModel,
+        "DeepID": DeepID.loadModel,
+        "Dlib": DlibWrapper.loadModel,
+        "ArcFace": ArcFace.loadModel,
+        "SFace": SFace.load_model,
+        "Emotion": Emotion.loadModel,
+        "Age": Age.loadModel,
+        "Gender": Gender.loadModel,
+        "Race": Race.loadModel,
+    }
+
+    if not "model_obj" in globals():
+        model_obj = {}
+
+    if not model_name in model_obj:
+        model = models.get(model_name)
+        if model:
+            model = model()
+            model_obj[model_name] = model
+        else:
+            raise ValueError(f"Invalid model_name passed - {model_name}")
+
+    return model_obj[model_name]
diff --git a/deepface/commons/realtime.py b/deepface/modules/realtime.py
similarity index 100%
rename from deepface/commons/realtime.py
rename to deepface/modules/realtime.py
diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py
new file mode 100644
index 0000000..096385d
--- /dev/null
+++ b/deepface/modules/recognition.py
@@ -0,0 +1,268 @@
+# built-in dependencies
+import os
+import pickle
+from typing import List, Union
+import time
+
+# 3rd party dependencies
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+# project dependencies
+from deepface.commons import functions, distance as dst
+from deepface.commons.logger import Logger
+from deepface.modules import representation
+
+logger = Logger(module="deepface/modules/recognition.py")
+
+
+def find(
+    img_path: Union[str, np.ndarray],
+    db_path: str,
+    model_name: str = "VGG-Face",
+    distance_metric: str = "cosine",
+    enforce_detection: bool = True,
+    detector_backend: str = "opencv",
+    align: bool = True,
+    normalization: str = "base",
+    silent: bool = False,
+) -> List[pd.DataFrame]:
+    """
+    This function applies verification several times and find the identities in a database
+
+    Parameters:
+            img_path: exact image path, numpy array (BGR) or based64 encoded image.
+            Source image can have many faces. Then, result will be the size of number of
+            faces in the source image.
+
+            db_path (string): You should store some image files in a folder and pass the
+            exact folder path to this. A database image can also have many faces.
+            Then, all detected faces in db side will be considered in the decision.
+
+            model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID,
+            Dlib, ArcFace, SFace or Ensemble
+
+            distance_metric (string): cosine, euclidean, euclidean_l2
+
+            enforce_detection (bool): The function throws exception if a face could not be detected.
+            Set this to False if you don't want to get exception. This might be convenient for low
+            resolution images.
+
+            detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
+            dlib, mediapipe or yolov8.
+
+            align (boolean): alignment according to the eye positions.
+
+            normalization (string): normalize the input image before feeding to model
+
+            silent (boolean): disable some logging and progress bars
+
+    Returns:
+            This function returns list of pandas data frame. Each item of the list corresponding to
+            an identity in the img_path.
+    """
+
+    tic = time.time()
+
+    # -------------------------------
+    if os.path.isdir(db_path) is not True:
+        raise ValueError("Passed db_path does not exist!")
+
+    target_size = functions.find_target_size(model_name=model_name)
+
+    # ---------------------------------------
+
+    file_name = f"representations_{model_name}.pkl"
+    file_name = file_name.replace("-", "_").lower()
+
+    df_cols = [
+        "identity",
+        f"{model_name}_representation",
+        "target_x",
+        "target_y",
+        "target_w",
+        "target_h",
+    ]
+
+    if os.path.exists(db_path + "/" + file_name):
+        if not silent:
+            logger.warn(
+                f"Representations for images in {db_path} folder were previously stored"
+                f" in {file_name}. If you added new instances after the creation, then please "
+                "delete this file and call find function again. It will create it again."
+            )
+
+        with open(f"{db_path}/{file_name}", "rb") as f:
+            representations = pickle.load(f)
+
+            if len(representations) > 0 and len(representations[0]) != len(df_cols):
+                raise ValueError(
+                    f"Seems existing {db_path}/{file_name} is out-of-the-date."
+                    "Delete it and re-run."
+                )
+
+        if not silent:
+            logger.info(f"There are {len(representations)} representations found in {file_name}")
+
+    else:  # create representation.pkl from scratch
+        employees = []
+
+        for r, _, f in os.walk(db_path):
+            for file in f:
+                if (
+                    (".jpg" in file.lower())
+                    or (".jpeg" in file.lower())
+                    or (".png" in file.lower())
+                ):
+                    exact_path = r + "/" + file
+                    employees.append(exact_path)
+
+        if len(employees) == 0:
+            raise ValueError(
+                "There is no image in ",
+                db_path,
+                " folder! Validate .jpg or .png files exist in this path.",
+            )
+
+        # ------------------------
+        # find representations for db images
+
+        representations = []
+
+        # for employee in employees:
+        pbar = tqdm(
+            range(0, len(employees)),
+            desc="Finding representations",
+            disable=silent,
+        )
+        for index in pbar:
+            employee = employees[index]
+
+            img_objs = functions.extract_faces(
+                img=employee,
+                target_size=target_size,
+                detector_backend=detector_backend,
+                grayscale=False,
+                enforce_detection=enforce_detection,
+                align=align,
+            )
+
+            for img_content, img_region, _ in img_objs:
+                embedding_obj = representation.represent(
+                    img_path=img_content,
+                    model_name=model_name,
+                    enforce_detection=enforce_detection,
+                    detector_backend="skip",
+                    align=align,
+                    normalization=normalization,
+                )
+
+                img_representation = embedding_obj[0]["embedding"]
+
+                instance = []
+                instance.append(employee)
+                instance.append(img_representation)
+                instance.append(img_region["x"])
+                instance.append(img_region["y"])
+                instance.append(img_region["w"])
+                instance.append(img_region["h"])
+                representations.append(instance)
+
+        # -------------------------------
+
+        with open(f"{db_path}/{file_name}", "wb") as f:
+            pickle.dump(representations, f)
+
+        if not silent:
+            logger.info(
+                f"Representations stored in {db_path}/{file_name} file."
+                + "Please delete this file when you add new identities in your database."
+            )
+
+    # ----------------------------
+    # now, we got representations for facial database
+    df = pd.DataFrame(
+        representations,
+        columns=df_cols,
+    )
+
+    # img path might have more than once face
+    source_objs = functions.extract_faces(
+        img=img_path,
+        target_size=target_size,
+        detector_backend=detector_backend,
+        grayscale=False,
+        enforce_detection=enforce_detection,
+        align=align,
+    )
+
+    resp_obj = []
+
+    for source_img, source_region, _ in source_objs:
+        target_embedding_obj = representation.represent(
+            img_path=source_img,
+            model_name=model_name,
+            enforce_detection=enforce_detection,
+            detector_backend="skip",
+            align=align,
+            normalization=normalization,
+        )
+
+        target_representation = target_embedding_obj[0]["embedding"]
+
+        result_df = df.copy()  # df will be filtered in each img
+        result_df["source_x"] = source_region["x"]
+        result_df["source_y"] = source_region["y"]
+        result_df["source_w"] = source_region["w"]
+        result_df["source_h"] = source_region["h"]
+
+        distances = []
+        for index, instance in df.iterrows():
+            source_representation = instance[f"{model_name}_representation"]
+
+            target_dims = len(list(target_representation))
+            source_dims = len(list(source_representation))
+            if target_dims != source_dims:
+                raise ValueError(
+                    "Source and target embeddings must have same dimensions but "
+                    + f"{target_dims}:{source_dims}. Model structure may change"
+                    + " after pickle created. Delete the {file_name} and re-run."
+                )
+
+            if distance_metric == "cosine":
+                distance = dst.findCosineDistance(source_representation, target_representation)
+            elif distance_metric == "euclidean":
+                distance = dst.findEuclideanDistance(source_representation, target_representation)
+            elif distance_metric == "euclidean_l2":
+                distance = dst.findEuclideanDistance(
+                    dst.l2_normalize(source_representation),
+                    dst.l2_normalize(target_representation),
+                )
+            else:
+                raise ValueError(f"invalid distance metric passes - {distance_metric}")
+
+            distances.append(distance)
+
+            # ---------------------------
+
+        result_df[f"{model_name}_{distance_metric}"] = distances
+
+        threshold = dst.findThreshold(model_name, distance_metric)
+        result_df = result_df.drop(columns=[f"{model_name}_representation"])
+        # pylint: disable=unsubscriptable-object
+        result_df = result_df[result_df[f"{model_name}_{distance_metric}"] <= threshold]
+        result_df = result_df.sort_values(
+            by=[f"{model_name}_{distance_metric}"], ascending=True
+        ).reset_index(drop=True)
+
+        resp_obj.append(result_df)
+
+    # -----------------------------------
+
+    toc = time.time()
+
+    if not silent:
+        logger.info(f"find function lasts {toc - tic} seconds")
+
+    return resp_obj
diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py
new file mode 100644
index 0000000..ab2e01d
--- /dev/null
+++ b/deepface/modules/representation.py
@@ -0,0 +1,129 @@
+# built-in dependencies
+from typing import Any, Dict, List, Union
+
+# 3rd party dependencies
+import numpy as np
+import cv2
+import tensorflow as tf
+
+# project dependencies
+from deepface.modules import modeling
+from deepface.commons import functions
+
+# conditional dependencies
+tf_version = int(tf.__version__.split(".", maxsplit=1)[0])
+if tf_version == 2:
+    from tensorflow.keras.models import Model
+else:
+    from keras.models import Model
+
+
+def represent(
+    img_path: Union[str, np.ndarray],
+    model_name: str = "VGG-Face",
+    enforce_detection: bool = True,
+    detector_backend: str = "opencv",
+    align: bool = True,
+    normalization: str = "base",
+) -> List[Dict[str, Any]]:
+    """
+    This function represents facial images as vectors. The function uses convolutional neural
+    networks models to generate vector embeddings.
+
+    Parameters:
+            img_path (string): exact image path. Alternatively, numpy array (BGR) or based64
+            encoded images could be passed. Source image can have many faces. Then, result will
+            be the size of number of faces appearing in the source image.
+
+            model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib,
+            ArcFace, SFace
+
+            enforce_detection (boolean): If no face could not be detected in an image, then this
+            function will return exception by default. Set this to False not to have this exception.
+            This might be convenient for low resolution images.
+
+            detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
+            dlib, mediapipe or yolov8. A special value `skip` could be used to skip face-detection
+            and only encode the given image.
+
+            align (boolean): alignment according to the eye positions.
+
+            normalization (string): normalize the input image before feeding to model
+
+    Returns:
+            Represent function returns a list of object, each object has fields as follows:
+            {
+                // Multidimensional vector
+                // The number of dimensions is changing based on the reference model.
+                // E.g. FaceNet returns 128 dimensional vector;
+                //      VGG-Face returns 2622 dimensional vector.
+                "embedding": np.array,
+
+                // Detected Facial-Area by Face detection in dict format.
+                // (x, y) is left-corner point, and (w, h) is the width and height
+                // If `detector_backend` == `skip`, it is the full image area and nonsense.
+                "facial_area": dict{"x": int, "y": int, "w": int, "h": int},
+
+                // Face detection confidence.
+                // If `detector_backend` == `skip`, will be 0 and nonsense.
+                "face_confidence": float
+            }
+    """
+    resp_objs = []
+
+    model = modeling.build_model(model_name)
+
+    # ---------------------------------
+    # we have run pre-process in verification. so, this can be skipped if it is coming from verify.
+    target_size = functions.find_target_size(model_name=model_name)
+    if detector_backend != "skip":
+        img_objs = functions.extract_faces(
+            img=img_path,
+            target_size=target_size,
+            detector_backend=detector_backend,
+            grayscale=False,
+            enforce_detection=enforce_detection,
+            align=align,
+        )
+    else:  # skip
+        # Try load. If load error, will raise exception internal
+        img, _ = functions.load_image(img_path)
+        # --------------------------------
+        if len(img.shape) == 4:
+            img = img[0]  # e.g. (1, 224, 224, 3) to (224, 224, 3)
+        if len(img.shape) == 3:
+            img = cv2.resize(img, target_size)
+            img = np.expand_dims(img, axis=0)
+            # when called from verify, this is already normalized. But needed when user given.
+            if img.max() > 1:
+                img = (img.astype(np.float32) / 255.0).astype(np.float32)
+        # --------------------------------
+        # make dummy region and confidence to keep compatibility with `extract_faces`
+        img_region = {"x": 0, "y": 0, "w": img.shape[1], "h": img.shape[2]}
+        img_objs = [(img, img_region, 0)]
+    # ---------------------------------
+
+    for img, region, confidence in img_objs:
+        # custom normalization
+        img = functions.normalize_input(img=img, normalization=normalization)
+
+        # represent
+        # if "keras" in str(type(model)):
+        if isinstance(model, Model):
+            # model.predict causes memory issue when it is called in a for loop
+            # embedding = model.predict(img, verbose=0)[0].tolist()
+            embedding = model(img, training=False).numpy()[0].tolist()
+            # if you still get verbose logging. try call
+            # - `tf.keras.utils.disable_interactive_logging()`
+            # in your main program
+        else:
+            # SFace and Dlib are not keras models and no verbose arguments
+            embedding = model.predict(img)[0].tolist()
+
+        resp_obj = {}
+        resp_obj["embedding"] = embedding
+        resp_obj["facial_area"] = region
+        resp_obj["face_confidence"] = confidence
+        resp_objs.append(resp_obj)
+
+    return resp_objs
diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py
new file mode 100644
index 0000000..b5dc8fa
--- /dev/null
+++ b/deepface/modules/verification.py
@@ -0,0 +1,151 @@
+# built-in dependencies
+import time
+from typing import Any, Dict, Union
+
+# 3rd party dependencies
+import numpy as np
+
+# project dependencies
+from deepface.commons import functions, distance as dst
+from deepface.modules import representation
+
+
+def verify(
+    img1_path: Union[str, np.ndarray],
+    img2_path: Union[str, np.ndarray],
+    model_name: str = "VGG-Face",
+    detector_backend: str = "opencv",
+    distance_metric: str = "cosine",
+    enforce_detection: bool = True,
+    align: bool = True,
+    normalization: str = "base",
+) -> Dict[str, Any]:
+    """
+    This function verifies an image pair is same person or different persons. In the background,
+    verification function represents facial images as vectors and then calculates the similarity
+    between those vectors. Vectors of same person images should have more similarity (or less
+    distance) than vectors of different persons.
+
+    Parameters:
+            img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded
+            images are also welcome. If one of pair has more than one face, then we will compare the
+            face pair with max similarity.
+
+            model_name (str): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib
+            , ArcFace and SFace
+
+            distance_metric (string): cosine, euclidean, euclidean_l2
+
+            enforce_detection (boolean): If no face could not be detected in an image, then this
+            function will return exception by default. Set this to False not to have this exception.
+            This might be convenient for low resolution images.
+
+            detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
+            dlib, mediapipe or yolov8.
+
+            align (boolean): alignment according to the eye positions.
+
+            normalization (string): normalize the input image before feeding to model
+
+    Returns:
+            Verify function returns a dictionary.
+
+            {
+                    "verified": True
+                    , "distance": 0.2563
+                    , "max_threshold_to_verify": 0.40
+                    , "model": "VGG-Face"
+                    , "similarity_metric": "cosine"
+                    , 'facial_areas': {
+                            'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769},
+                            'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
+                    }
+                    , "time": 2
+            }
+
+    """
+
+    tic = time.time()
+
+    # --------------------------------
+    target_size = functions.find_target_size(model_name=model_name)
+
+    # img pairs might have many faces
+    img1_objs = functions.extract_faces(
+        img=img1_path,
+        target_size=target_size,
+        detector_backend=detector_backend,
+        grayscale=False,
+        enforce_detection=enforce_detection,
+        align=align,
+    )
+
+    img2_objs = functions.extract_faces(
+        img=img2_path,
+        target_size=target_size,
+        detector_backend=detector_backend,
+        grayscale=False,
+        enforce_detection=enforce_detection,
+        align=align,
+    )
+    # --------------------------------
+    distances = []
+    regions = []
+    # now we will find the face pair with minimum distance
+    for img1_content, img1_region, _ in img1_objs:
+        for img2_content, img2_region, _ in img2_objs:
+            img1_embedding_obj = representation.represent(
+                img_path=img1_content,
+                model_name=model_name,
+                enforce_detection=enforce_detection,
+                detector_backend="skip",
+                align=align,
+                normalization=normalization,
+            )
+
+            img2_embedding_obj = representation.represent(
+                img_path=img2_content,
+                model_name=model_name,
+                enforce_detection=enforce_detection,
+                detector_backend="skip",
+                align=align,
+                normalization=normalization,
+            )
+
+            img1_representation = img1_embedding_obj[0]["embedding"]
+            img2_representation = img2_embedding_obj[0]["embedding"]
+
+            if distance_metric == "cosine":
+                distance = dst.findCosineDistance(img1_representation, img2_representation)
+            elif distance_metric == "euclidean":
+                distance = dst.findEuclideanDistance(img1_representation, img2_representation)
+            elif distance_metric == "euclidean_l2":
+                distance = dst.findEuclideanDistance(
+                    dst.l2_normalize(img1_representation), dst.l2_normalize(img2_representation)
+                )
+            else:
+                raise ValueError("Invalid distance_metric passed - ", distance_metric)
+
+            distances.append(distance)
+            regions.append((img1_region, img2_region))
+
+    # -------------------------------
+    threshold = dst.findThreshold(model_name, distance_metric)
+    distance = min(distances)  # best distance
+    facial_areas = regions[np.argmin(distances)]
+
+    toc = time.time()
+
+    # pylint: disable=simplifiable-if-expression
+    resp_obj = {
+        "verified": True if distance <= threshold else False,
+        "distance": distance,
+        "threshold": threshold,
+        "model": model_name,
+        "detector_backend": detector_backend,
+        "similarity_metric": distance_metric,
+        "facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]},
+        "time": round(toc - tic, 2),
+    }
+
+    return resp_obj
diff --git a/tests/test_represent.py b/tests/test_represent.py
index 4b45594..44412f3 100644
--- a/tests/test_represent.py
+++ b/tests/test_represent.py
@@ -1,3 +1,4 @@
+import cv2
 from deepface import DeepFace
 from deepface.commons.logger import Logger
 
@@ -14,7 +15,7 @@ def test_standard_represent():
     logger.info("✅ test standard represent function done")
 
 
-def test_represent_for_skipped_detector_backend():
+def test_represent_for_skipped_detector_backend_with_image_path():
     face_img = "dataset/img5.jpg"
     img_objs = DeepFace.represent(img_path=face_img, detector_backend="skip")
     assert len(img_objs) >= 1
@@ -27,4 +28,21 @@ def test_represent_for_skipped_detector_backend():
     assert "w" in img_obj["facial_area"].keys()
     assert "h" in img_obj["facial_area"].keys()
     assert "face_confidence" in img_obj.keys()
-    logger.info("✅ test represent function for skipped detector backend done")
+    logger.info("✅ test represent function for skipped detector and image path input backend done")
+
+
+def test_represent_for_skipped_detector_backend_with_preloaded_image():
+    face_img = "dataset/img5.jpg"
+    img = cv2.imread(face_img)
+    img_objs = DeepFace.represent(img_path=img, detector_backend="skip")
+    assert len(img_objs) >= 1
+    img_obj = img_objs[0]
+    assert "embedding" in img_obj.keys()
+    assert "facial_area" in img_obj.keys()
+    assert isinstance(img_obj["facial_area"], dict)
+    assert "x" in img_obj["facial_area"].keys()
+    assert "y" in img_obj["facial_area"].keys()
+    assert "w" in img_obj["facial_area"].keys()
+    assert "h" in img_obj["facial_area"].keys()
+    assert "face_confidence" in img_obj.keys()
+    logger.info("✅ test represent function for skipped detector and preloaded image done")