From e5f0da3fd74fecc95f3d721d941f3cb35c3a0474 Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 27 Jan 2024 19:20:03 +0000
Subject: [PATCH 1/2] pretty return type for detect faces

---
 deepface/commons/functions.py         | 21 +++++++-----
 deepface/detectors/DetectorWrapper.py | 22 ++++--------
 deepface/detectors/Dlib.py            | 36 ++++++++------------
 deepface/detectors/FastMtCnn.py       | 35 ++++++++-----------
 deepface/detectors/MediaPipe.py       | 36 +++++++++-----------
 deepface/detectors/MtCnn.py           | 35 ++++++++-----------
 deepface/detectors/OpenCv.py          | 35 ++++++++-----------
 deepface/detectors/RetinaFace.py      | 36 +++++++++-----------
 deepface/detectors/Ssd.py             | 49 +++++++++++++--------------
 deepface/detectors/Yolo.py            | 39 ++++++++++-----------
 deepface/detectors/YuNet.py           | 35 ++++++++-----------
 deepface/models/Detector.py           | 43 +++++++++++++++--------
 tests/visual-test.py                  |  4 +--
 13 files changed, 192 insertions(+), 234 deletions(-)

diff --git a/deepface/commons/functions.py b/deepface/commons/functions.py
index dc2a5b2..cdb047d 100644
--- a/deepface/commons/functions.py
+++ b/deepface/commons/functions.py
@@ -12,6 +12,7 @@ import tensorflow as tf
 
 # package dependencies
 from deepface.detectors import DetectorWrapper
+from deepface.models.Detector import DetectedFace, FacialAreaRegion
 from deepface.commons.logger import Logger
 
 logger = Logger(module="commons.functions")
@@ -170,10 +171,11 @@ def extract_faces(
 
     # img might be path, base64 or numpy array. Convert it to numpy whatever it is.
     img, img_name = load_image(img)
-    img_region = [0, 0, img.shape[1], img.shape[0]]
+
+    base_region = FacialAreaRegion(x=0, y=0, w=img.shape[1], h=img.shape[0])
 
     if detector_backend == "skip":
-        face_objs = [(img, img_region, 0)]
+        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
     else:
         face_objs = DetectorWrapper.detect_faces(detector_backend, img, align)
 
@@ -192,9 +194,12 @@ def extract_faces(
             )
 
     if len(face_objs) == 0 and enforce_detection is False:
-        face_objs = [(img, img_region, 0)]
+        face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
 
-    for current_img, current_region, confidence in face_objs:
+    for face_obj in face_objs:
+        current_img = face_obj.img
+        current_region = face_obj.facial_area
+        confidence = face_obj.confidence
         if current_img.shape[0] > 0 and current_img.shape[1] > 0:
             if grayscale is True:
                 current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
@@ -245,10 +250,10 @@ def extract_faces(
 
             # int cast is for the exception - object of type 'float32' is not JSON serializable
             region_obj = {
-                "x": int(current_region[0]),
-                "y": int(current_region[1]),
-                "w": int(current_region[2]),
-                "h": int(current_region[3]),
+                "x": current_region.x,
+                "y": current_region.y,
+                "w": current_region.w,
+                "h": current_region.h,
             }
 
             extracted_face = (img_pixels, region_obj, confidence)
diff --git a/deepface/detectors/DetectorWrapper.py b/deepface/detectors/DetectorWrapper.py
index d061d79..54bc7a5 100644
--- a/deepface/detectors/DetectorWrapper.py
+++ b/deepface/detectors/DetectorWrapper.py
@@ -1,6 +1,6 @@
-from typing import Any
+from typing import Any, List
 import numpy as np
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace
 from deepface.detectors import (
     FastMtCnn,
     MediaPipe,
@@ -52,7 +52,7 @@ def build_model(detector_backend: str) -> Any:
     return face_detector_obj[detector_backend]
 
 
-def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> list:
+def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
     """
     Detect face(s) from a given image
     Args:
@@ -60,19 +60,11 @@ def detect_faces(detector_backend: str, img: np.ndarray, align: bool = True) ->
         img (np.ndarray): pre-loaded image
         alig (bool): enable or disable alignment after detection
     Returns:
-        results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-            where each tuple contains:
-            - detected_face (np.ndarray): The detected face as a NumPy array.
-            - face_region (List[float]): The image region represented as
-                a list of floats e.g. [x, y, w, h]
+        results (List[DetectedFace]): A list of DetectedFace objects
+            where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
             - confidence (float): The confidence score associated with the detected face.
-
-    Example:
-        results = [
-            (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-            (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-            (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-        ]
     """
     face_detector: Detector = build_model(detector_backend)
     return face_detector.detect_faces(img=img, align=align)
diff --git a/deepface/detectors/Dlib.py b/deepface/detectors/Dlib.py
index fead85e..a64068a 100644
--- a/deepface/detectors/Dlib.py
+++ b/deepface/detectors/Dlib.py
@@ -1,10 +1,10 @@
-from typing import List, Tuple
+from typing import List
 import os
 import bz2
 import gdown
 import numpy as np
 from deepface.commons import functions
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.commons.logger import Logger
 
 logger = Logger(module="detectors.DlibWrapper")
@@ -56,9 +56,7 @@ class DlibClient(Detector):
         detector["sp"] = sp
         return detector
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with dlib
         Args:
@@ -66,19 +64,11 @@ class DlibClient(Detector):
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         # this is not a must dependency. do not import it in the global level.
         try:
@@ -95,8 +85,6 @@ class DlibClient(Detector):
 
         detected_face = None
 
-        img_region = [0, 0, img.shape[1], img.shape[0]]
-
         face_detector = self.model["face_detector"]
 
         # note that, by design, dlib's fhog face detector scores are >0 but not capped at 1
@@ -115,13 +103,17 @@ class DlibClient(Detector):
                     max(0, top) : min(bottom, img.shape[0]), max(0, left) : min(right, img.shape[1])
                 ]
 
-                img_region = [left, top, right - left, bottom - top]
+                img_region = FacialAreaRegion(x=left, y=right, w=right - left, h=bottom - top)
                 confidence = scores[idx]
 
                 if align:
                     img_shape = sp(img, detections[idx])
                     detected_face = dlib.get_face_chip(img, img_shape, size=detected_face.shape[0])
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face, facial_area=img_region, confidence=confidence
+                )
+
+                resp.append(detected_face_obj)
 
         return resp
diff --git a/deepface/detectors/FastMtCnn.py b/deepface/detectors/FastMtCnn.py
index c43ed55..8affe0a 100644
--- a/deepface/detectors/FastMtCnn.py
+++ b/deepface/detectors/FastMtCnn.py
@@ -1,7 +1,7 @@
-from typing import Any, Union, List, Tuple
+from typing import Any, Union, List
 import cv2
 import numpy as np
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 
 # Link -> https://github.com/timesler/facenet-pytorch
@@ -12,33 +12,22 @@ class FastMtCnnClient(Detector):
     def __init__(self):
         self.model = self.build_model()
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with mtcnn
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
         detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
 
         img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # mtcnn expects RGB but OpenCV read BGR
         detections = self.model.detect(
@@ -49,7 +38,7 @@ class FastMtCnnClient(Detector):
             for current_detection in zip(*detections):
                 x, y, w, h = xyxy_to_xywh(current_detection[0])
                 detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
-                img_region = [x, y, w, h]
+                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                 confidence = current_detection[1]
 
                 if align:
@@ -59,7 +48,11 @@ class FastMtCnnClient(Detector):
                         img=detected_face, left_eye=left_eye, right_eye=right_eye
                     )
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face, facial_area=img_region, confidence=confidence
+                )
+
+                resp.append(detected_face_obj)
 
         return resp
 
diff --git a/deepface/detectors/MediaPipe.py b/deepface/detectors/MediaPipe.py
index 0e067a2..e9f112e 100644
--- a/deepface/detectors/MediaPipe.py
+++ b/deepface/detectors/MediaPipe.py
@@ -1,6 +1,6 @@
-from typing import Any, List, Tuple
+from typing import Any, List
 import numpy as np
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 
 # Link - https://google.github.io/mediapipe/solutions/face_detection
@@ -29,28 +29,18 @@ class MediaPipeClient(Detector):
         face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
         return face_detection
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with mediapipe
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
@@ -85,13 +75,19 @@ class MediaPipeClient(Detector):
 
             if x > 0 and y > 0:
                 detected_face = img[y : y + h, x : x + w]
-                img_region = [x, y, w, h]
+                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
 
                 if align:
                     detected_face = detection.align_face(
                         img=detected_face, left_eye=left_eye, right_eye=right_eye
                     )
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face,
+                    facial_area=img_region,
+                    confidence=confidence,
+                )
+
+                resp.append(detected_face_obj)
 
         return resp
diff --git a/deepface/detectors/MtCnn.py b/deepface/detectors/MtCnn.py
index aefb5f6..2e30b83 100644
--- a/deepface/detectors/MtCnn.py
+++ b/deepface/detectors/MtCnn.py
@@ -1,8 +1,8 @@
-from typing import List, Tuple
+from typing import List
 import cv2
 import numpy as np
 from mtcnn import MTCNN
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 
 # pylint: disable=too-few-public-methods
@@ -14,34 +14,23 @@ class MtCnnClient(Detector):
     def __init__(self):
         self.model = MTCNN()
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with mtcnn
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
 
         resp = []
 
         detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
 
         img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # mtcnn expects RGB but OpenCV read BGR
         detections = self.model.detect_faces(img_rgb)
@@ -51,7 +40,7 @@ class MtCnnClient(Detector):
             for current_detection in detections:
                 x, y, w, h = current_detection["box"]
                 detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
-                img_region = [x, y, w, h]
+                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                 confidence = current_detection["confidence"]
 
                 if align:
@@ -62,6 +51,10 @@ class MtCnnClient(Detector):
                         img=detected_face, left_eye=left_eye, right_eye=right_eye
                     )
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face, facial_area=img_region, confidence=confidence
+                )
+
+                resp.append(detected_face_obj)
 
         return resp
diff --git a/deepface/detectors/OpenCv.py b/deepface/detectors/OpenCv.py
index 6f1d21c..353892c 100644
--- a/deepface/detectors/OpenCv.py
+++ b/deepface/detectors/OpenCv.py
@@ -1,8 +1,8 @@
 import os
-from typing import Any, List, Tuple
+from typing import Any, List
 import cv2
 import numpy as np
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 
 
@@ -25,9 +25,7 @@ class OpenCvClient(Detector):
         detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
         return detector
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with opencv
         Args:
@@ -35,24 +33,15 @@ class OpenCvClient(Detector):
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
         detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
 
         faces = []
         try:
@@ -73,9 +62,13 @@ class OpenCvClient(Detector):
                     left_eye, right_eye = self.find_eyes(img=detected_face)
                     detected_face = detection.align_face(detected_face, left_eye, right_eye)
 
-                img_region = [x, y, w, h]
+                detected_face_obj = DetectedFace(
+                    img=detected_face,
+                    facial_area=FacialAreaRegion(x, y, w, h),
+                    confidence=confidence,
+                )
 
-                resp.append((detected_face, img_region, confidence))
+                resp.append(detected_face_obj)
 
         return resp
 
diff --git a/deepface/detectors/RetinaFace.py b/deepface/detectors/RetinaFace.py
index 0eb8bf1..16b30c0 100644
--- a/deepface/detectors/RetinaFace.py
+++ b/deepface/detectors/RetinaFace.py
@@ -1,36 +1,26 @@
-from typing import List, Tuple
+from typing import List
 import numpy as np
 from retinaface import RetinaFace as rf
 from retinaface.commons import postprocess
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 
 # pylint: disable=too-few-public-methods
 class RetinaFaceClient(Detector):
     def __init__(self):
         self.model = rf.build_model()
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with retinaface
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace object
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
@@ -45,7 +35,7 @@ class RetinaFaceClient(Detector):
                 h = facial_area[3] - y
                 x = facial_area[0]
                 w = facial_area[2] - x
-                img_region = [x, y, w, h]
+                img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
                 confidence = identity["score"]
 
                 # detected_face = img[int(y):int(y+h), int(x):int(x+w)] #opencv
@@ -65,6 +55,12 @@ class RetinaFaceClient(Detector):
                         detected_face, right_eye, left_eye, nose
                     )
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face,
+                    facial_area=img_region,
+                    confidence=confidence,
+                )
+
+                resp.append(detected_face_obj)
 
         return resp
diff --git a/deepface/detectors/Ssd.py b/deepface/detectors/Ssd.py
index 7793e5f..8775b51 100644
--- a/deepface/detectors/Ssd.py
+++ b/deepface/detectors/Ssd.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple
+from typing import List
 import os
 import gdown
 import cv2
@@ -6,7 +6,7 @@ import pandas as pd
 import numpy as np
 from deepface.detectors import OpenCv
 from deepface.commons import functions
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 from deepface.commons.logger import Logger
 
@@ -71,33 +71,22 @@ class SsdClient(Detector):
 
         return detector
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with ssd
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace object
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
         detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
 
         ssd_labels = ["img_id", "is_face", "confidence", "left", "top", "right", "bottom"]
 
@@ -141,12 +130,14 @@ class SsdClient(Detector):
                     int(top * aspect_ratio_y) : int(bottom * aspect_ratio_y),
                     int(left * aspect_ratio_x) : int(right * aspect_ratio_x),
                 ]
-                img_region = [
-                    int(left * aspect_ratio_x),
-                    int(top * aspect_ratio_y),
-                    int(right * aspect_ratio_x) - int(left * aspect_ratio_x),
-                    int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y),
-                ]
+
+                face_region = FacialAreaRegion(
+                    x=int(left * aspect_ratio_x),
+                    y=int(top * aspect_ratio_y),
+                    w=int(right * aspect_ratio_x) - int(left * aspect_ratio_x),
+                    h=int(bottom * aspect_ratio_y) - int(top * aspect_ratio_y),
+                )
+
                 confidence = instance["confidence"]
 
                 if align:
@@ -156,5 +147,11 @@ class SsdClient(Detector):
                         img=detected_face, left_eye=left_eye, right_eye=right_eye
                     )
 
-                resp.append((detected_face, img_region, confidence))
+                detected_face_obj = DetectedFace(
+                    img=detected_face,
+                    facial_area=face_region,
+                    confidence=confidence,
+                )
+
+                resp.append(detected_face_obj)
         return resp
diff --git a/deepface/detectors/Yolo.py b/deepface/detectors/Yolo.py
index 44bd4bb..79e1eaa 100644
--- a/deepface/detectors/Yolo.py
+++ b/deepface/detectors/Yolo.py
@@ -1,6 +1,8 @@
-from typing import Any, List, Tuple
+import os
+from typing import Any, List
 import numpy as np
-from deepface.models.Detector import Detector
+import gdown
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 from deepface.commons.logger import Logger
 
@@ -9,7 +11,7 @@ logger = Logger()
 # Model's weights paths
 PATH = "/.deepface/weights/yolov8n-face.pt"
 
-# Google Drive URL
+# Google Drive URL from repo (https://github.com/derronqi/yolov8-face) ~6MB
 WEIGHT_URL = "https://drive.google.com/uc?id=1qcr9DbgsX3ryrz2uU8w4Xm3cOrRywXqb"
 
 # Confidence thresholds for landmarks detection
@@ -27,8 +29,6 @@ class YoloClient(Detector):
         Returns:
             model (Any)
         """
-        import gdown
-        import os
 
         # Import the Ultralytics YOLO model
         try:
@@ -51,9 +51,7 @@ class YoloClient(Detector):
         # Return face_detector
         return YOLO(weight_path)
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = False
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = False) -> List[DetectedFace]:
         """
         Detect and align face with yolo
         Args:
@@ -61,19 +59,11 @@ class YoloClient(Detector):
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[Tuple[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         resp = []
 
@@ -87,6 +77,7 @@ class YoloClient(Detector):
             confidence = result.boxes.conf.tolist()[0]
 
             x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
+            region = FacialAreaRegion(x=x, y=y, w=w, h=h)
             detected_face = img[y : y + h, x : x + w].copy()
 
             if align:
@@ -103,6 +94,10 @@ class YoloClient(Detector):
                     detected_face = detection.align_face(
                         img=detected_face, left_eye=left_eye[0].cpu(), right_eye=right_eye[0].cpu()
                     )
-            resp.append((detected_face, [x, y, w, h], confidence))
+
+            detected_face_obj = DetectedFace(
+                img=detected_face, facial_area=region, confidence=confidence
+            )
+            resp.append(detected_face_obj)
 
         return resp
diff --git a/deepface/detectors/YuNet.py b/deepface/detectors/YuNet.py
index d4970a7..d079428 100644
--- a/deepface/detectors/YuNet.py
+++ b/deepface/detectors/YuNet.py
@@ -1,10 +1,10 @@
 import os
-from typing import Any, List, Tuple
+from typing import Any, List
 import cv2
 import numpy as np
 import gdown
 from deepface.commons import functions
-from deepface.models.Detector import Detector
+from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
 from deepface.modules import detection
 from deepface.commons.logger import Logger
 
@@ -49,35 +49,24 @@ class YuNetClient(Detector):
             ) from err
         return face_detector
 
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List[DetectedFace]:
         """
         Detect and align face with yunet
         Args:
             img (np.ndarray): pre-loaded image
             align (bool): default is true
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
-                - face_region (List[float]): The image region represented as
-                    a list of floats e.g. [x, y, w, h]
-                - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
+            results (List[DetectedFace]): A list of DetectedFace objects
+                where each object contains:
+            - img (np.ndarray): The detected face as a NumPy array.
+            - facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h
+            - confidence (float): The confidence score associated with the detected face.
         """
         # FaceDetector.detect_faces does not support score_threshold parameter.
         # We can set it via environment variable.
         score_threshold = float(os.environ.get("yunet_score_threshold", "0.9"))
         resp = []
         detected_face = None
-        img_region = [0, 0, img.shape[1], img.shape[0]]
         faces = []
         height, width = img.shape[0], img.shape[1]
         # resize image if it is too large (Yunet fails to detect faces on large input sometimes)
@@ -127,8 +116,12 @@ class YuNetClient(Detector):
             confidence = face[-1]
             confidence = f"{confidence:.2f}"
             detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
-            img_region = [x, y, w, h]
+            img_region = FacialAreaRegion(x=x, y=y, w=w, h=h)
             if align:
                 detected_face = detection.align_face(detected_face, (x_re, y_re), (x_le, y_le))
-            resp.append((detected_face, img_region, confidence))
+
+            detected_face_obj = DetectedFace(
+                img=detected_face, facial_area=img_region, confidence=confidence
+            )
+            resp.append(detected_face_obj)
         return resp
diff --git a/deepface/models/Detector.py b/deepface/models/Detector.py
index b834e9d..52d562a 100644
--- a/deepface/models/Detector.py
+++ b/deepface/models/Detector.py
@@ -1,4 +1,4 @@
-from typing import List, Tuple
+from typing import List
 from abc import ABC, abstractmethod
 import numpy as np
 
@@ -8,27 +8,42 @@ import numpy as np
 # pylint: disable=unnecessary-pass, too-few-public-methods
 class Detector(ABC):
     @abstractmethod
-    def detect_faces(
-        self, img: np.ndarray, align: bool = True
-    ) -> List[Tuple[np.ndarray, List[float], float]]:
+    def detect_faces(self, img: np.ndarray, align: bool = True) -> List["DetectedFace"]:
         """
         Detect faces from a given image
         Args:
             img (np.ndarray): pre-loaded image as a NumPy array
             align (bool): enable or disable alignment after face detection
         Returns:
-            results (List[Tuple[np.ndarray, List[float], float]]): A list of tuples
-                where each tuple contains:
-                - detected_face (np.ndarray): The detected face as a NumPy array.
+            results (List[DetectedFace]): A list of DetectedFace object
+                where each object contains:
+                - face (np.ndarray): The detected face as a NumPy array.
                 - face_region (List[float]): The image region represented as
                     a list of floats e.g. [x, y, w, h]
                 - confidence (float): The confidence score associated with the detected face.
-
-        Example:
-            results = [
-                (array(..., dtype=uint8), [110, 60, 150, 380], 0.99),
-                (array(..., dtype=uint8), [150, 50, 299, 375], 0.98),
-                (array(..., dtype=uint8), [120, 55, 300, 371], 0.96),
-            ]
         """
         pass
+
+
+class FacialAreaRegion:
+    x: int
+    y: int
+    w: int
+    h: int
+
+    def __init__(self, x: int, y: int, w: int, h: int):
+        self.x = x
+        self.y = y
+        self.w = w
+        self.h = h
+
+
+class DetectedFace:
+    img: np.ndarray
+    facial_area: FacialAreaRegion
+    confidence: float
+
+    def __init__(self, img: np.ndarray, facial_area: FacialAreaRegion, confidence: float):
+        self.img = img
+        self.facial_area = facial_area
+        self.confidence = confidence
diff --git a/tests/visual-test.py b/tests/visual-test.py
index 9188412..dbf861c 100644
--- a/tests/visual-test.py
+++ b/tests/visual-test.py
@@ -20,8 +20,7 @@ model_names = [
     "SFace",
 ]
 
-detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yunet"]
-
+detector_backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yunet", "yolov8"]
 
 # verification
 for model_name in model_names:
@@ -45,7 +44,6 @@ dfs = DeepFace.find(
 for df in dfs:
     logger.info(df)
 
-
 # extract faces
 for detector_backend in detector_backends:
     face_objs = DeepFace.extract_faces(

From 69c727acb8984ff4c04df32421dda95b7f1fdd2d Mon Sep 17 00:00:00 2001
From: Sefik Ilkin Serengil <serengil@gmail.com>
Date: Sat, 27 Jan 2024 19:24:04 +0000
Subject: [PATCH 2/2] v0.0.84

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 81c803b..dc730dc 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ with open("requirements.txt", "r", encoding="utf-8") as f:
 
 setuptools.setup(
     name="deepface",
-    version="0.0.83",
+    version="0.0.84",
     author="Sefik Ilkin Serengil",
     author_email="serengil@gmail.com",
     description="A Lightweight Face Recognition and Facial Attribute Analysis Framework (Age, Gender, Emotion, Race) for Python",