diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index e833377..f035423 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -10,7 +10,6 @@ os.environ["TF_USE_LEGACY_KERAS"] = "1" # pylint: disable=wrong-import-position # 3rd party dependencies -import cv2 import numpy as np import pandas as pd import tensorflow as tf @@ -26,6 +25,7 @@ from deepface.modules import ( demography, detection, streaming, + preprocessing, ) from deepface import __version__ @@ -548,5 +548,5 @@ def detectFace( extracted_face = None if len(face_objs) > 0: extracted_face = face_objs[0]["face"] - extracted_face = cv2.resize(extracted_face, target_size) + extracted_face = preprocessing.resize_image(img=extracted_face, target_size=target_size) return extracted_face diff --git a/deepface/commons/image_utils.py b/deepface/commons/image_utils.py new file mode 100644 index 0000000..c25e411 --- /dev/null +++ b/deepface/commons/image_utils.py @@ -0,0 +1,149 @@ +# built-in dependencies +import os +import io +from typing import List, Union, Tuple +import hashlib +import base64 +from pathlib import Path + +# 3rd party dependencies +import requests +import numpy as np +import cv2 +from PIL import Image + + +def list_images(path: str) -> List[str]: + """ + List images in a given path + Args: + path (str): path's location + Returns: + images (list): list of exact image paths + """ + images = [] + for r, _, f in os.walk(path): + for file in f: + exact_path = os.path.join(r, file) + + _, ext = os.path.splitext(exact_path) + ext_lower = ext.lower() + + if ext_lower not in {".jpg", ".jpeg", ".png"}: + continue + + with Image.open(exact_path) as img: # lazy + if img.format.lower() in ["jpeg", "png"]: + images.append(exact_path) + return images + + +def find_image_hash(file_path: str) -> str: + """ + Find the hash of given image file with its properties + finding the hash of image content is costly operation + Args: + file_path (str): exact image path + Returns: + hash (str): digest with sha1 algorithm + """ + file_stats = os.stat(file_path) + + # some properties + file_size = file_stats.st_size + creation_time = file_stats.st_ctime + modification_time = file_stats.st_mtime + + properties = f"{file_size}-{creation_time}-{modification_time}" + + hasher = hashlib.sha1() + hasher.update(properties.encode("utf-8")) + return hasher.hexdigest() + + +def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: + """ + Load image from path, url, base64 or numpy array. + Args: + img: a path, url, base64 or numpy array. + Returns: + image (numpy array): the loaded image in BGR format + image name (str): image name itself + """ + + # The image is already a numpy array + if isinstance(img, np.ndarray): + return img, "numpy array" + + if isinstance(img, Path): + img = str(img) + + if not isinstance(img, str): + raise ValueError(f"img must be numpy array or str but it is {type(img)}") + + # The image is a base64 string + if img.startswith("data:image/"): + return load_image_from_base64(img), "base64 encoded string" + + # The image is a url + if img.lower().startswith("http://") or img.lower().startswith("https://"): + return load_image_from_web(url=img), img + + # The image is a path + if os.path.isfile(img) is not True: + raise ValueError(f"Confirm that {img} exists") + + # image must be a file on the system then + + # image name must have english characters + if img.isascii() is False: + raise ValueError(f"Input image must not have non-english characters - {img}") + + img_obj_bgr = cv2.imread(img) + # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB) + return img_obj_bgr, img + + +def load_image_from_base64(uri: str) -> np.ndarray: + """ + Load image from base64 string. + Args: + uri: a base64 string. + Returns: + numpy array: the loaded image. + """ + + encoded_data_parts = uri.split(",") + + if len(encoded_data_parts) < 2: + raise ValueError("format error in base64 encoded string") + + encoded_data = encoded_data_parts[1] + decoded_bytes = base64.b64decode(encoded_data) + + # similar to find functionality, we are just considering these extensions + # content type is safer option than file extension + with Image.open(io.BytesIO(decoded_bytes)) as img: + file_type = img.format.lower() + if file_type not in ["jpeg", "png"]: + raise ValueError(f"input image can be jpg or png, but it is {file_type}") + + nparr = np.fromstring(decoded_bytes, np.uint8) + img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + return img_bgr + + +def load_image_from_web(url: str) -> np.ndarray: + """ + Loading an image from web + Args: + url: link for the image + Returns: + img (np.ndarray): equivalent to pre-loaded image from opencv (BGR format) + """ + response = requests.get(url, stream=True, timeout=60) + response.raise_for_status() + image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8) + img = cv2.imdecode(image_array, cv2.IMREAD_COLOR) + return img diff --git a/deepface/commons/package_utils.py b/deepface/commons/package_utils.py index 3d68235..c35db8b 100644 --- a/deepface/commons/package_utils.py +++ b/deepface/commons/package_utils.py @@ -1,7 +1,3 @@ -# built-in dependencies -import os -import hashlib - # 3rd party dependencies import tensorflow as tf @@ -29,29 +25,6 @@ def get_tf_minor_version() -> int: return int(tf.__version__.split(".", maxsplit=-1)[1]) -def find_hash_of_file(file_path: str) -> str: - """ - Find the hash of given image file with its properties - finding the hash of image content is costly operation - Args: - file_path (str): exact image path - Returns: - hash (str): digest with sha1 algorithm - """ - file_stats = os.stat(file_path) - - # some properties - file_size = file_stats.st_size - creation_time = file_stats.st_ctime - modification_time = file_stats.st_mtime - - properties = f"{file_size}-{creation_time}-{modification_time}" - - hasher = hashlib.sha1() - hasher.update(properties.encode("utf-8")) - return hasher.hexdigest() - - def validate_for_keras3(): tf_major = get_tf_major_version() tf_minor = get_tf_minor_version() diff --git a/deepface/modules/detection.py b/deepface/modules/detection.py index d509d60..ad4b288 100644 --- a/deepface/modules/detection.py +++ b/deepface/modules/detection.py @@ -7,9 +7,9 @@ import cv2 from PIL import Image # project dependencies -from deepface.modules import preprocessing from deepface.models.Detector import DetectedFace, FacialAreaRegion from deepface.detectors import DetectorWrapper +from deepface.commons import image_utils from deepface.commons import logger as log logger = log.get_singletonish_logger() @@ -63,7 +63,7 @@ def extract_faces( resp_objs = [] # img might be path, base64 or numpy array. Convert it to numpy whatever it is. - img, img_name = preprocessing.load_image(img_path) + img, img_name = image_utils.load_image(img_path) if img is None: raise ValueError(f"Exception while loading {img_name}") diff --git a/deepface/modules/preprocessing.py b/deepface/modules/preprocessing.py index 0ecef6a..459adba 100644 --- a/deepface/modules/preprocessing.py +++ b/deepface/modules/preprocessing.py @@ -1,15 +1,9 @@ # built-in dependencies -import os -from typing import Union, Tuple -import base64 -from pathlib import Path -import io +from typing import Tuple # 3rd party import numpy as np import cv2 -import requests -from PIL import Image # project dependencies from deepface.commons import package_utils @@ -22,95 +16,6 @@ elif tf_major_version == 2: from tensorflow.keras.preprocessing import image -def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: - """ - Load image from path, url, base64 or numpy array. - Args: - img: a path, url, base64 or numpy array. - Returns: - image (numpy array): the loaded image in BGR format - image name (str): image name itself - """ - - # The image is already a numpy array - if isinstance(img, np.ndarray): - return img, "numpy array" - - if isinstance(img, Path): - img = str(img) - - if not isinstance(img, str): - raise ValueError(f"img must be numpy array or str but it is {type(img)}") - - # The image is a base64 string - if img.startswith("data:image/"): - return load_base64(img), "base64 encoded string" - - # The image is a url - if img.lower().startswith("http://") or img.lower().startswith("https://"): - return load_image_from_web(url=img), img - - # The image is a path - if os.path.isfile(img) is not True: - raise ValueError(f"Confirm that {img} exists") - - # image must be a file on the system then - - # image name must have english characters - if img.isascii() is False: - raise ValueError(f"Input image must not have non-english characters - {img}") - - img_obj_bgr = cv2.imread(img) - # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB) - return img_obj_bgr, img - - -def load_image_from_web(url: str) -> np.ndarray: - """ - Loading an image from web - Args: - url: link for the image - Returns: - img (np.ndarray): equivalent to pre-loaded image from opencv (BGR format) - """ - response = requests.get(url, stream=True, timeout=60) - response.raise_for_status() - image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8) - img = cv2.imdecode(image_array, cv2.IMREAD_COLOR) - return img - - -def load_base64(uri: str) -> np.ndarray: - """Load image from base64 string. - - Args: - uri: a base64 string. - - Returns: - numpy array: the loaded image. - """ - - encoded_data_parts = uri.split(",") - - if len(encoded_data_parts) < 2: - raise ValueError("format error in base64 encoded string") - - encoded_data = encoded_data_parts[1] - decoded_bytes = base64.b64decode(encoded_data) - - # similar to find functionality, we are just considering these extensions - # content type is safer option than file extension - with Image.open(io.BytesIO(decoded_bytes)) as img: - file_type = img.format.lower() - if file_type not in ["jpeg", "png"]: - raise ValueError(f"input image can be jpg or png, but it is {file_type}") - - nparr = np.fromstring(decoded_bytes, np.uint8) - img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) - # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) - return img_bgr - - def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray: """Normalize input image. diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 03541c2..0118634 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -8,10 +8,9 @@ import time import numpy as np import pandas as pd from tqdm import tqdm -from PIL import Image # project dependencies -from deepface.commons import package_utils +from deepface.commons import image_utils from deepface.modules import representation, detection, verification from deepface.commons import logger as log @@ -144,7 +143,7 @@ def find( pickled_images = [representation["identity"] for representation in representations] # Get the list of images on storage - storage_images = __list_images(path=db_path) + storage_images = image_utils.list_images(path=db_path) if len(storage_images) == 0: raise ValueError(f"No item found in {db_path}") @@ -161,7 +160,7 @@ def find( if identity in old_images: continue alpha_hash = current_representation["hash"] - beta_hash = package_utils.find_hash_of_file(identity) + beta_hash = image_utils.find_image_hash(identity) if alpha_hash != beta_hash: logger.debug(f"Even though {identity} represented before, it's replaced later.") replaced_images.append(identity) @@ -292,31 +291,6 @@ def find( return resp_obj -def __list_images(path: str) -> List[str]: - """ - List images in a given path - Args: - path (str): path's location - Returns: - images (list): list of exact image paths - """ - images = [] - for r, _, f in os.walk(path): - for file in f: - exact_path = os.path.join(r, file) - - _, ext = os.path.splitext(exact_path) - ext_lower = ext.lower() - - if ext_lower not in {".jpg", ".jpeg", ".png"}: - continue - - with Image.open(exact_path) as img: # lazy - if img.format.lower() in ["jpeg", "png"]: - images.append(exact_path) - return images - - def __find_bulk_embeddings( employees: List[str], model_name: str = "VGG-Face", @@ -360,7 +334,7 @@ def __find_bulk_embeddings( desc="Finding representations", disable=silent, ): - file_hash = package_utils.find_hash_of_file(employee) + file_hash = image_utils.find_image_hash(employee) try: img_objs = detection.extract_faces( diff --git a/deepface/modules/representation.py b/deepface/modules/representation.py index dbaf1c7..9e8a1a6 100644 --- a/deepface/modules/representation.py +++ b/deepface/modules/representation.py @@ -5,6 +5,7 @@ from typing import Any, Dict, List, Union import numpy as np # project dependencies +from deepface.commons import image_utils from deepface.modules import modeling, detection, preprocessing from deepface.models.FacialRecognition import FacialRecognition @@ -74,7 +75,7 @@ def represent( ) else: # skip # Try load. If load error, will raise exception internal - img, _ = preprocessing.load_image(img_path) + img, _ = image_utils.load_image(img_path) if len(img.shape) != 3: raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}") diff --git a/requirements.txt b/requirements.txt index cf5ce5f..19611a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +requests>=2.27.1 numpy>=1.14.0 pandas>=0.23.4 gdown>=3.10.1 diff --git a/tests/test_extract_faces.py b/tests/test_extract_faces.py index dfbc7cf..c841eb3 100644 --- a/tests/test_extract_faces.py +++ b/tests/test_extract_faces.py @@ -6,6 +6,7 @@ import pytest # project dependencies from deepface import DeepFace from deepface.modules import preprocessing +from deepface.commons import image_utils from deepface.commons import logger as log logger = log.get_singletonish_logger() @@ -60,12 +61,12 @@ def test_file_types_while_loading_base64(): img1_base64 = image_to_base64(image_path=img1_path) with pytest.raises(ValueError, match="input image can be jpg or png, but it is"): - _ = preprocessing.load_base64(uri=img1_base64) + _ = image_utils.load_image_from_base64(uri=img1_base64) img2_path = "dataset/img1.jpg" img2_base64 = image_to_base64(image_path=img2_path) - img2 = preprocessing.load_base64(uri=img2_base64) + img2 = image_utils.load_image_from_base64(uri=img2_base64) # 3 dimensional image should be loaded assert len(img2.shape) == 3 diff --git a/tests/test_find.py b/tests/test_find.py index 09d361f..83d9964 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -8,7 +8,7 @@ import pandas as pd # project dependencies from deepface import DeepFace from deepface.modules import verification -from deepface.modules import recognition +from deepface.commons import image_utils from deepface.commons import logger as log logger = log.get_singletonish_logger() @@ -95,7 +95,7 @@ def test_filetype_for_find(): def test_filetype_for_find_bulk_embeddings(): - imgs = recognition.__list_images("dataset") + imgs = image_utils.list_images("dataset") assert len(imgs) > 0