mirror of
https://github.com/serengil/deepface.git
synced 2025-06-07 20:15:21 +00:00
Merge pull request #1420 from PyWoody/image_utils
Optimizations for deepface.recognition.find, Optimization and New Iterator Functionality in image_utils
This commit is contained in:
commit
5bab888411
@ -1,7 +1,7 @@
|
|||||||
# built-in dependencies
|
# built-in dependencies
|
||||||
import os
|
import os
|
||||||
import io
|
import io
|
||||||
from typing import List, Union, Tuple
|
from typing import Generator, List, Union, Tuple
|
||||||
import hashlib
|
import hashlib
|
||||||
import base64
|
import base64
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -14,6 +14,10 @@ from PIL import Image
|
|||||||
from werkzeug.datastructures import FileStorage
|
from werkzeug.datastructures import FileStorage
|
||||||
|
|
||||||
|
|
||||||
|
IMAGE_EXTS = {".jpg", ".jpeg", ".png"}
|
||||||
|
PIL_EXTS = {"jpeg", "png"}
|
||||||
|
|
||||||
|
|
||||||
def list_images(path: str) -> List[str]:
|
def list_images(path: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
List images in a given path
|
List images in a given path
|
||||||
@ -25,19 +29,31 @@ def list_images(path: str) -> List[str]:
|
|||||||
images = []
|
images = []
|
||||||
for r, _, f in os.walk(path):
|
for r, _, f in os.walk(path):
|
||||||
for file in f:
|
for file in f:
|
||||||
exact_path = os.path.join(r, file)
|
if os.path.splitext(file)[1].lower() in IMAGE_EXTS:
|
||||||
|
exact_path = os.path.join(r, file)
|
||||||
ext_lower = os.path.splitext(exact_path)[-1].lower()
|
with Image.open(exact_path) as img: # lazy
|
||||||
|
if img.format.lower() in PIL_EXTS:
|
||||||
if ext_lower not in {".jpg", ".jpeg", ".png"}:
|
images.append(exact_path)
|
||||||
continue
|
|
||||||
|
|
||||||
with Image.open(exact_path) as img: # lazy
|
|
||||||
if img.format.lower() in {"jpeg", "png"}:
|
|
||||||
images.append(exact_path)
|
|
||||||
return images
|
return images
|
||||||
|
|
||||||
|
|
||||||
|
def yield_images(path: str) -> Generator[str, None, None]:
|
||||||
|
"""
|
||||||
|
Yield images in a given path
|
||||||
|
Args:
|
||||||
|
path (str): path's location
|
||||||
|
Yields:
|
||||||
|
image (str): image path
|
||||||
|
"""
|
||||||
|
for r, _, f in os.walk(path):
|
||||||
|
for file in f:
|
||||||
|
if os.path.splitext(file)[1].lower() in IMAGE_EXTS:
|
||||||
|
exact_path = os.path.join(r, file)
|
||||||
|
with Image.open(exact_path) as img: # lazy
|
||||||
|
if img.format.lower() in PIL_EXTS:
|
||||||
|
yield exact_path
|
||||||
|
|
||||||
|
|
||||||
def find_image_hash(file_path: str) -> str:
|
def find_image_hash(file_path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Find the hash of given image file with its properties
|
Find the hash of given image file with its properties
|
||||||
|
@ -136,7 +136,7 @@ def find(
|
|||||||
representations = []
|
representations = []
|
||||||
|
|
||||||
# required columns for representations
|
# required columns for representations
|
||||||
df_cols = [
|
df_cols = {
|
||||||
"identity",
|
"identity",
|
||||||
"hash",
|
"hash",
|
||||||
"embedding",
|
"embedding",
|
||||||
@ -144,7 +144,7 @@ def find(
|
|||||||
"target_y",
|
"target_y",
|
||||||
"target_w",
|
"target_w",
|
||||||
"target_h",
|
"target_h",
|
||||||
]
|
}
|
||||||
|
|
||||||
# Ensure the proper pickle file exists
|
# Ensure the proper pickle file exists
|
||||||
if not os.path.exists(datastore_path):
|
if not os.path.exists(datastore_path):
|
||||||
@ -157,18 +157,15 @@ def find(
|
|||||||
|
|
||||||
# check each item of representations list has required keys
|
# check each item of representations list has required keys
|
||||||
for i, current_representation in enumerate(representations):
|
for i, current_representation in enumerate(representations):
|
||||||
missing_keys = set(df_cols) - set(current_representation.keys())
|
missing_keys = df_cols - set(current_representation.keys())
|
||||||
if len(missing_keys) > 0:
|
if len(missing_keys) > 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"{i}-th item does not have some required keys - {missing_keys}."
|
f"{i}-th item does not have some required keys - {missing_keys}."
|
||||||
f"Consider to delete {datastore_path}"
|
f"Consider to delete {datastore_path}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# embedded images
|
|
||||||
pickled_images = [representation["identity"] for representation in representations]
|
|
||||||
|
|
||||||
# Get the list of images on storage
|
# Get the list of images on storage
|
||||||
storage_images = image_utils.list_images(path=db_path)
|
storage_images = set(image_utils.yield_images(path=db_path))
|
||||||
|
|
||||||
if len(storage_images) == 0 and refresh_database is True:
|
if len(storage_images) == 0 and refresh_database is True:
|
||||||
raise ValueError(f"No item found in {db_path}")
|
raise ValueError(f"No item found in {db_path}")
|
||||||
@ -186,8 +183,13 @@ def find(
|
|||||||
|
|
||||||
# Enforce data consistency amongst on disk images and pickle file
|
# Enforce data consistency amongst on disk images and pickle file
|
||||||
if refresh_database:
|
if refresh_database:
|
||||||
new_images = set(storage_images) - set(pickled_images) # images added to storage
|
# embedded images
|
||||||
old_images = set(pickled_images) - set(storage_images) # images removed from storage
|
pickled_images = {
|
||||||
|
representation["identity"] for representation in representations
|
||||||
|
}
|
||||||
|
|
||||||
|
new_images = storage_images - pickled_images # images added to storage
|
||||||
|
old_images = pickled_images - storage_images # images removed from storage
|
||||||
|
|
||||||
# detect replaced images
|
# detect replaced images
|
||||||
for current_representation in representations:
|
for current_representation in representations:
|
||||||
|
@ -95,12 +95,23 @@ def test_filetype_for_find():
|
|||||||
|
|
||||||
|
|
||||||
def test_filetype_for_find_bulk_embeddings():
|
def test_filetype_for_find_bulk_embeddings():
|
||||||
imgs = image_utils.list_images("dataset")
|
# List
|
||||||
|
list_imgs = image_utils.list_images("dataset")
|
||||||
|
|
||||||
assert len(imgs) > 0
|
assert len(list_imgs) > 0
|
||||||
|
|
||||||
# img47 is webp even though its extension is jpg
|
# img47 is webp even though its extension is jpg
|
||||||
assert "dataset/img47.jpg" not in imgs
|
assert "dataset/img47.jpg" not in list_imgs
|
||||||
|
|
||||||
|
# Generator
|
||||||
|
gen_imgs = list(image_utils.yield_images("dataset"))
|
||||||
|
|
||||||
|
assert len(gen_imgs) > 0
|
||||||
|
|
||||||
|
# img47 is webp even though its extension is jpg
|
||||||
|
assert "dataset/img47.jpg" not in gen_imgs
|
||||||
|
|
||||||
|
assert gen_imgs == list_imgs
|
||||||
|
|
||||||
|
|
||||||
def test_find_without_refresh_database():
|
def test_find_without_refresh_database():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user