Merge pull request #1173 from serengil/feat-task-0704-find-image-type-with-pil

checking file type with PIL
This commit is contained in:
Sefik Ilkin Serengil 2024-04-07 10:12:53 +01:00 committed by GitHub
commit ba4d193535
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 16 additions and 4 deletions

View File

@ -3,12 +3,13 @@ import os
from typing import Union, Tuple from typing import Union, Tuple
import base64 import base64
from pathlib import Path from pathlib import Path
import imghdr import io
# 3rd party # 3rd party
import numpy as np import numpy as np
import cv2 import cv2
import requests import requests
from PIL import Image
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
@ -86,7 +87,9 @@ def load_base64(uri: str) -> np.ndarray:
encoded_data = encoded_data_parts[1] encoded_data = encoded_data_parts[1]
decoded_bytes = base64.b64decode(encoded_data) decoded_bytes = base64.b64decode(encoded_data)
file_type = imghdr.what(None, h=decoded_bytes)
img = Image.open(io.BytesIO(decoded_bytes))
file_type = img.format.lower()
# similar to find functionality, we are just considering these extensions # similar to find functionality, we are just considering these extensions
# content type is safer option than file extension # content type is safer option than file extension

View File

@ -3,12 +3,12 @@ import os
import pickle import pickle
from typing import List, Union, Optional, Dict, Any from typing import List, Union, Optional, Dict, Any
import time import time
import imghdr
# 3rd party dependencies # 3rd party dependencies
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from PIL import Image
# project dependencies # project dependencies
from deepface.commons.logger import Logger from deepface.commons.logger import Logger
@ -298,7 +298,16 @@ def __list_images(path: str) -> List[str]:
for r, _, f in os.walk(path): for r, _, f in os.walk(path):
for file in f: for file in f:
exact_path = os.path.join(r, file) exact_path = os.path.join(r, file)
file_type = imghdr.what(exact_path)
_, ext = os.path.splitext(exact_path)
ext_lower = ext.lower()
if ext_lower not in {".jpg", ".jpeg", ".png"}:
continue
img = Image.open(exact_path) # lazy
file_type = img.format.lower()
if file_type in ["jpeg", "png"]: if file_type in ["jpeg", "png"]:
images.append(exact_path) images.append(exact_path)
return images return images