checking file type with PIL

imghdr is deprecated
This commit is contained in:
Sefik Ilkin Serengil 2024-04-07 10:03:38 +01:00
parent ec0b833569
commit 6c51489636
2 changed files with 16 additions and 4 deletions

View File

@ -3,12 +3,13 @@ import os
from typing import Union, Tuple from typing import Union, Tuple
import base64 import base64
from pathlib import Path from pathlib import Path
import imghdr import io
# 3rd party # 3rd party
import numpy as np import numpy as np
import cv2 import cv2
import requests import requests
from PIL import Image
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
@ -86,7 +87,9 @@ def load_base64(uri: str) -> np.ndarray:
encoded_data = encoded_data_parts[1] encoded_data = encoded_data_parts[1]
decoded_bytes = base64.b64decode(encoded_data) decoded_bytes = base64.b64decode(encoded_data)
file_type = imghdr.what(None, h=decoded_bytes)
img = Image.open(io.BytesIO(decoded_bytes))
file_type = img.format.lower()
# similar to find functionality, we are just considering these extensions # similar to find functionality, we are just considering these extensions
# content type is safer option than file extension # content type is safer option than file extension

View File

@ -3,12 +3,12 @@ import os
import pickle import pickle
from typing import List, Union, Optional, Dict, Any from typing import List, Union, Optional, Dict, Any
import time import time
import imghdr
# 3rd party dependencies # 3rd party dependencies
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from PIL import Image
# project dependencies # project dependencies
from deepface.commons.logger import Logger from deepface.commons.logger import Logger
@ -298,7 +298,16 @@ def __list_images(path: str) -> List[str]:
for r, _, f in os.walk(path): for r, _, f in os.walk(path):
for file in f: for file in f:
exact_path = os.path.join(r, file) exact_path = os.path.join(r, file)
file_type = imghdr.what(exact_path)
_, ext = os.path.splitext(exact_path)
ext_lower = ext.lower()
if ext_lower not in {".jpg", ".jpeg", ".png"}:
continue
img = Image.open(exact_path) # lazy
file_type = img.format.lower()
if file_type in ["jpeg", "png"]: if file_type in ["jpeg", "png"]:
images.append(exact_path) images.append(exact_path)
return images return images