feat: support extract raw image from video

This commit is contained in:
arkohut 2024-08-13 15:35:48 +08:00
parent 615a938e5c
commit 016264f13f
6 changed files with 168 additions and 44 deletions

View File

@ -3,34 +3,42 @@ import json
import argparse import argparse
from PIL import Image, PngImagePlugin from PIL import Image, PngImagePlugin
def read_metadata(image_path): def read_metadata(image_path):
try: try:
img = Image.open(image_path) img = Image.open(image_path)
exif_data = img.info.get('exif') exif_data = img.info.get("exif")
png_info = img.info if isinstance(img, PngImagePlugin.PngImageFile) else None png_info = img.info if isinstance(img, PngImagePlugin.PngImageFile) else None
if not exif_data and not png_info: if not exif_data and not png_info:
print("No EXIF or PNG metadata found.") print("No EXIF or PNG metadata found.")
return return None
metadata = {}
if exif_data: if exif_data:
exif_dict = piexif.load(exif_data) exif_dict = piexif.load(exif_data)
metadata_json = exif_dict["0th"].get(piexif.ImageIFD.ImageDescription) metadata_json = exif_dict["0th"].get(piexif.ImageIFD.ImageDescription)
if metadata_json: if metadata_json:
metadata = json.loads(metadata_json.decode()) metadata["exif"] = json.loads(metadata_json.decode())
print("EXIF Metadata:", json.dumps(metadata, indent=4)) print("EXIF Metadata:", json.dumps(metadata["exif"], indent=4))
else: else:
print("No metadata found in the ImageDescription field of EXIF.") print("No metadata found in the ImageDescription field of EXIF.")
if png_info: if png_info:
metadata_json = png_info.get("Description") metadata_json = png_info.get("Description")
if metadata_json: if metadata_json:
metadata = json.loads(metadata_json) metadata["png"] = json.loads(metadata_json)
print("PNG Metadata:", json.dumps(metadata, indent=4)) print("PNG Metadata:", json.dumps(metadata["png"], indent=4))
else: else:
print("No metadata found in the Description field of PNG.") print("No metadata found in the Description field of PNG.")
return metadata if metadata else None
except Exception as e: except Exception as e:
print(f"An error occurred: {str(e)}") print(f"An error occurred: {str(e)}")
return None
def main(): def main():
parser = argparse.ArgumentParser(description="Read metadata from a screenshot") parser = argparse.ArgumentParser(description="Read metadata from a screenshot")
@ -39,5 +47,6 @@ def main():
read_metadata(args.image_path) read_metadata(args.image_path)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -13,6 +13,9 @@ from typing import List, Annotated
from pathlib import Path from pathlib import Path
import asyncio import asyncio
import logging # Import logging module import logging # Import logging module
import cv2
from PIL import Image
from .read_metadata import read_metadata
import typesense import typesense
@ -454,7 +457,9 @@ async def search_entities(
end: int = None, end: int = None,
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
library_ids = [int(id) for id in library_ids.split(",") if id] if library_ids else None library_ids = (
[int(id) for id in library_ids.split(",") if id] if library_ids else None
)
folder_ids = [int(id) for id in folder_ids.split(",") if id] if folder_ids else None folder_ids = [int(id) for id in folder_ids.split(",") if id] if folder_ids else None
try: try:
return indexing.search_entities( return indexing.search_entities(
@ -567,6 +572,72 @@ def add_library_plugin(
crud.add_plugin_to_library(library_id, new_plugin.plugin_id, db) crud.add_plugin_to_library(library_id, new_plugin.plugin_id, db)
def is_image(file_path: Path) -> bool:
return file_path.suffix.lower() in [".png", ".jpg", ".jpeg"]
def get_thumbnail_info(metadata: dict) -> tuple:
if not metadata:
return None, None, None
meta = metadata.get("exif", {}) or metadata.get("png", {})
if not meta.get("sequence"):
return None, None, False
return meta.get("screen_name"), meta.get("sequence"), True
def extract_video_frame(video_path: Path, frame_number: int) -> Image.Image:
cap = cv2.VideoCapture(str(video_path))
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = cap.read()
cap.release()
if not ret:
return None
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
return Image.fromarray(frame_rgb)
@app.get("/files/video/{file_path:path}", tags=["files"])
async def get_video_frame(file_path: str):
full_path = Path("/") / file_path.strip("/")
if not full_path.is_file():
raise HTTPException(status_code=404, detail="File not found")
if not is_image(full_path):
return FileResponse(full_path)
metadata = read_metadata(str(full_path))
screen, sequence, is_thumbnail = get_thumbnail_info(metadata)
print(screen, sequence, is_thumbnail)
if not all([screen, sequence, is_thumbnail]):
return FileResponse(full_path)
video_path = full_path.parent / f"{screen}.mp4"
print(video_path)
if not video_path.is_file():
return FileResponse(full_path)
frame_image = extract_video_frame(video_path, sequence)
if frame_image is None:
return FileResponse(full_path)
temp_dir = Path("/tmp")
temp_dir.mkdir(parents=True, exist_ok=True)
temp_path = temp_dir / f"temp_{full_path.name}"
frame_image.save(temp_path)
return FileResponse(
temp_path, headers={"Content-Disposition": f"inline; filename={full_path.name}"}
)
@app.get("/files/{file_path:path}", tags=["files"]) @app.get("/files/{file_path:path}", tags=["files"])
async def get_file(file_path: str): async def get_file(file_path: str):
full_path = Path("/") / file_path.strip("/") full_path = Path("/") / file_path.strip("/")

View File

@ -10,33 +10,35 @@ from PIL.PngImagePlugin import PngInfo
from multiprocessing import Pool, Manager from multiprocessing import Pool, Manager
from tqdm import tqdm from tqdm import tqdm
parser = argparse.ArgumentParser(description='Compress and save image(s) with metadata') parser = argparse.ArgumentParser(description="Compress and save image(s) with metadata")
parser.add_argument('path', type=str, help='path to the directory or image file') parser.add_argument("path", type=str, help="path to the directory or image file")
args = parser.parse_args() args = parser.parse_args()
input_path = args.path.rstrip('/') input_path = args.path.rstrip("/")
def compress_and_save_image(image_path, order): def compress_and_save_image(image_path, order):
# Open the image # Open the image
img = Image.open(image_path) img = Image.open(image_path)
if image_path.endswith(('.jpg', '.jpeg', '.tiff')): if image_path.endswith((".jpg", ".jpeg", ".tiff")):
# Add order to the image metadata for JPEG/TIFF # Add order to the image metadata for JPEG/TIFF
exif_dict = piexif.load(image_path) exif_dict = piexif.load(image_path)
existing_description = exif_dict["0th"].get(piexif.ImageIFD.ImageDescription, b'{}') existing_description = exif_dict["0th"].get(
piexif.ImageIFD.ImageDescription, b"{}"
)
try: try:
existing_data = json.loads(existing_description.decode('utf-8')) existing_data = json.loads(existing_description.decode("utf-8"))
except json.JSONDecodeError: except json.JSONDecodeError:
existing_data = {} existing_data = {}
existing_data["sequence"] = order existing_data["sequence"] = order
existing_data["is_thumbnail"] = True existing_data["is_thumbnail"] = True
updated_description = json.dumps(existing_data).encode('utf-8') updated_description = json.dumps(existing_data).encode("utf-8")
exif_dict["0th"][piexif.ImageIFD.ImageDescription] = updated_description exif_dict["0th"][piexif.ImageIFD.ImageDescription] = updated_description
exif_bytes = piexif.dump(exif_dict) exif_bytes = piexif.dump(exif_dict)
elif image_path.endswith('.png'): elif image_path.endswith(".png"):
# Add order to the image metadata for PNG # Add order to the image metadata for PNG
metadata = PngInfo() metadata = PngInfo()
existing_description = img.info.get("Description", '{}') existing_description = img.info.get("Description", "{}")
try: try:
existing_data = json.loads(existing_description) existing_data = json.loads(existing_description)
except json.JSONDecodeError: except json.JSONDecodeError:
@ -51,31 +53,35 @@ def compress_and_save_image(image_path, order):
# Compress the image # Compress the image
img = img.convert("RGB") img = img.convert("RGB")
if image_path.endswith('.png'): if image_path.endswith(".png"):
img.save(image_path, "PNG", optimize=True, pnginfo=metadata) img.save(image_path, "PNG", optimize=True, pnginfo=metadata)
else: else:
img.save(image_path, "JPEG", quality=30) # Lower quality for higher compression img.save(image_path, "JPEG", quality=30) # Lower quality for higher compression
# Resize the image proportionally # Resize the image proportionally
max_size = (960, 960) # Define the maximum size for the thumbnail max_size = (960, 960) # Define the maximum size for the thumbnail
img.thumbnail(max_size) img.thumbnail(max_size)
if image_path.endswith('.png'): if image_path.endswith(".png"):
img.save(image_path, "PNG", optimize=True, pnginfo=metadata) img.save(image_path, "PNG", optimize=True, pnginfo=metadata)
else: else:
img.save(image_path, "JPEG", quality=30) # Lower quality for higher compression img.save(image_path, "JPEG", quality=30) # Lower quality for higher compression
if image_path.endswith(('.jpg', '.jpeg', '.tiff')): if image_path.endswith((".jpg", ".jpeg", ".tiff")):
# Insert updated EXIF data for JPEG/TIFF # Insert updated EXIF data for JPEG/TIFF
piexif.insert(exif_bytes, image_path) piexif.insert(exif_bytes, image_path)
return image_path return image_path
def process_image(args): def process_image(args):
filename, screens = args filename, screens = args
if filename.endswith(('.jpg', '.png')): # consider files with .jpg or .png extension if filename.endswith(
parts = filename.split('-of-') # split the file name at the "-of-" string (".jpg", ".png")
display_name = parts[-1].rsplit('.', 1)[0] # get the last part and remove the extension ): # consider files with .jpg or .png extension
parts = filename.split("-of-") # split the file name at the "-of-" string
display_name = parts[-1].rsplit(".", 1)[
0
] # get the last part and remove the extension
screens.append(display_name) # add the display name to the set of screens screens.append(display_name) # add the display name to the set of screens
# call the function with the filename of the image # call the function with the filename of the image
@ -87,15 +93,27 @@ def process_directory(directory):
with Manager() as manager: with Manager() as manager:
screens = manager.list() screens = manager.list()
with Pool(min(8, os.cpu_count())) as p: with Pool(min(8, os.cpu_count())) as p:
list(tqdm(p.imap(process_image, [(filename, screens) for filename in os.listdir(directory)]), total=len(os.listdir(directory)))) list(
tqdm(
p.imap(
process_image,
[(filename, screens) for filename in os.listdir(directory)],
),
total=len(os.listdir(directory)),
)
)
screens = set(screens) screens = set(screens)
print(screens) print(screens)
for screen in screens: for screen in screens:
# Check if there are jpg or png files for the screen # Check if there are jpg or png files for the screen
jpg_files = [f for f in os.listdir(directory) if f.endswith('.jpg') and screen in f] jpg_files = [
png_files = [f for f in os.listdir(directory) if f.endswith('.png') and screen in f] f for f in os.listdir(directory) if f.endswith(".jpg") and screen in f
]
png_files = [
f for f in os.listdir(directory) if f.endswith(".png") and screen in f
]
if jpg_files: if jpg_files:
input_pattern = f"{directory}/*{screen}*.jpg" input_pattern = f"{directory}/*{screen}*.jpg"
@ -107,7 +125,7 @@ def process_directory(directory):
continue # Skip if no matching files are found continue # Skip if no matching files are found
# Create the frames.txt file # Create the frames.txt file
with open(f"{directory}/{screen}.frames.txt", 'w') as f: with open(f"{directory}/{screen}.frames.txt", "w") as f:
for frame, filename in enumerate(sorted(files)): for frame, filename in enumerate(sorted(files)):
f.write(f"{frame},{filename}\n") f.write(f"{frame},{filename}\n")
@ -115,21 +133,39 @@ def process_directory(directory):
command = f"ffmpeg -y -framerate 15 -pattern_type glob -i '{input_pattern}' -c:v libx264 -pix_fmt yuv420p {directory}/{screen}.mp4" command = f"ffmpeg -y -framerate 15 -pattern_type glob -i '{input_pattern}' -c:v libx264 -pix_fmt yuv420p {directory}/{screen}.mp4"
# Start the process # Start the process
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) process = subprocess.Popen(
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
)
# Print the output in real-time # Print the output in real-time
for line in process.stdout: for line in process.stdout:
print(line, end='') print(line, end="")
# Compress and save all images after video generation # Compress and save all images after video generation
for screen in screens: for screen in screens:
jpg_pattern = f"{directory}/*{screen}*.jpg" # Check if there are jpg or png files for the screen
png_pattern = f"{directory}/*{screen}*.png" jpg_files = [
f for f in os.listdir(directory) if f.endswith(".jpg") and screen in f
for pattern in [jpg_pattern, png_pattern]: ]
files = glob.glob(pattern) png_files = [
for order, input_path in enumerate(tqdm(files, desc=f"Compressing {screen} images", unit="file")): f for f in os.listdir(directory) if f.endswith(".png") and screen in f
compress_and_save_image(input_path, order) ]
if jpg_files:
files = jpg_files
elif png_files:
files = png_files
else:
continue # Skip if no matching files are found
for frame, filename in enumerate(
tqdm(sorted(files), desc=f"Compressing {screen} images", unit="file")
):
compress_and_save_image(os.path.join(directory, filename), frame)
# for filename in os.listdir(directory): # for filename in os.listdir(directory):
# if filename.endswith(('.jpg', '.png')): # if filename.endswith(('.jpg', '.png')):
@ -144,5 +180,6 @@ def main():
else: else:
print("Invalid path. Please provide a valid directory or file path.") print("Invalid path. Please provide a valid directory or file path.")
if __name__ == '__main__':
main() if __name__ == "__main__":
main()

View File

@ -25,6 +25,8 @@ setup(
'magika', 'magika',
'pydantic-settings', 'pydantic-settings',
'typesense', 'typesense',
'opencv-python',
'pillow',
], ],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
@ -32,4 +34,4 @@ setup(
], ],
}, },
python_requires='>=3.10', python_requires='>=3.10',
) )

View File

@ -25,6 +25,10 @@
* @type {any} * @type {any}
*/ */
export let image; export let image;
/**
* @type {string}
*/
export let video;
/** /**
* @type {string} * @type {string}
*/ */
@ -107,7 +111,7 @@
<div class="flex flex-col md:flex-row h-full"> <div class="flex flex-col md:flex-row h-full">
<!-- Image container --> <!-- Image container -->
<div class="flex-none w-full md:w-1/2 h-full"> <div class="flex-none w-full md:w-1/2 h-full">
<a href={image} target="_blank" rel="noopener noreferrer"> <a href={video} target="_blank" rel="noopener noreferrer">
<img class="w-full h-full object-contain" src={image} alt={title} /> <img class="w-full h-full object-contain" src={image} alt={title} />
</a> </a>
</div> </div>

View File

@ -180,6 +180,7 @@
library_id={searchResults[selectedImage].library_id} library_id={searchResults[selectedImage].library_id}
folder_id={searchResults[selectedImage].folder_id} folder_id={searchResults[selectedImage].folder_id}
image={`${apiEndpoint}/files/${searchResults[selectedImage].filepath}`} image={`${apiEndpoint}/files/${searchResults[selectedImage].filepath}`}
video={`${apiEndpoint}/files/video/${searchResults[selectedImage].filepath}`}
created_at={searchResults[selectedImage].file_created_at} created_at={searchResults[selectedImage].file_created_at}
filepath={searchResults[selectedImage].filepath} filepath={searchResults[selectedImage].filepath}
title={filename(searchResults[selectedImage].filepath)} title={filename(searchResults[selectedImage].filepath)}