From 7c41769516873e443b7f8327b6911a9919a9876b Mon Sep 17 00:00:00 2001 From: arkohut <39525455+arkohut@users.noreply.github.com> Date: Fri, 4 Oct 2024 15:45:29 +0800 Subject: [PATCH] feat(ocr): force max image size 1920 x 1920 --- memos/plugins/ocr/main.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/memos/plugins/ocr/main.py b/memos/plugins/ocr/main.py index 4d1b891..690a139 100644 --- a/memos/plugins/ocr/main.py +++ b/memos/plugins/ocr/main.py @@ -11,6 +11,9 @@ from rapidocr_onnxruntime import RapidOCR from concurrent.futures import ThreadPoolExecutor from functools import partial import yaml +import io + +MAX_THUMBNAIL_SIZE = (1920, 1920) from fastapi import APIRouter, Request, HTTPException from memos.schemas import Entity, MetadataType @@ -35,9 +38,11 @@ logger = logging.getLogger(__name__) def image2base64(img_path): try: with Image.open(img_path) as img: - img.convert("RGB") # Check if image is not broken - with open(img_path, "rb") as image_file: - encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + img = img.convert("RGB") + img.thumbnail(MAX_THUMBNAIL_SIZE) + buffered = io.BytesIO() + img.save(buffered, format="JPEG") + encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") return encoded_string except Exception as e: logger.error(f"Error processing image {img_path}: {str(e)}") @@ -71,6 +76,8 @@ def convert_ocr_results(results): def predict_local(img_path): try: with Image.open(img_path) as img: + img = img.convert("RGB") + img.thumbnail(MAX_THUMBNAIL_SIZE) img_array = np.array(img) results, _ = ocr(img_array) return convert_ocr_results(results) @@ -128,7 +135,6 @@ async def ocr(entity: Entity, request: Request): patch_url = f"{location_url}/metadata" ocr_result = await predict(entity.filepath) - logger.info(ocr_result) if not ocr_result: logger.info(f"No OCR result found for file: {entity.filepath}")