feat: add reindex

This commit is contained in:
arkohut 2024-10-13 23:36:02 +08:00
parent 03c1268076
commit 1fbd21183e
6 changed files with 39 additions and 12 deletions

View File

@ -584,7 +584,7 @@ def reindex(
continue continue
update_response = client.post(f"{BASE_URL}/entities/{entity['id']}/last-scan-at") update_response = client.post(f"{BASE_URL}/entities/{entity['id']}/last-scan-at")
if update_response.status_code != 200: if update_response.status_code != 204:
print(f"Failed to update last_scan_at for entity {entity['id']}: {update_response.status_code} - {update_response.text}") print(f"Failed to update last_scan_at for entity {entity['id']}: {update_response.status_code} - {update_response.text}")
else: else:
print(f"Updated last_scan_at for entity {entity['id']}") print(f"Updated last_scan_at for entity {entity['id']}")

View File

@ -18,7 +18,7 @@ import sys
import subprocess import subprocess
import platform import platform
from .cmds.plugin import plugin_app, bind from .cmds.plugin import plugin_app, bind
from .cmds.library import lib_app, scan, index, watch from .cmds.library import lib_app, scan, typesense_index, reindex, watch
import psutil import psutil
import signal import signal
from tabulate import tabulate from tabulate import tabulate
@ -139,8 +139,8 @@ def scan_default_library(force: bool = False):
scan(default_library["id"], plugins=None, folders=None, force=force) scan(default_library["id"], plugins=None, folders=None, force=force)
@app.command("index") @app.command("typesense-index")
def index_default_library( def typsense_index_default_library(
batchsize: int = typer.Option( batchsize: int = typer.Option(
4, "--batchsize", "-bs", help="Number of entities to index in a batch" 4, "--batchsize", "-bs", help="Number of entities to index in a batch"
), ),
@ -164,7 +164,32 @@ def index_default_library(
print("Default library does not exist.") print("Default library does not exist.")
return return
index(default_library["id"], force=force, folders=None, batchsize=batchsize) typesense_index(default_library["id"], force=force, folders=None, batchsize=batchsize)
@app.command("reindex")
def reindex_default_library():
"""
Reindex the default library for memos.
"""
# Get the default library
response = httpx.get(f"{BASE_URL}/libraries")
if response.status_code != 200:
print(f"Failed to retrieve libraries: {response.status_code} - {response.text}")
return
libraries = response.json()
default_library = next(
(lib for lib in libraries if lib["name"] == settings.default_library), None
)
if not default_library:
print("Default library does not exist.")
return
# Reindex the library
print(f"Reindexing library: {default_library['name']}")
reindex(default_library["id"])
@app.command("record") @app.command("record")

View File

@ -290,12 +290,15 @@ def update_entity(
return Entity(**db_entity.__dict__) return Entity(**db_entity.__dict__)
def touch_entity(entity_id: int, db: Session): def touch_entity(entity_id: int, db: Session) -> bool:
db_entity = db.query(EntityModel).filter(EntityModel.id == entity_id).first() db_entity = db.query(EntityModel).filter(EntityModel.id == entity_id).first()
if db_entity: if db_entity:
db_entity.last_scan_at = func.now() db_entity.last_scan_at = func.now()
db.commit() db.commit()
db.refresh(db_entity) db.refresh(db_entity)
return True
else:
return False
def update_entity_tags(entity_id: int, tags: List[str], db: Session) -> Entity: def update_entity_tags(entity_id: int, tags: List[str], db: Session) -> Entity:

View File

@ -33,7 +33,7 @@ def init_embedding_model():
model_dir = settings.embedding.model model_dir = settings.embedding.model
logger.info(f"Using model: {model_dir}") logger.info(f"Using model: {model_dir}")
model = SentenceTransformer(model_dir, trust_remote_code=True) model = SentenceTransformer(model_dir, trust_remote_code=True, truncate_dim=768)
model.to(device) model.to(device)
logger.info(f"Embedding model initialized on device: {device}") logger.info(f"Embedding model initialized on device: {device}")

View File

@ -7,7 +7,7 @@ import json
import base64 import base64
from PIL import Image from PIL import Image
import numpy as np import numpy as np
from rapidocr_openvino import RapidOCR from rapidocr_onnxruntime import RapidOCR
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from functools import partial from functools import partial
import yaml import yaml

View File

@ -372,7 +372,7 @@ async def update_entity(
return entity return entity
@app.post("/entities/{entity_id}/last-scan-at", response_model=Entity, tags=["entity"]) @app.post("/entities/{entity_id}/last-scan-at", status_code=status.HTTP_204_NO_CONTENT, tags=["entity"])
def update_entity_last_scan_at( def update_entity_last_scan_at(
entity_id: int, entity_id: int,
db: Session = Depends(get_db) db: Session = Depends(get_db)
@ -380,13 +380,12 @@ def update_entity_last_scan_at(
""" """
Update the last_scan_at timestamp for an entity and trigger update for fts and vec. Update the last_scan_at timestamp for an entity and trigger update for fts and vec.
""" """
entity = crud.touch_entity(entity_id, db) succeeded = crud.touch_entity(entity_id, db)
if entity is None: if not succeeded:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, status_code=status.HTTP_404_NOT_FOUND,
detail="Entity not found", detail="Entity not found",
) )
return entity
def typesense_required(func): def typesense_required(func):