diff --git a/memos/config.py b/memos/config.py index ca855a9..ff5c9ea 100644 --- a/memos/config.py +++ b/memos/config.py @@ -8,6 +8,11 @@ class Settings(BaseSettings): base_dir: str = str(Path.home() / ".memos") database_path: str = os.path.join(base_dir, "database.db") + typesense_host: str = "localhost" + typesense_port: str = "8108" + typesense_protocol: str = "http" + typesense_api_key: str = "xyz" + typesense_connection_timeout_seconds: int = 2 settings = Settings() diff --git a/memos/indexing.py b/memos/indexing.py new file mode 100644 index 0000000..5dbc0a6 --- /dev/null +++ b/memos/indexing.py @@ -0,0 +1,61 @@ +import json + +from .schemas import MetadataType, EntityMetadata + + +def convert_metadata_value(metadata: EntityMetadata): + if metadata.data_type == MetadataType.NUMBER_DATA: + try: + return int(metadata.value) + except ValueError: + return float(metadata.value) + elif metadata.data_type == MetadataType.JSON_DATA: + return json.loads(metadata.value) + else: + return metadata.value + + +def upsert(client, entity): + # Prepare the entity data for Typesense + entity_data = { + "id": str(entity.id), + "filepath": entity.filepath, + "filename": entity.filename, + "size": entity.size, + "file_created_at": int(entity.file_created_at.timestamp()), + "file_last_modified_at": int(entity.file_last_modified_at.timestamp()), + "file_type": entity.file_type, + "file_type_group": entity.file_type_group, + "last_scan_at": ( + int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None + ), + "library_id": entity.library_id, + "folder_id": entity.folder_id, + "tags": [tag.name for tag in entity.tags], + "metadata_entries": [ + { + "key": metadata.key, + "value": convert_metadata_value(metadata), + "source": metadata.source, + } + for metadata in entity.metadata_entries + ], + "metadata_text": "\n\n".join( + [ + ( + f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}" + if metadata.data_type == MetadataType.JSON_DATA + else f"key: {metadata.key}\nvalue:\n{metadata.value}" + ) + for metadata in entity.metadata_entries + ] + ), + } + + # Sync the entity data to Typesense + try: + client.collections["entities"].documents.upsert(entity_data) + except Exception as e: + raise Exception( + f"Failed to sync entity to Typesense: {str(e)}", + ) diff --git a/memos/initialize_typesense.py b/memos/initialize_typesense.py new file mode 100644 index 0000000..2b2d8d4 --- /dev/null +++ b/memos/initialize_typesense.py @@ -0,0 +1,72 @@ +import typesense +from memos.config import settings + +# Initialize Typesense client +client = typesense.Client( + { + "nodes": [ + { + "host": settings.typesense_host, + "port": settings.typesense_port, + "protocol": settings.typesense_protocol, + } + ], + "api_key": settings.typesense_api_key, + "connection_timeout_seconds": settings.typesense_connection_timeout_seconds, + } +) + +# Define the schema for the Typesense collection +schema = { + "name": "entities", + "enable_nested_fields": True, + "fields": [ + {"name": "filepath", "type": "string"}, + {"name": "filename", "type": "string"}, + {"name": "size", "type": "int32"}, + {"name": "file_created_at", "type": "int64", "facet": False}, + {"name": "file_last_modified_at", "type": "int64", "facet": False}, + {"name": "file_type", "type": "string", "facet": True}, + {"name": "file_type_group", "type": "string", "facet": True}, + {"name": "last_scan_at", "type": "int64", "facet": False, "optional": True}, + {"name": "library_id", "type": "int32", "facet": True}, + {"name": "folder_id", "type": "int32", "facet": True}, + { + "name": "tags", + "type": "string[]", + "facet": True, + "optional": True, + "locale": "zh", + }, + { + "name": "metadata_entries", + "type": "object[]", + "optional": True, + "locale": "zh", + }, + {"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"}, + ], + "token_separators": [":", "/", ".", " ", "-", "\\"], +} + + +if __name__ == "__main__": + + import sys + + # Check if "--force" parameter is provided + force_recreate = "--force" in sys.argv + + # Drop the collection if it exists and "--force" parameter is provided + if force_recreate: + try: + client.collections["entities"].delete() + print("Existing Typesense collection 'entities' deleted successfully.") + except Exception as e: + print( + f"Failed to delete existing Typesense collection 'entities': {str(e)}" + ) + + # Recreate the collection in Typesense + client.collections.create(schema) + print("Typesense collection 'entities' created successfully.") diff --git a/memos/server.py b/memos/server.py index b327c61..e07e9aa 100644 --- a/memos/server.py +++ b/memos/server.py @@ -6,9 +6,14 @@ from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from typing import List, Annotated import asyncio +import json + +import typesense +from memos.config import settings from .config import get_database_path import memos.crud as crud +import memos.indexing as indexing from .schemas import ( Library, Folder, @@ -22,11 +27,27 @@ from .schemas import ( NewLibraryPluginParam, UpdateEntityTagsParam, UpdateEntityMetadataParam, + MetadataType, ) engine = create_engine(f"sqlite:///{get_database_path()}") SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +# Initialize Typesense client +client = typesense.Client( + { + "nodes": [ + { + "host": settings.typesense_host, + "port": settings.typesense_port, + "protocol": settings.typesense_protocol, + } + ], + "api_key": settings.typesense_api_key, + "connection_timeout_seconds": settings.typesense_connection_timeout_seconds, + } +) + app = FastAPI() @@ -103,11 +124,7 @@ async def trigger_webhooks(library: Library, entity: Entity, request: Request): tasks = [] for plugin in library.plugins: if plugin.webhook_url: - location = str( - request.url_for( - "get_entity_by_id", entity_id=entity.id - ) - ) + location = str(request.url_for("get_entity_by_id", entity_id=entity.id)) task = client.post( plugin.webhook_url, json=entity.model_dump(mode="json"), @@ -193,7 +210,9 @@ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)): @app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity) -def get_entity_by_id_in_library(library_id: int, entity_id: int, db: Session = Depends(get_db)): +def get_entity_by_id_in_library( + library_id: int, entity_id: int, db: Session = Depends(get_db) +): entity = crud.get_entity_by_id(entity_id, db) if entity is None or entity.library_id != library_id: raise HTTPException( @@ -228,12 +247,30 @@ async def update_entity( return entity + +@app.post("/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT) +async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)): + entity = crud.get_entity_by_id(entity_id, db) + if entity is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Entity not found", + ) + + try: + indexing.upsert(client, entity) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e), + ) + return None + + @app.patch("/entities/{entity_id}/tags", response_model=Entity) @app.put("/entities/{entity_id}/tags", response_model=Entity) def patch_entity_tags( - entity_id: int, - update_tags: UpdateEntityTagsParam, - db: Session = Depends(get_db) + entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db) ): entity = crud.get_entity_by_id(entity_id, db) if entity is None: @@ -251,7 +288,7 @@ def patch_entity_tags( def patch_entity_metadata( entity_id: int, update_metadata: UpdateEntityMetadataParam, - db: Session = Depends(get_db) + db: Session = Depends(get_db), ): entity = crud.get_entity_by_id(entity_id, db) if entity is None: @@ -261,11 +298,12 @@ def patch_entity_metadata( ) # Use the CRUD function to update the metadata entries - entity = crud.update_entity_metadata_entries(entity_id, update_metadata.metadata_entries, db) + entity = crud.update_entity_metadata_entries( + entity_id, update_metadata.metadata_entries, db + ) return entity - @app.delete( "/libraries/{library_id}/entities/{entity_id}", status_code=status.HTTP_204_NO_CONTENT, diff --git a/requirements.txt b/requirements.txt index 8565912..276e577 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ typer tabulate magika pydantic-settings +typesense diff --git a/setup.py b/setup.py index dfa97aa..028d97a 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,8 @@ setup( 'sqlalchemy', 'typer', 'magika', - 'pydantic-settings' + 'pydantic-settings', + 'typesense', ], entry_points={ 'console_scripts': [