From 38272c48d00b1e024cf48ed51331341c7a42f247 Mon Sep 17 00:00:00 2001 From: arkohut <39525455+arkohut@users.noreply.github.com> Date: Thu, 11 Jul 2024 12:12:54 +0800 Subject: [PATCH] feat(indexing): add api fetch document in typesense --- memos/indexing.py | 32 +++++++++++++++++++++++++++++++- memos/initialize_typesense.py | 6 +++--- memos/server.py | 17 +++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/memos/indexing.py b/memos/indexing.py index b45ce67..18e985f 100644 --- a/memos/indexing.py +++ b/memos/indexing.py @@ -137,7 +137,8 @@ def search_entities( search_parameters = { "q": q, - "query_by": "tags,metadata_entries,filepath,filename,embedding", + "query_by": "filename,filepath,tags,metadata_entries,embedding", + "infix": "always,always,off,off,off", "filter_by": f"{filter_by_str} && file_type_group:=image" if filter_by_str else "file_type_group:=image", "per_page": limit, "page": offset // limit + 1, @@ -174,3 +175,32 @@ def search_entities( raise Exception( f"Failed to search entities: {str(e)}", ) + + +def fetch_entity_by_id(client, id: str) -> EntityIndexItem: + try: + document = client.collections["entities"].documents[id].retrieve() + return EntitySearchResult( + id=document["id"], + filepath=document["filepath"], + filename=document["filename"], + size=document["size"], + file_created_at=document["file_created_at"], + file_last_modified_at=document["file_last_modified_at"], + file_type=document["file_type"], + file_type_group=document["file_type_group"], + last_scan_at=document.get("last_scan_at"), + library_id=document["library_id"], + folder_id=document["folder_id"], + tags=document["tags"], + metadata_entries=[ + MetadataIndexItem( + key=entry["key"], value=entry["value"], source=entry["source"] + ) + for entry in document["metadata_entries"] + ], + ) + except Exception as e: + raise Exception( + f"Failed to fetch document by id: {str(e)}", + ) diff --git a/memos/initialize_typesense.py b/memos/initialize_typesense.py index 8726a4c..35fa317 100644 --- a/memos/initialize_typesense.py +++ b/memos/initialize_typesense.py @@ -21,8 +21,8 @@ schema = { "name": "entities", "enable_nested_fields": True, "fields": [ - {"name": "filepath", "type": "string"}, - {"name": "filename", "type": "string"}, + {"name": "filepath", "type": "string", "infix": True}, + {"name": "filename", "type": "string", "infix": True}, {"name": "size", "type": "int32"}, {"name": "file_created_at", "type": "int64", "facet": False}, {"name": "file_last_modified_at", "type": "int64", "facet": False}, @@ -55,7 +55,7 @@ schema = { "optional": True, }, ], - "token_separators": [":", "/", ".", " ", "-", "\\"], + "token_separators": [":", "/", " ", "\\"], } diff --git a/memos/server.py b/memos/server.py index d8c71ec..16e9a30 100644 --- a/memos/server.py +++ b/memos/server.py @@ -317,6 +317,23 @@ async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db) return None +@app.get( + "/entities/{entity_id}/index", + response_model=EntitySearchResult, + tags=["entity"], +) +async def get_entity_index(entity_id: int) -> EntityIndexItem: + try: + entity_index_item = indexing.fetch_entity_by_id(client, entity_id) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) + + return entity_index_item + + @app.delete( "/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT,