diff --git a/memos/commands.py b/memos/commands.py index 652cac8..fd3ace6 100644 --- a/memos/commands.py +++ b/memos/commands.py @@ -23,6 +23,7 @@ BASE_URL = "http://localhost:8080" ignore_files = [".DS_Store"] + def format_timestamp(timestamp): if isinstance(timestamp, str): return timestamp @@ -55,7 +56,9 @@ def display_libraries(libraries): ] ) - print(tabulate(table, headers=["ID", "Name", "Folders", "Plugins"], tablefmt="plain")) + print( + tabulate(table, headers=["ID", "Name", "Folders", "Plugins"], tablefmt="plain") + ) @app.command() @@ -253,6 +256,91 @@ def scan(library_id: int): print(f"Total files deleted: {total_files_deleted}") +@lib_app.command("index") +def index(library_id: int): + print(f"Indexing library {library_id}") + + # Get the library + response = httpx.get(f"{BASE_URL}/libraries/{library_id}") + if response.status_code != 200: + print(f"Failed to get library: {response.status_code} - {response.text}") + return + + library = response.json() + scanned_entities = set() + + # Iterate through folders + for folder in library["folders"]: + tqdm.write(f"Processing folder: {folder['id']}") + + # List all entities in the folder + offset = 0 + while True: + entities_response = httpx.get( + f"{BASE_URL}/libraries/{library_id}/folders/{folder['id']}/entities", + params={"limit": 200, "offset": offset}, + ) + if entities_response.status_code != 200: + tqdm.write( + f"Failed to get entities: {entities_response.status_code} - {entities_response.text}" + ) + break + + entities = entities_response.json() + if not entities: + break + + # Index each entity + for entity in tqdm(entities, desc="Indexing entities", leave=False): + index_response = httpx.post(f"{BASE_URL}/entities/{entity['id']}/index") + if index_response.status_code == 204: + tqdm.write(f"Indexed entity: {entity['id']}") + else: + tqdm.write( + f"Failed to index entity {entity['id']}: {index_response.status_code} - {index_response.text}" + ) + + scanned_entities.add(str(entity["id"])) + + offset += 200 + + # List all indexed entities in the folder + offset = 0 + while True: + index_response = httpx.get( + f"{BASE_URL}/libraries/{library_id}/folders/{folder['id']}/index", + params={"limit": 200, "offset": offset}, + ) + if index_response.status_code != 200: + tqdm.write( + f"Failed to get indexed entities: {index_response.status_code} - {index_response.text}" + ) + break + + indexed_entities = index_response.json() + if not indexed_entities: + break + + # Delete indexes for entities not in scanned_entities + for indexed_entity in tqdm( + indexed_entities, desc="Cleaning up indexes", leave=False + ): + if indexed_entity["id"] not in scanned_entities: + delete_response = httpx.delete( + f"{BASE_URL}/entities/{indexed_entity['id']}/index" + ) + if delete_response.status_code == 204: + tqdm.write(f"Deleted index for entity: {indexed_entity['id']}") + else: + tqdm.write( + f"Failed to delete index for entity {indexed_entity['id']}: {delete_response.status_code} - {delete_response.text}" + ) + + offset += 200 + + print("Indexing completed") + + def display_plugins(plugins): table = [] for plugin in plugins: diff --git a/memos/indexing.py b/memos/indexing.py index 5dbc0a6..c068587 100644 --- a/memos/indexing.py +++ b/memos/indexing.py @@ -1,6 +1,7 @@ import json +from typing import List -from .schemas import MetadataType, EntityMetadata +from .schemas import MetadataType, EntityMetadata, EntityIndexItem, MetadataIndexItem def convert_metadata_value(metadata: EntityMetadata): @@ -16,31 +17,28 @@ def convert_metadata_value(metadata: EntityMetadata): def upsert(client, entity): - # Prepare the entity data for Typesense - entity_data = { - "id": str(entity.id), - "filepath": entity.filepath, - "filename": entity.filename, - "size": entity.size, - "file_created_at": int(entity.file_created_at.timestamp()), - "file_last_modified_at": int(entity.file_last_modified_at.timestamp()), - "file_type": entity.file_type, - "file_type_group": entity.file_type_group, - "last_scan_at": ( - int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None - ), - "library_id": entity.library_id, - "folder_id": entity.folder_id, - "tags": [tag.name for tag in entity.tags], - "metadata_entries": [ - { - "key": metadata.key, - "value": convert_metadata_value(metadata), - "source": metadata.source, - } + entity_data = EntityIndexItem( + id=str(entity.id), + filepath=entity.filepath, + filename=entity.filename, + size=entity.size, + file_created_at=int(entity.file_created_at.timestamp()), + file_last_modified_at=int(entity.file_last_modified_at.timestamp()), + file_type=entity.file_type, + file_type_group=entity.file_type_group, + last_scan_at=int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None, + library_id=entity.library_id, + folder_id=entity.folder_id, + tags=[tag.name for tag in entity.tags], + metadata_entries=[ + MetadataIndexItem( + key=metadata.key, + value=convert_metadata_value(metadata), + source=metadata.source, + ) for metadata in entity.metadata_entries ], - "metadata_text": "\n\n".join( + metadata_text="\n\n".join( [ ( f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}" @@ -50,12 +48,61 @@ def upsert(client, entity): for metadata in entity.metadata_entries ] ), - } + ) # Sync the entity data to Typesense try: - client.collections["entities"].documents.upsert(entity_data) + client.collections["entities"].documents.upsert(entity_data.model_dump_json()) except Exception as e: raise Exception( f"Failed to sync entity to Typesense: {str(e)}", ) + + +def remove_entity_by_id(client, entity_id): + try: + client.collections["entities"].documents[entity_id].delete() + except Exception as e: + raise Exception( + f"Failed to remove entity from Typesense: {str(e)}", + ) + + +def list_all_entities(client, library_id: int, folder_id: int, limit=100, offset=0) -> List[EntityIndexItem]: + try: + response = client.collections["entities"].documents.search( + { + "q": "*", + "filter_by": f"library_id:={library_id} && folder_id:={folder_id}", + "per_page": limit, + "page": offset // limit + 1, + } + ) + return [ + EntityIndexItem( + id=hit["document"]["id"], + filepath=hit["document"]["filepath"], + filename=hit["document"]["filename"], + size=hit["document"]["size"], + file_created_at=hit["document"]["file_created_at"], + file_last_modified_at=hit["document"]["file_last_modified_at"], + file_type=hit["document"]["file_type"], + file_type_group=hit["document"]["file_type_group"], + last_scan_at=hit["document"].get("last_scan_at"), + library_id=hit["document"]["library_id"], + folder_id=hit["document"]["folder_id"], + tags=hit["document"]["tags"], + metadata_entries=[ + MetadataIndexItem( + key=entry["key"], + value=entry["value"], + source=entry["source"] + ) for entry in hit["document"]["metadata_entries"] + ], + metadata_text=hit["document"]["metadata_text"] + ) for hit in response["hits"] + ] + except Exception as e: + raise Exception( + f"Failed to list entities for library {library_id} and folder {folder_id}: {str(e)}", + ) diff --git a/memos/schemas.py b/memos/schemas.py index dd4ade9..ef8fe25 100644 --- a/memos/schemas.py +++ b/memos/schemas.py @@ -1,5 +1,5 @@ -from pydantic import BaseModel, ConfigDict, DirectoryPath, HttpUrl -from typing import List +from pydantic import BaseModel, ConfigDict, DirectoryPath, HttpUrl, Field +from typing import List, Optional, Any from datetime import datetime from enum import Enum @@ -140,3 +140,25 @@ class Entity(BaseModel): model_config = ConfigDict(from_attributes=True) + +class MetadataIndexItem(BaseModel): + key: str + value: Any + source: str + + +class EntityIndexItem(BaseModel): + id: str + filepath: str + filename: str + size: int + file_created_at: int = Field(..., description="Unix timestamp") + file_last_modified_at: int = Field(..., description="Unix timestamp") + file_type: str + file_type_group: str + last_scan_at: Optional[int] = Field(None, description="Unix timestamp") + library_id: int + folder_id: int + tags: List[str] + metadata_entries: List[MetadataIndexItem] + metadata_text: str diff --git a/memos/server.py b/memos/server.py index e638a8e..e7231c8 100644 --- a/memos/server.py +++ b/memos/server.py @@ -28,6 +28,8 @@ from .schemas import ( UpdateEntityTagsParam, UpdateEntityMetadataParam, MetadataType, + EntityIndexItem, + MetadataIndexItem, ) engine = create_engine(f"sqlite:///{get_database_path()}") @@ -280,6 +282,55 @@ async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db) return None +@app.delete( + "/entities/{entity_id}/index", + status_code=status.HTTP_204_NO_CONTENT, + tags=["entity"], +) +async def remove_entity_from_typesense(entity_id: int, db: Session = Depends(get_db)): + entity = crud.get_entity_by_id(entity_id, db) + if entity is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Entity not found", + ) + + try: + indexing.remove_entity_by_id(client, entity_id) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e), + ) + return None + +@app.get( + "/libraries/{library_id}/folders/{folder_id}/index", + response_model=List[EntityIndexItem], + tags=["entity"], +) +def list_entities_in_folder( + library_id: int, + folder_id: int, + limit: Annotated[int, Query(ge=1, le=200)] = 10, + offset: int = 0, + db: Session = Depends(get_db), +): + library = crud.get_library_by_id(library_id, db) + if library is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail="Library not found" + ) + + if folder_id not in [folder.id for folder in library.folders]: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Folder not found in the specified library", + ) + + return indexing.list_all_entities(client, library_id, folder_id, limit, offset) + + @app.patch("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"]) @app.put("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"]) def patch_entity_tags(