diff --git a/memos/indexing.py b/memos/indexing.py index 81aad67..998e56a 100644 --- a/memos/indexing.py +++ b/memos/indexing.py @@ -1,5 +1,6 @@ import json from typing import List +from datetime import datetime from .schemas import ( MetadataType, @@ -17,6 +18,26 @@ def convert_metadata_value(metadata: EntityMetadata): return metadata.value +def parse_date_fields(entity): + timestamp_metadata = next( + (m for m in entity.metadata_entries if m.key == "timestamp"), None + ) + + if timestamp_metadata and len(timestamp_metadata.value) == 15: + try: + dt = datetime.strptime(timestamp_metadata.value, "%Y%m%d-%H%M%S") + except ValueError: + dt = entity.file_created_at + else: + dt = entity.file_created_at + + return { + "created_date": dt.strftime("%Y-%m-%d"), + "created_month": dt.strftime("%Y-%m"), + "created_year": dt.strftime("%Y"), + } + + def bulk_upsert(client, entities): documents = [ EntityIndexItem( @@ -52,13 +73,16 @@ def bulk_upsert(client, entities): for metadata in entity.metadata_entries ] ), - ).model_dump(mode='json') + **parse_date_fields(entity), + ).model_dump(mode="json") for entity in entities ] # Sync the entity data to Typesense try: - response = client.collections["entities"].documents.import_(documents, {'action': 'upsert'}) + response = client.collections["entities"].documents.import_( + documents, {"action": "upsert"} + ) return response except Exception as e: raise Exception( @@ -67,6 +91,7 @@ def bulk_upsert(client, entities): def upsert(client, entity): + date_fields = parse_date_fields(entity) entity_data = EntityIndexItem( id=str(entity.id), filepath=entity.filepath, @@ -100,6 +125,9 @@ def upsert(client, entity): for metadata in entity.metadata_entries ] ), + created_date=date_fields.get("created_date"), + created_month=date_fields.get("created_month"), + created_year=date_fields.get("created_year"), ) # Sync the entity data to Typesense @@ -153,6 +181,9 @@ def list_all_entities( for entry in hit["document"]["metadata_entries"] ], metadata_text=hit["document"]["metadata_text"], + created_date=hit["document"].get("created_date"), + created_month=hit["document"].get("created_month"), + created_year=hit["document"].get("created_year"), ) for hit in response["hits"] ] @@ -219,6 +250,9 @@ def search_entities( ) for entry in hit["document"]["metadata_entries"] ], + created_date=hit["document"]["created_date"], + created_month=hit["document"]["created_month"], + created_year=hit["document"]["created_year"], ) for hit in search_results["hits"] ] @@ -250,6 +284,9 @@ def fetch_entity_by_id(client, id: str) -> EntityIndexItem: ) for entry in document["metadata_entries"] ], + created_date=document.get("created_date"), + created_month=document.get("created_month"), + created_year=document.get("created_year"), ) except Exception as e: raise Exception( diff --git a/memos/initialize_typesense.py b/memos/initialize_typesense.py index 35fa317..c0c3b1c 100644 --- a/memos/initialize_typesense.py +++ b/memos/initialize_typesense.py @@ -25,6 +25,9 @@ schema = { {"name": "filename", "type": "string", "infix": True}, {"name": "size", "type": "int32"}, {"name": "file_created_at", "type": "int64", "facet": False}, + {"name": "created_date", "type": "string", "facet": True, "optional": True}, + {"name": "created_month", "type": "string", "facet": True, "optional": True}, + {"name": "created_year", "type": "string", "facet": True, "optional": True}, {"name": "file_last_modified_at", "type": "int64", "facet": False}, {"name": "file_type", "type": "string", "facet": True}, {"name": "file_type_group", "type": "string", "facet": True}, @@ -59,23 +62,54 @@ schema = { } -if __name__ == "__main__": +def update_collection_fields(client, schema): + existing_collection = client.collections["entities"].retrieve() + existing_fields = {field["name"]: field for field in existing_collection["fields"]} + new_fields = {field["name"]: field for field in schema["fields"]} + fields_to_add = [] + for name, field in new_fields.items(): + if name not in existing_fields: + fields_to_add.append(field) + else: + # Check if the field can be updated + updatable_properties = ["facet", "optional"] + for prop in updatable_properties: + if prop in field and field[prop] != existing_fields[name].get(prop): + fields_to_add.append(field) + break + + if fields_to_add: + client.collections["entities"].update({"fields": fields_to_add}) + print( + f"Added/updated {len(fields_to_add)} fields in the 'entities' collection." + ) + else: + print("No new fields to add or update in the 'entities' collection.") + + +if __name__ == "__main__": import sys - # Check if "--force" parameter is provided force_recreate = "--force" in sys.argv - # Drop the collection if it exists and "--force" parameter is provided - if force_recreate: - try: + try: + # Check if the collection exists + existing_collection = client.collections["entities"].retrieve() + + if force_recreate: client.collections["entities"].delete() print("Existing Typesense collection 'entities' deleted successfully.") - except Exception as e: - print( - f"Failed to delete existing Typesense collection 'entities': {str(e)}" - ) + client.collections.create(schema) + print("Typesense collection 'entities' recreated successfully.") + else: + # Update the fields of the existing collection + update_collection_fields(client, schema) - # Recreate the collection in Typesense - client.collections.create(schema) - print("Typesense collection 'entities' created successfully.") + except typesense.exceptions.ObjectNotFound: + # Collection doesn't exist, create it + client.collections.create(schema) + print("Typesense collection 'entities' created successfully.") + + except Exception as e: + print(f"An error occurred: {str(e)}") diff --git a/memos/schemas.py b/memos/schemas.py index 0c4b92e..d31aebf 100644 --- a/memos/schemas.py +++ b/memos/schemas.py @@ -174,6 +174,9 @@ class EntityIndexItem(BaseModel): filename: str size: int file_created_at: int = Field(..., description="Unix timestamp") + created_date: Optional[str] = None + created_month: Optional[str] = None + created_year: Optional[str] = None file_last_modified_at: int = Field(..., description="Unix timestamp") file_type: str file_type_group: str @@ -191,6 +194,9 @@ class EntitySearchResult(BaseModel): filename: str size: int file_created_at: int = Field(..., description="Unix timestamp") + created_date: Optional[str] = None + created_month: Optional[str] = None + created_year: Optional[str] = None file_last_modified_at: int = Field(..., description="Unix timestamp") file_type: str file_type_group: str @@ -198,4 +204,4 @@ class EntitySearchResult(BaseModel): library_id: int folder_id: int tags: List[str] - metadata_entries: List[MetadataIndexItem] + metadata_entries: List[MetadataIndexItem] \ No newline at end of file