feat(index): add date month year for index

This commit is contained in:
arkohut 2024-08-20 13:18:34 +08:00
parent 1d10b0ef7b
commit f0dfaf387e
3 changed files with 92 additions and 15 deletions

View File

@ -1,5 +1,6 @@
import json
from typing import List
from datetime import datetime
from .schemas import (
MetadataType,
@ -17,6 +18,26 @@ def convert_metadata_value(metadata: EntityMetadata):
return metadata.value
def parse_date_fields(entity):
timestamp_metadata = next(
(m for m in entity.metadata_entries if m.key == "timestamp"), None
)
if timestamp_metadata and len(timestamp_metadata.value) == 15:
try:
dt = datetime.strptime(timestamp_metadata.value, "%Y%m%d-%H%M%S")
except ValueError:
dt = entity.file_created_at
else:
dt = entity.file_created_at
return {
"created_date": dt.strftime("%Y-%m-%d"),
"created_month": dt.strftime("%Y-%m"),
"created_year": dt.strftime("%Y"),
}
def bulk_upsert(client, entities):
documents = [
EntityIndexItem(
@ -52,13 +73,16 @@ def bulk_upsert(client, entities):
for metadata in entity.metadata_entries
]
),
).model_dump(mode='json')
**parse_date_fields(entity),
).model_dump(mode="json")
for entity in entities
]
# Sync the entity data to Typesense
try:
response = client.collections["entities"].documents.import_(documents, {'action': 'upsert'})
response = client.collections["entities"].documents.import_(
documents, {"action": "upsert"}
)
return response
except Exception as e:
raise Exception(
@ -67,6 +91,7 @@ def bulk_upsert(client, entities):
def upsert(client, entity):
date_fields = parse_date_fields(entity)
entity_data = EntityIndexItem(
id=str(entity.id),
filepath=entity.filepath,
@ -100,6 +125,9 @@ def upsert(client, entity):
for metadata in entity.metadata_entries
]
),
created_date=date_fields.get("created_date"),
created_month=date_fields.get("created_month"),
created_year=date_fields.get("created_year"),
)
# Sync the entity data to Typesense
@ -153,6 +181,9 @@ def list_all_entities(
for entry in hit["document"]["metadata_entries"]
],
metadata_text=hit["document"]["metadata_text"],
created_date=hit["document"].get("created_date"),
created_month=hit["document"].get("created_month"),
created_year=hit["document"].get("created_year"),
)
for hit in response["hits"]
]
@ -219,6 +250,9 @@ def search_entities(
)
for entry in hit["document"]["metadata_entries"]
],
created_date=hit["document"]["created_date"],
created_month=hit["document"]["created_month"],
created_year=hit["document"]["created_year"],
)
for hit in search_results["hits"]
]
@ -250,6 +284,9 @@ def fetch_entity_by_id(client, id: str) -> EntityIndexItem:
)
for entry in document["metadata_entries"]
],
created_date=document.get("created_date"),
created_month=document.get("created_month"),
created_year=document.get("created_year"),
)
except Exception as e:
raise Exception(

View File

@ -25,6 +25,9 @@ schema = {
{"name": "filename", "type": "string", "infix": True},
{"name": "size", "type": "int32"},
{"name": "file_created_at", "type": "int64", "facet": False},
{"name": "created_date", "type": "string", "facet": True, "optional": True},
{"name": "created_month", "type": "string", "facet": True, "optional": True},
{"name": "created_year", "type": "string", "facet": True, "optional": True},
{"name": "file_last_modified_at", "type": "int64", "facet": False},
{"name": "file_type", "type": "string", "facet": True},
{"name": "file_type_group", "type": "string", "facet": True},
@ -59,23 +62,54 @@ schema = {
}
if __name__ == "__main__":
def update_collection_fields(client, schema):
existing_collection = client.collections["entities"].retrieve()
existing_fields = {field["name"]: field for field in existing_collection["fields"]}
new_fields = {field["name"]: field for field in schema["fields"]}
fields_to_add = []
for name, field in new_fields.items():
if name not in existing_fields:
fields_to_add.append(field)
else:
# Check if the field can be updated
updatable_properties = ["facet", "optional"]
for prop in updatable_properties:
if prop in field and field[prop] != existing_fields[name].get(prop):
fields_to_add.append(field)
break
if fields_to_add:
client.collections["entities"].update({"fields": fields_to_add})
print(
f"Added/updated {len(fields_to_add)} fields in the 'entities' collection."
)
else:
print("No new fields to add or update in the 'entities' collection.")
if __name__ == "__main__":
import sys
# Check if "--force" parameter is provided
force_recreate = "--force" in sys.argv
# Drop the collection if it exists and "--force" parameter is provided
if force_recreate:
try:
try:
# Check if the collection exists
existing_collection = client.collections["entities"].retrieve()
if force_recreate:
client.collections["entities"].delete()
print("Existing Typesense collection 'entities' deleted successfully.")
except Exception as e:
print(
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
)
client.collections.create(schema)
print("Typesense collection 'entities' recreated successfully.")
else:
# Update the fields of the existing collection
update_collection_fields(client, schema)
# Recreate the collection in Typesense
client.collections.create(schema)
print("Typesense collection 'entities' created successfully.")
except typesense.exceptions.ObjectNotFound:
# Collection doesn't exist, create it
client.collections.create(schema)
print("Typesense collection 'entities' created successfully.")
except Exception as e:
print(f"An error occurred: {str(e)}")

View File

@ -174,6 +174,9 @@ class EntityIndexItem(BaseModel):
filename: str
size: int
file_created_at: int = Field(..., description="Unix timestamp")
created_date: Optional[str] = None
created_month: Optional[str] = None
created_year: Optional[str] = None
file_last_modified_at: int = Field(..., description="Unix timestamp")
file_type: str
file_type_group: str
@ -191,6 +194,9 @@ class EntitySearchResult(BaseModel):
filename: str
size: int
file_created_at: int = Field(..., description="Unix timestamp")
created_date: Optional[str] = None
created_month: Optional[str] = None
created_year: Optional[str] = None
file_last_modified_at: int = Field(..., description="Unix timestamp")
file_type: str
file_type_group: str
@ -198,4 +204,4 @@ class EntitySearchResult(BaseModel):
library_id: int
folder_id: int
tags: List[str]
metadata_entries: List[MetadataIndexItem]
metadata_entries: List[MetadataIndexItem]