mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-07 03:35:24 +00:00
feat(index): add date month year for index
This commit is contained in:
parent
1d10b0ef7b
commit
f0dfaf387e
@ -1,5 +1,6 @@
|
||||
import json
|
||||
from typing import List
|
||||
from datetime import datetime
|
||||
|
||||
from .schemas import (
|
||||
MetadataType,
|
||||
@ -17,6 +18,26 @@ def convert_metadata_value(metadata: EntityMetadata):
|
||||
return metadata.value
|
||||
|
||||
|
||||
def parse_date_fields(entity):
|
||||
timestamp_metadata = next(
|
||||
(m for m in entity.metadata_entries if m.key == "timestamp"), None
|
||||
)
|
||||
|
||||
if timestamp_metadata and len(timestamp_metadata.value) == 15:
|
||||
try:
|
||||
dt = datetime.strptime(timestamp_metadata.value, "%Y%m%d-%H%M%S")
|
||||
except ValueError:
|
||||
dt = entity.file_created_at
|
||||
else:
|
||||
dt = entity.file_created_at
|
||||
|
||||
return {
|
||||
"created_date": dt.strftime("%Y-%m-%d"),
|
||||
"created_month": dt.strftime("%Y-%m"),
|
||||
"created_year": dt.strftime("%Y"),
|
||||
}
|
||||
|
||||
|
||||
def bulk_upsert(client, entities):
|
||||
documents = [
|
||||
EntityIndexItem(
|
||||
@ -52,13 +73,16 @@ def bulk_upsert(client, entities):
|
||||
for metadata in entity.metadata_entries
|
||||
]
|
||||
),
|
||||
).model_dump(mode='json')
|
||||
**parse_date_fields(entity),
|
||||
).model_dump(mode="json")
|
||||
for entity in entities
|
||||
]
|
||||
|
||||
# Sync the entity data to Typesense
|
||||
try:
|
||||
response = client.collections["entities"].documents.import_(documents, {'action': 'upsert'})
|
||||
response = client.collections["entities"].documents.import_(
|
||||
documents, {"action": "upsert"}
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
@ -67,6 +91,7 @@ def bulk_upsert(client, entities):
|
||||
|
||||
|
||||
def upsert(client, entity):
|
||||
date_fields = parse_date_fields(entity)
|
||||
entity_data = EntityIndexItem(
|
||||
id=str(entity.id),
|
||||
filepath=entity.filepath,
|
||||
@ -100,6 +125,9 @@ def upsert(client, entity):
|
||||
for metadata in entity.metadata_entries
|
||||
]
|
||||
),
|
||||
created_date=date_fields.get("created_date"),
|
||||
created_month=date_fields.get("created_month"),
|
||||
created_year=date_fields.get("created_year"),
|
||||
)
|
||||
|
||||
# Sync the entity data to Typesense
|
||||
@ -153,6 +181,9 @@ def list_all_entities(
|
||||
for entry in hit["document"]["metadata_entries"]
|
||||
],
|
||||
metadata_text=hit["document"]["metadata_text"],
|
||||
created_date=hit["document"].get("created_date"),
|
||||
created_month=hit["document"].get("created_month"),
|
||||
created_year=hit["document"].get("created_year"),
|
||||
)
|
||||
for hit in response["hits"]
|
||||
]
|
||||
@ -219,6 +250,9 @@ def search_entities(
|
||||
)
|
||||
for entry in hit["document"]["metadata_entries"]
|
||||
],
|
||||
created_date=hit["document"]["created_date"],
|
||||
created_month=hit["document"]["created_month"],
|
||||
created_year=hit["document"]["created_year"],
|
||||
)
|
||||
for hit in search_results["hits"]
|
||||
]
|
||||
@ -250,6 +284,9 @@ def fetch_entity_by_id(client, id: str) -> EntityIndexItem:
|
||||
)
|
||||
for entry in document["metadata_entries"]
|
||||
],
|
||||
created_date=document.get("created_date"),
|
||||
created_month=document.get("created_month"),
|
||||
created_year=document.get("created_year"),
|
||||
)
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
|
@ -25,6 +25,9 @@ schema = {
|
||||
{"name": "filename", "type": "string", "infix": True},
|
||||
{"name": "size", "type": "int32"},
|
||||
{"name": "file_created_at", "type": "int64", "facet": False},
|
||||
{"name": "created_date", "type": "string", "facet": True, "optional": True},
|
||||
{"name": "created_month", "type": "string", "facet": True, "optional": True},
|
||||
{"name": "created_year", "type": "string", "facet": True, "optional": True},
|
||||
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
||||
{"name": "file_type", "type": "string", "facet": True},
|
||||
{"name": "file_type_group", "type": "string", "facet": True},
|
||||
@ -59,23 +62,54 @@ schema = {
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
def update_collection_fields(client, schema):
|
||||
existing_collection = client.collections["entities"].retrieve()
|
||||
existing_fields = {field["name"]: field for field in existing_collection["fields"]}
|
||||
new_fields = {field["name"]: field for field in schema["fields"]}
|
||||
|
||||
fields_to_add = []
|
||||
for name, field in new_fields.items():
|
||||
if name not in existing_fields:
|
||||
fields_to_add.append(field)
|
||||
else:
|
||||
# Check if the field can be updated
|
||||
updatable_properties = ["facet", "optional"]
|
||||
for prop in updatable_properties:
|
||||
if prop in field and field[prop] != existing_fields[name].get(prop):
|
||||
fields_to_add.append(field)
|
||||
break
|
||||
|
||||
if fields_to_add:
|
||||
client.collections["entities"].update({"fields": fields_to_add})
|
||||
print(
|
||||
f"Added/updated {len(fields_to_add)} fields in the 'entities' collection."
|
||||
)
|
||||
else:
|
||||
print("No new fields to add or update in the 'entities' collection.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
# Check if "--force" parameter is provided
|
||||
force_recreate = "--force" in sys.argv
|
||||
|
||||
# Drop the collection if it exists and "--force" parameter is provided
|
||||
if force_recreate:
|
||||
try:
|
||||
try:
|
||||
# Check if the collection exists
|
||||
existing_collection = client.collections["entities"].retrieve()
|
||||
|
||||
if force_recreate:
|
||||
client.collections["entities"].delete()
|
||||
print("Existing Typesense collection 'entities' deleted successfully.")
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
|
||||
)
|
||||
client.collections.create(schema)
|
||||
print("Typesense collection 'entities' recreated successfully.")
|
||||
else:
|
||||
# Update the fields of the existing collection
|
||||
update_collection_fields(client, schema)
|
||||
|
||||
# Recreate the collection in Typesense
|
||||
client.collections.create(schema)
|
||||
print("Typesense collection 'entities' created successfully.")
|
||||
except typesense.exceptions.ObjectNotFound:
|
||||
# Collection doesn't exist, create it
|
||||
client.collections.create(schema)
|
||||
print("Typesense collection 'entities' created successfully.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
|
@ -174,6 +174,9 @@ class EntityIndexItem(BaseModel):
|
||||
filename: str
|
||||
size: int
|
||||
file_created_at: int = Field(..., description="Unix timestamp")
|
||||
created_date: Optional[str] = None
|
||||
created_month: Optional[str] = None
|
||||
created_year: Optional[str] = None
|
||||
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
||||
file_type: str
|
||||
file_type_group: str
|
||||
@ -191,6 +194,9 @@ class EntitySearchResult(BaseModel):
|
||||
filename: str
|
||||
size: int
|
||||
file_created_at: int = Field(..., description="Unix timestamp")
|
||||
created_date: Optional[str] = None
|
||||
created_month: Optional[str] = None
|
||||
created_year: Optional[str] = None
|
||||
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
||||
file_type: str
|
||||
file_type_group: str
|
||||
@ -198,4 +204,4 @@ class EntitySearchResult(BaseModel):
|
||||
library_id: int
|
||||
folder_id: int
|
||||
tags: List[str]
|
||||
metadata_entries: List[MetadataIndexItem]
|
||||
metadata_entries: List[MetadataIndexItem]
|
Loading…
x
Reference in New Issue
Block a user