mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-10 13:07:15 +00:00
feat(index): add date month year for index
This commit is contained in:
parent
1d10b0ef7b
commit
f0dfaf387e
@ -1,5 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from typing import List
|
from typing import List
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from .schemas import (
|
from .schemas import (
|
||||||
MetadataType,
|
MetadataType,
|
||||||
@ -17,6 +18,26 @@ def convert_metadata_value(metadata: EntityMetadata):
|
|||||||
return metadata.value
|
return metadata.value
|
||||||
|
|
||||||
|
|
||||||
|
def parse_date_fields(entity):
|
||||||
|
timestamp_metadata = next(
|
||||||
|
(m for m in entity.metadata_entries if m.key == "timestamp"), None
|
||||||
|
)
|
||||||
|
|
||||||
|
if timestamp_metadata and len(timestamp_metadata.value) == 15:
|
||||||
|
try:
|
||||||
|
dt = datetime.strptime(timestamp_metadata.value, "%Y%m%d-%H%M%S")
|
||||||
|
except ValueError:
|
||||||
|
dt = entity.file_created_at
|
||||||
|
else:
|
||||||
|
dt = entity.file_created_at
|
||||||
|
|
||||||
|
return {
|
||||||
|
"created_date": dt.strftime("%Y-%m-%d"),
|
||||||
|
"created_month": dt.strftime("%Y-%m"),
|
||||||
|
"created_year": dt.strftime("%Y"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def bulk_upsert(client, entities):
|
def bulk_upsert(client, entities):
|
||||||
documents = [
|
documents = [
|
||||||
EntityIndexItem(
|
EntityIndexItem(
|
||||||
@ -52,13 +73,16 @@ def bulk_upsert(client, entities):
|
|||||||
for metadata in entity.metadata_entries
|
for metadata in entity.metadata_entries
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
).model_dump(mode='json')
|
**parse_date_fields(entity),
|
||||||
|
).model_dump(mode="json")
|
||||||
for entity in entities
|
for entity in entities
|
||||||
]
|
]
|
||||||
|
|
||||||
# Sync the entity data to Typesense
|
# Sync the entity data to Typesense
|
||||||
try:
|
try:
|
||||||
response = client.collections["entities"].documents.import_(documents, {'action': 'upsert'})
|
response = client.collections["entities"].documents.import_(
|
||||||
|
documents, {"action": "upsert"}
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
@ -67,6 +91,7 @@ def bulk_upsert(client, entities):
|
|||||||
|
|
||||||
|
|
||||||
def upsert(client, entity):
|
def upsert(client, entity):
|
||||||
|
date_fields = parse_date_fields(entity)
|
||||||
entity_data = EntityIndexItem(
|
entity_data = EntityIndexItem(
|
||||||
id=str(entity.id),
|
id=str(entity.id),
|
||||||
filepath=entity.filepath,
|
filepath=entity.filepath,
|
||||||
@ -100,6 +125,9 @@ def upsert(client, entity):
|
|||||||
for metadata in entity.metadata_entries
|
for metadata in entity.metadata_entries
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
created_date=date_fields.get("created_date"),
|
||||||
|
created_month=date_fields.get("created_month"),
|
||||||
|
created_year=date_fields.get("created_year"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Sync the entity data to Typesense
|
# Sync the entity data to Typesense
|
||||||
@ -153,6 +181,9 @@ def list_all_entities(
|
|||||||
for entry in hit["document"]["metadata_entries"]
|
for entry in hit["document"]["metadata_entries"]
|
||||||
],
|
],
|
||||||
metadata_text=hit["document"]["metadata_text"],
|
metadata_text=hit["document"]["metadata_text"],
|
||||||
|
created_date=hit["document"].get("created_date"),
|
||||||
|
created_month=hit["document"].get("created_month"),
|
||||||
|
created_year=hit["document"].get("created_year"),
|
||||||
)
|
)
|
||||||
for hit in response["hits"]
|
for hit in response["hits"]
|
||||||
]
|
]
|
||||||
@ -219,6 +250,9 @@ def search_entities(
|
|||||||
)
|
)
|
||||||
for entry in hit["document"]["metadata_entries"]
|
for entry in hit["document"]["metadata_entries"]
|
||||||
],
|
],
|
||||||
|
created_date=hit["document"]["created_date"],
|
||||||
|
created_month=hit["document"]["created_month"],
|
||||||
|
created_year=hit["document"]["created_year"],
|
||||||
)
|
)
|
||||||
for hit in search_results["hits"]
|
for hit in search_results["hits"]
|
||||||
]
|
]
|
||||||
@ -250,6 +284,9 @@ def fetch_entity_by_id(client, id: str) -> EntityIndexItem:
|
|||||||
)
|
)
|
||||||
for entry in document["metadata_entries"]
|
for entry in document["metadata_entries"]
|
||||||
],
|
],
|
||||||
|
created_date=document.get("created_date"),
|
||||||
|
created_month=document.get("created_month"),
|
||||||
|
created_year=document.get("created_year"),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
|
@ -25,6 +25,9 @@ schema = {
|
|||||||
{"name": "filename", "type": "string", "infix": True},
|
{"name": "filename", "type": "string", "infix": True},
|
||||||
{"name": "size", "type": "int32"},
|
{"name": "size", "type": "int32"},
|
||||||
{"name": "file_created_at", "type": "int64", "facet": False},
|
{"name": "file_created_at", "type": "int64", "facet": False},
|
||||||
|
{"name": "created_date", "type": "string", "facet": True, "optional": True},
|
||||||
|
{"name": "created_month", "type": "string", "facet": True, "optional": True},
|
||||||
|
{"name": "created_year", "type": "string", "facet": True, "optional": True},
|
||||||
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
||||||
{"name": "file_type", "type": "string", "facet": True},
|
{"name": "file_type", "type": "string", "facet": True},
|
||||||
{"name": "file_type_group", "type": "string", "facet": True},
|
{"name": "file_type_group", "type": "string", "facet": True},
|
||||||
@ -59,23 +62,54 @@ schema = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def update_collection_fields(client, schema):
|
||||||
|
existing_collection = client.collections["entities"].retrieve()
|
||||||
|
existing_fields = {field["name"]: field for field in existing_collection["fields"]}
|
||||||
|
new_fields = {field["name"]: field for field in schema["fields"]}
|
||||||
|
|
||||||
|
fields_to_add = []
|
||||||
|
for name, field in new_fields.items():
|
||||||
|
if name not in existing_fields:
|
||||||
|
fields_to_add.append(field)
|
||||||
|
else:
|
||||||
|
# Check if the field can be updated
|
||||||
|
updatable_properties = ["facet", "optional"]
|
||||||
|
for prop in updatable_properties:
|
||||||
|
if prop in field and field[prop] != existing_fields[name].get(prop):
|
||||||
|
fields_to_add.append(field)
|
||||||
|
break
|
||||||
|
|
||||||
|
if fields_to_add:
|
||||||
|
client.collections["entities"].update({"fields": fields_to_add})
|
||||||
|
print(
|
||||||
|
f"Added/updated {len(fields_to_add)} fields in the 'entities' collection."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("No new fields to add or update in the 'entities' collection.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Check if "--force" parameter is provided
|
|
||||||
force_recreate = "--force" in sys.argv
|
force_recreate = "--force" in sys.argv
|
||||||
|
|
||||||
# Drop the collection if it exists and "--force" parameter is provided
|
|
||||||
if force_recreate:
|
|
||||||
try:
|
try:
|
||||||
|
# Check if the collection exists
|
||||||
|
existing_collection = client.collections["entities"].retrieve()
|
||||||
|
|
||||||
|
if force_recreate:
|
||||||
client.collections["entities"].delete()
|
client.collections["entities"].delete()
|
||||||
print("Existing Typesense collection 'entities' deleted successfully.")
|
print("Existing Typesense collection 'entities' deleted successfully.")
|
||||||
except Exception as e:
|
client.collections.create(schema)
|
||||||
print(
|
print("Typesense collection 'entities' recreated successfully.")
|
||||||
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
|
else:
|
||||||
)
|
# Update the fields of the existing collection
|
||||||
|
update_collection_fields(client, schema)
|
||||||
|
|
||||||
# Recreate the collection in Typesense
|
except typesense.exceptions.ObjectNotFound:
|
||||||
|
# Collection doesn't exist, create it
|
||||||
client.collections.create(schema)
|
client.collections.create(schema)
|
||||||
print("Typesense collection 'entities' created successfully.")
|
print("Typesense collection 'entities' created successfully.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {str(e)}")
|
||||||
|
@ -174,6 +174,9 @@ class EntityIndexItem(BaseModel):
|
|||||||
filename: str
|
filename: str
|
||||||
size: int
|
size: int
|
||||||
file_created_at: int = Field(..., description="Unix timestamp")
|
file_created_at: int = Field(..., description="Unix timestamp")
|
||||||
|
created_date: Optional[str] = None
|
||||||
|
created_month: Optional[str] = None
|
||||||
|
created_year: Optional[str] = None
|
||||||
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
||||||
file_type: str
|
file_type: str
|
||||||
file_type_group: str
|
file_type_group: str
|
||||||
@ -191,6 +194,9 @@ class EntitySearchResult(BaseModel):
|
|||||||
filename: str
|
filename: str
|
||||||
size: int
|
size: int
|
||||||
file_created_at: int = Field(..., description="Unix timestamp")
|
file_created_at: int = Field(..., description="Unix timestamp")
|
||||||
|
created_date: Optional[str] = None
|
||||||
|
created_month: Optional[str] = None
|
||||||
|
created_year: Optional[str] = None
|
||||||
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
||||||
file_type: str
|
file_type: str
|
||||||
file_type_group: str
|
file_type_group: str
|
||||||
|
Loading…
x
Reference in New Issue
Block a user