mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-06 03:05:25 +00:00
171 lines
5.8 KiB
Python
171 lines
5.8 KiB
Python
import typesense
|
|
from .config import settings, TYPESENSE_COLLECTION_NAME
|
|
import sys
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Define the schema for the Typesense collection
|
|
schema = {
|
|
"name": TYPESENSE_COLLECTION_NAME,
|
|
"enable_nested_fields": True,
|
|
"fields": [
|
|
{"name": "filepath", "type": "string", "infix": True},
|
|
{"name": "filename", "type": "string", "infix": True},
|
|
{"name": "size", "type": "int32"},
|
|
{"name": "file_created_at", "type": "int64", "facet": False},
|
|
{
|
|
"name": "created_date",
|
|
"type": "string",
|
|
"facet": True,
|
|
"optional": True,
|
|
"sort": True,
|
|
},
|
|
{
|
|
"name": "created_month",
|
|
"type": "string",
|
|
"facet": True,
|
|
"optional": True,
|
|
"sort": True,
|
|
},
|
|
{
|
|
"name": "created_year",
|
|
"type": "string",
|
|
"facet": True,
|
|
"optional": True,
|
|
"sort": True,
|
|
},
|
|
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
|
{"name": "file_type", "type": "string", "facet": True},
|
|
{"name": "file_type_group", "type": "string", "facet": True},
|
|
{"name": "last_scan_at", "type": "int64", "facet": False, "optional": True},
|
|
{"name": "library_id", "type": "int32", "facet": True},
|
|
{"name": "folder_id", "type": "int32", "facet": True},
|
|
{
|
|
"name": "tags",
|
|
"type": "string[]",
|
|
"facet": True,
|
|
"optional": True,
|
|
"locale": "zh",
|
|
},
|
|
{
|
|
"name": "metadata_entries",
|
|
"type": "object[]",
|
|
"optional": True,
|
|
"locale": "zh",
|
|
},
|
|
{"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"},
|
|
{
|
|
"name": "embedding",
|
|
"type": "float[]",
|
|
"num_dim": settings.embedding.num_dim,
|
|
"optional": True,
|
|
},
|
|
{
|
|
"name": "image_embedding",
|
|
"type": "float[]",
|
|
"optional": True,
|
|
},
|
|
],
|
|
"token_separators": [":", "/", " ", "\\"],
|
|
}
|
|
|
|
def update_collection_fields(client, schema):
|
|
existing_collection = client.collections[TYPESENSE_COLLECTION_NAME].retrieve()
|
|
existing_fields = {field["name"]: field for field in existing_collection["fields"]}
|
|
new_fields = {field["name"]: field for field in schema["fields"]}
|
|
|
|
fields_to_add = []
|
|
for name, field in new_fields.items():
|
|
if name not in existing_fields:
|
|
fields_to_add.append(field)
|
|
else:
|
|
# Check if the field can be updated
|
|
updatable_properties = ["facet", "optional"]
|
|
for prop in updatable_properties:
|
|
if prop in field and field[prop] != existing_fields[name].get(prop):
|
|
fields_to_add.append(field)
|
|
break
|
|
|
|
if fields_to_add:
|
|
client.collections[TYPESENSE_COLLECTION_NAME].update({"fields": fields_to_add})
|
|
print(
|
|
f"Added/updated {len(fields_to_add)} fields in the '{TYPESENSE_COLLECTION_NAME}' collection."
|
|
)
|
|
else:
|
|
print(
|
|
f"No new fields to add or update in the '{TYPESENSE_COLLECTION_NAME}' collection."
|
|
)
|
|
|
|
def init_typesense():
|
|
"""Initialize the Typesense collection."""
|
|
if not settings.typesense.enabled:
|
|
logger.warning("Typesense is not enabled. Skipping initialization.")
|
|
return False
|
|
|
|
try:
|
|
client = typesense.Client(
|
|
{
|
|
"nodes": [
|
|
{
|
|
"host": settings.typesense_host,
|
|
"port": settings.typesense_port,
|
|
"protocol": settings.typesense_protocol,
|
|
}
|
|
],
|
|
"api_key": settings.typesense_api_key,
|
|
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
|
}
|
|
)
|
|
|
|
existing_collections = client.collections.retrieve()
|
|
collection_names = [c["name"] for c in existing_collections]
|
|
if TYPESENSE_COLLECTION_NAME not in collection_names:
|
|
client.collections.create(schema)
|
|
logger.info(f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' created successfully.")
|
|
else:
|
|
update_collection_fields(client, schema)
|
|
logger.info(f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' already exists. Updated fields if necessary.")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error initializing Typesense collection: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
import sys
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--force", action="store_true", help="Drop the collection before initializing")
|
|
args = parser.parse_args()
|
|
|
|
if not settings.typesense.enabled:
|
|
logger.warning("Typesense is not enabled. Please enable it in the configuration if you want to use Typesense.")
|
|
sys.exit(0)
|
|
|
|
client = typesense.Client(
|
|
{
|
|
"nodes": [
|
|
{
|
|
"host": settings.typesense_host,
|
|
"port": settings.typesense_port,
|
|
"protocol": settings.typesense_protocol,
|
|
}
|
|
],
|
|
"api_key": settings.typesense_api_key,
|
|
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
|
}
|
|
)
|
|
|
|
if args.force:
|
|
try:
|
|
client.collections[TYPESENSE_COLLECTION_NAME].delete()
|
|
logger.info(f"Dropped collection '{TYPESENSE_COLLECTION_NAME}'.")
|
|
except Exception as e:
|
|
logger.error(f"Error dropping collection: {e}")
|
|
|
|
if not init_typesense():
|
|
sys.exit(1)
|