feat(index): extract collection name to settings

This commit is contained in:
arkohut 2024-08-20 18:35:35 +08:00
parent f0dfaf387e
commit 2cf75bee7f
3 changed files with 31 additions and 18 deletions

View File

@ -13,6 +13,7 @@ class Settings(BaseSettings):
typesense_protocol: str = "http"
typesense_api_key: str = "xyz"
typesense_connection_timeout_seconds: int = 2
typesense_collection_name: str = "entities"
settings = Settings()
@ -20,7 +21,10 @@ settings = Settings()
# Define the default database path
os.makedirs(settings.base_dir, exist_ok=True)
# Global variable for Typesense collection name
TYPESENSE_COLLECTION_NAME = settings.typesense_collection_name
# Function to get the database path from environment variable or default
def get_database_path():
return settings.database_path
return settings.database_path

View File

@ -9,6 +9,7 @@ from .schemas import (
MetadataIndexItem,
EntitySearchResult,
)
from .config import TYPESENSE_COLLECTION_NAME
def convert_metadata_value(metadata: EntityMetadata):
@ -80,7 +81,7 @@ def bulk_upsert(client, entities):
# Sync the entity data to Typesense
try:
response = client.collections["entities"].documents.import_(
response = client.collections[TYPESENSE_COLLECTION_NAME].documents.import_(
documents, {"action": "upsert"}
)
return response
@ -132,7 +133,7 @@ def upsert(client, entity):
# Sync the entity data to Typesense
try:
client.collections["entities"].documents.upsert(entity_data.model_dump_json())
client.collections[TYPESENSE_COLLECTION_NAME].documents.upsert(entity_data.model_dump_json())
except Exception as e:
raise Exception(
f"Failed to sync entity to Typesense: {str(e)}",
@ -141,7 +142,7 @@ def upsert(client, entity):
def remove_entity_by_id(client, entity_id):
try:
client.collections["entities"].documents[entity_id].delete()
client.collections[TYPESENSE_COLLECTION_NAME].documents[entity_id].delete()
except Exception as e:
raise Exception(
f"Failed to remove entity from Typesense: {str(e)}",
@ -152,7 +153,7 @@ def list_all_entities(
client, library_id: int, folder_id: int, limit=100, offset=0
) -> List[EntityIndexItem]:
try:
response = client.collections["entities"].documents.search(
response = client.collections[TYPESENSE_COLLECTION_NAME].documents.search(
{
"q": "*",
"filter_by": f"library_id:={library_id} && folder_id:={folder_id}",
@ -227,7 +228,7 @@ def search_entities(
"exclude_fields": "metadata_text,embedding",
"sort_by": "_text_match:desc",
}
search_results = client.collections["entities"].documents.search(
search_results = client.collections[TYPESENSE_COLLECTION_NAME].documents.search(
search_parameters
)
return [
@ -264,7 +265,7 @@ def search_entities(
def fetch_entity_by_id(client, id: str) -> EntityIndexItem:
try:
document = client.collections["entities"].documents[id].retrieve()
document = client.collections[TYPESENSE_COLLECTION_NAME].documents[id].retrieve()
return EntitySearchResult(
id=document["id"],
filepath=document["filepath"],

View File

@ -1,5 +1,5 @@
import typesense
from .config import settings
from .config import settings, TYPESENSE_COLLECTION_NAME
# Initialize Typesense client
client = typesense.Client(
@ -18,7 +18,7 @@ client = typesense.Client(
# Define the schema for the Typesense collection
schema = {
"name": "entities",
"name": TYPESENSE_COLLECTION_NAME,
"enable_nested_fields": True,
"fields": [
{"name": "filepath", "type": "string", "infix": True},
@ -63,7 +63,7 @@ schema = {
def update_collection_fields(client, schema):
existing_collection = client.collections["entities"].retrieve()
existing_collection = client.collections[TYPESENSE_COLLECTION_NAME].retrieve()
existing_fields = {field["name"]: field for field in existing_collection["fields"]}
new_fields = {field["name"]: field for field in schema["fields"]}
@ -80,12 +80,14 @@ def update_collection_fields(client, schema):
break
if fields_to_add:
client.collections["entities"].update({"fields": fields_to_add})
client.collections[TYPESENSE_COLLECTION_NAME].update({"fields": fields_to_add})
print(
f"Added/updated {len(fields_to_add)} fields in the 'entities' collection."
f"Added/updated {len(fields_to_add)} fields in the '{TYPESENSE_COLLECTION_NAME}' collection."
)
else:
print("No new fields to add or update in the 'entities' collection.")
print(
f"No new fields to add or update in the '{TYPESENSE_COLLECTION_NAME}' collection."
)
if __name__ == "__main__":
@ -95,13 +97,17 @@ if __name__ == "__main__":
try:
# Check if the collection exists
existing_collection = client.collections["entities"].retrieve()
existing_collection = client.collections[TYPESENSE_COLLECTION_NAME].retrieve()
if force_recreate:
client.collections["entities"].delete()
print("Existing Typesense collection 'entities' deleted successfully.")
client.collections[TYPESENSE_COLLECTION_NAME].delete()
print(
f"Existing Typesense collection '{TYPESENSE_COLLECTION_NAME}' deleted successfully."
)
client.collections.create(schema)
print("Typesense collection 'entities' recreated successfully.")
print(
f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' recreated successfully."
)
else:
# Update the fields of the existing collection
update_collection_fields(client, schema)
@ -109,7 +115,9 @@ if __name__ == "__main__":
except typesense.exceptions.ObjectNotFound:
# Collection doesn't exist, create it
client.collections.create(schema)
print("Typesense collection 'entities' created successfully.")
print(
f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' created successfully."
)
except Exception as e:
print(f"An error occurred: {str(e)}")