feat(index): extract collection name to settings

This commit is contained in:
arkohut 2024-08-20 18:35:35 +08:00
parent f0dfaf387e
commit 2cf75bee7f
3 changed files with 31 additions and 18 deletions

View File

@ -13,6 +13,7 @@ class Settings(BaseSettings):
typesense_protocol: str = "http" typesense_protocol: str = "http"
typesense_api_key: str = "xyz" typesense_api_key: str = "xyz"
typesense_connection_timeout_seconds: int = 2 typesense_connection_timeout_seconds: int = 2
typesense_collection_name: str = "entities"
settings = Settings() settings = Settings()
@ -20,6 +21,9 @@ settings = Settings()
# Define the default database path # Define the default database path
os.makedirs(settings.base_dir, exist_ok=True) os.makedirs(settings.base_dir, exist_ok=True)
# Global variable for Typesense collection name
TYPESENSE_COLLECTION_NAME = settings.typesense_collection_name
# Function to get the database path from environment variable or default # Function to get the database path from environment variable or default
def get_database_path(): def get_database_path():

View File

@ -9,6 +9,7 @@ from .schemas import (
MetadataIndexItem, MetadataIndexItem,
EntitySearchResult, EntitySearchResult,
) )
from .config import TYPESENSE_COLLECTION_NAME
def convert_metadata_value(metadata: EntityMetadata): def convert_metadata_value(metadata: EntityMetadata):
@ -80,7 +81,7 @@ def bulk_upsert(client, entities):
# Sync the entity data to Typesense # Sync the entity data to Typesense
try: try:
response = client.collections["entities"].documents.import_( response = client.collections[TYPESENSE_COLLECTION_NAME].documents.import_(
documents, {"action": "upsert"} documents, {"action": "upsert"}
) )
return response return response
@ -132,7 +133,7 @@ def upsert(client, entity):
# Sync the entity data to Typesense # Sync the entity data to Typesense
try: try:
client.collections["entities"].documents.upsert(entity_data.model_dump_json()) client.collections[TYPESENSE_COLLECTION_NAME].documents.upsert(entity_data.model_dump_json())
except Exception as e: except Exception as e:
raise Exception( raise Exception(
f"Failed to sync entity to Typesense: {str(e)}", f"Failed to sync entity to Typesense: {str(e)}",
@ -141,7 +142,7 @@ def upsert(client, entity):
def remove_entity_by_id(client, entity_id): def remove_entity_by_id(client, entity_id):
try: try:
client.collections["entities"].documents[entity_id].delete() client.collections[TYPESENSE_COLLECTION_NAME].documents[entity_id].delete()
except Exception as e: except Exception as e:
raise Exception( raise Exception(
f"Failed to remove entity from Typesense: {str(e)}", f"Failed to remove entity from Typesense: {str(e)}",
@ -152,7 +153,7 @@ def list_all_entities(
client, library_id: int, folder_id: int, limit=100, offset=0 client, library_id: int, folder_id: int, limit=100, offset=0
) -> List[EntityIndexItem]: ) -> List[EntityIndexItem]:
try: try:
response = client.collections["entities"].documents.search( response = client.collections[TYPESENSE_COLLECTION_NAME].documents.search(
{ {
"q": "*", "q": "*",
"filter_by": f"library_id:={library_id} && folder_id:={folder_id}", "filter_by": f"library_id:={library_id} && folder_id:={folder_id}",
@ -227,7 +228,7 @@ def search_entities(
"exclude_fields": "metadata_text,embedding", "exclude_fields": "metadata_text,embedding",
"sort_by": "_text_match:desc", "sort_by": "_text_match:desc",
} }
search_results = client.collections["entities"].documents.search( search_results = client.collections[TYPESENSE_COLLECTION_NAME].documents.search(
search_parameters search_parameters
) )
return [ return [
@ -264,7 +265,7 @@ def search_entities(
def fetch_entity_by_id(client, id: str) -> EntityIndexItem: def fetch_entity_by_id(client, id: str) -> EntityIndexItem:
try: try:
document = client.collections["entities"].documents[id].retrieve() document = client.collections[TYPESENSE_COLLECTION_NAME].documents[id].retrieve()
return EntitySearchResult( return EntitySearchResult(
id=document["id"], id=document["id"],
filepath=document["filepath"], filepath=document["filepath"],

View File

@ -1,5 +1,5 @@
import typesense import typesense
from .config import settings from .config import settings, TYPESENSE_COLLECTION_NAME
# Initialize Typesense client # Initialize Typesense client
client = typesense.Client( client = typesense.Client(
@ -18,7 +18,7 @@ client = typesense.Client(
# Define the schema for the Typesense collection # Define the schema for the Typesense collection
schema = { schema = {
"name": "entities", "name": TYPESENSE_COLLECTION_NAME,
"enable_nested_fields": True, "enable_nested_fields": True,
"fields": [ "fields": [
{"name": "filepath", "type": "string", "infix": True}, {"name": "filepath", "type": "string", "infix": True},
@ -63,7 +63,7 @@ schema = {
def update_collection_fields(client, schema): def update_collection_fields(client, schema):
existing_collection = client.collections["entities"].retrieve() existing_collection = client.collections[TYPESENSE_COLLECTION_NAME].retrieve()
existing_fields = {field["name"]: field for field in existing_collection["fields"]} existing_fields = {field["name"]: field for field in existing_collection["fields"]}
new_fields = {field["name"]: field for field in schema["fields"]} new_fields = {field["name"]: field for field in schema["fields"]}
@ -80,12 +80,14 @@ def update_collection_fields(client, schema):
break break
if fields_to_add: if fields_to_add:
client.collections["entities"].update({"fields": fields_to_add}) client.collections[TYPESENSE_COLLECTION_NAME].update({"fields": fields_to_add})
print( print(
f"Added/updated {len(fields_to_add)} fields in the 'entities' collection." f"Added/updated {len(fields_to_add)} fields in the '{TYPESENSE_COLLECTION_NAME}' collection."
) )
else: else:
print("No new fields to add or update in the 'entities' collection.") print(
f"No new fields to add or update in the '{TYPESENSE_COLLECTION_NAME}' collection."
)
if __name__ == "__main__": if __name__ == "__main__":
@ -95,13 +97,17 @@ if __name__ == "__main__":
try: try:
# Check if the collection exists # Check if the collection exists
existing_collection = client.collections["entities"].retrieve() existing_collection = client.collections[TYPESENSE_COLLECTION_NAME].retrieve()
if force_recreate: if force_recreate:
client.collections["entities"].delete() client.collections[TYPESENSE_COLLECTION_NAME].delete()
print("Existing Typesense collection 'entities' deleted successfully.") print(
f"Existing Typesense collection '{TYPESENSE_COLLECTION_NAME}' deleted successfully."
)
client.collections.create(schema) client.collections.create(schema)
print("Typesense collection 'entities' recreated successfully.") print(
f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' recreated successfully."
)
else: else:
# Update the fields of the existing collection # Update the fields of the existing collection
update_collection_fields(client, schema) update_collection_fields(client, schema)
@ -109,7 +115,9 @@ if __name__ == "__main__":
except typesense.exceptions.ObjectNotFound: except typesense.exceptions.ObjectNotFound:
# Collection doesn't exist, create it # Collection doesn't exist, create it
client.collections.create(schema) client.collections.create(schema)
print("Typesense collection 'entities' created successfully.") print(
f"Typesense collection '{TYPESENSE_COLLECTION_NAME}' created successfully."
)
except Exception as e: except Exception as e:
print(f"An error occurred: {str(e)}") print(f"An error occurred: {str(e)}")