feat: indexing entity to typesense

This commit is contained in:
arkohut 2024-06-13 16:54:16 +08:00
parent 7734b40848
commit d1575d55f8
6 changed files with 191 additions and 13 deletions

View File

@ -8,6 +8,11 @@ class Settings(BaseSettings):
base_dir: str = str(Path.home() / ".memos")
database_path: str = os.path.join(base_dir, "database.db")
typesense_host: str = "localhost"
typesense_port: str = "8108"
typesense_protocol: str = "http"
typesense_api_key: str = "xyz"
typesense_connection_timeout_seconds: int = 2
settings = Settings()

61
memos/indexing.py Normal file
View File

@ -0,0 +1,61 @@
import json
from .schemas import MetadataType, EntityMetadata
def convert_metadata_value(metadata: EntityMetadata):
if metadata.data_type == MetadataType.NUMBER_DATA:
try:
return int(metadata.value)
except ValueError:
return float(metadata.value)
elif metadata.data_type == MetadataType.JSON_DATA:
return json.loads(metadata.value)
else:
return metadata.value
def upsert(client, entity):
# Prepare the entity data for Typesense
entity_data = {
"id": str(entity.id),
"filepath": entity.filepath,
"filename": entity.filename,
"size": entity.size,
"file_created_at": int(entity.file_created_at.timestamp()),
"file_last_modified_at": int(entity.file_last_modified_at.timestamp()),
"file_type": entity.file_type,
"file_type_group": entity.file_type_group,
"last_scan_at": (
int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None
),
"library_id": entity.library_id,
"folder_id": entity.folder_id,
"tags": [tag.name for tag in entity.tags],
"metadata_entries": [
{
"key": metadata.key,
"value": convert_metadata_value(metadata),
"source": metadata.source,
}
for metadata in entity.metadata_entries
],
"metadata_text": "\n\n".join(
[
(
f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}"
if metadata.data_type == MetadataType.JSON_DATA
else f"key: {metadata.key}\nvalue:\n{metadata.value}"
)
for metadata in entity.metadata_entries
]
),
}
# Sync the entity data to Typesense
try:
client.collections["entities"].documents.upsert(entity_data)
except Exception as e:
raise Exception(
f"Failed to sync entity to Typesense: {str(e)}",
)

View File

@ -0,0 +1,72 @@
import typesense
from memos.config import settings
# Initialize Typesense client
client = typesense.Client(
{
"nodes": [
{
"host": settings.typesense_host,
"port": settings.typesense_port,
"protocol": settings.typesense_protocol,
}
],
"api_key": settings.typesense_api_key,
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
}
)
# Define the schema for the Typesense collection
schema = {
"name": "entities",
"enable_nested_fields": True,
"fields": [
{"name": "filepath", "type": "string"},
{"name": "filename", "type": "string"},
{"name": "size", "type": "int32"},
{"name": "file_created_at", "type": "int64", "facet": False},
{"name": "file_last_modified_at", "type": "int64", "facet": False},
{"name": "file_type", "type": "string", "facet": True},
{"name": "file_type_group", "type": "string", "facet": True},
{"name": "last_scan_at", "type": "int64", "facet": False, "optional": True},
{"name": "library_id", "type": "int32", "facet": True},
{"name": "folder_id", "type": "int32", "facet": True},
{
"name": "tags",
"type": "string[]",
"facet": True,
"optional": True,
"locale": "zh",
},
{
"name": "metadata_entries",
"type": "object[]",
"optional": True,
"locale": "zh",
},
{"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"},
],
"token_separators": [":", "/", ".", " ", "-", "\\"],
}
if __name__ == "__main__":
import sys
# Check if "--force" parameter is provided
force_recreate = "--force" in sys.argv
# Drop the collection if it exists and "--force" parameter is provided
if force_recreate:
try:
client.collections["entities"].delete()
print("Existing Typesense collection 'entities' deleted successfully.")
except Exception as e:
print(
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
)
# Recreate the collection in Typesense
client.collections.create(schema)
print("Typesense collection 'entities' created successfully.")

View File

@ -6,9 +6,14 @@ from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from typing import List, Annotated
import asyncio
import json
import typesense
from memos.config import settings
from .config import get_database_path
import memos.crud as crud
import memos.indexing as indexing
from .schemas import (
Library,
Folder,
@ -22,11 +27,27 @@ from .schemas import (
NewLibraryPluginParam,
UpdateEntityTagsParam,
UpdateEntityMetadataParam,
MetadataType,
)
engine = create_engine(f"sqlite:///{get_database_path()}")
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Initialize Typesense client
client = typesense.Client(
{
"nodes": [
{
"host": settings.typesense_host,
"port": settings.typesense_port,
"protocol": settings.typesense_protocol,
}
],
"api_key": settings.typesense_api_key,
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
}
)
app = FastAPI()
@ -103,11 +124,7 @@ async def trigger_webhooks(library: Library, entity: Entity, request: Request):
tasks = []
for plugin in library.plugins:
if plugin.webhook_url:
location = str(
request.url_for(
"get_entity_by_id", entity_id=entity.id
)
)
location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
task = client.post(
plugin.webhook_url,
json=entity.model_dump(mode="json"),
@ -193,7 +210,9 @@ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
@app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity)
def get_entity_by_id_in_library(library_id: int, entity_id: int, db: Session = Depends(get_db)):
def get_entity_by_id_in_library(
library_id: int, entity_id: int, db: Session = Depends(get_db)
):
entity = crud.get_entity_by_id(entity_id, db)
if entity is None or entity.library_id != library_id:
raise HTTPException(
@ -228,12 +247,30 @@ async def update_entity(
return entity
@app.post("/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT)
async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)):
entity = crud.get_entity_by_id(entity_id, db)
if entity is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Entity not found",
)
try:
indexing.upsert(client, entity)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e),
)
return None
@app.patch("/entities/{entity_id}/tags", response_model=Entity)
@app.put("/entities/{entity_id}/tags", response_model=Entity)
def patch_entity_tags(
entity_id: int,
update_tags: UpdateEntityTagsParam,
db: Session = Depends(get_db)
entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
):
entity = crud.get_entity_by_id(entity_id, db)
if entity is None:
@ -251,7 +288,7 @@ def patch_entity_tags(
def patch_entity_metadata(
entity_id: int,
update_metadata: UpdateEntityMetadataParam,
db: Session = Depends(get_db)
db: Session = Depends(get_db),
):
entity = crud.get_entity_by_id(entity_id, db)
if entity is None:
@ -261,11 +298,12 @@ def patch_entity_metadata(
)
# Use the CRUD function to update the metadata entries
entity = crud.update_entity_metadata_entries(entity_id, update_metadata.metadata_entries, db)
entity = crud.update_entity_metadata_entries(
entity_id, update_metadata.metadata_entries, db
)
return entity
@app.delete(
"/libraries/{library_id}/entities/{entity_id}",
status_code=status.HTTP_204_NO_CONTENT,

View File

@ -7,3 +7,4 @@ typer
tabulate
magika
pydantic-settings
typesense

View File

@ -20,7 +20,8 @@ setup(
'sqlalchemy',
'typer',
'magika',
'pydantic-settings'
'pydantic-settings',
'typesense',
],
entry_points={
'console_scripts': [