feat: indexing entity to typesense

This commit is contained in:
arkohut 2024-06-13 16:54:16 +08:00
parent 7734b40848
commit d1575d55f8
6 changed files with 191 additions and 13 deletions

View File

@ -8,6 +8,11 @@ class Settings(BaseSettings):
base_dir: str = str(Path.home() / ".memos") base_dir: str = str(Path.home() / ".memos")
database_path: str = os.path.join(base_dir, "database.db") database_path: str = os.path.join(base_dir, "database.db")
typesense_host: str = "localhost"
typesense_port: str = "8108"
typesense_protocol: str = "http"
typesense_api_key: str = "xyz"
typesense_connection_timeout_seconds: int = 2
settings = Settings() settings = Settings()

61
memos/indexing.py Normal file
View File

@ -0,0 +1,61 @@
import json
from .schemas import MetadataType, EntityMetadata
def convert_metadata_value(metadata: EntityMetadata):
if metadata.data_type == MetadataType.NUMBER_DATA:
try:
return int(metadata.value)
except ValueError:
return float(metadata.value)
elif metadata.data_type == MetadataType.JSON_DATA:
return json.loads(metadata.value)
else:
return metadata.value
def upsert(client, entity):
# Prepare the entity data for Typesense
entity_data = {
"id": str(entity.id),
"filepath": entity.filepath,
"filename": entity.filename,
"size": entity.size,
"file_created_at": int(entity.file_created_at.timestamp()),
"file_last_modified_at": int(entity.file_last_modified_at.timestamp()),
"file_type": entity.file_type,
"file_type_group": entity.file_type_group,
"last_scan_at": (
int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None
),
"library_id": entity.library_id,
"folder_id": entity.folder_id,
"tags": [tag.name for tag in entity.tags],
"metadata_entries": [
{
"key": metadata.key,
"value": convert_metadata_value(metadata),
"source": metadata.source,
}
for metadata in entity.metadata_entries
],
"metadata_text": "\n\n".join(
[
(
f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}"
if metadata.data_type == MetadataType.JSON_DATA
else f"key: {metadata.key}\nvalue:\n{metadata.value}"
)
for metadata in entity.metadata_entries
]
),
}
# Sync the entity data to Typesense
try:
client.collections["entities"].documents.upsert(entity_data)
except Exception as e:
raise Exception(
f"Failed to sync entity to Typesense: {str(e)}",
)

View File

@ -0,0 +1,72 @@
import typesense
from memos.config import settings
# Initialize Typesense client
client = typesense.Client(
{
"nodes": [
{
"host": settings.typesense_host,
"port": settings.typesense_port,
"protocol": settings.typesense_protocol,
}
],
"api_key": settings.typesense_api_key,
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
}
)
# Define the schema for the Typesense collection
schema = {
"name": "entities",
"enable_nested_fields": True,
"fields": [
{"name": "filepath", "type": "string"},
{"name": "filename", "type": "string"},
{"name": "size", "type": "int32"},
{"name": "file_created_at", "type": "int64", "facet": False},
{"name": "file_last_modified_at", "type": "int64", "facet": False},
{"name": "file_type", "type": "string", "facet": True},
{"name": "file_type_group", "type": "string", "facet": True},
{"name": "last_scan_at", "type": "int64", "facet": False, "optional": True},
{"name": "library_id", "type": "int32", "facet": True},
{"name": "folder_id", "type": "int32", "facet": True},
{
"name": "tags",
"type": "string[]",
"facet": True,
"optional": True,
"locale": "zh",
},
{
"name": "metadata_entries",
"type": "object[]",
"optional": True,
"locale": "zh",
},
{"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"},
],
"token_separators": [":", "/", ".", " ", "-", "\\"],
}
if __name__ == "__main__":
import sys
# Check if "--force" parameter is provided
force_recreate = "--force" in sys.argv
# Drop the collection if it exists and "--force" parameter is provided
if force_recreate:
try:
client.collections["entities"].delete()
print("Existing Typesense collection 'entities' deleted successfully.")
except Exception as e:
print(
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
)
# Recreate the collection in Typesense
client.collections.create(schema)
print("Typesense collection 'entities' created successfully.")

View File

@ -6,9 +6,14 @@ from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from typing import List, Annotated from typing import List, Annotated
import asyncio import asyncio
import json
import typesense
from memos.config import settings
from .config import get_database_path from .config import get_database_path
import memos.crud as crud import memos.crud as crud
import memos.indexing as indexing
from .schemas import ( from .schemas import (
Library, Library,
Folder, Folder,
@ -22,11 +27,27 @@ from .schemas import (
NewLibraryPluginParam, NewLibraryPluginParam,
UpdateEntityTagsParam, UpdateEntityTagsParam,
UpdateEntityMetadataParam, UpdateEntityMetadataParam,
MetadataType,
) )
engine = create_engine(f"sqlite:///{get_database_path()}") engine = create_engine(f"sqlite:///{get_database_path()}")
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Initialize Typesense client
client = typesense.Client(
{
"nodes": [
{
"host": settings.typesense_host,
"port": settings.typesense_port,
"protocol": settings.typesense_protocol,
}
],
"api_key": settings.typesense_api_key,
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
}
)
app = FastAPI() app = FastAPI()
@ -103,11 +124,7 @@ async def trigger_webhooks(library: Library, entity: Entity, request: Request):
tasks = [] tasks = []
for plugin in library.plugins: for plugin in library.plugins:
if plugin.webhook_url: if plugin.webhook_url:
location = str( location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
request.url_for(
"get_entity_by_id", entity_id=entity.id
)
)
task = client.post( task = client.post(
plugin.webhook_url, plugin.webhook_url,
json=entity.model_dump(mode="json"), json=entity.model_dump(mode="json"),
@ -193,7 +210,9 @@ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
@app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity) @app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity)
def get_entity_by_id_in_library(library_id: int, entity_id: int, db: Session = Depends(get_db)): def get_entity_by_id_in_library(
library_id: int, entity_id: int, db: Session = Depends(get_db)
):
entity = crud.get_entity_by_id(entity_id, db) entity = crud.get_entity_by_id(entity_id, db)
if entity is None or entity.library_id != library_id: if entity is None or entity.library_id != library_id:
raise HTTPException( raise HTTPException(
@ -228,12 +247,30 @@ async def update_entity(
return entity return entity
@app.post("/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT)
async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)):
entity = crud.get_entity_by_id(entity_id, db)
if entity is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Entity not found",
)
try:
indexing.upsert(client, entity)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(e),
)
return None
@app.patch("/entities/{entity_id}/tags", response_model=Entity) @app.patch("/entities/{entity_id}/tags", response_model=Entity)
@app.put("/entities/{entity_id}/tags", response_model=Entity) @app.put("/entities/{entity_id}/tags", response_model=Entity)
def patch_entity_tags( def patch_entity_tags(
entity_id: int, entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
update_tags: UpdateEntityTagsParam,
db: Session = Depends(get_db)
): ):
entity = crud.get_entity_by_id(entity_id, db) entity = crud.get_entity_by_id(entity_id, db)
if entity is None: if entity is None:
@ -251,7 +288,7 @@ def patch_entity_tags(
def patch_entity_metadata( def patch_entity_metadata(
entity_id: int, entity_id: int,
update_metadata: UpdateEntityMetadataParam, update_metadata: UpdateEntityMetadataParam,
db: Session = Depends(get_db) db: Session = Depends(get_db),
): ):
entity = crud.get_entity_by_id(entity_id, db) entity = crud.get_entity_by_id(entity_id, db)
if entity is None: if entity is None:
@ -261,11 +298,12 @@ def patch_entity_metadata(
) )
# Use the CRUD function to update the metadata entries # Use the CRUD function to update the metadata entries
entity = crud.update_entity_metadata_entries(entity_id, update_metadata.metadata_entries, db) entity = crud.update_entity_metadata_entries(
entity_id, update_metadata.metadata_entries, db
)
return entity return entity
@app.delete( @app.delete(
"/libraries/{library_id}/entities/{entity_id}", "/libraries/{library_id}/entities/{entity_id}",
status_code=status.HTTP_204_NO_CONTENT, status_code=status.HTTP_204_NO_CONTENT,

View File

@ -7,3 +7,4 @@ typer
tabulate tabulate
magika magika
pydantic-settings pydantic-settings
typesense

View File

@ -20,7 +20,8 @@ setup(
'sqlalchemy', 'sqlalchemy',
'typer', 'typer',
'magika', 'magika',
'pydantic-settings' 'pydantic-settings',
'typesense',
], ],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [