mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-06 03:05:25 +00:00
feat: indexing entity to typesense
This commit is contained in:
parent
7734b40848
commit
d1575d55f8
@ -8,6 +8,11 @@ class Settings(BaseSettings):
|
||||
|
||||
base_dir: str = str(Path.home() / ".memos")
|
||||
database_path: str = os.path.join(base_dir, "database.db")
|
||||
typesense_host: str = "localhost"
|
||||
typesense_port: str = "8108"
|
||||
typesense_protocol: str = "http"
|
||||
typesense_api_key: str = "xyz"
|
||||
typesense_connection_timeout_seconds: int = 2
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
61
memos/indexing.py
Normal file
61
memos/indexing.py
Normal file
@ -0,0 +1,61 @@
|
||||
import json
|
||||
|
||||
from .schemas import MetadataType, EntityMetadata
|
||||
|
||||
|
||||
def convert_metadata_value(metadata: EntityMetadata):
|
||||
if metadata.data_type == MetadataType.NUMBER_DATA:
|
||||
try:
|
||||
return int(metadata.value)
|
||||
except ValueError:
|
||||
return float(metadata.value)
|
||||
elif metadata.data_type == MetadataType.JSON_DATA:
|
||||
return json.loads(metadata.value)
|
||||
else:
|
||||
return metadata.value
|
||||
|
||||
|
||||
def upsert(client, entity):
|
||||
# Prepare the entity data for Typesense
|
||||
entity_data = {
|
||||
"id": str(entity.id),
|
||||
"filepath": entity.filepath,
|
||||
"filename": entity.filename,
|
||||
"size": entity.size,
|
||||
"file_created_at": int(entity.file_created_at.timestamp()),
|
||||
"file_last_modified_at": int(entity.file_last_modified_at.timestamp()),
|
||||
"file_type": entity.file_type,
|
||||
"file_type_group": entity.file_type_group,
|
||||
"last_scan_at": (
|
||||
int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None
|
||||
),
|
||||
"library_id": entity.library_id,
|
||||
"folder_id": entity.folder_id,
|
||||
"tags": [tag.name for tag in entity.tags],
|
||||
"metadata_entries": [
|
||||
{
|
||||
"key": metadata.key,
|
||||
"value": convert_metadata_value(metadata),
|
||||
"source": metadata.source,
|
||||
}
|
||||
for metadata in entity.metadata_entries
|
||||
],
|
||||
"metadata_text": "\n\n".join(
|
||||
[
|
||||
(
|
||||
f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}"
|
||||
if metadata.data_type == MetadataType.JSON_DATA
|
||||
else f"key: {metadata.key}\nvalue:\n{metadata.value}"
|
||||
)
|
||||
for metadata in entity.metadata_entries
|
||||
]
|
||||
),
|
||||
}
|
||||
|
||||
# Sync the entity data to Typesense
|
||||
try:
|
||||
client.collections["entities"].documents.upsert(entity_data)
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Failed to sync entity to Typesense: {str(e)}",
|
||||
)
|
72
memos/initialize_typesense.py
Normal file
72
memos/initialize_typesense.py
Normal file
@ -0,0 +1,72 @@
|
||||
import typesense
|
||||
from memos.config import settings
|
||||
|
||||
# Initialize Typesense client
|
||||
client = typesense.Client(
|
||||
{
|
||||
"nodes": [
|
||||
{
|
||||
"host": settings.typesense_host,
|
||||
"port": settings.typesense_port,
|
||||
"protocol": settings.typesense_protocol,
|
||||
}
|
||||
],
|
||||
"api_key": settings.typesense_api_key,
|
||||
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
||||
}
|
||||
)
|
||||
|
||||
# Define the schema for the Typesense collection
|
||||
schema = {
|
||||
"name": "entities",
|
||||
"enable_nested_fields": True,
|
||||
"fields": [
|
||||
{"name": "filepath", "type": "string"},
|
||||
{"name": "filename", "type": "string"},
|
||||
{"name": "size", "type": "int32"},
|
||||
{"name": "file_created_at", "type": "int64", "facet": False},
|
||||
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
||||
{"name": "file_type", "type": "string", "facet": True},
|
||||
{"name": "file_type_group", "type": "string", "facet": True},
|
||||
{"name": "last_scan_at", "type": "int64", "facet": False, "optional": True},
|
||||
{"name": "library_id", "type": "int32", "facet": True},
|
||||
{"name": "folder_id", "type": "int32", "facet": True},
|
||||
{
|
||||
"name": "tags",
|
||||
"type": "string[]",
|
||||
"facet": True,
|
||||
"optional": True,
|
||||
"locale": "zh",
|
||||
},
|
||||
{
|
||||
"name": "metadata_entries",
|
||||
"type": "object[]",
|
||||
"optional": True,
|
||||
"locale": "zh",
|
||||
},
|
||||
{"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"},
|
||||
],
|
||||
"token_separators": [":", "/", ".", " ", "-", "\\"],
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
import sys
|
||||
|
||||
# Check if "--force" parameter is provided
|
||||
force_recreate = "--force" in sys.argv
|
||||
|
||||
# Drop the collection if it exists and "--force" parameter is provided
|
||||
if force_recreate:
|
||||
try:
|
||||
client.collections["entities"].delete()
|
||||
print("Existing Typesense collection 'entities' deleted successfully.")
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
|
||||
)
|
||||
|
||||
# Recreate the collection in Typesense
|
||||
client.collections.create(schema)
|
||||
print("Typesense collection 'entities' created successfully.")
|
@ -6,9 +6,14 @@ from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from typing import List, Annotated
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
import typesense
|
||||
from memos.config import settings
|
||||
|
||||
from .config import get_database_path
|
||||
import memos.crud as crud
|
||||
import memos.indexing as indexing
|
||||
from .schemas import (
|
||||
Library,
|
||||
Folder,
|
||||
@ -22,11 +27,27 @@ from .schemas import (
|
||||
NewLibraryPluginParam,
|
||||
UpdateEntityTagsParam,
|
||||
UpdateEntityMetadataParam,
|
||||
MetadataType,
|
||||
)
|
||||
|
||||
engine = create_engine(f"sqlite:///{get_database_path()}")
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
# Initialize Typesense client
|
||||
client = typesense.Client(
|
||||
{
|
||||
"nodes": [
|
||||
{
|
||||
"host": settings.typesense_host,
|
||||
"port": settings.typesense_port,
|
||||
"protocol": settings.typesense_protocol,
|
||||
}
|
||||
],
|
||||
"api_key": settings.typesense_api_key,
|
||||
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
||||
}
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@ -103,11 +124,7 @@ async def trigger_webhooks(library: Library, entity: Entity, request: Request):
|
||||
tasks = []
|
||||
for plugin in library.plugins:
|
||||
if plugin.webhook_url:
|
||||
location = str(
|
||||
request.url_for(
|
||||
"get_entity_by_id", entity_id=entity.id
|
||||
)
|
||||
)
|
||||
location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
|
||||
task = client.post(
|
||||
plugin.webhook_url,
|
||||
json=entity.model_dump(mode="json"),
|
||||
@ -193,7 +210,9 @@ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
|
||||
|
||||
|
||||
@app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity)
|
||||
def get_entity_by_id_in_library(library_id: int, entity_id: int, db: Session = Depends(get_db)):
|
||||
def get_entity_by_id_in_library(
|
||||
library_id: int, entity_id: int, db: Session = Depends(get_db)
|
||||
):
|
||||
entity = crud.get_entity_by_id(entity_id, db)
|
||||
if entity is None or entity.library_id != library_id:
|
||||
raise HTTPException(
|
||||
@ -228,12 +247,30 @@ async def update_entity(
|
||||
return entity
|
||||
|
||||
|
||||
|
||||
@app.post("/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT)
|
||||
async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)):
|
||||
entity = crud.get_entity_by_id(entity_id, db)
|
||||
if entity is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Entity not found",
|
||||
)
|
||||
|
||||
try:
|
||||
indexing.upsert(client, entity)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
@app.patch("/entities/{entity_id}/tags", response_model=Entity)
|
||||
@app.put("/entities/{entity_id}/tags", response_model=Entity)
|
||||
def patch_entity_tags(
|
||||
entity_id: int,
|
||||
update_tags: UpdateEntityTagsParam,
|
||||
db: Session = Depends(get_db)
|
||||
entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
|
||||
):
|
||||
entity = crud.get_entity_by_id(entity_id, db)
|
||||
if entity is None:
|
||||
@ -251,7 +288,7 @@ def patch_entity_tags(
|
||||
def patch_entity_metadata(
|
||||
entity_id: int,
|
||||
update_metadata: UpdateEntityMetadataParam,
|
||||
db: Session = Depends(get_db)
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
entity = crud.get_entity_by_id(entity_id, db)
|
||||
if entity is None:
|
||||
@ -261,11 +298,12 @@ def patch_entity_metadata(
|
||||
)
|
||||
|
||||
# Use the CRUD function to update the metadata entries
|
||||
entity = crud.update_entity_metadata_entries(entity_id, update_metadata.metadata_entries, db)
|
||||
entity = crud.update_entity_metadata_entries(
|
||||
entity_id, update_metadata.metadata_entries, db
|
||||
)
|
||||
return entity
|
||||
|
||||
|
||||
|
||||
@app.delete(
|
||||
"/libraries/{library_id}/entities/{entity_id}",
|
||||
status_code=status.HTTP_204_NO_CONTENT,
|
||||
|
@ -7,3 +7,4 @@ typer
|
||||
tabulate
|
||||
magika
|
||||
pydantic-settings
|
||||
typesense
|
||||
|
Loading…
x
Reference in New Issue
Block a user