mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-06 19:25:24 +00:00
feat: indexing entity to typesense
This commit is contained in:
parent
7734b40848
commit
d1575d55f8
@ -8,6 +8,11 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
base_dir: str = str(Path.home() / ".memos")
|
base_dir: str = str(Path.home() / ".memos")
|
||||||
database_path: str = os.path.join(base_dir, "database.db")
|
database_path: str = os.path.join(base_dir, "database.db")
|
||||||
|
typesense_host: str = "localhost"
|
||||||
|
typesense_port: str = "8108"
|
||||||
|
typesense_protocol: str = "http"
|
||||||
|
typesense_api_key: str = "xyz"
|
||||||
|
typesense_connection_timeout_seconds: int = 2
|
||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
61
memos/indexing.py
Normal file
61
memos/indexing.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from .schemas import MetadataType, EntityMetadata
|
||||||
|
|
||||||
|
|
||||||
|
def convert_metadata_value(metadata: EntityMetadata):
|
||||||
|
if metadata.data_type == MetadataType.NUMBER_DATA:
|
||||||
|
try:
|
||||||
|
return int(metadata.value)
|
||||||
|
except ValueError:
|
||||||
|
return float(metadata.value)
|
||||||
|
elif metadata.data_type == MetadataType.JSON_DATA:
|
||||||
|
return json.loads(metadata.value)
|
||||||
|
else:
|
||||||
|
return metadata.value
|
||||||
|
|
||||||
|
|
||||||
|
def upsert(client, entity):
|
||||||
|
# Prepare the entity data for Typesense
|
||||||
|
entity_data = {
|
||||||
|
"id": str(entity.id),
|
||||||
|
"filepath": entity.filepath,
|
||||||
|
"filename": entity.filename,
|
||||||
|
"size": entity.size,
|
||||||
|
"file_created_at": int(entity.file_created_at.timestamp()),
|
||||||
|
"file_last_modified_at": int(entity.file_last_modified_at.timestamp()),
|
||||||
|
"file_type": entity.file_type,
|
||||||
|
"file_type_group": entity.file_type_group,
|
||||||
|
"last_scan_at": (
|
||||||
|
int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None
|
||||||
|
),
|
||||||
|
"library_id": entity.library_id,
|
||||||
|
"folder_id": entity.folder_id,
|
||||||
|
"tags": [tag.name for tag in entity.tags],
|
||||||
|
"metadata_entries": [
|
||||||
|
{
|
||||||
|
"key": metadata.key,
|
||||||
|
"value": convert_metadata_value(metadata),
|
||||||
|
"source": metadata.source,
|
||||||
|
}
|
||||||
|
for metadata in entity.metadata_entries
|
||||||
|
],
|
||||||
|
"metadata_text": "\n\n".join(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}"
|
||||||
|
if metadata.data_type == MetadataType.JSON_DATA
|
||||||
|
else f"key: {metadata.key}\nvalue:\n{metadata.value}"
|
||||||
|
)
|
||||||
|
for metadata in entity.metadata_entries
|
||||||
|
]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Sync the entity data to Typesense
|
||||||
|
try:
|
||||||
|
client.collections["entities"].documents.upsert(entity_data)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to sync entity to Typesense: {str(e)}",
|
||||||
|
)
|
72
memos/initialize_typesense.py
Normal file
72
memos/initialize_typesense.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import typesense
|
||||||
|
from memos.config import settings
|
||||||
|
|
||||||
|
# Initialize Typesense client
|
||||||
|
client = typesense.Client(
|
||||||
|
{
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"host": settings.typesense_host,
|
||||||
|
"port": settings.typesense_port,
|
||||||
|
"protocol": settings.typesense_protocol,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"api_key": settings.typesense_api_key,
|
||||||
|
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define the schema for the Typesense collection
|
||||||
|
schema = {
|
||||||
|
"name": "entities",
|
||||||
|
"enable_nested_fields": True,
|
||||||
|
"fields": [
|
||||||
|
{"name": "filepath", "type": "string"},
|
||||||
|
{"name": "filename", "type": "string"},
|
||||||
|
{"name": "size", "type": "int32"},
|
||||||
|
{"name": "file_created_at", "type": "int64", "facet": False},
|
||||||
|
{"name": "file_last_modified_at", "type": "int64", "facet": False},
|
||||||
|
{"name": "file_type", "type": "string", "facet": True},
|
||||||
|
{"name": "file_type_group", "type": "string", "facet": True},
|
||||||
|
{"name": "last_scan_at", "type": "int64", "facet": False, "optional": True},
|
||||||
|
{"name": "library_id", "type": "int32", "facet": True},
|
||||||
|
{"name": "folder_id", "type": "int32", "facet": True},
|
||||||
|
{
|
||||||
|
"name": "tags",
|
||||||
|
"type": "string[]",
|
||||||
|
"facet": True,
|
||||||
|
"optional": True,
|
||||||
|
"locale": "zh",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "metadata_entries",
|
||||||
|
"type": "object[]",
|
||||||
|
"optional": True,
|
||||||
|
"locale": "zh",
|
||||||
|
},
|
||||||
|
{"name": "metadata_text", "type": "string", "optional": True, "locale": "zh"},
|
||||||
|
],
|
||||||
|
"token_separators": [":", "/", ".", " ", "-", "\\"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Check if "--force" parameter is provided
|
||||||
|
force_recreate = "--force" in sys.argv
|
||||||
|
|
||||||
|
# Drop the collection if it exists and "--force" parameter is provided
|
||||||
|
if force_recreate:
|
||||||
|
try:
|
||||||
|
client.collections["entities"].delete()
|
||||||
|
print("Existing Typesense collection 'entities' deleted successfully.")
|
||||||
|
except Exception as e:
|
||||||
|
print(
|
||||||
|
f"Failed to delete existing Typesense collection 'entities': {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recreate the collection in Typesense
|
||||||
|
client.collections.create(schema)
|
||||||
|
print("Typesense collection 'entities' created successfully.")
|
@ -6,9 +6,14 @@ from sqlalchemy import create_engine
|
|||||||
from sqlalchemy.orm import sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from typing import List, Annotated
|
from typing import List, Annotated
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
|
|
||||||
|
import typesense
|
||||||
|
from memos.config import settings
|
||||||
|
|
||||||
from .config import get_database_path
|
from .config import get_database_path
|
||||||
import memos.crud as crud
|
import memos.crud as crud
|
||||||
|
import memos.indexing as indexing
|
||||||
from .schemas import (
|
from .schemas import (
|
||||||
Library,
|
Library,
|
||||||
Folder,
|
Folder,
|
||||||
@ -22,11 +27,27 @@ from .schemas import (
|
|||||||
NewLibraryPluginParam,
|
NewLibraryPluginParam,
|
||||||
UpdateEntityTagsParam,
|
UpdateEntityTagsParam,
|
||||||
UpdateEntityMetadataParam,
|
UpdateEntityMetadataParam,
|
||||||
|
MetadataType,
|
||||||
)
|
)
|
||||||
|
|
||||||
engine = create_engine(f"sqlite:///{get_database_path()}")
|
engine = create_engine(f"sqlite:///{get_database_path()}")
|
||||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||||
|
|
||||||
|
# Initialize Typesense client
|
||||||
|
client = typesense.Client(
|
||||||
|
{
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"host": settings.typesense_host,
|
||||||
|
"port": settings.typesense_port,
|
||||||
|
"protocol": settings.typesense_protocol,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"api_key": settings.typesense_api_key,
|
||||||
|
"connection_timeout_seconds": settings.typesense_connection_timeout_seconds,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
@ -103,11 +124,7 @@ async def trigger_webhooks(library: Library, entity: Entity, request: Request):
|
|||||||
tasks = []
|
tasks = []
|
||||||
for plugin in library.plugins:
|
for plugin in library.plugins:
|
||||||
if plugin.webhook_url:
|
if plugin.webhook_url:
|
||||||
location = str(
|
location = str(request.url_for("get_entity_by_id", entity_id=entity.id))
|
||||||
request.url_for(
|
|
||||||
"get_entity_by_id", entity_id=entity.id
|
|
||||||
)
|
|
||||||
)
|
|
||||||
task = client.post(
|
task = client.post(
|
||||||
plugin.webhook_url,
|
plugin.webhook_url,
|
||||||
json=entity.model_dump(mode="json"),
|
json=entity.model_dump(mode="json"),
|
||||||
@ -193,7 +210,9 @@ def get_entity_by_id(entity_id: int, db: Session = Depends(get_db)):
|
|||||||
|
|
||||||
|
|
||||||
@app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity)
|
@app.get("/libraries/{library_id}/entities/{entity_id}", response_model=Entity)
|
||||||
def get_entity_by_id_in_library(library_id: int, entity_id: int, db: Session = Depends(get_db)):
|
def get_entity_by_id_in_library(
|
||||||
|
library_id: int, entity_id: int, db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
entity = crud.get_entity_by_id(entity_id, db)
|
entity = crud.get_entity_by_id(entity_id, db)
|
||||||
if entity is None or entity.library_id != library_id:
|
if entity is None or entity.library_id != library_id:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
@ -228,12 +247,30 @@ async def update_entity(
|
|||||||
return entity
|
return entity
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/entities/{entity_id}/index", status_code=status.HTTP_204_NO_CONTENT)
|
||||||
|
async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)):
|
||||||
|
entity = crud.get_entity_by_id(entity_id, db)
|
||||||
|
if entity is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail="Entity not found",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
indexing.upsert(client, entity)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail=str(e),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@app.patch("/entities/{entity_id}/tags", response_model=Entity)
|
@app.patch("/entities/{entity_id}/tags", response_model=Entity)
|
||||||
@app.put("/entities/{entity_id}/tags", response_model=Entity)
|
@app.put("/entities/{entity_id}/tags", response_model=Entity)
|
||||||
def patch_entity_tags(
|
def patch_entity_tags(
|
||||||
entity_id: int,
|
entity_id: int, update_tags: UpdateEntityTagsParam, db: Session = Depends(get_db)
|
||||||
update_tags: UpdateEntityTagsParam,
|
|
||||||
db: Session = Depends(get_db)
|
|
||||||
):
|
):
|
||||||
entity = crud.get_entity_by_id(entity_id, db)
|
entity = crud.get_entity_by_id(entity_id, db)
|
||||||
if entity is None:
|
if entity is None:
|
||||||
@ -251,7 +288,7 @@ def patch_entity_tags(
|
|||||||
def patch_entity_metadata(
|
def patch_entity_metadata(
|
||||||
entity_id: int,
|
entity_id: int,
|
||||||
update_metadata: UpdateEntityMetadataParam,
|
update_metadata: UpdateEntityMetadataParam,
|
||||||
db: Session = Depends(get_db)
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
entity = crud.get_entity_by_id(entity_id, db)
|
entity = crud.get_entity_by_id(entity_id, db)
|
||||||
if entity is None:
|
if entity is None:
|
||||||
@ -261,11 +298,12 @@ def patch_entity_metadata(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Use the CRUD function to update the metadata entries
|
# Use the CRUD function to update the metadata entries
|
||||||
entity = crud.update_entity_metadata_entries(entity_id, update_metadata.metadata_entries, db)
|
entity = crud.update_entity_metadata_entries(
|
||||||
|
entity_id, update_metadata.metadata_entries, db
|
||||||
|
)
|
||||||
return entity
|
return entity
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.delete(
|
@app.delete(
|
||||||
"/libraries/{library_id}/entities/{entity_id}",
|
"/libraries/{library_id}/entities/{entity_id}",
|
||||||
status_code=status.HTTP_204_NO_CONTENT,
|
status_code=status.HTTP_204_NO_CONTENT,
|
||||||
|
@ -7,3 +7,4 @@ typer
|
|||||||
tabulate
|
tabulate
|
||||||
magika
|
magika
|
||||||
pydantic-settings
|
pydantic-settings
|
||||||
|
typesense
|
||||||
|
Loading…
x
Reference in New Issue
Block a user