mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-06 19:25:24 +00:00
feat: add index
This commit is contained in:
parent
7c3017f85b
commit
8a1e04f997
@ -23,6 +23,7 @@ BASE_URL = "http://localhost:8080"
|
||||
|
||||
ignore_files = [".DS_Store"]
|
||||
|
||||
|
||||
def format_timestamp(timestamp):
|
||||
if isinstance(timestamp, str):
|
||||
return timestamp
|
||||
@ -55,7 +56,9 @@ def display_libraries(libraries):
|
||||
]
|
||||
)
|
||||
|
||||
print(tabulate(table, headers=["ID", "Name", "Folders", "Plugins"], tablefmt="plain"))
|
||||
print(
|
||||
tabulate(table, headers=["ID", "Name", "Folders", "Plugins"], tablefmt="plain")
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
@ -253,6 +256,91 @@ def scan(library_id: int):
|
||||
print(f"Total files deleted: {total_files_deleted}")
|
||||
|
||||
|
||||
@lib_app.command("index")
|
||||
def index(library_id: int):
|
||||
print(f"Indexing library {library_id}")
|
||||
|
||||
# Get the library
|
||||
response = httpx.get(f"{BASE_URL}/libraries/{library_id}")
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to get library: {response.status_code} - {response.text}")
|
||||
return
|
||||
|
||||
library = response.json()
|
||||
scanned_entities = set()
|
||||
|
||||
# Iterate through folders
|
||||
for folder in library["folders"]:
|
||||
tqdm.write(f"Processing folder: {folder['id']}")
|
||||
|
||||
# List all entities in the folder
|
||||
offset = 0
|
||||
while True:
|
||||
entities_response = httpx.get(
|
||||
f"{BASE_URL}/libraries/{library_id}/folders/{folder['id']}/entities",
|
||||
params={"limit": 200, "offset": offset},
|
||||
)
|
||||
if entities_response.status_code != 200:
|
||||
tqdm.write(
|
||||
f"Failed to get entities: {entities_response.status_code} - {entities_response.text}"
|
||||
)
|
||||
break
|
||||
|
||||
entities = entities_response.json()
|
||||
if not entities:
|
||||
break
|
||||
|
||||
# Index each entity
|
||||
for entity in tqdm(entities, desc="Indexing entities", leave=False):
|
||||
index_response = httpx.post(f"{BASE_URL}/entities/{entity['id']}/index")
|
||||
if index_response.status_code == 204:
|
||||
tqdm.write(f"Indexed entity: {entity['id']}")
|
||||
else:
|
||||
tqdm.write(
|
||||
f"Failed to index entity {entity['id']}: {index_response.status_code} - {index_response.text}"
|
||||
)
|
||||
|
||||
scanned_entities.add(str(entity["id"]))
|
||||
|
||||
offset += 200
|
||||
|
||||
# List all indexed entities in the folder
|
||||
offset = 0
|
||||
while True:
|
||||
index_response = httpx.get(
|
||||
f"{BASE_URL}/libraries/{library_id}/folders/{folder['id']}/index",
|
||||
params={"limit": 200, "offset": offset},
|
||||
)
|
||||
if index_response.status_code != 200:
|
||||
tqdm.write(
|
||||
f"Failed to get indexed entities: {index_response.status_code} - {index_response.text}"
|
||||
)
|
||||
break
|
||||
|
||||
indexed_entities = index_response.json()
|
||||
if not indexed_entities:
|
||||
break
|
||||
|
||||
# Delete indexes for entities not in scanned_entities
|
||||
for indexed_entity in tqdm(
|
||||
indexed_entities, desc="Cleaning up indexes", leave=False
|
||||
):
|
||||
if indexed_entity["id"] not in scanned_entities:
|
||||
delete_response = httpx.delete(
|
||||
f"{BASE_URL}/entities/{indexed_entity['id']}/index"
|
||||
)
|
||||
if delete_response.status_code == 204:
|
||||
tqdm.write(f"Deleted index for entity: {indexed_entity['id']}")
|
||||
else:
|
||||
tqdm.write(
|
||||
f"Failed to delete index for entity {indexed_entity['id']}: {delete_response.status_code} - {delete_response.text}"
|
||||
)
|
||||
|
||||
offset += 200
|
||||
|
||||
print("Indexing completed")
|
||||
|
||||
|
||||
def display_plugins(plugins):
|
||||
table = []
|
||||
for plugin in plugins:
|
||||
|
@ -1,6 +1,7 @@
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from .schemas import MetadataType, EntityMetadata
|
||||
from .schemas import MetadataType, EntityMetadata, EntityIndexItem, MetadataIndexItem
|
||||
|
||||
|
||||
def convert_metadata_value(metadata: EntityMetadata):
|
||||
@ -16,31 +17,28 @@ def convert_metadata_value(metadata: EntityMetadata):
|
||||
|
||||
|
||||
def upsert(client, entity):
|
||||
# Prepare the entity data for Typesense
|
||||
entity_data = {
|
||||
"id": str(entity.id),
|
||||
"filepath": entity.filepath,
|
||||
"filename": entity.filename,
|
||||
"size": entity.size,
|
||||
"file_created_at": int(entity.file_created_at.timestamp()),
|
||||
"file_last_modified_at": int(entity.file_last_modified_at.timestamp()),
|
||||
"file_type": entity.file_type,
|
||||
"file_type_group": entity.file_type_group,
|
||||
"last_scan_at": (
|
||||
int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None
|
||||
),
|
||||
"library_id": entity.library_id,
|
||||
"folder_id": entity.folder_id,
|
||||
"tags": [tag.name for tag in entity.tags],
|
||||
"metadata_entries": [
|
||||
{
|
||||
"key": metadata.key,
|
||||
"value": convert_metadata_value(metadata),
|
||||
"source": metadata.source,
|
||||
}
|
||||
entity_data = EntityIndexItem(
|
||||
id=str(entity.id),
|
||||
filepath=entity.filepath,
|
||||
filename=entity.filename,
|
||||
size=entity.size,
|
||||
file_created_at=int(entity.file_created_at.timestamp()),
|
||||
file_last_modified_at=int(entity.file_last_modified_at.timestamp()),
|
||||
file_type=entity.file_type,
|
||||
file_type_group=entity.file_type_group,
|
||||
last_scan_at=int(entity.last_scan_at.timestamp()) if entity.last_scan_at else None,
|
||||
library_id=entity.library_id,
|
||||
folder_id=entity.folder_id,
|
||||
tags=[tag.name for tag in entity.tags],
|
||||
metadata_entries=[
|
||||
MetadataIndexItem(
|
||||
key=metadata.key,
|
||||
value=convert_metadata_value(metadata),
|
||||
source=metadata.source,
|
||||
)
|
||||
for metadata in entity.metadata_entries
|
||||
],
|
||||
"metadata_text": "\n\n".join(
|
||||
metadata_text="\n\n".join(
|
||||
[
|
||||
(
|
||||
f"key: {metadata.key}\nvalue:\n{json.dumps(json.loads(metadata.value), indent=2)}"
|
||||
@ -50,12 +48,61 @@ def upsert(client, entity):
|
||||
for metadata in entity.metadata_entries
|
||||
]
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
# Sync the entity data to Typesense
|
||||
try:
|
||||
client.collections["entities"].documents.upsert(entity_data)
|
||||
client.collections["entities"].documents.upsert(entity_data.model_dump_json())
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Failed to sync entity to Typesense: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
def remove_entity_by_id(client, entity_id):
|
||||
try:
|
||||
client.collections["entities"].documents[entity_id].delete()
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Failed to remove entity from Typesense: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
def list_all_entities(client, library_id: int, folder_id: int, limit=100, offset=0) -> List[EntityIndexItem]:
|
||||
try:
|
||||
response = client.collections["entities"].documents.search(
|
||||
{
|
||||
"q": "*",
|
||||
"filter_by": f"library_id:={library_id} && folder_id:={folder_id}",
|
||||
"per_page": limit,
|
||||
"page": offset // limit + 1,
|
||||
}
|
||||
)
|
||||
return [
|
||||
EntityIndexItem(
|
||||
id=hit["document"]["id"],
|
||||
filepath=hit["document"]["filepath"],
|
||||
filename=hit["document"]["filename"],
|
||||
size=hit["document"]["size"],
|
||||
file_created_at=hit["document"]["file_created_at"],
|
||||
file_last_modified_at=hit["document"]["file_last_modified_at"],
|
||||
file_type=hit["document"]["file_type"],
|
||||
file_type_group=hit["document"]["file_type_group"],
|
||||
last_scan_at=hit["document"].get("last_scan_at"),
|
||||
library_id=hit["document"]["library_id"],
|
||||
folder_id=hit["document"]["folder_id"],
|
||||
tags=hit["document"]["tags"],
|
||||
metadata_entries=[
|
||||
MetadataIndexItem(
|
||||
key=entry["key"],
|
||||
value=entry["value"],
|
||||
source=entry["source"]
|
||||
) for entry in hit["document"]["metadata_entries"]
|
||||
],
|
||||
metadata_text=hit["document"]["metadata_text"]
|
||||
) for hit in response["hits"]
|
||||
]
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"Failed to list entities for library {library_id} and folder {folder_id}: {str(e)}",
|
||||
)
|
||||
|
@ -1,5 +1,5 @@
|
||||
from pydantic import BaseModel, ConfigDict, DirectoryPath, HttpUrl
|
||||
from typing import List
|
||||
from pydantic import BaseModel, ConfigDict, DirectoryPath, HttpUrl, Field
|
||||
from typing import List, Optional, Any
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
|
||||
@ -140,3 +140,25 @@ class Entity(BaseModel):
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
|
||||
class MetadataIndexItem(BaseModel):
|
||||
key: str
|
||||
value: Any
|
||||
source: str
|
||||
|
||||
|
||||
class EntityIndexItem(BaseModel):
|
||||
id: str
|
||||
filepath: str
|
||||
filename: str
|
||||
size: int
|
||||
file_created_at: int = Field(..., description="Unix timestamp")
|
||||
file_last_modified_at: int = Field(..., description="Unix timestamp")
|
||||
file_type: str
|
||||
file_type_group: str
|
||||
last_scan_at: Optional[int] = Field(None, description="Unix timestamp")
|
||||
library_id: int
|
||||
folder_id: int
|
||||
tags: List[str]
|
||||
metadata_entries: List[MetadataIndexItem]
|
||||
metadata_text: str
|
||||
|
@ -28,6 +28,8 @@ from .schemas import (
|
||||
UpdateEntityTagsParam,
|
||||
UpdateEntityMetadataParam,
|
||||
MetadataType,
|
||||
EntityIndexItem,
|
||||
MetadataIndexItem,
|
||||
)
|
||||
|
||||
engine = create_engine(f"sqlite:///{get_database_path()}")
|
||||
@ -280,6 +282,55 @@ async def sync_entity_to_typesense(entity_id: int, db: Session = Depends(get_db)
|
||||
return None
|
||||
|
||||
|
||||
@app.delete(
|
||||
"/entities/{entity_id}/index",
|
||||
status_code=status.HTTP_204_NO_CONTENT,
|
||||
tags=["entity"],
|
||||
)
|
||||
async def remove_entity_from_typesense(entity_id: int, db: Session = Depends(get_db)):
|
||||
entity = crud.get_entity_by_id(entity_id, db)
|
||||
if entity is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Entity not found",
|
||||
)
|
||||
|
||||
try:
|
||||
indexing.remove_entity_by_id(client, entity_id)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(e),
|
||||
)
|
||||
return None
|
||||
|
||||
@app.get(
|
||||
"/libraries/{library_id}/folders/{folder_id}/index",
|
||||
response_model=List[EntityIndexItem],
|
||||
tags=["entity"],
|
||||
)
|
||||
def list_entities_in_folder(
|
||||
library_id: int,
|
||||
folder_id: int,
|
||||
limit: Annotated[int, Query(ge=1, le=200)] = 10,
|
||||
offset: int = 0,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
library = crud.get_library_by_id(library_id, db)
|
||||
if library is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND, detail="Library not found"
|
||||
)
|
||||
|
||||
if folder_id not in [folder.id for folder in library.folders]:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Folder not found in the specified library",
|
||||
)
|
||||
|
||||
return indexing.list_all_entities(client, library_id, folder_id, limit, offset)
|
||||
|
||||
|
||||
@app.patch("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
|
||||
@app.put("/entities/{entity_id}/tags", response_model=Entity, tags=["entity"])
|
||||
def patch_entity_tags(
|
||||
|
Loading…
x
Reference in New Issue
Block a user