mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-09 04:35:26 +00:00
fix(index): ignore empty text for embedding
This commit is contained in:
parent
0d0f14c526
commit
1a08a44a4d
@ -104,11 +104,14 @@ def generate_metadata_text(metadata_entries):
|
|||||||
def bulk_upsert(client, entities):
|
def bulk_upsert(client, entities):
|
||||||
documents = []
|
documents = []
|
||||||
metadata_texts = []
|
metadata_texts = []
|
||||||
|
entities_with_metadata = []
|
||||||
|
|
||||||
for entity in entities:
|
for entity in entities:
|
||||||
metadata_text = generate_metadata_text(entity.metadata_entries)
|
metadata_text = generate_metadata_text(entity.metadata_entries)
|
||||||
print(f"metadata_text: {len(metadata_text)}")
|
print(f"metadata_text: {len(metadata_text)}")
|
||||||
|
if metadata_text:
|
||||||
metadata_texts.append(metadata_text)
|
metadata_texts.append(metadata_text)
|
||||||
|
entities_with_metadata.append(entity)
|
||||||
|
|
||||||
documents.append(
|
documents.append(
|
||||||
EntityIndexItem(
|
EntityIndexItem(
|
||||||
@ -141,11 +144,9 @@ def bulk_upsert(client, entities):
|
|||||||
).model_dump(mode="json")
|
).model_dump(mode="json")
|
||||||
)
|
)
|
||||||
|
|
||||||
# 批量获取嵌入向量
|
|
||||||
print(f"Getting embeddings for {len(metadata_texts)} texts")
|
|
||||||
embeddings = get_embeddings(metadata_texts)
|
embeddings = get_embeddings(metadata_texts)
|
||||||
# 将嵌入向量添加到文档中
|
for doc, embedding, entity in zip(documents, embeddings, entities):
|
||||||
for doc, embedding in zip(documents, embeddings):
|
if entity in entities_with_metadata:
|
||||||
doc["embedding"] = embedding
|
doc["embedding"] = embedding
|
||||||
|
|
||||||
# Sync the entity data to Typesense
|
# Sync the entity data to Typesense
|
||||||
|
Loading…
x
Reference in New Issue
Block a user