fix(index): ignore empty text for embedding

This commit is contained in:
arkohut 2024-08-29 21:38:01 +08:00
parent 0d0f14c526
commit 1a08a44a4d

View File

@ -104,11 +104,14 @@ def generate_metadata_text(metadata_entries):
def bulk_upsert(client, entities):
documents = []
metadata_texts = []
entities_with_metadata = []
for entity in entities:
metadata_text = generate_metadata_text(entity.metadata_entries)
print(f"metadata_text: {len(metadata_text)}")
if metadata_text:
metadata_texts.append(metadata_text)
entities_with_metadata.append(entity)
documents.append(
EntityIndexItem(
@ -141,11 +144,9 @@ def bulk_upsert(client, entities):
).model_dump(mode="json")
)
# 批量获取嵌入向量
print(f"Getting embeddings for {len(metadata_texts)} texts")
embeddings = get_embeddings(metadata_texts)
# 将嵌入向量添加到文档中
for doc, embedding in zip(documents, embeddings):
for doc, embedding, entity in zip(documents, embeddings, entities):
if entity in entities_with_metadata:
doc["embedding"] = embedding
# Sync the entity data to Typesense