1. Add missing argument of tokcn size for summarizeChunk.

2. Make sure `num_ctx` is from `options` instead of calculated value which is not stable.
3. Simplify function of `recursiveSummarize`
This commit is contained in:
Jks Liu 2024-10-17 21:18:52 +08:00
parent 7ea7dc0cdf
commit c4deab61d9

View File

@ -90,15 +90,13 @@ async function summarizeContent(content, systemPrompt) {
const model = settings.ollamaModel || "llama3.1:8b"; const model = settings.ollamaModel || "llama3.1:8b";
const tokenLimit = settings.tokenLimit || 4096; const tokenLimit = settings.tokenLimit || 4096;
const maxContentTokens = tokenLimit - estimateTokenCount(systemPrompt) - 100; // Reserve 100 tokens for safety
console.log(`Starting summarization process. Token limit: ${tokenLimit}`); console.log(`Starting summarization process. Token limit: ${tokenLimit}`);
try { try {
let { summary, chunkCount, recursionDepth } = await recursiveSummarize( let { summary, chunkCount, recursionDepth } = await recursiveSummarize(
content, content,
systemPrompt, systemPrompt,
maxContentTokens, tokenLimit,
endpoint, endpoint,
model model
); );
@ -123,30 +121,15 @@ async function summarizeContent(content, systemPrompt) {
async function recursiveSummarize( async function recursiveSummarize(
content, content,
systemPrompt, systemPrompt,
maxContentTokens, tokenLimit,
endpoint, endpoint,
model, model,
depth = 0 depth = 0
) { ) {
console.log(`Recursive summarization depth: ${depth}`); console.log(`Recursive summarization depth: ${depth}`);
const chunks = splitContentIntoChunks(content, maxContentTokens); const chunks = splitContentIntoChunks(content, tokenLimit, systemPrompt);
console.log(`Split content into ${chunks.length} chunks`); console.log(`Split content into ${chunks.length} chunks`);
if (chunks.length === 1) {
console.log("Single chunk, summarizing directly");
return {
summary: await summarizeChunk(
chunks[0],
systemPrompt,
endpoint,
model,
maxContentTokens
),
chunkCount: 1,
recursionDepth: depth,
};
}
let summaries = []; let summaries = [];
for (let i = 0; i < chunks.length; i++) { for (let i = 0; i < chunks.length; i++) {
console.log(`Summarizing chunk ${i + 1} of ${chunks.length}`); console.log(`Summarizing chunk ${i + 1} of ${chunks.length}`);
@ -154,32 +137,26 @@ async function recursiveSummarize(
chunks[i], chunks[i],
systemPrompt, systemPrompt,
endpoint, endpoint,
model model,
tokenLimit
); );
summaries.push(chunkSummary); summaries.push(chunkSummary);
} }
const combinedSummaries = summaries.join("\n\n"); const combinedSummaries = summaries.join("\n\n");
if (estimateTokenCount(combinedSummaries) <= maxContentTokens) {
console.log( if (chunks.length <= 1) {
"Combined summaries fit within token limit, finalizing summary" console.log("Single chunk, summarizing directly");
);
return { return {
summary: await summarizeChunk( summary: combinedSummaries,
combinedSummaries,
systemPrompt,
endpoint,
model
),
chunkCount: chunks.length, chunkCount: chunks.length,
recursionDepth: depth, recursionDepth: depth,
}; };
} else { } else {
console.log("Combined summaries exceed token limit, recursing"); console.log("Multiple chunks, summarizing recursively");
const result = await recursiveSummarize( const result = await recursiveSummarize(
combinedSummaries, combinedSummaries,
systemPrompt, systemPrompt,
maxContentTokens, tokenLimit,
endpoint, endpoint,
model, model,
depth + 1 depth + 1
@ -196,7 +173,7 @@ async function summarizeChunk(
systemPrompt, systemPrompt,
endpoint, endpoint,
model, model,
maxContentTokens tokenLimit
) { ) {
const response = await fetch(endpoint, { const response = await fetch(endpoint, {
method: "POST", method: "POST",
@ -207,7 +184,7 @@ async function summarizeChunk(
prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${chunk}`, prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${chunk}`,
model: model, model: model,
stream: false, stream: false,
num_ctx: maxContentTokens, num_ctx: tokenLimit,
}), }),
}); });
@ -234,7 +211,8 @@ function estimateTokenCount(text) {
return Math.ceil(text.length / 4); return Math.ceil(text.length / 4);
} }
function splitContentIntoChunks(content, maxTokens) { function splitContentIntoChunks(content, tokenLimit, systemPrompt) {
const maxTokens = tokenLimit - estimateTokenCount(systemPrompt) - 100; // Reserve 100 tokens for safety
const chunks = []; const chunks = [];
const words = content.split(/\s+/); const words = content.split(/\s+/);
let currentChunk = ""; let currentChunk = "";