mirror of
https://github.com/tcsenpai/spacellama.git
synced 2025-06-07 03:35:31 +00:00
chunking and recursive summarizing support
This commit is contained in:
parent
cd20d4f5c0
commit
083da7f71a
151
background.js
151
background.js
@ -33,63 +33,18 @@ async function summarizeContent(content, systemPrompt) {
|
||||
|
||||
const maxContentTokens = tokenLimit - estimateTokenCount(systemPrompt) - 100; // Reserve 100 tokens for safety
|
||||
|
||||
console.log(`Starting summarization process. Token limit: ${tokenLimit}`);
|
||||
|
||||
try {
|
||||
console.log(`Using system prompt: ${systemPrompt}`);
|
||||
let summary = "";
|
||||
let chunks = splitContentIntoChunks(content, maxContentTokens);
|
||||
|
||||
for (let chunk of chunks) {
|
||||
const response = await fetch(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${chunk}`,
|
||||
model: model,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(
|
||||
`HTTP error! status: ${response.status}, message: ${errorText}`
|
||||
);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
summary += data.response + "\n\n";
|
||||
}
|
||||
|
||||
if (chunks.length > 1) {
|
||||
// If we had multiple chunks, summarize the summary
|
||||
const finalResponse = await fetch(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: `${systemPrompt}\n\nFollow the above instructions and provide a final summary of the following summaries:\n\n${summary}`,
|
||||
model: model,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!finalResponse.ok) {
|
||||
const errorText = await finalResponse.text();
|
||||
throw new Error(
|
||||
`HTTP error! status: ${finalResponse.status}, message: ${errorText}`
|
||||
);
|
||||
}
|
||||
|
||||
const finalData = await finalResponse.json();
|
||||
summary = finalData.response;
|
||||
}
|
||||
|
||||
return summary.trim();
|
||||
let { summary, chunkCount, recursionDepth } = await recursiveSummarize(content, systemPrompt, maxContentTokens, endpoint, model);
|
||||
console.log("Final summary completed.");
|
||||
return {
|
||||
summary: typeof summary === 'string' ? summary.trim() : JSON.stringify(summary),
|
||||
chunkCount,
|
||||
recursionDepth,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error("Error details:", error);
|
||||
console.error("Error in summarizeContent:", error);
|
||||
error.details = {
|
||||
endpoint: endpoint,
|
||||
model: model,
|
||||
@ -99,34 +54,82 @@ async function summarizeContent(content, systemPrompt) {
|
||||
}
|
||||
}
|
||||
|
||||
async function recursiveSummarize(content, systemPrompt, maxContentTokens, endpoint, model, depth = 0) {
|
||||
console.log(`Recursive summarization depth: ${depth}`);
|
||||
const chunks = splitContentIntoChunks(content, maxContentTokens);
|
||||
console.log(`Split content into ${chunks.length} chunks`);
|
||||
|
||||
if (chunks.length === 1) {
|
||||
console.log("Single chunk, summarizing directly");
|
||||
return {
|
||||
summary: await summarizeChunk(chunks[0], systemPrompt, endpoint, model),
|
||||
chunkCount: 1,
|
||||
recursionDepth: depth,
|
||||
};
|
||||
}
|
||||
|
||||
let summaries = [];
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
console.log(`Summarizing chunk ${i + 1} of ${chunks.length}`);
|
||||
const chunkSummary = await summarizeChunk(chunks[i], systemPrompt, endpoint, model);
|
||||
summaries.push(chunkSummary);
|
||||
}
|
||||
|
||||
const combinedSummaries = summaries.join("\n\n");
|
||||
if (estimateTokenCount(combinedSummaries) <= maxContentTokens) {
|
||||
console.log("Combined summaries fit within token limit, finalizing summary");
|
||||
return {
|
||||
summary: await summarizeChunk(combinedSummaries, systemPrompt, endpoint, model),
|
||||
chunkCount: chunks.length,
|
||||
recursionDepth: depth,
|
||||
};
|
||||
} else {
|
||||
console.log("Combined summaries exceed token limit, recursing");
|
||||
const result = await recursiveSummarize(combinedSummaries, systemPrompt, maxContentTokens, endpoint, model, depth + 1);
|
||||
return {
|
||||
...result,
|
||||
chunkCount: chunks.length + result.chunkCount,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function summarizeChunk(chunk, systemPrompt, endpoint, model) {
|
||||
const response = await fetch(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${chunk}`,
|
||||
model: model,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`HTTP error! status: ${response.status}, message: ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.response;
|
||||
}
|
||||
|
||||
function estimateTokenCount(text) {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
function splitContentIntoChunks(content, maxTokens) {
|
||||
const chunks = [];
|
||||
const words = content.split(/\s+/);
|
||||
let currentChunk = "";
|
||||
|
||||
const sentences = content.split(/(?<=[.!?])\s+/);
|
||||
|
||||
for (let sentence of sentences) {
|
||||
if (estimateTokenCount(currentChunk + sentence) > maxTokens) {
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
currentChunk = "";
|
||||
}
|
||||
if (estimateTokenCount(sentence) > maxTokens) {
|
||||
// If a single sentence is too long, split it
|
||||
while (sentence) {
|
||||
const chunk = sentence.slice(0, maxTokens * 4); // Approximate characters
|
||||
chunks.push(chunk.trim());
|
||||
sentence = sentence.slice(maxTokens * 4);
|
||||
}
|
||||
} else {
|
||||
currentChunk = sentence;
|
||||
}
|
||||
for (const word of words) {
|
||||
if (estimateTokenCount(currentChunk + " " + word) > maxTokens) {
|
||||
chunks.push(currentChunk.trim());
|
||||
currentChunk = word;
|
||||
} else {
|
||||
currentChunk += " " + sentence;
|
||||
currentChunk += (currentChunk ? " " : "") + word;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,8 +40,31 @@ document.addEventListener("DOMContentLoaded", () => {
|
||||
}
|
||||
|
||||
if (response && response.summary) {
|
||||
// Render the Markdown content
|
||||
summaryDiv.innerHTML = marked.parse(response.summary);
|
||||
let warningHtml = "";
|
||||
if (response.chunkCount > 1) {
|
||||
warningHtml = `
|
||||
<div class="warning" style="background-color: #fff3cd; color: #856404; padding: 10px; margin-bottom: 10px; border-radius: 4px;">
|
||||
<strong>Warning:</strong> The content was split into ${response.chunkCount} chunks for summarization.
|
||||
Recursive summarization depth: ${response.recursionDepth}.
|
||||
This may affect the quality and coherence of the summary, and might result in slower performance.
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
let summaryText;
|
||||
if (typeof response.summary === 'string') {
|
||||
summaryText = response.summary;
|
||||
} else if (typeof response.summary === 'object') {
|
||||
// Convert JSON to Markdown
|
||||
summaryText = Object.entries(response.summary)
|
||||
.map(([key, value]) => `## ${key}\n\n${value}`)
|
||||
.join('\n\n');
|
||||
} else {
|
||||
summaryText = JSON.stringify(response.summary);
|
||||
}
|
||||
|
||||
// Render the Markdown content with warning if applicable
|
||||
summaryDiv.innerHTML = warningHtml + marked.parse(summaryText);
|
||||
tokenCountDiv.textContent = `Token count: ${response.tokenCount}`;
|
||||
} else if (response && response.error) {
|
||||
handleError(response.error, response.details);
|
||||
|
Loading…
x
Reference in New Issue
Block a user