token count and limits support

This commit is contained in:
tcsenpai 2024-10-13 17:58:06 +02:00
parent f90b62070d
commit cd20d4f5c0
4 changed files with 116 additions and 21 deletions

View File

@ -6,13 +6,14 @@ browser.browserAction.onClicked.addListener(() => {
browser.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.action === "summarize") {
const tokenCount = estimateTokenCount(request.content);
summarizeContent(request.content, request.systemPrompt)
.then((summary) => {
sendResponse({ summary });
sendResponse({ summary, tokenCount });
})
.catch((error) => {
console.error("Error in summarizeContent:", error);
sendResponse({ error: error.toString(), details: error.details });
sendResponse({ error: error.toString(), details: error.details, tokenCount });
});
return true; // Indicates that we will send a response asynchronously
}
@ -22,35 +23,71 @@ async function summarizeContent(content, systemPrompt) {
const settings = await browser.storage.local.get([
"ollamaEndpoint",
"ollamaModel",
"tokenLimit",
]);
const endpoint = `${
settings.ollamaEndpoint || "http://localhost:11434"
}/api/generate`;
const model = settings.ollamaModel || "llama3.1:8b";
const model = settings.ollamaModel || "llama2";
const tokenLimit = settings.tokenLimit || 4096;
const maxContentTokens = tokenLimit - estimateTokenCount(systemPrompt) - 100; // Reserve 100 tokens for safety
try {
console.log(`Using system prompt: ${systemPrompt}`);
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${content}`,
model: model,
stream: false,
}),
});
let summary = "";
let chunks = splitContentIntoChunks(content, maxContentTokens);
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`HTTP error! status: ${response.status}, message: ${errorText}`
);
for (let chunk of chunks) {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: `${systemPrompt}\n\nFollow the above instructions and summarize the following text:\n\n${chunk}`,
model: model,
stream: false,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(
`HTTP error! status: ${response.status}, message: ${errorText}`
);
}
const data = await response.json();
summary += data.response + "\n\n";
}
const data = await response.json();
return data.response;
if (chunks.length > 1) {
// If we had multiple chunks, summarize the summary
const finalResponse = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: `${systemPrompt}\n\nFollow the above instructions and provide a final summary of the following summaries:\n\n${summary}`,
model: model,
stream: false,
}),
});
if (!finalResponse.ok) {
const errorText = await finalResponse.text();
throw new Error(
`HTTP error! status: ${finalResponse.status}, message: ${errorText}`
);
}
const finalData = await finalResponse.json();
summary = finalData.response;
}
return summary.trim();
} catch (error) {
console.error("Error details:", error);
error.details = {
@ -61,3 +98,41 @@ async function summarizeContent(content, systemPrompt) {
throw error;
}
}
function estimateTokenCount(text) {
return Math.ceil(text.length / 4);
}
function splitContentIntoChunks(content, maxTokens) {
const chunks = [];
let currentChunk = "";
const sentences = content.split(/(?<=[.!?])\s+/);
for (let sentence of sentences) {
if (estimateTokenCount(currentChunk + sentence) > maxTokens) {
if (currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = "";
}
if (estimateTokenCount(sentence) > maxTokens) {
// If a single sentence is too long, split it
while (sentence) {
const chunk = sentence.slice(0, maxTokens * 4); // Approximate characters
chunks.push(chunk.trim());
sentence = sentence.slice(maxTokens * 4);
}
} else {
currentChunk = sentence;
}
} else {
currentChunk += " " + sentence;
}
}
if (currentChunk) {
chunks.push(currentChunk.trim());
}
return chunks;
}

View File

@ -25,6 +25,10 @@
<label for="model">OLLAMA Model:</label>
<input type="text" id="model" placeholder="llama2" />
</div>
<div class="form-group">
<label for="token-limit">Token Limit:</label>
<input type="number" id="token-limit" min="1000" step="1000" placeholder="4096" />
</div>
<button type="submit" class="btn btn-primary">Save Settings</button>
<div class="form-group">
<label for="system-prompt">System Prompt:</label>

View File

@ -20,6 +20,7 @@ async function saveOptions(e) {
const model = document.getElementById("model").value;
const systemPrompt = document.getElementById("system-prompt").value;
const status = document.getElementById("status");
const tokenLimit = document.getElementById("token-limit").value || 4096;
// Ensure the endpoint doesn't end with /api/generate
const cleanEndpoint = endpoint.replace(/\/api\/generate\/?$/, "");
status.textContent = "Validating endpoint...";
@ -31,6 +32,7 @@ async function saveOptions(e) {
ollamaEndpoint: cleanEndpoint,
ollamaModel: model,
systemPrompt: systemPrompt,
tokenLimit: parseInt(tokenLimit),
})
.then(() => {
status.textContent = "Options saved and endpoint validated.";
@ -49,12 +51,14 @@ async function restoreOptions() {
"ollamaEndpoint",
"ollamaModel",
"systemPrompt",
"tokenLimit",
]);
const endpoint = result.ollamaEndpoint || "http://localhost:11434";
const defaultSystemPrompt = "You are a helpful AI assistant. Summarize the given text concisely.";
document.getElementById("endpoint").value = endpoint;
document.getElementById("model").value = result.ollamaModel || "llama2";
document.getElementById("system-prompt").value = result.systemPrompt || defaultSystemPrompt;
document.getElementById("token-limit").value = result.tokenLimit || 4096;
const isValid = await validateEndpoint(endpoint);
updateEndpointStatus(isValid);
}
@ -67,3 +71,4 @@ document.getElementById("endpoint").addEventListener("blur", async (e) => {
const isValid = await validateEndpoint(e.target.value);
updateEndpointStatus(isValid);
});

View File

@ -2,9 +2,16 @@ document.addEventListener("DOMContentLoaded", () => {
const summarizeButton = document.getElementById("summarize");
const summaryDiv = document.getElementById("summary");
const openOptionsButton = document.getElementById("open-options");
const tokenCountDiv = document.createElement("div");
tokenCountDiv.id = "token-count";
tokenCountDiv.style.marginTop = "10px";
tokenCountDiv.style.fontStyle = "italic";
summarizeButton.parentNode.insertBefore(tokenCountDiv, summarizeButton.nextSibling);
summarizeButton.addEventListener("click", () => {
summaryDiv.innerHTML = "<p>Summarizing...</p>";
tokenCountDiv.textContent = "";
summarizeButton.disabled = true;
browser.tabs.query({ active: true, currentWindow: true }, (tabs) => {
@ -35,8 +42,12 @@ document.addEventListener("DOMContentLoaded", () => {
if (response && response.summary) {
// Render the Markdown content
summaryDiv.innerHTML = marked.parse(response.summary);
tokenCountDiv.textContent = `Token count: ${response.tokenCount}`;
} else if (response && response.error) {
handleError(response.error, response.details);
if (response.tokenCount) {
tokenCountDiv.textContent = `Token count: ${response.tokenCount}`;
}
} else {
handleError("Unexpected response from summarization");
}