feat : vllm in server

This commit is contained in:
martin legrand 2025-04-08 19:38:06 +02:00
parent 864fb36af5
commit 8cfb2d1246
5 changed files with 86 additions and 6 deletions

View File

@ -6,6 +6,7 @@ from flask import Flask, jsonify, request
from sources.llamacpp_handler import LlamacppLLM from sources.llamacpp_handler import LlamacppLLM
from sources.ollama_handler import OllamaLLM from sources.ollama_handler import OllamaLLM
from sources.vllm_handler import Vllm
parser = argparse.ArgumentParser(description='AgenticSeek server script') parser = argparse.ArgumentParser(description='AgenticSeek server script')
parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True) parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True)
@ -16,7 +17,13 @@ app = Flask(__name__)
assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information"
generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM() handler_map = {
"ollama": OllamaLLM(),
"llamacpp": LlamacppLLM(),
"vllm": Vllm()
}
generator = handler_map[args.provider]
@app.route('/generate', methods=['POST']) @app.route('/generate', methods=['POST'])
def start_generation(): def start_generation():

View File

@ -0,0 +1,70 @@
from vllm import LLM, SamplingParams
import logging
from typing import List, Dict
class Vllm(GeneratorLLM):
def __init__(self):
"""
Handle generation using vLLM.
"""
super().__init__()
self.logger = logging.getLogger(__name__)
self.llm = LLM(model=self.model)
def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str:
"""
Convert OpenAI-format history to a single prompt string for vLLM.
"""
prompt = ""
for message in history:
role = message["role"]
content = message["content"]
if role == "system":
prompt += f"System: {content}\n"
elif role == "user":
prompt += f"User: {content}\n"
elif role == "assistant":
prompt += f"Assistant: {content}\n"
prompt += "Assistant: "
return prompt
def generate(self, history: List[Dict[str, str]]):
"""
Generate response using vLLM from OpenAI-format message history.
Args:
history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...]
"""
self.logger.info(f"Using {self.model} for generation with vLLM")
try:
with self.state.lock:
self.state.is_generating = True
self.state.last_complete_sentence = ""
self.state.current_buffer = ""
prompt = self.convert_history_to_prompt(history)
sampling_params = SamplingParams(
temperature=0.7,
max_tokens=512,
stream=True # Enable streaming
)
outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False)
for output in outputs:
content = output.outputs[0].text
with self.state.lock:
if '.' in content:
self.logger.info(self.state.current_buffer)
self.state.current_buffer += content
with self.state.lock:
self.logger.info(f"Final output: {self.state.current_buffer}")
except Exception as e:
self.logger.error(f"Error during generation: {str(e)}")
raise e
finally:
self.logger.info("Generation complete")
with self.state.lock:
self.state.is_generating = False

View File

@ -112,7 +112,7 @@ class BrowserAgent(Agent):
1. **Decide if the page answers the users query:** 1. **Decide if the page answers the users query:**
- If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
- If it does and you completed user request, say {Action.REQUEST_EXIT}. - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}.
- If it doesnt, say: Error: <why page don't help> then go back or navigate to another link. - If it doesnt, say: Error: <why page don't help> then go back or navigate to another link.
2. **Navigate to a link by either: ** 2. **Navigate to a link by either: **
- Saying I will navigate to (write down the full URL) www.example.com/cats - Saying I will navigate to (write down the full URL) www.example.com/cats
@ -145,7 +145,7 @@ class BrowserAgent(Agent):
Action: {Action.GO_BACK.value} Action: {Action.GO_BACK.value}
Example 3 (query answer found, enought notes taken): Example 3 (query answer found, enought notes taken):
Note: I found on <link> that ...<expand on information found>... Note: I found on website www.example.com that ...<expand on information found>...
Given this answer the user query I should exit the web browser. Given this answer the user query I should exit the web browser.
Action: {Action.REQUEST_EXIT.value} Action: {Action.REQUEST_EXIT.value}
@ -161,8 +161,9 @@ class BrowserAgent(Agent):
You previously took these notes: You previously took these notes:
{notes} {notes}
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
You might {Action.REQUEST_EXIT.value} if no more link are useful.
If you conduct research do not exit until you have several notes. If you conduct research do not exit until you have several notes.
Do not ever ask the user to conduct a task, do not ever exit expecting user intervention.
You should be Investigative, Curious and Skeptical.
""" """
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:

View File

@ -87,8 +87,8 @@ class Provider:
except AttributeError as e: except AttributeError as e:
raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?") raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
except Exception as e: except Exception as e:
if "RemoteDisconnected" in str(e): if "refused" in str(e):
return f"{self.server_ip} seem offline. RemoteDisconnected error." return f"Server {self.server_ip} seem offline. Unable to answer."
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
return thought return thought

View File

@ -121,7 +121,9 @@ class AgentRouter:
("Write a Python function to merge two sorted lists", "LOW"), ("Write a Python function to merge two sorted lists", "LOW"),
("Organize my desktop files by extension and then write a script to list them", "HIGH"), ("Organize my desktop files by extension and then write a script to list them", "HIGH"),
("Create a bash script to monitor disk space and alert via text file", "LOW"), ("Create a bash script to monitor disk space and alert via text file", "LOW"),
("can you find vitess repo, clone it and install by following the readme", "HIGH"),
("Search X for posts about AI ethics and summarize them", "LOW"), ("Search X for posts about AI ethics and summarize them", "LOW"),
("Can you follow the readme and install the project", "HIGH"),
("Find the latest research on renewable energy and build a web app to display it", "HIGH"), ("Find the latest research on renewable energy and build a web app to display it", "HIGH"),
("Write a C program to sort an array of integers", "LOW"), ("Write a C program to sort an array of integers", "LOW"),
("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"), ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"),