From 8cfb2d12461c18396d4c70ec8815f4d6ebbcc937 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 19:38:06 +0200 Subject: [PATCH] feat : vllm in server --- server/app.py | 9 ++++- server/sources/vllm_handler.py | 70 +++++++++++++++++++++++++++++++++ sources/agents/browser_agent.py | 7 ++-- sources/llm_provider.py | 4 +- sources/router.py | 2 + 5 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 server/sources/vllm_handler.py diff --git a/server/app.py b/server/app.py index 85e0f04..b46a314 100644 --- a/server/app.py +++ b/server/app.py @@ -6,6 +6,7 @@ from flask import Flask, jsonify, request from sources.llamacpp_handler import LlamacppLLM from sources.ollama_handler import OllamaLLM +from sources.vllm_handler import Vllm parser = argparse.ArgumentParser(description='AgenticSeek server script') parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True) @@ -16,7 +17,13 @@ app = Flask(__name__) assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information" -generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM() +handler_map = { + "ollama": OllamaLLM(), + "llamacpp": LlamacppLLM(), + "vllm": Vllm() +} + +generator = handler_map[args.provider] @app.route('/generate', methods=['POST']) def start_generation(): diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py new file mode 100644 index 0000000..2f9b329 --- /dev/null +++ b/server/sources/vllm_handler.py @@ -0,0 +1,70 @@ +from vllm import LLM, SamplingParams +import logging +from typing import List, Dict + +class Vllm(GeneratorLLM): + def __init__(self): + """ + Handle generation using vLLM. + """ + super().__init__() + self.logger = logging.getLogger(__name__) + self.llm = LLM(model=self.model) + + def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str: + """ + Convert OpenAI-format history to a single prompt string for vLLM. + """ + prompt = "" + for message in history: + role = message["role"] + content = message["content"] + if role == "system": + prompt += f"System: {content}\n" + elif role == "user": + prompt += f"User: {content}\n" + elif role == "assistant": + prompt += f"Assistant: {content}\n" + prompt += "Assistant: " + return prompt + + def generate(self, history: List[Dict[str, str]]): + """ + Generate response using vLLM from OpenAI-format message history. + + Args: + history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...] + """ + self.logger.info(f"Using {self.model} for generation with vLLM") + + try: + with self.state.lock: + self.state.is_generating = True + self.state.last_complete_sentence = "" + self.state.current_buffer = "" + + prompt = self.convert_history_to_prompt(history) + + sampling_params = SamplingParams( + temperature=0.7, + max_tokens=512, + stream=True # Enable streaming + ) + outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False) + for output in outputs: + content = output.outputs[0].text + with self.state.lock: + if '.' in content: + self.logger.info(self.state.current_buffer) + self.state.current_buffer += content + with self.state.lock: + self.logger.info(f"Final output: {self.state.current_buffer}") + + except Exception as e: + self.logger.error(f"Error during generation: {str(e)}") + raise e + + finally: + self.logger.info("Generation complete") + with self.state.lock: + self.state.is_generating = False \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 0e5a687..77064eb 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -112,7 +112,7 @@ class BrowserAgent(Agent): 1. **Decide if the page answers the user’s query:** - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - - If it does and you completed user request, say {Action.REQUEST_EXIT}. + - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - Saying I will navigate to (write down the full URL) www.example.com/cats @@ -145,7 +145,7 @@ class BrowserAgent(Agent): Action: {Action.GO_BACK.value} Example 3 (query answer found, enought notes taken): - Note: I found on that ...... + Note: I found on website www.example.com that ...... Given this answer the user query I should exit the web browser. Action: {Action.REQUEST_EXIT.value} @@ -161,8 +161,9 @@ class BrowserAgent(Agent): You previously took these notes: {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. - You might {Action.REQUEST_EXIT.value} if no more link are useful. If you conduct research do not exit until you have several notes. + Do not ever ask the user to conduct a task, do not ever exit expecting user intervention. + You should be Investigative, Curious and Skeptical. """ def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: diff --git a/sources/llm_provider.py b/sources/llm_provider.py index bffd8e6..e0b0314 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -87,8 +87,8 @@ class Provider: except AttributeError as e: raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?") except Exception as e: - if "RemoteDisconnected" in str(e): - return f"{self.server_ip} seem offline. RemoteDisconnected error." + if "refused" in str(e): + return f"Server {self.server_ip} seem offline. Unable to answer." raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e return thought diff --git a/sources/router.py b/sources/router.py index cab9e8b..008bec7 100644 --- a/sources/router.py +++ b/sources/router.py @@ -121,7 +121,9 @@ class AgentRouter: ("Write a Python function to merge two sorted lists", "LOW"), ("Organize my desktop files by extension and then write a script to list them", "HIGH"), ("Create a bash script to monitor disk space and alert via text file", "LOW"), + ("can you find vitess repo, clone it and install by following the readme", "HIGH"), ("Search X for posts about AI ethics and summarize them", "LOW"), + ("Can you follow the readme and install the project", "HIGH"), ("Find the latest research on renewable energy and build a web app to display it", "HIGH"), ("Write a C program to sort an array of integers", "LOW"), ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"),