feat : vllm in server

2025-06-05 02:25:27 +00:00 · 2025-04-08 19:38:06 +02:00 · 2025-04-08 19:38:06 +02:00 · 8cfb2d1246
commit 8cfb2d1246
parent 864fb36af5
5 changed files with 86 additions and 6 deletions
--- a/server/app.py
+++ b/server/app.py
@ -6,6 +6,7 @@ from flask import Flask, jsonify, request

 from sources.llamacpp_handler import LlamacppLLM
 from sources.ollama_handler import OllamaLLM
+from sources.vllm_handler import Vllm

 parser = argparse.ArgumentParser(description='AgenticSeek server script')
 parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True)
@ -16,7 +17,13 @@ app = Flask(__name__)

 assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information"

-generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM() 
+handler_map = {
+    "ollama": OllamaLLM(),
+    "llamacpp": LlamacppLLM(),
+    "vllm": Vllm()
+}
+
+generator = handler_map[args.provider]

@app.route('/generate', methods=['POST'])
 def start_generation():
--- a/server/sources/vllm_handler.py
+++ b/server/sources/vllm_handler.py
@ -0,0 +1,70 @@
+from vllm import LLM, SamplingParams
+import logging
+from typing import List, Dict
+
+class Vllm(GeneratorLLM):
+    def __init__(self):
+        """
+        Handle generation using vLLM.
+        """
+        super().__init__()
+        self.logger = logging.getLogger(__name__)
+        self.llm = LLM(model=self.model)
+        
+    def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str:
+        """
+        Convert OpenAI-format history to a single prompt string for vLLM.
+        """
+        prompt = ""
+        for message in history:
+            role = message["role"]
+            content = message["content"]
+            if role == "system":
+                prompt += f"System: {content}\n"
+            elif role == "user":
+                prompt += f"User: {content}\n"
+            elif role == "assistant":
+                prompt += f"Assistant: {content}\n"
+        prompt += "Assistant: "
+        return prompt
+
+    def generate(self, history: List[Dict[str, str]]):
+        """
+        Generate response using vLLM from OpenAI-format message history.
+        
+        Args:
+            history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...]
+        """
+        self.logger.info(f"Using {self.model} for generation with vLLM")
+        
+        try:
+            with self.state.lock:
+                self.state.is_generating = True
+                self.state.last_complete_sentence = ""
+                self.state.current_buffer = ""
+
+            prompt = self.convert_history_to_prompt(history)
+            
+            sampling_params = SamplingParams(
+                temperature=0.7,
+                max_tokens=512,
+                stream=True  # Enable streaming
+            )
+            outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False)
+            for output in outputs:
+                content = output.outputs[0].text
+                with self.state.lock:
+                    if '.' in content:
+                        self.logger.info(self.state.current_buffer)
+                    self.state.current_buffer += content
+            with self.state.lock:
+                self.logger.info(f"Final output: {self.state.current_buffer}")
+
+        except Exception as e:
+            self.logger.error(f"Error during generation: {str(e)}")
+            raise e
+            
+        finally:
+            self.logger.info("Generation complete")
+            with self.state.lock:
+                self.state.is_generating = False
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -112,7 +112,7 @@ class BrowserAgent(Agent):

        1. **Decide if the page answers the user’s query:**
          - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
-          - If it does and you completed user request, say {Action.REQUEST_EXIT}.
+          - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}.
          - If it doesn’t, say: Error: <why page don't help> then go back or navigate to another link.
        2. **Navigate to a link by either: **
          - Saying I will navigate to (write down the full URL) www.example.com/cats
@ -145,7 +145,7 @@ class BrowserAgent(Agent):
        Action: {Action.GO_BACK.value}

        Example 3 (query answer found, enought notes taken):
-        Note: I found on <link> that ...<expand on information found>...
+        Note: I found on website www.example.com that ...<expand on information found>...
        Given this answer the user query I should exit the web browser.
        Action: {Action.REQUEST_EXIT.value}

@ -161,8 +161,9 @@ class BrowserAgent(Agent):
        You previously took these notes:
        {notes}
        Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
-        You might {Action.REQUEST_EXIT.value} if no more link are useful.
        If you conduct research do not exit until you have several notes.
+        Do not ever ask the user to conduct a task, do not ever exit expecting user intervention.
+        You should be Investigative, Curious and Skeptical.
        """
    
    def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
--- a/sources/llm_provider.py
+++ b/sources/llm_provider.py
@ -87,8 +87,8 @@ class Provider:
        except AttributeError as e:
            raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
        except Exception as e:
-            if "RemoteDisconnected" in str(e):
-                return f"{self.server_ip} seem offline. RemoteDisconnected error."
+            if "refused" in str(e):
+                return f"Server {self.server_ip} seem offline. Unable to answer."
            raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
        return thought

--- a/sources/router.py
+++ b/sources/router.py
@ -121,7 +121,9 @@ class AgentRouter:
            ("Write a Python function to merge two sorted lists", "LOW"),
            ("Organize my desktop files by extension and then write a script to list them", "HIGH"),
            ("Create a bash script to monitor disk space and alert via text file", "LOW"),
+            ("can you find vitess repo, clone it and install by following the readme", "HIGH"),
            ("Search X for posts about AI ethics and summarize them", "LOW"),
+            ("Can you follow the readme and install the project", "HIGH"),
            ("Find the latest research on renewable energy and build a web app to display it", "HIGH"),
            ("Write a C program to sort an array of integers", "LOW"),
            ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"),