From 8cfb2d12461c18396d4c70ec8815f4d6ebbcc937 Mon Sep 17 00:00:00 2001
From: martin legrand <martin.legrand@epitech.eu>
Date: Tue, 8 Apr 2025 19:38:06 +0200
Subject: [PATCH] feat : vllm in server

---
 server/app.py                   |  9 ++++-
 server/sources/vllm_handler.py  | 70 +++++++++++++++++++++++++++++++++
 sources/agents/browser_agent.py |  7 ++--
 sources/llm_provider.py         |  4 +-
 sources/router.py               |  2 +
 5 files changed, 86 insertions(+), 6 deletions(-)
 create mode 100644 server/sources/vllm_handler.py

diff --git a/server/app.py b/server/app.py
index 85e0f04..b46a314 100644
--- a/server/app.py
+++ b/server/app.py
@@ -6,6 +6,7 @@ from flask import Flask, jsonify, request
 
 from sources.llamacpp_handler import LlamacppLLM
 from sources.ollama_handler import OllamaLLM
+from sources.vllm_handler import Vllm
 
 parser = argparse.ArgumentParser(description='AgenticSeek server script')
 parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True)
@@ -16,7 +17,13 @@ app = Flask(__name__)
 
 assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information"
 
-generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM() 
+handler_map = {
+    "ollama": OllamaLLM(),
+    "llamacpp": LlamacppLLM(),
+    "vllm": Vllm()
+}
+
+generator = handler_map[args.provider]
 
 @app.route('/generate', methods=['POST'])
 def start_generation():
diff --git a/server/sources/vllm_handler.py b/server/sources/vllm_handler.py
new file mode 100644
index 0000000..2f9b329
--- /dev/null
+++ b/server/sources/vllm_handler.py
@@ -0,0 +1,70 @@
+from vllm import LLM, SamplingParams
+import logging
+from typing import List, Dict
+
+class Vllm(GeneratorLLM):
+    def __init__(self):
+        """
+        Handle generation using vLLM.
+        """
+        super().__init__()
+        self.logger = logging.getLogger(__name__)
+        self.llm = LLM(model=self.model)
+        
+    def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str:
+        """
+        Convert OpenAI-format history to a single prompt string for vLLM.
+        """
+        prompt = ""
+        for message in history:
+            role = message["role"]
+            content = message["content"]
+            if role == "system":
+                prompt += f"System: {content}\n"
+            elif role == "user":
+                prompt += f"User: {content}\n"
+            elif role == "assistant":
+                prompt += f"Assistant: {content}\n"
+        prompt += "Assistant: "
+        return prompt
+
+    def generate(self, history: List[Dict[str, str]]):
+        """
+        Generate response using vLLM from OpenAI-format message history.
+        
+        Args:
+            history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...]
+        """
+        self.logger.info(f"Using {self.model} for generation with vLLM")
+        
+        try:
+            with self.state.lock:
+                self.state.is_generating = True
+                self.state.last_complete_sentence = ""
+                self.state.current_buffer = ""
+
+            prompt = self.convert_history_to_prompt(history)
+            
+            sampling_params = SamplingParams(
+                temperature=0.7,
+                max_tokens=512,
+                stream=True  # Enable streaming
+            )
+            outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False)
+            for output in outputs:
+                content = output.outputs[0].text
+                with self.state.lock:
+                    if '.' in content:
+                        self.logger.info(self.state.current_buffer)
+                    self.state.current_buffer += content
+            with self.state.lock:
+                self.logger.info(f"Final output: {self.state.current_buffer}")
+
+        except Exception as e:
+            self.logger.error(f"Error during generation: {str(e)}")
+            raise e
+            
+        finally:
+            self.logger.info("Generation complete")
+            with self.state.lock:
+                self.state.is_generating = False
\ No newline at end of file
diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py
index 0e5a687..77064eb 100644
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@@ -112,7 +112,7 @@ class BrowserAgent(Agent):
 
         1. **Decide if the page answers the user’s query:**
           - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
-          - If it does and you completed user request, say {Action.REQUEST_EXIT}.
+          - If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}.
           - If it doesn’t, say: Error: <why page don't help> then go back or navigate to another link.
         2. **Navigate to a link by either: **
           - Saying I will navigate to (write down the full URL) www.example.com/cats
@@ -145,7 +145,7 @@ class BrowserAgent(Agent):
         Action: {Action.GO_BACK.value}
 
         Example 3 (query answer found, enought notes taken):
-        Note: I found on <link> that ...<expand on information found>...
+        Note: I found on website www.example.com that ...<expand on information found>...
         Given this answer the user query I should exit the web browser.
         Action: {Action.REQUEST_EXIT.value}
 
@@ -161,8 +161,9 @@ class BrowserAgent(Agent):
         You previously took these notes:
         {notes}
         Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
-        You might {Action.REQUEST_EXIT.value} if no more link are useful.
         If you conduct research do not exit until you have several notes.
+        Do not ever ask the user to conduct a task, do not ever exit expecting user intervention.
+        You should be Investigative, Curious and Skeptical.
         """
     
     def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
diff --git a/sources/llm_provider.py b/sources/llm_provider.py
index bffd8e6..e0b0314 100644
--- a/sources/llm_provider.py
+++ b/sources/llm_provider.py
@@ -87,8 +87,8 @@ class Provider:
         except AttributeError as e:
             raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
         except Exception as e:
-            if "RemoteDisconnected" in str(e):
-                return f"{self.server_ip} seem offline. RemoteDisconnected error."
+            if "refused" in str(e):
+                return f"Server {self.server_ip} seem offline. Unable to answer."
             raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
         return thought
 
diff --git a/sources/router.py b/sources/router.py
index cab9e8b..008bec7 100644
--- a/sources/router.py
+++ b/sources/router.py
@@ -121,7 +121,9 @@ class AgentRouter:
             ("Write a Python function to merge two sorted lists", "LOW"),
             ("Organize my desktop files by extension and then write a script to list them", "HIGH"),
             ("Create a bash script to monitor disk space and alert via text file", "LOW"),
+            ("can you find vitess repo, clone it and install by following the readme", "HIGH"),
             ("Search X for posts about AI ethics and summarize them", "LOW"),
+            ("Can you follow the readme and install the project", "HIGH"),
             ("Find the latest research on renewable energy and build a web app to display it", "HIGH"),
             ("Write a C program to sort an array of integers", "LOW"),
             ("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"),