mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-05 02:25:27 +00:00
feat : vllm in server
This commit is contained in:
parent
864fb36af5
commit
8cfb2d1246
@ -6,6 +6,7 @@ from flask import Flask, jsonify, request
|
||||
|
||||
from sources.llamacpp_handler import LlamacppLLM
|
||||
from sources.ollama_handler import OllamaLLM
|
||||
from sources.vllm_handler import Vllm
|
||||
|
||||
parser = argparse.ArgumentParser(description='AgenticSeek server script')
|
||||
parser.add_argument('--provider', type=str, help='LLM backend library to use. set to [ollama] or [llamacpp]', required=True)
|
||||
@ -16,7 +17,13 @@ app = Flask(__name__)
|
||||
|
||||
assert args.provider in ["ollama", "llamacpp"], f"Provider {args.provider} does not exists. see --help for more information"
|
||||
|
||||
generator = OllamaLLM() if args.provider == "ollama" else LlamacppLLM()
|
||||
handler_map = {
|
||||
"ollama": OllamaLLM(),
|
||||
"llamacpp": LlamacppLLM(),
|
||||
"vllm": Vllm()
|
||||
}
|
||||
|
||||
generator = handler_map[args.provider]
|
||||
|
||||
@app.route('/generate', methods=['POST'])
|
||||
def start_generation():
|
||||
|
70
server/sources/vllm_handler.py
Normal file
70
server/sources/vllm_handler.py
Normal file
@ -0,0 +1,70 @@
|
||||
from vllm import LLM, SamplingParams
|
||||
import logging
|
||||
from typing import List, Dict
|
||||
|
||||
class Vllm(GeneratorLLM):
|
||||
def __init__(self):
|
||||
"""
|
||||
Handle generation using vLLM.
|
||||
"""
|
||||
super().__init__()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.llm = LLM(model=self.model)
|
||||
|
||||
def convert_history_to_prompt(self, history: List[Dict[str, str]]) -> str:
|
||||
"""
|
||||
Convert OpenAI-format history to a single prompt string for vLLM.
|
||||
"""
|
||||
prompt = ""
|
||||
for message in history:
|
||||
role = message["role"]
|
||||
content = message["content"]
|
||||
if role == "system":
|
||||
prompt += f"System: {content}\n"
|
||||
elif role == "user":
|
||||
prompt += f"User: {content}\n"
|
||||
elif role == "assistant":
|
||||
prompt += f"Assistant: {content}\n"
|
||||
prompt += "Assistant: "
|
||||
return prompt
|
||||
|
||||
def generate(self, history: List[Dict[str, str]]):
|
||||
"""
|
||||
Generate response using vLLM from OpenAI-format message history.
|
||||
|
||||
Args:
|
||||
history: List of dictionaries in OpenAI format [{"role": "user", "content": "..."}, ...]
|
||||
"""
|
||||
self.logger.info(f"Using {self.model} for generation with vLLM")
|
||||
|
||||
try:
|
||||
with self.state.lock:
|
||||
self.state.is_generating = True
|
||||
self.state.last_complete_sentence = ""
|
||||
self.state.current_buffer = ""
|
||||
|
||||
prompt = self.convert_history_to_prompt(history)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
temperature=0.7,
|
||||
max_tokens=512,
|
||||
stream=True # Enable streaming
|
||||
)
|
||||
outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False)
|
||||
for output in outputs:
|
||||
content = output.outputs[0].text
|
||||
with self.state.lock:
|
||||
if '.' in content:
|
||||
self.logger.info(self.state.current_buffer)
|
||||
self.state.current_buffer += content
|
||||
with self.state.lock:
|
||||
self.logger.info(f"Final output: {self.state.current_buffer}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during generation: {str(e)}")
|
||||
raise e
|
||||
|
||||
finally:
|
||||
self.logger.info("Generation complete")
|
||||
with self.state.lock:
|
||||
self.state.is_generating = False
|
@ -112,7 +112,7 @@ class BrowserAgent(Agent):
|
||||
|
||||
1. **Decide if the page answers the user’s query:**
|
||||
- If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
|
||||
- If it does and you completed user request, say {Action.REQUEST_EXIT}.
|
||||
- If it does and no futher step would help with user request, say {Action.REQUEST_EXIT}.
|
||||
- If it doesn’t, say: Error: <why page don't help> then go back or navigate to another link.
|
||||
2. **Navigate to a link by either: **
|
||||
- Saying I will navigate to (write down the full URL) www.example.com/cats
|
||||
@ -145,7 +145,7 @@ class BrowserAgent(Agent):
|
||||
Action: {Action.GO_BACK.value}
|
||||
|
||||
Example 3 (query answer found, enought notes taken):
|
||||
Note: I found on <link> that ...<expand on information found>...
|
||||
Note: I found on website www.example.com that ...<expand on information found>...
|
||||
Given this answer the user query I should exit the web browser.
|
||||
Action: {Action.REQUEST_EXIT.value}
|
||||
|
||||
@ -161,8 +161,9 @@ class BrowserAgent(Agent):
|
||||
You previously took these notes:
|
||||
{notes}
|
||||
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
|
||||
You might {Action.REQUEST_EXIT.value} if no more link are useful.
|
||||
If you conduct research do not exit until you have several notes.
|
||||
Do not ever ask the user to conduct a task, do not ever exit expecting user intervention.
|
||||
You should be Investigative, Curious and Skeptical.
|
||||
"""
|
||||
|
||||
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
|
||||
|
@ -87,8 +87,8 @@ class Provider:
|
||||
except AttributeError as e:
|
||||
raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
|
||||
except Exception as e:
|
||||
if "RemoteDisconnected" in str(e):
|
||||
return f"{self.server_ip} seem offline. RemoteDisconnected error."
|
||||
if "refused" in str(e):
|
||||
return f"Server {self.server_ip} seem offline. Unable to answer."
|
||||
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
|
||||
return thought
|
||||
|
||||
|
@ -121,7 +121,9 @@ class AgentRouter:
|
||||
("Write a Python function to merge two sorted lists", "LOW"),
|
||||
("Organize my desktop files by extension and then write a script to list them", "HIGH"),
|
||||
("Create a bash script to monitor disk space and alert via text file", "LOW"),
|
||||
("can you find vitess repo, clone it and install by following the readme", "HIGH"),
|
||||
("Search X for posts about AI ethics and summarize them", "LOW"),
|
||||
("Can you follow the readme and install the project", "HIGH"),
|
||||
("Find the latest research on renewable energy and build a web app to display it", "HIGH"),
|
||||
("Write a C program to sort an array of integers", "LOW"),
|
||||
("Create a Node.js server that queries a public API for traffic data and displays it", "HIGH"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user