feat : better server provider

This commit is contained in:
martin legrand 2025-03-29 11:46:22 +01:00
parent 0bf813e865
commit a4cfa9c651
10 changed files with 186 additions and 91 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.wav
*.DS_Store
*.safetensors
config.ini
*.egg-info

View File

@ -149,6 +149,8 @@ You will be prompted with `>>> `
This indicate agenticSeek await you type for instructions.
You can also use speech to text by setting `listen = True` in the config.
To exit, simply say `goodbye`.
Here are some example usage:
### Coding/Bash

52
server/app.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin python3
import logging
import argparse
from flask import Flask, jsonify, request
from sources.llamacpp import LlamacppLLM
from sources.ollama import OllamaLLM
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)
parser = argparse.ArgumentParser(description='AgenticSeek server script')
args = parser.parse_args()
app = Flask(__name__)
generator = None
@app.route('/generate', methods=['POST'])
def start_generation():
if generator is None:
return jsonify({"error": "Generator not initialized"}), 400
data = request.get_json()
history = data.get('messages', [])
if generator.start(history):
return jsonify({"message": "Generation started"}), 202
return jsonify({"error": "Generation already in progress"}), 400
@app.route('/setup', methods=['POST'])
def setup():
data = request.get_json()
model = data.get('model', None)
provider = data.get('provider', None)
if provider is not None and generator is None:
if provider == "ollama":
generator = OllamaLLM()
elif provider == "llamacpp":
generator = LlamacppLLM()
else:
return jsonify({"error": "Provider not supported
if model is None:
return jsonify({"error": "Model not provided"}), 400
generator.set_model(model)
return jsonify({"message": "Model set"}), 200
@app.route('/get_updated_sentence')
def get_updated_sentence():
return generator.get_status()
if __name__ == '__main__':
app.run(host='0.0.0.0', threaded=True, debug=True, port=3333)

View File

@ -1,2 +1,3 @@
flask>=2.3.0
ollama>=0.4.7
llama-cpp-python

View File

@ -1,86 +0,0 @@
#!/usr/bin python3
from flask import Flask, jsonify, request
import threading
import ollama
import logging
import argparse
log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)
parser = argparse.ArgumentParser(description='AgenticSeek server script')
parser.add_argument('--model', type=str, help='Model to use. eg: deepseek-r1:14b', required=True)
args = parser.parse_args()
app = Flask(__name__)
model = args.model
# Shared state with thread-safe locks
class GenerationState:
def __init__(self):
self.lock = threading.Lock()
self.last_complete_sentence = ""
self.current_buffer = ""
self.is_generating = False
state = GenerationState()
def generate_response(history, model):
global state
print("using model:::::::", model)
try:
with state.lock:
state.is_generating = True
state.last_complete_sentence = ""
state.current_buffer = ""
stream = ollama.chat(
model=model,
messages=history,
stream=True,
)
for chunk in stream:
content = chunk['message']['content']
print(content, end='', flush=True)
with state.lock:
state.current_buffer += content
except ollama.ResponseError as e:
if e.status_code == 404:
ollama.pull(model)
with state.lock:
state.is_generating = False
print(f"Error: {e}")
finally:
with state.lock:
state.is_generating = False
@app.route('/generate', methods=['POST'])
def start_generation():
global state
data = request.get_json()
with state.lock:
if state.is_generating:
return jsonify({"error": "Generation already in progress"}), 400
history = data.get('messages', [])
# Start generation in background thread
threading.Thread(target=generate_response, args=(history, model)).start()
return jsonify({"message": "Generation started"}), 202
@app.route('/get_updated_sentence')
def get_updated_sentence():
global state
with state.lock:
return jsonify({
"sentence": state.current_buffer,
"is_complete": not state.is_generating
})
if __name__ == '__main__':
app.run(host='0.0.0.0', threaded=True, debug=True, port=5000)

View File

@ -0,0 +1,55 @@
from flask import jsonify
import threading
import logging
from abc import abstractmethod
class GenerationState:
def __init__(self):
self.lock = threading.Lock()
self.last_complete_sentence = ""
self.current_buffer = ""
self.is_generating = False
def status(self) -> dict:
return {
"sentence": self.current_buffer,
"is_complete": not self.is_generating,
"last_complete_sentence": self.last_complete_sentence,
"is_generating": self.is_generating,
}
class GeneratorLLM():
def __init__(self):
self.model = None
self.state = GenerationState()
self.logger = logging.getLogger(__name__)
def set_model(self, model: str) -> None:
self.logger.info(f"Model set to {model}")
self.model = model
def start(self, history: list) -> bool:
if self.model is None:
raise Exception("Model not set")
with self.state.lock:
if self.state.is_generating:
return False
self.logger.info("Starting generation")
threading.Thread(target=self.generate, args=(history,)).start()
return True
def get_status(self) -> dict:
with self.state.lock:
return jsonify(self.state.status())
@abstractmethod
def generate(self, history: list) -> None:
"""
Generate text using the model.
args:
history: list of strings
returns:
None
"""
pass

View File

@ -0,0 +1,22 @@
from .generator import GeneratorLLM
class LlamacppLLM(GeneratorLLM):
from llama_cpp import Llama
def __init__(self):
"""
Handle generation using llama.cpp
"""
super().__init__()
self.llm = Llama.from_pretrained(
repo_id=self.model,
filename="*q8_0.gguf",
verbose=True
)
def generate(self, history):
self.logger.info(f"Using {self.model} for generation with Llama.cpp")
self.llm.create_chat_completion(
messages = history
)

46
server/sources/ollama.py Normal file
View File

@ -0,0 +1,46 @@
from .generator import GeneratorLLM
class OllamaLLM(GeneratorLLM):
import ollama
def __init__(self):
"""
Handle generation using Ollama.
"""
super().__init__()
def generate(self, history):
self.logger.info(f"Using {self.model} for generation with Ollama")
try:
with self.state.lock:
self.state.is_generating = True
self.state.last_complete_sentence = ""
self.state.current_buffer = ""
stream = ollama.chat(
model=self.model,
messages=history,
stream=True,
)
for chunk in stream:
content = chunk['message']['content']
print(content, end='', flush=True)
with self.state.lock:
self.state.current_buffer += content
except ollama.ResponseError as e:
if e.status_code == 404:
self.logger.info(f"Downloading {self.model}...")
ollama.pull(self.model)
with self.state.lock:
self.state.is_generating = False
print(f"Error: {e}")
except Exception as e:
if "refused" in str(e).lower():
raise Exception("Ollama connection failed. is the server running ?") from e
finally:
with self.state.lock:
self.state.is_generating = False

View File

@ -25,10 +25,10 @@ class Interaction:
if stt_enabled:
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
self.recorder = AudioRecorder()
if tts_enabled:
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
if recover_last_session:
self.load_last_session()
if tts_enabled:
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
def find_ai_name(self) -> str:
"""Find the name of the default AI. It is required for STT as a trigger word."""

View File

@ -107,13 +107,15 @@ class Provider:
Use a remote server with LLM to generate text.
"""
thought = ""
route_start = f"http://{self.server_ip}/generate"
route_setup = f"http://{self.server_ip}/setup"
route_gen = f"http://{self.server_ip}/generate"
if not self.is_ip_online(self.server_ip.split(":")[0]):
raise Exception(f"Server is offline at {self.server_ip}")
try:
requests.post(route_start, json={"messages": history})
requests.post(route_setup, json={"model": self.model, "provider": self.provider_name})
requests.post(route_gen, json={"messages": history})
is_complete = False
while not is_complete:
response = requests.get(f"http://{self.server_ip}/get_updated_sentence")