mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
feat : better server provider
This commit is contained in:
parent
0bf813e865
commit
a4cfa9c651
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
*.wav
|
||||
*.DS_Store
|
||||
*.safetensors
|
||||
config.ini
|
||||
*.egg-info
|
||||
|
@ -149,6 +149,8 @@ You will be prompted with `>>> `
|
||||
This indicate agenticSeek await you type for instructions.
|
||||
You can also use speech to text by setting `listen = True` in the config.
|
||||
|
||||
To exit, simply say `goodbye`.
|
||||
|
||||
Here are some example usage:
|
||||
|
||||
### Coding/Bash
|
||||
|
52
server/app.py
Normal file
52
server/app.py
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin python3
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
from flask import Flask, jsonify, request
|
||||
|
||||
from sources.llamacpp import LlamacppLLM
|
||||
from sources.ollama import OllamaLLM
|
||||
|
||||
log = logging.getLogger('werkzeug')
|
||||
log.setLevel(logging.ERROR)
|
||||
|
||||
parser = argparse.ArgumentParser(description='AgenticSeek server script')
|
||||
args = parser.parse_args()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
generator = None
|
||||
|
||||
@app.route('/generate', methods=['POST'])
|
||||
def start_generation():
|
||||
if generator is None:
|
||||
return jsonify({"error": "Generator not initialized"}), 400
|
||||
data = request.get_json()
|
||||
history = data.get('messages', [])
|
||||
if generator.start(history):
|
||||
return jsonify({"message": "Generation started"}), 202
|
||||
return jsonify({"error": "Generation already in progress"}), 400
|
||||
|
||||
@app.route('/setup', methods=['POST'])
|
||||
def setup():
|
||||
data = request.get_json()
|
||||
model = data.get('model', None)
|
||||
provider = data.get('provider', None)
|
||||
if provider is not None and generator is None:
|
||||
if provider == "ollama":
|
||||
generator = OllamaLLM()
|
||||
elif provider == "llamacpp":
|
||||
generator = LlamacppLLM()
|
||||
else:
|
||||
return jsonify({"error": "Provider not supported
|
||||
if model is None:
|
||||
return jsonify({"error": "Model not provided"}), 400
|
||||
generator.set_model(model)
|
||||
return jsonify({"message": "Model set"}), 200
|
||||
|
||||
@app.route('/get_updated_sentence')
|
||||
def get_updated_sentence():
|
||||
return generator.get_status()
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', threaded=True, debug=True, port=3333)
|
@ -1,2 +1,3 @@
|
||||
flask>=2.3.0
|
||||
ollama>=0.4.7
|
||||
llama-cpp-python
|
@ -1,86 +0,0 @@
|
||||
#!/usr/bin python3
|
||||
|
||||
from flask import Flask, jsonify, request
|
||||
import threading
|
||||
import ollama
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
log = logging.getLogger('werkzeug')
|
||||
log.setLevel(logging.ERROR)
|
||||
|
||||
parser = argparse.ArgumentParser(description='AgenticSeek server script')
|
||||
parser.add_argument('--model', type=str, help='Model to use. eg: deepseek-r1:14b', required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
model = args.model
|
||||
|
||||
# Shared state with thread-safe locks
|
||||
class GenerationState:
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
self.last_complete_sentence = ""
|
||||
self.current_buffer = ""
|
||||
self.is_generating = False
|
||||
|
||||
state = GenerationState()
|
||||
|
||||
def generate_response(history, model):
|
||||
global state
|
||||
print("using model:::::::", model)
|
||||
try:
|
||||
with state.lock:
|
||||
state.is_generating = True
|
||||
state.last_complete_sentence = ""
|
||||
state.current_buffer = ""
|
||||
|
||||
stream = ollama.chat(
|
||||
model=model,
|
||||
messages=history,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for chunk in stream:
|
||||
content = chunk['message']['content']
|
||||
print(content, end='', flush=True)
|
||||
|
||||
with state.lock:
|
||||
state.current_buffer += content
|
||||
|
||||
except ollama.ResponseError as e:
|
||||
if e.status_code == 404:
|
||||
ollama.pull(model)
|
||||
with state.lock:
|
||||
state.is_generating = False
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
with state.lock:
|
||||
state.is_generating = False
|
||||
|
||||
@app.route('/generate', methods=['POST'])
|
||||
def start_generation():
|
||||
global state
|
||||
data = request.get_json()
|
||||
|
||||
with state.lock:
|
||||
if state.is_generating:
|
||||
return jsonify({"error": "Generation already in progress"}), 400
|
||||
|
||||
history = data.get('messages', [])
|
||||
# Start generation in background thread
|
||||
threading.Thread(target=generate_response, args=(history, model)).start()
|
||||
return jsonify({"message": "Generation started"}), 202
|
||||
|
||||
@app.route('/get_updated_sentence')
|
||||
def get_updated_sentence():
|
||||
global state
|
||||
with state.lock:
|
||||
return jsonify({
|
||||
"sentence": state.current_buffer,
|
||||
"is_complete": not state.is_generating
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', threaded=True, debug=True, port=5000)
|
55
server/sources/generator.py
Normal file
55
server/sources/generator.py
Normal file
@ -0,0 +1,55 @@
|
||||
|
||||
from flask import jsonify
|
||||
import threading
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
|
||||
class GenerationState:
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
self.last_complete_sentence = ""
|
||||
self.current_buffer = ""
|
||||
self.is_generating = False
|
||||
|
||||
def status(self) -> dict:
|
||||
return {
|
||||
"sentence": self.current_buffer,
|
||||
"is_complete": not self.is_generating,
|
||||
"last_complete_sentence": self.last_complete_sentence,
|
||||
"is_generating": self.is_generating,
|
||||
}
|
||||
|
||||
class GeneratorLLM():
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.state = GenerationState()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def set_model(self, model: str) -> None:
|
||||
self.logger.info(f"Model set to {model}")
|
||||
self.model = model
|
||||
|
||||
def start(self, history: list) -> bool:
|
||||
if self.model is None:
|
||||
raise Exception("Model not set")
|
||||
with self.state.lock:
|
||||
if self.state.is_generating:
|
||||
return False
|
||||
self.logger.info("Starting generation")
|
||||
threading.Thread(target=self.generate, args=(history,)).start()
|
||||
return True
|
||||
|
||||
def get_status(self) -> dict:
|
||||
with self.state.lock:
|
||||
return jsonify(self.state.status())
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, history: list) -> None:
|
||||
"""
|
||||
Generate text using the model.
|
||||
args:
|
||||
history: list of strings
|
||||
returns:
|
||||
None
|
||||
"""
|
||||
pass
|
22
server/sources/llamacpp.py
Normal file
22
server/sources/llamacpp.py
Normal file
@ -0,0 +1,22 @@
|
||||
|
||||
from .generator import GeneratorLLM
|
||||
|
||||
class LlamacppLLM(GeneratorLLM):
|
||||
from llama_cpp import Llama
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Handle generation using llama.cpp
|
||||
"""
|
||||
super().__init__()
|
||||
self.llm = Llama.from_pretrained(
|
||||
repo_id=self.model,
|
||||
filename="*q8_0.gguf",
|
||||
verbose=True
|
||||
)
|
||||
|
||||
def generate(self, history):
|
||||
self.logger.info(f"Using {self.model} for generation with Llama.cpp")
|
||||
self.llm.create_chat_completion(
|
||||
messages = history
|
||||
)
|
46
server/sources/ollama.py
Normal file
46
server/sources/ollama.py
Normal file
@ -0,0 +1,46 @@
|
||||
|
||||
from .generator import GeneratorLLM
|
||||
|
||||
class OllamaLLM(GeneratorLLM):
|
||||
import ollama
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Handle generation using Ollama.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
def generate(self, history):
|
||||
self.logger.info(f"Using {self.model} for generation with Ollama")
|
||||
try:
|
||||
with self.state.lock:
|
||||
self.state.is_generating = True
|
||||
self.state.last_complete_sentence = ""
|
||||
self.state.current_buffer = ""
|
||||
|
||||
stream = ollama.chat(
|
||||
model=self.model,
|
||||
messages=history,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for chunk in stream:
|
||||
content = chunk['message']['content']
|
||||
print(content, end='', flush=True)
|
||||
|
||||
with self.state.lock:
|
||||
self.state.current_buffer += content
|
||||
|
||||
except ollama.ResponseError as e:
|
||||
if e.status_code == 404:
|
||||
self.logger.info(f"Downloading {self.model}...")
|
||||
ollama.pull(self.model)
|
||||
with self.state.lock:
|
||||
self.state.is_generating = False
|
||||
print(f"Error: {e}")
|
||||
except Exception as e:
|
||||
if "refused" in str(e).lower():
|
||||
raise Exception("Ollama connection failed. is the server running ?") from e
|
||||
finally:
|
||||
with self.state.lock:
|
||||
self.state.is_generating = False
|
@ -25,10 +25,10 @@ class Interaction:
|
||||
if stt_enabled:
|
||||
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
|
||||
self.recorder = AudioRecorder()
|
||||
if tts_enabled:
|
||||
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
|
||||
if recover_last_session:
|
||||
self.load_last_session()
|
||||
if tts_enabled:
|
||||
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
|
||||
|
||||
def find_ai_name(self) -> str:
|
||||
"""Find the name of the default AI. It is required for STT as a trigger word."""
|
||||
|
@ -107,13 +107,15 @@ class Provider:
|
||||
Use a remote server with LLM to generate text.
|
||||
"""
|
||||
thought = ""
|
||||
route_start = f"http://{self.server_ip}/generate"
|
||||
route_setup = f"http://{self.server_ip}/setup"
|
||||
route_gen = f"http://{self.server_ip}/generate"
|
||||
|
||||
if not self.is_ip_online(self.server_ip.split(":")[0]):
|
||||
raise Exception(f"Server is offline at {self.server_ip}")
|
||||
|
||||
try:
|
||||
requests.post(route_start, json={"messages": history})
|
||||
requests.post(route_setup, json={"model": self.model, "provider": self.provider_name})
|
||||
requests.post(route_gen, json={"messages": history})
|
||||
is_complete = False
|
||||
while not is_complete:
|
||||
response = requests.get(f"http://{self.server_ip}/get_updated_sentence")
|
||||
|
Loading…
x
Reference in New Issue
Block a user