mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
feat : better server provider
This commit is contained in:
parent
0bf813e865
commit
a4cfa9c651
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
|||||||
*.wav
|
*.wav
|
||||||
|
*.DS_Store
|
||||||
*.safetensors
|
*.safetensors
|
||||||
config.ini
|
config.ini
|
||||||
*.egg-info
|
*.egg-info
|
||||||
|
@ -149,6 +149,8 @@ You will be prompted with `>>> `
|
|||||||
This indicate agenticSeek await you type for instructions.
|
This indicate agenticSeek await you type for instructions.
|
||||||
You can also use speech to text by setting `listen = True` in the config.
|
You can also use speech to text by setting `listen = True` in the config.
|
||||||
|
|
||||||
|
To exit, simply say `goodbye`.
|
||||||
|
|
||||||
Here are some example usage:
|
Here are some example usage:
|
||||||
|
|
||||||
### Coding/Bash
|
### Coding/Bash
|
||||||
|
52
server/app.py
Normal file
52
server/app.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
#!/usr/bin python3
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import argparse
|
||||||
|
from flask import Flask, jsonify, request
|
||||||
|
|
||||||
|
from sources.llamacpp import LlamacppLLM
|
||||||
|
from sources.ollama import OllamaLLM
|
||||||
|
|
||||||
|
log = logging.getLogger('werkzeug')
|
||||||
|
log.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='AgenticSeek server script')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
generator = None
|
||||||
|
|
||||||
|
@app.route('/generate', methods=['POST'])
|
||||||
|
def start_generation():
|
||||||
|
if generator is None:
|
||||||
|
return jsonify({"error": "Generator not initialized"}), 400
|
||||||
|
data = request.get_json()
|
||||||
|
history = data.get('messages', [])
|
||||||
|
if generator.start(history):
|
||||||
|
return jsonify({"message": "Generation started"}), 202
|
||||||
|
return jsonify({"error": "Generation already in progress"}), 400
|
||||||
|
|
||||||
|
@app.route('/setup', methods=['POST'])
|
||||||
|
def setup():
|
||||||
|
data = request.get_json()
|
||||||
|
model = data.get('model', None)
|
||||||
|
provider = data.get('provider', None)
|
||||||
|
if provider is not None and generator is None:
|
||||||
|
if provider == "ollama":
|
||||||
|
generator = OllamaLLM()
|
||||||
|
elif provider == "llamacpp":
|
||||||
|
generator = LlamacppLLM()
|
||||||
|
else:
|
||||||
|
return jsonify({"error": "Provider not supported
|
||||||
|
if model is None:
|
||||||
|
return jsonify({"error": "Model not provided"}), 400
|
||||||
|
generator.set_model(model)
|
||||||
|
return jsonify({"message": "Model set"}), 200
|
||||||
|
|
||||||
|
@app.route('/get_updated_sentence')
|
||||||
|
def get_updated_sentence():
|
||||||
|
return generator.get_status()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host='0.0.0.0', threaded=True, debug=True, port=3333)
|
@ -1,2 +1,3 @@
|
|||||||
flask>=2.3.0
|
flask>=2.3.0
|
||||||
ollama>=0.4.7
|
ollama>=0.4.7
|
||||||
|
llama-cpp-python
|
@ -1,86 +0,0 @@
|
|||||||
#!/usr/bin python3
|
|
||||||
|
|
||||||
from flask import Flask, jsonify, request
|
|
||||||
import threading
|
|
||||||
import ollama
|
|
||||||
import logging
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
log = logging.getLogger('werkzeug')
|
|
||||||
log.setLevel(logging.ERROR)
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='AgenticSeek server script')
|
|
||||||
parser.add_argument('--model', type=str, help='Model to use. eg: deepseek-r1:14b', required=True)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
model = args.model
|
|
||||||
|
|
||||||
# Shared state with thread-safe locks
|
|
||||||
class GenerationState:
|
|
||||||
def __init__(self):
|
|
||||||
self.lock = threading.Lock()
|
|
||||||
self.last_complete_sentence = ""
|
|
||||||
self.current_buffer = ""
|
|
||||||
self.is_generating = False
|
|
||||||
|
|
||||||
state = GenerationState()
|
|
||||||
|
|
||||||
def generate_response(history, model):
|
|
||||||
global state
|
|
||||||
print("using model:::::::", model)
|
|
||||||
try:
|
|
||||||
with state.lock:
|
|
||||||
state.is_generating = True
|
|
||||||
state.last_complete_sentence = ""
|
|
||||||
state.current_buffer = ""
|
|
||||||
|
|
||||||
stream = ollama.chat(
|
|
||||||
model=model,
|
|
||||||
messages=history,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
for chunk in stream:
|
|
||||||
content = chunk['message']['content']
|
|
||||||
print(content, end='', flush=True)
|
|
||||||
|
|
||||||
with state.lock:
|
|
||||||
state.current_buffer += content
|
|
||||||
|
|
||||||
except ollama.ResponseError as e:
|
|
||||||
if e.status_code == 404:
|
|
||||||
ollama.pull(model)
|
|
||||||
with state.lock:
|
|
||||||
state.is_generating = False
|
|
||||||
print(f"Error: {e}")
|
|
||||||
finally:
|
|
||||||
with state.lock:
|
|
||||||
state.is_generating = False
|
|
||||||
|
|
||||||
@app.route('/generate', methods=['POST'])
|
|
||||||
def start_generation():
|
|
||||||
global state
|
|
||||||
data = request.get_json()
|
|
||||||
|
|
||||||
with state.lock:
|
|
||||||
if state.is_generating:
|
|
||||||
return jsonify({"error": "Generation already in progress"}), 400
|
|
||||||
|
|
||||||
history = data.get('messages', [])
|
|
||||||
# Start generation in background thread
|
|
||||||
threading.Thread(target=generate_response, args=(history, model)).start()
|
|
||||||
return jsonify({"message": "Generation started"}), 202
|
|
||||||
|
|
||||||
@app.route('/get_updated_sentence')
|
|
||||||
def get_updated_sentence():
|
|
||||||
global state
|
|
||||||
with state.lock:
|
|
||||||
return jsonify({
|
|
||||||
"sentence": state.current_buffer,
|
|
||||||
"is_complete": not state.is_generating
|
|
||||||
})
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app.run(host='0.0.0.0', threaded=True, debug=True, port=5000)
|
|
55
server/sources/generator.py
Normal file
55
server/sources/generator.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
from flask import jsonify
|
||||||
|
import threading
|
||||||
|
import logging
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
class GenerationState:
|
||||||
|
def __init__(self):
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
self.last_complete_sentence = ""
|
||||||
|
self.current_buffer = ""
|
||||||
|
self.is_generating = False
|
||||||
|
|
||||||
|
def status(self) -> dict:
|
||||||
|
return {
|
||||||
|
"sentence": self.current_buffer,
|
||||||
|
"is_complete": not self.is_generating,
|
||||||
|
"last_complete_sentence": self.last_complete_sentence,
|
||||||
|
"is_generating": self.is_generating,
|
||||||
|
}
|
||||||
|
|
||||||
|
class GeneratorLLM():
|
||||||
|
def __init__(self):
|
||||||
|
self.model = None
|
||||||
|
self.state = GenerationState()
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def set_model(self, model: str) -> None:
|
||||||
|
self.logger.info(f"Model set to {model}")
|
||||||
|
self.model = model
|
||||||
|
|
||||||
|
def start(self, history: list) -> bool:
|
||||||
|
if self.model is None:
|
||||||
|
raise Exception("Model not set")
|
||||||
|
with self.state.lock:
|
||||||
|
if self.state.is_generating:
|
||||||
|
return False
|
||||||
|
self.logger.info("Starting generation")
|
||||||
|
threading.Thread(target=self.generate, args=(history,)).start()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
with self.state.lock:
|
||||||
|
return jsonify(self.state.status())
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def generate(self, history: list) -> None:
|
||||||
|
"""
|
||||||
|
Generate text using the model.
|
||||||
|
args:
|
||||||
|
history: list of strings
|
||||||
|
returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
pass
|
22
server/sources/llamacpp.py
Normal file
22
server/sources/llamacpp.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
from .generator import GeneratorLLM
|
||||||
|
|
||||||
|
class LlamacppLLM(GeneratorLLM):
|
||||||
|
from llama_cpp import Llama
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Handle generation using llama.cpp
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
self.llm = Llama.from_pretrained(
|
||||||
|
repo_id=self.model,
|
||||||
|
filename="*q8_0.gguf",
|
||||||
|
verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
def generate(self, history):
|
||||||
|
self.logger.info(f"Using {self.model} for generation with Llama.cpp")
|
||||||
|
self.llm.create_chat_completion(
|
||||||
|
messages = history
|
||||||
|
)
|
46
server/sources/ollama.py
Normal file
46
server/sources/ollama.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
|
||||||
|
from .generator import GeneratorLLM
|
||||||
|
|
||||||
|
class OllamaLLM(GeneratorLLM):
|
||||||
|
import ollama
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Handle generation using Ollama.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def generate(self, history):
|
||||||
|
self.logger.info(f"Using {self.model} for generation with Ollama")
|
||||||
|
try:
|
||||||
|
with self.state.lock:
|
||||||
|
self.state.is_generating = True
|
||||||
|
self.state.last_complete_sentence = ""
|
||||||
|
self.state.current_buffer = ""
|
||||||
|
|
||||||
|
stream = ollama.chat(
|
||||||
|
model=self.model,
|
||||||
|
messages=history,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
content = chunk['message']['content']
|
||||||
|
print(content, end='', flush=True)
|
||||||
|
|
||||||
|
with self.state.lock:
|
||||||
|
self.state.current_buffer += content
|
||||||
|
|
||||||
|
except ollama.ResponseError as e:
|
||||||
|
if e.status_code == 404:
|
||||||
|
self.logger.info(f"Downloading {self.model}...")
|
||||||
|
ollama.pull(self.model)
|
||||||
|
with self.state.lock:
|
||||||
|
self.state.is_generating = False
|
||||||
|
print(f"Error: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
if "refused" in str(e).lower():
|
||||||
|
raise Exception("Ollama connection failed. is the server running ?") from e
|
||||||
|
finally:
|
||||||
|
with self.state.lock:
|
||||||
|
self.state.is_generating = False
|
@ -25,10 +25,10 @@ class Interaction:
|
|||||||
if stt_enabled:
|
if stt_enabled:
|
||||||
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
|
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
|
||||||
self.recorder = AudioRecorder()
|
self.recorder = AudioRecorder()
|
||||||
if tts_enabled:
|
|
||||||
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
|
|
||||||
if recover_last_session:
|
if recover_last_session:
|
||||||
self.load_last_session()
|
self.load_last_session()
|
||||||
|
if tts_enabled:
|
||||||
|
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
|
||||||
|
|
||||||
def find_ai_name(self) -> str:
|
def find_ai_name(self) -> str:
|
||||||
"""Find the name of the default AI. It is required for STT as a trigger word."""
|
"""Find the name of the default AI. It is required for STT as a trigger word."""
|
||||||
|
@ -107,13 +107,15 @@ class Provider:
|
|||||||
Use a remote server with LLM to generate text.
|
Use a remote server with LLM to generate text.
|
||||||
"""
|
"""
|
||||||
thought = ""
|
thought = ""
|
||||||
route_start = f"http://{self.server_ip}/generate"
|
route_setup = f"http://{self.server_ip}/setup"
|
||||||
|
route_gen = f"http://{self.server_ip}/generate"
|
||||||
|
|
||||||
if not self.is_ip_online(self.server_ip.split(":")[0]):
|
if not self.is_ip_online(self.server_ip.split(":")[0]):
|
||||||
raise Exception(f"Server is offline at {self.server_ip}")
|
raise Exception(f"Server is offline at {self.server_ip}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
requests.post(route_start, json={"messages": history})
|
requests.post(route_setup, json={"model": self.model, "provider": self.provider_name})
|
||||||
|
requests.post(route_gen, json={"messages": history})
|
||||||
is_complete = False
|
is_complete = False
|
||||||
while not is_complete:
|
while not is_complete:
|
||||||
response = requests.get(f"http://{self.server_ip}/get_updated_sentence")
|
response = requests.get(f"http://{self.server_ip}/get_updated_sentence")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user