diff --git a/.gitignore b/.gitignore index afef1e3..9916e76 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.wav +*.DS_Store *.safetensors config.ini *.egg-info diff --git a/README.md b/README.md index 88edad9..61876dc 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,8 @@ You will be prompted with `>>> ` This indicate agenticSeek await you type for instructions. You can also use speech to text by setting `listen = True` in the config. +To exit, simply say `goodbye`. + Here are some example usage: ### Coding/Bash diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000..a95fbfb --- /dev/null +++ b/server/app.py @@ -0,0 +1,52 @@ +#!/usr/bin python3 + +import logging +import argparse +from flask import Flask, jsonify, request + +from sources.llamacpp import LlamacppLLM +from sources.ollama import OllamaLLM + +log = logging.getLogger('werkzeug') +log.setLevel(logging.ERROR) + +parser = argparse.ArgumentParser(description='AgenticSeek server script') +args = parser.parse_args() + +app = Flask(__name__) + +generator = None + +@app.route('/generate', methods=['POST']) +def start_generation(): + if generator is None: + return jsonify({"error": "Generator not initialized"}), 400 + data = request.get_json() + history = data.get('messages', []) + if generator.start(history): + return jsonify({"message": "Generation started"}), 202 + return jsonify({"error": "Generation already in progress"}), 400 + +@app.route('/setup', methods=['POST']) +def setup(): + data = request.get_json() + model = data.get('model', None) + provider = data.get('provider', None) + if provider is not None and generator is None: + if provider == "ollama": + generator = OllamaLLM() + elif provider == "llamacpp": + generator = LlamacppLLM() + else: + return jsonify({"error": "Provider not supported + if model is None: + return jsonify({"error": "Model not provided"}), 400 + generator.set_model(model) + return jsonify({"message": "Model set"}), 200 + +@app.route('/get_updated_sentence') +def get_updated_sentence(): + return generator.get_status() + +if __name__ == '__main__': + app.run(host='0.0.0.0', threaded=True, debug=True, port=3333) \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt index 7709179..8272fe5 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,2 +1,3 @@ flask>=2.3.0 -ollama>=0.4.7 \ No newline at end of file +ollama>=0.4.7 +llama-cpp-python \ No newline at end of file diff --git a/server/server_ollama.py b/server/server_ollama.py deleted file mode 100644 index a38a936..0000000 --- a/server/server_ollama.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin python3 - -from flask import Flask, jsonify, request -import threading -import ollama -import logging -import argparse - -log = logging.getLogger('werkzeug') -log.setLevel(logging.ERROR) - -parser = argparse.ArgumentParser(description='AgenticSeek server script') -parser.add_argument('--model', type=str, help='Model to use. eg: deepseek-r1:14b', required=True) -args = parser.parse_args() - -app = Flask(__name__) - -model = args.model - -# Shared state with thread-safe locks -class GenerationState: - def __init__(self): - self.lock = threading.Lock() - self.last_complete_sentence = "" - self.current_buffer = "" - self.is_generating = False - -state = GenerationState() - -def generate_response(history, model): - global state - print("using model:::::::", model) - try: - with state.lock: - state.is_generating = True - state.last_complete_sentence = "" - state.current_buffer = "" - - stream = ollama.chat( - model=model, - messages=history, - stream=True, - ) - - for chunk in stream: - content = chunk['message']['content'] - print(content, end='', flush=True) - - with state.lock: - state.current_buffer += content - - except ollama.ResponseError as e: - if e.status_code == 404: - ollama.pull(model) - with state.lock: - state.is_generating = False - print(f"Error: {e}") - finally: - with state.lock: - state.is_generating = False - -@app.route('/generate', methods=['POST']) -def start_generation(): - global state - data = request.get_json() - - with state.lock: - if state.is_generating: - return jsonify({"error": "Generation already in progress"}), 400 - - history = data.get('messages', []) - # Start generation in background thread - threading.Thread(target=generate_response, args=(history, model)).start() - return jsonify({"message": "Generation started"}), 202 - -@app.route('/get_updated_sentence') -def get_updated_sentence(): - global state - with state.lock: - return jsonify({ - "sentence": state.current_buffer, - "is_complete": not state.is_generating - }) - -if __name__ == '__main__': - app.run(host='0.0.0.0', threaded=True, debug=True, port=5000) \ No newline at end of file diff --git a/server/sources/generator.py b/server/sources/generator.py new file mode 100644 index 0000000..0c70f06 --- /dev/null +++ b/server/sources/generator.py @@ -0,0 +1,55 @@ + +from flask import jsonify +import threading +import logging +from abc import abstractmethod + +class GenerationState: + def __init__(self): + self.lock = threading.Lock() + self.last_complete_sentence = "" + self.current_buffer = "" + self.is_generating = False + + def status(self) -> dict: + return { + "sentence": self.current_buffer, + "is_complete": not self.is_generating, + "last_complete_sentence": self.last_complete_sentence, + "is_generating": self.is_generating, + } + +class GeneratorLLM(): + def __init__(self): + self.model = None + self.state = GenerationState() + self.logger = logging.getLogger(__name__) + + def set_model(self, model: str) -> None: + self.logger.info(f"Model set to {model}") + self.model = model + + def start(self, history: list) -> bool: + if self.model is None: + raise Exception("Model not set") + with self.state.lock: + if self.state.is_generating: + return False + self.logger.info("Starting generation") + threading.Thread(target=self.generate, args=(history,)).start() + return True + + def get_status(self) -> dict: + with self.state.lock: + return jsonify(self.state.status()) + + @abstractmethod + def generate(self, history: list) -> None: + """ + Generate text using the model. + args: + history: list of strings + returns: + None + """ + pass \ No newline at end of file diff --git a/server/sources/llamacpp.py b/server/sources/llamacpp.py new file mode 100644 index 0000000..524cb1d --- /dev/null +++ b/server/sources/llamacpp.py @@ -0,0 +1,22 @@ + +from .generator import GeneratorLLM + +class LlamacppLLM(GeneratorLLM): + from llama_cpp import Llama + + def __init__(self): + """ + Handle generation using llama.cpp + """ + super().__init__() + self.llm = Llama.from_pretrained( + repo_id=self.model, + filename="*q8_0.gguf", + verbose=True + ) + + def generate(self, history): + self.logger.info(f"Using {self.model} for generation with Llama.cpp") + self.llm.create_chat_completion( + messages = history + ) \ No newline at end of file diff --git a/server/sources/ollama.py b/server/sources/ollama.py new file mode 100644 index 0000000..548a1d8 --- /dev/null +++ b/server/sources/ollama.py @@ -0,0 +1,46 @@ + +from .generator import GeneratorLLM + +class OllamaLLM(GeneratorLLM): + import ollama + + def __init__(self): + """ + Handle generation using Ollama. + """ + super().__init__() + + def generate(self, history): + self.logger.info(f"Using {self.model} for generation with Ollama") + try: + with self.state.lock: + self.state.is_generating = True + self.state.last_complete_sentence = "" + self.state.current_buffer = "" + + stream = ollama.chat( + model=self.model, + messages=history, + stream=True, + ) + + for chunk in stream: + content = chunk['message']['content'] + print(content, end='', flush=True) + + with self.state.lock: + self.state.current_buffer += content + + except ollama.ResponseError as e: + if e.status_code == 404: + self.logger.info(f"Downloading {self.model}...") + ollama.pull(self.model) + with self.state.lock: + self.state.is_generating = False + print(f"Error: {e}") + except Exception as e: + if "refused" in str(e).lower(): + raise Exception("Ollama connection failed. is the server running ?") from e + finally: + with self.state.lock: + self.state.is_generating = False \ No newline at end of file diff --git a/sources/interaction.py b/sources/interaction.py index 1dc03f2..4b6888e 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -25,10 +25,10 @@ class Interaction: if stt_enabled: self.transcriber = AudioTranscriber(self.ai_name, verbose=False) self.recorder = AudioRecorder() - if tts_enabled: - self.speech.speak("Hello, we are online and ready. What can I do for you ?") if recover_last_session: self.load_last_session() + if tts_enabled: + self.speech.speak("Hello, we are online and ready. What can I do for you ?") def find_ai_name(self) -> str: """Find the name of the default AI. It is required for STT as a trigger word.""" diff --git a/sources/llm_provider.py b/sources/llm_provider.py index e7a2f2d..e34109c 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -107,13 +107,15 @@ class Provider: Use a remote server with LLM to generate text. """ thought = "" - route_start = f"http://{self.server_ip}/generate" + route_setup = f"http://{self.server_ip}/setup" + route_gen = f"http://{self.server_ip}/generate" if not self.is_ip_online(self.server_ip.split(":")[0]): raise Exception(f"Server is offline at {self.server_ip}") try: - requests.post(route_start, json={"messages": history}) + requests.post(route_setup, json={"model": self.model, "provider": self.provider_name}) + requests.post(route_gen, json={"messages": history}) is_complete = False while not is_complete: response = requests.get(f"http://{self.server_ip}/get_updated_sentence")