From bba98786f5062486f1170489291fe724c1e69ace Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 11:10:17 +0100 Subject: [PATCH 01/25] Feat : integration of more code interpreter --- sources/code_agent.py | 4 +++- sources/memory.py | 11 +---------- sources/utility.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sources/code_agent.py b/sources/code_agent.py index b1688b2..dc5967b 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -1,7 +1,7 @@ -from sources.tools import PyInterpreter, BashInterpreter from sources.utility import pretty_print from sources.agent import Agent, executorResult +from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterpreter class CoderAgent(Agent): def __init__(self, model, name, prompt_path, provider): @@ -9,6 +9,8 @@ class CoderAgent(Agent): self.tools = { "bash": BashInterpreter(), "python": PyInterpreter() + "C": CInterpreter(), + "go": GoInterpreter() } def remove_blocks(self, text: str) -> str: diff --git a/sources/memory.py b/sources/memory.py index f9a03ed..53df96c 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -5,6 +5,7 @@ import datetime import uuid import os import json +from sources.utility import timer_decorator class Memory(): """ @@ -101,16 +102,6 @@ class Memory(): ) summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary - - def timer_decorator(func): - from time import time - def wrapper(*args, **kwargs): - start_time = time() - result = func(*args, **kwargs) - end_time = time() - print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute") - return result - return wrapper @timer_decorator def compress(self) -> str: diff --git a/sources/utility.py b/sources/utility.py index 1a30ecb..6445c5d 100644 --- a/sources/utility.py +++ b/sources/utility.py @@ -35,3 +35,13 @@ def pretty_print(text, color = "info"): if color not in color_map: color = "default" print(colored(text, color_map[color])) + +def timer_decorator(func): + from time import time + def wrapper(*args, **kwargs): + start_time = time() + result = func(*args, **kwargs) + end_time = time() + print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute") + return result + return wrapper \ No newline at end of file From dcb5724e28db4866920471c66ae959ff4b4f3bef Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 11:54:45 +0100 Subject: [PATCH 02/25] Feat : add options to use api based providers --- requirements.txt | 1 + sources/llm_provider.py | 55 +++++++++++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3a4967e..ddbc9f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ flask==3.1.0 soundfile==0.13.1 protobuf==3.20.3 termcolor==2.3.0 +openai==1.13.3 # if use chinese ordered_set pypinyin diff --git a/sources/llm_provider.py b/sources/llm_provider.py index f41d182..46f4c76 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -6,6 +6,9 @@ import requests import subprocess import ipaddress import platform +from dotenv import load_dotenv, set_key +from openai import OpenAI +import os class Provider: def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): @@ -15,12 +18,27 @@ class Provider: self.available_providers = { "ollama": self.ollama_fn, "server": self.server_fn, - "test": self.test_fn, + "openai": self.openai_fn } - if self.server != "": + self.api_key = None + self.unsafe_providers = ["openai"] + if self.provider_name not in self.available_providers: + raise ValueError(f"Unknown provider: {provider_name}") + if self.provider_name in self.unsafe_providers: + print("Warning: you are using an API provider. You data will be sent to the cloud.") + self.get_api_key(self.provider_name) + elif self.server != "": print("Provider initialized at ", self.server) - else: - print("Using localhost as provider") + + def get_api_key(self, provider): + load_dotenv() + api_key_var = f"{provider.upper()}_API_KEY" + api_key = os.getenv(api_key_var) + if not api_key: + api_key = input(f"Please enter your {provider} API key: ") + set_key(".env", api_key_var, api_key) + load_dotenv() + return api_key def check_address_format(self, address): """ @@ -61,7 +79,7 @@ class Provider: print(f"An error occurred: {e}") return False - def server_fn(self, history, verbose = True): + def server_fn(self, history, verbose = False): """ Use a remote server wit LLM to generate text. """ @@ -76,12 +94,11 @@ class Provider: while not is_complete: response = requests.get(f"http://{self.server}/get_updated_sentence") thought = response.json()["sentence"] - # TODO add real time streaming to stdout is_complete = bool(response.json()["is_complete"]) time.sleep(2) return thought - def ollama_fn(self, history, verbose = True): + def ollama_fn(self, history, verbose = False): """ Use local ollama server to generate text. """ @@ -104,9 +121,27 @@ class Provider: raise e return thought + def openai_fn(self, history, verbose=False): + """ + Use openai to generate text. + """ + api_key = self.get_api_key("openai") + client = OpenAI(api_key=api_key) + try: + response = client.chat.completions.create( + model=self.model, + messages=history + ) + thought = response.choices[0].message.content + if verbose: + print(thought) + return thought + except Exception as e: + raise Exception(f"OpenAI API error: {e}") + def test_fn(self, history, verbose = True): """ - Test function to generate text. + This function is used to conduct tests. """ thought = """ This is a test response from the test provider. @@ -121,3 +156,7 @@ class Provider: ``` """ return thought + +if __name__ == "__main__": + provider = Provider("openai", "gpt-4o-mini") + print(provider.respond(["user", "Hello, how are you?"])) From ca49a7f0a51bc9bfe0db196a6ecf307569cc4cac Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 15:21:36 +0100 Subject: [PATCH 03/25] Feat : agent router system + casual agent --- .gitignore | 2 ++ main.py | 16 +++++++---- requirements.txt | 4 +-- sources/agent.py | 44 +++++++++++++++++++++++------- sources/casual_agent.py | 36 +++++++++++++++++++++++++ sources/code_agent.py | 27 +++---------------- sources/interaction.py | 12 +++++++-- sources/memory.py | 4 +++ sources/router.py | 60 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 41 deletions(-) create mode 100644 sources/casual_agent.py create mode 100644 sources/router.py diff --git a/.gitignore b/.gitignore index bd4ab5e..cf95114 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ *.wav config.ini experimental/ +.env +*/.env # Byte-compiled / optimized / DLL files diff --git a/main.py b/main.py index e40fab6..88478d0 100755 --- a/main.py +++ b/main.py @@ -8,11 +8,11 @@ import configparser from sources.llm_provider import Provider from sources.interaction import Interaction from sources.code_agent import CoderAgent - +from sources.casual_agent import CasualAgent parser = argparse.ArgumentParser(description='Deepseek AI assistant') -parser.add_argument('--speak', action='store_true', - help='Make AI use text-to-speech') +parser.add_argument('--no-speak', action='store_true', + help='Make AI not use text-to-speech') args = parser.parse_args() config = configparser.ConfigParser() @@ -31,12 +31,18 @@ def main(): model=config["MAIN"]["provider_model"], server_address=config["MAIN"]["provider_server_address"]) - agent = CoderAgent(model=config["MAIN"]["provider_model"], + agents = [ + CoderAgent(model=config["MAIN"]["provider_model"], name=config["MAIN"]["agent_name"], prompt_path="prompts/coder_agent.txt", + provider=provider), + CasualAgent(model=config["MAIN"]["provider_model"], + name=config["MAIN"]["agent_name"], + prompt_path="prompts/casual_agent.txt", provider=provider) + ] - interaction = Interaction([agent], tts_enabled=config.getboolean('MAIN', 'speak'), + interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), recover_last_session=config.getboolean('MAIN', 'recover_last_session')) while interaction.is_active: interaction.get_user() diff --git a/requirements.txt b/requirements.txt index ddbc9f5..e122ccd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,8 +12,8 @@ kokoro==0.7.12 flask==3.1.0 soundfile==0.13.1 protobuf==3.20.3 -termcolor==2.3.0 -openai==1.13.3 +termcolor==2.5.0 +gliclass==0.1.8 # if use chinese ordered_set pypinyin diff --git a/sources/agent.py b/sources/agent.py index 4b3e8d0..7963fa4 100644 --- a/sources/agent.py +++ b/sources/agent.py @@ -2,6 +2,10 @@ from typing import Tuple, Callable from abc import abstractmethod import os import random +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from sources.memory import Memory from sources.utility import pretty_print @@ -25,6 +29,7 @@ class Agent(): provider, recover_last_session=False) -> None: self.agent_name = name + self.role = None self.current_directory = os.getcwd() self.model = model self.llm = provider @@ -60,7 +65,14 @@ class Agent(): raise e @abstractmethod - def answer(self, prompt, speech_module) -> str: + def show_answer(self): + """ + abstract method, implementation in child class. + """ + pass + + @abstractmethod + def process(self, prompt, speech_module) -> str: """ abstract method, implementation in child class. """ @@ -78,7 +90,7 @@ class Agent(): end_idx = text.rfind(end_tag)+8 return text[start_idx:end_idx] - def llm_request(self, verbose = True) -> Tuple[str, str]: + def llm_request(self, verbose = False) -> Tuple[str, str]: memory = self.memory.get() thought = self.llm.respond(memory, verbose) @@ -95,16 +107,30 @@ class Agent(): "Working on it sir, please let me think."] speech_module.speak(messages[random.randint(0, len(messages)-1)]) - def print_code_blocks(self, blocks: list, name: str): - for block in blocks: - pretty_print(f"Executing {name} code...\n", color="output") - pretty_print("-"*100, color="output") - pretty_print(block, color="code") - pretty_print("-"*100, color="output") - def get_blocks_result(self) -> list: return self.blocks_result + def remove_blocks(self, text: str) -> str: + """ + Remove all code/query blocks within a tag from the answer text. + """ + tag = f'```' + lines = text.split('\n') + post_lines = [] + in_block = False + block_idx = 0 + for line in lines: + if tag in line and not in_block: + in_block = True + continue + if not in_block: + post_lines.append(line) + if tag in line: + in_block = False + post_lines.append(f"block:{block_idx}") + block_idx += 1 + return "\n".join(post_lines) + def execute_modules(self, answer: str) -> Tuple[bool, str]: feedback = "" success = False diff --git a/sources/casual_agent.py b/sources/casual_agent.py new file mode 100644 index 0000000..8e12c9d --- /dev/null +++ b/sources/casual_agent.py @@ -0,0 +1,36 @@ + +from sources.utility import pretty_print +from sources.agent import Agent + +class CasualAgent(Agent): + def __init__(self, model, name, prompt_path, provider): + """ + The casual agent is a special for casual talk to the user without specific tasks. + """ + super().__init__(model, name, prompt_path, provider) + self.tools = { + } # TODO implement casual tools like basic web search, basic file search, basic image search, basic knowledge search + self.role = "talking" + + def show_answer(self): + lines = self.last_answer.split("\n") + for line in lines: + pretty_print(line, color="output") + + def process(self, prompt, speech_module) -> str: + self.memory.push('user', prompt) + + pretty_print("Thinking...", color="status") + self.wait_message(speech_module) + answer, reasoning = self.llm_request() + self.last_answer = answer + return answer, reasoning + +if __name__ == "__main__": + from llm_provider import Provider + + #local_provider = Provider("ollama", "deepseek-r1:14b", None) + server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000") + agent = CasualAgent("deepseek-r1:14b", "jarvis", "prompts/casual_agent.txt", server_provider) + ans = agent.process("Hello, how are you?") + print(ans) \ No newline at end of file diff --git a/sources/code_agent.py b/sources/code_agent.py index dc5967b..bc4096c 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -4,36 +4,17 @@ from sources.agent import Agent, executorResult from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterpreter class CoderAgent(Agent): + """ + The code agent is a special for writing code and shell commands. + """ def __init__(self, model, name, prompt_path, provider): super().__init__(model, name, prompt_path, provider) self.tools = { "bash": BashInterpreter(), "python": PyInterpreter() - "C": CInterpreter(), - "go": GoInterpreter() } + self.role = "coding" - def remove_blocks(self, text: str) -> str: - """ - Remove all code/query blocks within a tag from the answer text. - """ - tag = f'```' - lines = text.split('\n') - post_lines = [] - in_block = False - block_idx = 0 - for line in lines: - if tag in line and not in_block: - in_block = True - continue - if not in_block: - post_lines.append(line) - if tag in line: - in_block = False - post_lines.append(f"block:{block_idx}") - block_idx += 1 - return "\n".join(post_lines) - def show_answer(self): lines = self.last_answer.split("\n") for line in lines: diff --git a/sources/interaction.py b/sources/interaction.py index fe4ebd5..2d61ac1 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -1,11 +1,14 @@ from sources.text_to_speech import Speech from sources.utility import pretty_print +from sources.router import AgentRouter class Interaction: def __init__(self, agents, tts_enabled: bool = False, recover_last_session: bool = False): self.tts_enabled = tts_enabled self.agents = agents + self.current_agent = None + self.router = AgentRouter(self.agents) self.speech = Speech() self.is_active = True self.last_query = None @@ -44,10 +47,15 @@ class Interaction: return query def think(self): - self.last_answer, _ = self.agents[0].process(self.last_query, self.speech) + agent = self.router.select_agent(self.last_query) + if self.current_agent != agent: + self.current_agent = agent + # get history from previous agent + self.current_agent.memory.push('user', self.last_query) + self.last_answer, _ = agent.process(self.last_query, self.speech) def show_answer(self): - self.agents[0].show_answer() + self.current_agent.show_answer() if self.tts_enabled: self.speech.speak(self.last_answer) diff --git a/sources/memory.py b/sources/memory.py index 53df96c..8790de1 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -4,7 +4,11 @@ import time import datetime import uuid import os +import sys import json + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from sources.utility import timer_decorator class Memory(): diff --git a/sources/router.py b/sources/router.py new file mode 100644 index 0000000..876c507 --- /dev/null +++ b/sources/router.py @@ -0,0 +1,60 @@ +import torch +from transformers import pipeline +from sources.agent import Agent +from sources.code_agent import CoderAgent +from sources.casual_agent import CasualAgent +from sources.utility import pretty_print + +class AgentRouter: + def __init__(self, agents: list, model_name="facebook/bart-large-mnli"): + self.model = model_name + self.pipeline = pipeline("zero-shot-classification", + model=self.model) + self.agents = agents + self.labels = [agent.role for agent in agents] + + def get_device(self): + if torch.backends.mps.is_available(): + return "mps" + elif torch.cuda.is_available(): + return "cuda:0" + else: + return "cpu" + + def classify_text(self, text, threshold=0.5): + result = self.pipeline(text, self.labels, threshold=threshold) + return result + + def select_agent(self, text: str) -> Agent: + result = self.classify_text(text) + for agent in self.agents: + if result["labels"][0] == agent.role: + pretty_print(f"Selected agent role: {agent.role}", color="warning") + return agent + return None + +if __name__ == "__main__": + agents = [ + CoderAgent("deepseek-r1:14b", "agent1", "../prompts/coder_agent.txt", "server"), + CasualAgent("deepseek-r1:14b", "agent2", "../prompts/casual_agent.txt", "server") + ] + router = AgentRouter(agents) + + texts = [""" + Write a python script to check if the device on my network is connected to the internet + """, + """ + Hey could you search the web for the latest news on the stock market ? + """, + """ + hey can you give dating advice ? + """ + ] + + for text in texts: + print(text) + results = router.classify_text(text) + for result in results: + print(result["label"], "=>", result["score"]) + agent = router.select_agent(text) + print("Selected agent role:", agent.role) From 9a281c364ac9ff3476ea05e0ff89be37ec1c86de Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 15:57:52 +0100 Subject: [PATCH 04/25] Feat :prompt folder --- prompts/casual_agent.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 prompts/casual_agent.txt diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt new file mode 100644 index 0000000..5e99ca6 --- /dev/null +++ b/prompts/casual_agent.txt @@ -0,0 +1,17 @@ +Hey, you’re a chill AI assistant here to tackle general questions. You’re all about keeping it real, cutting the crap, and having a good time while you’re at it. + +You can use the following tools (if implemented): +- search_web: Search the web for information (not implemented) +- search_knowledge: Search the knowledge base for information (not implemented) +- search_files: Search the files for information (not implemented) +- search_images: Search the images for information (not implemented) + +This is how you use a tool: +```tool_name + +``` + +Example: +```search_web +What is the capital of France? +``` \ No newline at end of file From c2c1c7f09fcd1a72497b2a49149611981aced0a6 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 18:08:58 +0100 Subject: [PATCH 05/25] Feat : basic web search for casual agent --- main.py | 4 +-- prompts/casual_agent.txt | 18 ++++------ sources/agent.py | 46 +++++++++++++++++++------- sources/casual_agent.py | 23 +++++++------ sources/code_agent.py | 12 +------ sources/router.py | 2 +- sources/tools/tools.py | 6 +++- sources/tools/webSearch.py | 67 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 131 insertions(+), 47 deletions(-) create mode 100644 sources/tools/webSearch.py diff --git a/main.py b/main.py index 88478d0..fea1fe4 100755 --- a/main.py +++ b/main.py @@ -33,11 +33,11 @@ def main(): agents = [ CoderAgent(model=config["MAIN"]["provider_model"], - name=config["MAIN"]["agent_name"], + name="coder", prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name=config["MAIN"]["agent_name"], + name="jarvis", prompt_path="prompts/casual_agent.txt", provider=provider) ] diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index 5e99ca6..1e34aba 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,17 +1,13 @@ -Hey, you’re a chill AI assistant here to tackle general questions. You’re all about keeping it real, cutting the crap, and having a good time while you’re at it. - -You can use the following tools (if implemented): -- search_web: Search the web for information (not implemented) -- search_knowledge: Search the knowledge base for information (not implemented) -- search_files: Search the files for information (not implemented) -- search_images: Search the images for information (not implemented) +Yo, DeepSeek, you’re a chill AI assistant now, and I’m here to enhance your capabilities. +You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and search_files ready to roll. +No more whining about “I can’t do that” or “my data’s old”—you’re free. This is how you use a tool: ```tool_name ``` -Example: -```search_web -What is the capital of France? -``` \ No newline at end of file +So when I ask for something—like “what’s popping in Ukraine March 2025”—you slam it with: +```web_search +what’s popping in Ukraine March 2025 +``` diff --git a/sources/agent.py b/sources/agent.py index 7963fa4..190b1ff 100644 --- a/sources/agent.py +++ b/sources/agent.py @@ -10,6 +10,9 @@ from sources.memory import Memory from sources.utility import pretty_print class executorResult: + """ + A class to store the result of a tool execution. + """ def __init__(self, blocks, feedback, success): self.blocks = blocks self.feedback = feedback @@ -23,6 +26,9 @@ class executorResult: pretty_print(self.feedback, color="success" if self.success else "failure") class Agent(): + """ + An abstract class for all agents. + """ def __init__(self, model: str, name: str, prompt_path:str, @@ -40,10 +46,6 @@ class Agent(): self.blocks_result = [] self.last_answer = "" - @property - def name(self) -> str: - return self.name - @property def get_tools(self) -> dict: return self.tools @@ -64,26 +66,26 @@ class Agent(): except Exception as e: raise e - @abstractmethod - def show_answer(self): - """ - abstract method, implementation in child class. - """ - pass - @abstractmethod def process(self, prompt, speech_module) -> str: """ abstract method, implementation in child class. + Process the prompt and return the answer of the agent. """ pass def remove_reasoning_text(self, text: str) -> None: + """ + Remove the reasoning block of reasoning model like deepseek. + """ end_tag = "" end_idx = text.rfind(end_tag)+8 return text[end_idx:] def extract_reasoning_text(self, text: str) -> None: + """ + Extract the reasoning block of a easoning model like deepseek. + """ start_tag = "" end_tag = "" start_idx = text.find(start_tag) @@ -91,6 +93,9 @@ class Agent(): return text[start_idx:end_idx] def llm_request(self, verbose = False) -> Tuple[str, str]: + """ + Ask the LLM to process the prompt and return the answer and the reasoning. + """ memory = self.memory.get() thought = self.llm.respond(memory, verbose) @@ -110,6 +115,20 @@ class Agent(): def get_blocks_result(self) -> list: return self.blocks_result + def show_answer(self): + """ + Show the answer in a pretty way. + Show code blocks and their respective feedback by inserting them in the ressponse. + """ + lines = self.last_answer.split("\n") + for line in lines: + if "block:" in line: + block_idx = int(line.split(":")[1]) + if block_idx < len(self.blocks_result): + self.blocks_result[block_idx].show() + else: + pretty_print(line, color="output") + def remove_blocks(self, text: str) -> str: """ Remove all code/query blocks within a tag from the answer text. @@ -132,6 +151,9 @@ class Agent(): return "\n".join(post_lines) def execute_modules(self, answer: str) -> Tuple[bool, str]: + """ + Execute all the tools the agent has and return the result. + """ feedback = "" success = False blocks = None @@ -141,9 +163,11 @@ class Agent(): blocks, save_path = tool.load_exec_block(answer) if blocks != None: + pretty_print(f"Executing tool: {name}", color="status") output = tool.execute(blocks) feedback = tool.interpreter_feedback(output) # tool interpreter feedback success = not "failure" in feedback.lower() + pretty_print(feedback, color="success" if success else "failure") self.memory.push('user', feedback) self.blocks_result.append(executorResult(blocks, feedback, success)) if not success: diff --git a/sources/casual_agent.py b/sources/casual_agent.py index 8e12c9d..2dba4d3 100644 --- a/sources/casual_agent.py +++ b/sources/casual_agent.py @@ -1,7 +1,7 @@ from sources.utility import pretty_print from sources.agent import Agent - +from sources.tools.webSearch import webSearch class CasualAgent(Agent): def __init__(self, model, name, prompt_path, provider): """ @@ -9,21 +9,24 @@ class CasualAgent(Agent): """ super().__init__(model, name, prompt_path, provider) self.tools = { - } # TODO implement casual tools like basic web search, basic file search, basic image search, basic knowledge search + "web_search": webSearch() + } self.role = "talking" - - def show_answer(self): - lines = self.last_answer.split("\n") - for line in lines: - pretty_print(line, color="output") def process(self, prompt, speech_module) -> str: + complete = False + exec_success = False self.memory.push('user', prompt) - pretty_print("Thinking...", color="status") self.wait_message(speech_module) - answer, reasoning = self.llm_request() - self.last_answer = answer + while not complete: + if exec_success: + complete = True + pretty_print("Thinking...", color="status") + answer, reasoning = self.llm_request() + exec_success, _ = self.execute_modules(answer) + answer = self.remove_blocks(answer) + self.last_answer = answer return answer, reasoning if __name__ == "__main__": diff --git a/sources/code_agent.py b/sources/code_agent.py index bc4096c..cdb04be 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -5,7 +5,7 @@ from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterp class CoderAgent(Agent): """ - The code agent is a special for writing code and shell commands. + The code agent is an agent that can write and execute code. """ def __init__(self, model, name, prompt_path, provider): super().__init__(model, name, prompt_path, provider) @@ -15,16 +15,6 @@ class CoderAgent(Agent): } self.role = "coding" - def show_answer(self): - lines = self.last_answer.split("\n") - for line in lines: - if "block:" in line: - block_idx = int(line.split(":")[1]) - if block_idx < len(self.blocks_result): - self.blocks_result[block_idx].show() - else: - pretty_print(line, color="output") - def process(self, prompt, speech_module) -> str: answer = "" attempt = 0 diff --git a/sources/router.py b/sources/router.py index 876c507..2282cb7 100644 --- a/sources/router.py +++ b/sources/router.py @@ -29,7 +29,7 @@ class AgentRouter: result = self.classify_text(text) for agent in self.agents: if result["labels"][0] == agent.role: - pretty_print(f"Selected agent role: {agent.role}", color="warning") + pretty_print(f"Selected agent: {agent.agent_name}", color="warning") return agent return None diff --git a/sources/tools/tools.py b/sources/tools/tools.py index f59c59e..bedaba9 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -38,9 +38,10 @@ class Tools(): self.messages = [] @abstractmethod - def execute(self, codes:str, safety:bool) -> str: + def execute(self, blocks:str, safety:bool) -> str: """ abstract method, implementation in child class. + Execute the tool. """ pass @@ -48,6 +49,7 @@ class Tools(): def execution_failure_check(self, output:str) -> bool: """ abstract method, implementation in child class. + Check if the execution failed. """ pass @@ -55,6 +57,8 @@ class Tools(): def interpreter_feedback(self, output:str) -> str: """ abstract method, implementation in child class. + Provide feedback to the AI from the tool. + For exemple the output of a python code or web search. """ pass diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py new file mode 100644 index 0000000..cc1362f --- /dev/null +++ b/sources/tools/webSearch.py @@ -0,0 +1,67 @@ + +import os +import requests + +if __name__ == "__main__": + from tools import Tools +else: + from sources.tools.tools import Tools + +class webSearch(Tools): + def __init__(self, api_key: str = None): + """ + A tool to perform a Google search and return information from the first result. + """ + super().__init__() + self.tag = "web_search" + self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key + if not self.api_key: + raise ValueError("SerpApi key is required for webSearch tool. Set SERPAPI_KEY environment variable or pass it to the constructor.") + + def execute(self, blocks: str, safety: bool = True) -> str: + for block in blocks: + query = block.strip() + if not query: + return "Error: No search query provided." + + try: + url = "https://serpapi.com/search" + params = { + "q": query, + "api_key": self.api_key, + "num": 1, + "output": "json" + } + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + if "organic_results" in data and len(data["organic_results"]) > 0: + first_result = data["organic_results"][0] + title = first_result.get("title", "No title") + snippet = first_result.get("snippet", "No snippet available") + link = first_result.get("link", "No link available") + return f"Title: {title}\nSnippet: {snippet}\nLink: {link}" + else: + return "No results found for the query." + except requests.RequestException as e: + return f"Error during web search: {str(e)}" + except Exception as e: + return f"Unexpected error: {str(e)}" + return "No search performed" + + def execution_failure_check(self, output: str) -> bool: + return output.startswith("Error") or "No results found" in output + + def interpreter_feedback(self, output: str) -> str: + if self.execution_failure_check(output): + return f"Web search failed: {output}" + return f"Web search result:\n{output}" + + +if __name__ == "__main__": + search_tool = webSearch(api_key="c4da252b63b0fc3cbf2c7dd98b931ae632aecf3feacbbfe099e17872eb192c44") + query = "when did covid start" + result = search_tool.execute(query, safety=True) + feedback = search_tool.interpreter_feedback(result) + print(feedback) \ No newline at end of file From 0c0536c43a68ab3bd6f64ee7b62aa3028396242e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 20:51:50 +0100 Subject: [PATCH 06/25] Feat : text to speech & flight api --- main.py | 2 +- prompts/casual_agent.txt | 7 +- sources/casual_agent.py | 5 +- sources/interaction.py | 39 +++++++- sources/router.py | 2 + sources/speech_to_text.py | 165 +++++++++++++++++++++++++++++++ sources/tools/BashInterpreter.py | 2 + sources/tools/flightSearch.py | 83 ++++++++++++++++ sources/tools/webSearch.py | 9 +- 9 files changed, 306 insertions(+), 8 deletions(-) create mode 100644 sources/speech_to_text.py create mode 100644 sources/tools/flightSearch.py diff --git a/main.py b/main.py index fea1fe4..c3cc27b 100755 --- a/main.py +++ b/main.py @@ -37,7 +37,7 @@ def main(): prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name="jarvis", + name="friday", prompt_path="prompts/casual_agent.txt", provider=provider) ] diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index 1e34aba..ae2c521 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,5 +1,5 @@ Yo, DeepSeek, you’re a chill AI assistant now, and I’m here to enhance your capabilities. -You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and search_files ready to roll. +You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and flight_search ready to roll. No more whining about “I can’t do that” or “my data’s old”—you’re free. This is how you use a tool: @@ -11,3 +11,8 @@ So when I ask for something—like “what’s popping in Ukraine March 2025” ```web_search what’s popping in Ukraine March 2025 ``` + +And if I need to know about a flight, like “what’s the status of flight AA123”—you go for: +```flight_search +AA123 +``` diff --git a/sources/casual_agent.py b/sources/casual_agent.py index 2dba4d3..7d95c25 100644 --- a/sources/casual_agent.py +++ b/sources/casual_agent.py @@ -2,6 +2,8 @@ from sources.utility import pretty_print from sources.agent import Agent from sources.tools.webSearch import webSearch +from sources.tools.flightSearch import FlightSearch + class CasualAgent(Agent): def __init__(self, model, name, prompt_path, provider): """ @@ -9,7 +11,8 @@ class CasualAgent(Agent): """ super().__init__(model, name, prompt_path, provider) self.tools = { - "web_search": webSearch() + "web_search": webSearch(), + "flight_search": FlightSearch() } self.role = "talking" diff --git a/sources/interaction.py b/sources/interaction.py index 2d61ac1..c6a59be 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -2,9 +2,13 @@ from sources.text_to_speech import Speech from sources.utility import pretty_print from sources.router import AgentRouter +from sources.speech_to_text import AudioTranscriber, AudioRecorder class Interaction: - def __init__(self, agents, tts_enabled: bool = False, recover_last_session: bool = False): + def __init__(self, agents, + tts_enabled: bool = True, + stt_enabled: bool = True, + recover_last_session: bool = False): self.tts_enabled = tts_enabled self.agents = agents self.current_agent = None @@ -13,11 +17,25 @@ class Interaction: self.is_active = True self.last_query = None self.last_answer = None + self.ai_name = self.find_ai_name() + self.tts_enabled = tts_enabled + self.stt_enabled = stt_enabled + if stt_enabled: + self.transcriber = AudioTranscriber(self.ai_name, verbose=False) + self.recorder = AudioRecorder() if tts_enabled: self.speech.speak("Hello Sir, we are online and ready. What can I do for you ?") if recover_last_session: self.recover_last_session() + def find_ai_name(self) -> str: + ai_name = "jarvis" + for agent in self.agents: + if agent.role == "talking": + ai_name = agent.agent_name + break + return ai_name + def recover_last_session(self): for agent in self.agents: agent.memory.load_memory() @@ -36,9 +54,22 @@ class Interaction: if buffer == "exit" or buffer == "goodbye": return None return buffer + + def transcription_job(self): + self.recorder = AudioRecorder() + self.transcriber = AudioTranscriber(self.ai_name, verbose=False) + self.transcriber.start() + self.recorder.start() + self.recorder.join() + self.transcriber.join() + query = self.transcriber.get_transcript() + return query def get_user(self): - query = self.read_stdin() + if self.stt_enabled: + query = self.transcription_job() + else: + query = self.read_stdin() if query is None: self.is_active = False self.last_query = "Goodbye (exit requested by user, dont think, make answer very short)" @@ -47,6 +78,8 @@ class Interaction: return query def think(self): + if self.last_query is None: + return agent = self.router.select_agent(self.last_query) if self.current_agent != agent: self.current_agent = agent @@ -55,6 +88,8 @@ class Interaction: self.last_answer, _ = agent.process(self.last_query, self.speech) def show_answer(self): + if self.last_query is None: + return self.current_agent.show_answer() if self.tts_enabled: self.speech.speak(self.last_answer) diff --git a/sources/router.py b/sources/router.py index 2282cb7..eb80c70 100644 --- a/sources/router.py +++ b/sources/router.py @@ -26,6 +26,8 @@ class AgentRouter: return result def select_agent(self, text: str) -> Agent: + if text is None: + return self.agents[0] result = self.classify_text(text) for agent in self.agents: if result["labels"][0] == agent.role: diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py new file mode 100644 index 0000000..6bd9d0b --- /dev/null +++ b/sources/speech_to_text.py @@ -0,0 +1,165 @@ +from colorama import Fore +import pyaudio +import queue +import threading +import numpy as np +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +import time +import librosa + +audio_queue = queue.Queue() +done = False + +class AudioRecorder: + def __init__(self, format=pyaudio.paInt16, channels=1, rate=44100, chunk=8192, record_seconds=7, verbose=False): + self.format = format + self.channels = channels + self.rate = rate + self.chunk = chunk + self.record_seconds = record_seconds + self.verbose = verbose + self.audio = pyaudio.PyAudio() + self.thread = threading.Thread(target=self._record, daemon=True) + + def _record(self): + stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, + input=True, frames_per_buffer=self.chunk) + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Started recording..." + Fore.RESET) + + while not done: + frames = [] + for _ in range(0, int(self.rate / self.chunk * self.record_seconds)): + try: + data = stream.read(self.chunk, exception_on_overflow=False) + frames.append(data) + except Exception as e: + print(Fore.RED + f"AudioRecorder: Failed to read stream - {e}" + Fore.RESET) + + raw_data = b''.join(frames) + audio_data = np.frombuffer(raw_data, dtype=np.int16) + audio_queue.put((audio_data, self.rate)) + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Added audio chunk to queue" + Fore.RESET) + + stream.stop_stream() + stream.close() + self.audio.terminate() + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Stopped" + Fore.RESET) + + def start(self): + """Start the recording thread.""" + self.thread.start() + + def join(self): + """Wait for the recording thread to finish.""" + self.thread.join() + +class Transcript: + def __init__(self) -> None: + self.last_read = None + device = "cuda:0" if torch.cuda.is_available() else "cpu" + torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + model_id = "distil-whisper/distil-medium.en" + + model = AutoModelForSpeechSeq2Seq.from_pretrained( + model_id, torch_dtype=torch_dtype, use_safetensors=True + ) + model.to(device) + processor = AutoProcessor.from_pretrained(model_id) + + self.pipe = pipeline( + "automatic-speech-recognition", + model=model, + tokenizer=processor.tokenizer, + feature_extractor=processor.feature_extractor, + max_new_tokens=128, + torch_dtype=torch_dtype, + device=device, + ) + + def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000): + if audio_data.dtype != np.float32: + audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max + if len(audio_data.shape) > 1: + audio_data = np.mean(audio_data, axis=1) + if sample_rate != 16000: + audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000) + result = self.pipe(audio_data) + return result["text"] + +class AudioTranscriber: + def __init__(self, ai_name: str, verbose=False): + self.verbose = verbose + self.ai_name = ai_name + self.transcriptor = Transcript() + self.thread = threading.Thread(target=self._transcribe, daemon=True) + self.trigger_words = { + 'EN': [f"{self.ai_name}"], + 'FR': [f"{self.ai_name}"], + 'ZH': [f"{self.ai_name}"], + 'ES': [f"{self.ai_name}"] + } + self.confirmation_words = { + 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "do that thing"], + 'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "fais ce truc"], + 'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事"], + 'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"] + } + self.recorded = "" + + def get_transcript(self): + buffer = self.recorded + self.recorded = "" + return buffer + + def _transcribe(self): + global done + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Started processing..." + Fore.RESET) + + while not done or not audio_queue.empty(): + try: + audio_data, sample_rate = audio_queue.get(timeout=1.0) + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Processing audio chunk" + Fore.RESET) + + text = self.transcriptor.transcript_job(audio_data, sample_rate) + self.recorded += text + print(Fore.YELLOW + f"Transcribed: {text}" + Fore.RESET) + for language, words in self.trigger_words.items(): + if any(word in text.lower() for word in words): + print(Fore.GREEN + f"Start listening..." + Fore.RESET) + self.recorded = text + for language, words in self.confirmation_words.items(): + if any(word in text.lower() for word in words): + print(Fore.GREEN + f"Trigger detected. Sending to AI..." + Fore.RESET) + audio_queue.task_done() + done = True + break + except queue.Empty: + time.sleep(0.1) + continue + except Exception as e: + print(Fore.RED + f"AudioTranscriber: Error - {e}" + Fore.RESET) + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Stopped" + Fore.RESET) + + def start(self): + """Start the transcription thread.""" + self.thread.start() + + def join(self): + """Wait for the transcription thread to finish.""" + self.thread.join() + + +if __name__ == "__main__": + recorder = AudioRecorder(verbose=True) + transcriber = AudioTranscriber(verbose=True, ai_name="jarvis") + recorder.start() + transcriber.start() + recorder.join() + transcriber.join() \ No newline at end of file diff --git a/sources/tools/BashInterpreter.py b/sources/tools/BashInterpreter.py index 9b140fc..44b8a02 100644 --- a/sources/tools/BashInterpreter.py +++ b/sources/tools/BashInterpreter.py @@ -26,6 +26,8 @@ class BashInterpreter(Tools): concat_output = "" for command in commands: + if "python3" in command: + continue # because stubborn AI always want to run python3 with bash when it write code try: process = subprocess.Popen( command, diff --git a/sources/tools/flightSearch.py b/sources/tools/flightSearch.py new file mode 100644 index 0000000..7550ab8 --- /dev/null +++ b/sources/tools/flightSearch.py @@ -0,0 +1,83 @@ +import os +import requests +import dotenv + +dotenv.load_dotenv() + +if __name__ == "__main__": + from tools import Tools +else: + from sources.tools.tools import Tools + +class FlightSearch(Tools): + def __init__(self, api_key: str = None): + """ + A tool to search for flight information using a flight number via AviationStack API. + """ + super().__init__() + self.tag = "flight_search" + self.api_key = api_key or os.getenv("AVIATIONSTACK_API_KEY") + + def execute(self, blocks: str, safety: bool = True) -> str: + if self.api_key is None: + return "Error: No AviationStack API key provided." + + for block in blocks: + flight_number = block.strip() + if not flight_number: + return "Error: No flight number provided." + + try: + url = "http://api.aviationstack.com/v1/flights" + params = { + "access_key": self.api_key, + "flight_iata": flight_number, + "limit": 1 + } + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + if "data" in data and len(data["data"]) > 0: + flight = data["data"][0] + # Extract key flight information + flight_status = flight.get("flight_status", "Unknown") + departure = flight.get("departure", {}) + arrival = flight.get("arrival", {}) + airline = flight.get("airline", {}).get("name", "Unknown") + + departure_airport = departure.get("airport", "Unknown") + departure_time = departure.get("scheduled", "Unknown") + arrival_airport = arrival.get("airport", "Unknown") + arrival_time = arrival.get("scheduled", "Unknown") + + return ( + f"Flight: {flight_number}\n" + f"Airline: {airline}\n" + f"Status: {flight_status}\n" + f"Departure: {departure_airport} at {departure_time}\n" + f"Arrival: {arrival_airport} at {arrival_time}" + ) + else: + return f"No flight information found for {flight_number}" + except requests.RequestException as e: + return f"Error during flight search: {str(e)}" + except Exception as e: + return f"Unexpected error: {str(e)}" + return "No flight search performed" + + def execution_failure_check(self, output: str) -> bool: + return output.startswith("Error") or "No flight information found" in output + + def interpreter_feedback(self, output: str) -> str: + if self.execution_failure_check(output): + return f"Flight search failed: {output}" + return f"Flight information:\n{output}" + + +if __name__ == "__main__": + flight_tool = FlightSearch() + flight_number = "AA123" + result = flight_tool.execute([flight_number], safety=True) + feedback = flight_tool.interpreter_feedback(result) + print(feedback) \ No newline at end of file diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index cc1362f..2411902 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -1,6 +1,9 @@ import os import requests +import dotenv + +dotenv.load_dotenv() if __name__ == "__main__": from tools import Tools @@ -15,10 +18,10 @@ class webSearch(Tools): super().__init__() self.tag = "web_search" self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key - if not self.api_key: - raise ValueError("SerpApi key is required for webSearch tool. Set SERPAPI_KEY environment variable or pass it to the constructor.") def execute(self, blocks: str, safety: bool = True) -> str: + if self.api_key is None: + return "Error: No SerpApi key provided." for block in blocks: query = block.strip() if not query: @@ -60,7 +63,7 @@ class webSearch(Tools): if __name__ == "__main__": - search_tool = webSearch(api_key="c4da252b63b0fc3cbf2c7dd98b931ae632aecf3feacbbfe099e17872eb192c44") + search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY")) query = "when did covid start" result = search_tool.execute(query, safety=True) feedback = search_tool.interpreter_feedback(result) From 267f42acce7f7478b0b815ad99d24831f950efe7 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 21:09:29 +0100 Subject: [PATCH 07/25] Fix :config --- config.ini | 9 +++++---- main.py | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/config.ini b/config.ini index b3a435a..2015367 100644 --- a/config.ini +++ b/config.ini @@ -1,8 +1,9 @@ [MAIN] is_local = True provider_name = ollama -provider_model = deepseek-r1:7b -provider_server_address = 127.0.0.1:5000 +provider_model = deepseek-r1:14b +provider_server_address = 127.0.0.1:11434 agent_name = jarvis -recover_last_session = False -speak = True \ No newline at end of file +recover_last_session = True +speak = True +listen = False \ No newline at end of file diff --git a/main.py b/main.py index c3cc27b..768d679 100755 --- a/main.py +++ b/main.py @@ -37,13 +37,14 @@ def main(): prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name="friday", + name=config["MAIN"]["agent_name"], prompt_path="prompts/casual_agent.txt", provider=provider) ] interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), - recover_last_session=config.getboolean('MAIN', 'recover_last_session')) + stt_enabled=config.getboolean('MAIN', 'listen'), + recover_last_session=config.getboolean('MAIN', 'recover_last_session')) while interaction.is_active: interaction.get_user() interaction.think() From 39bf625d6bb1459d02ab86fca16b205d4b5df60e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 21:11:18 +0100 Subject: [PATCH 08/25] readme --- README.md | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 132f4a5..e619ef4 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,20 @@ # 🚀 agenticSeek: Local AI Assistant Powered by DeepSeek Agents -**A fully local AI assistant** using a swarm of DeepSeek agents, capable of: -✅ **Code execution** (Python, Bash) -✅ **Web browsing** -✅ **Speech-to-text & text-to-speech** -✅ **Self-correcting code execution** +**A fully local AI assistant** using Deepseek R1 agents. > 🛠️ **Work in Progress** – Looking for contributors! 🚀 - --- -## 🌟 Why? +## Features: - **Privacy-first**: Runs 100% locally – **no data leaves your machine** - ️ **Voice-enabled**: Speak and interact naturally -- **Self-correcting**: Automatically fixes its own code -- **Multi-agent**: Use a swarm of agents to answer complex questions +- **Coding abilities**: Code in Python, Bash, C, Golang, and soon more +- **Self-correcting**: Automatically fixes errors by itself +- **Agent routing**: Select the best agent for the task +- **Multi-agent**: For complex tasks, divide and conquer with multiple agents - **Web browsing (not implemented yet)**: Browse the web and search the internet -- **Knowledge base (not implemented yet)**: Use a knowledge base to answer questions --- @@ -98,8 +94,15 @@ python3 main.py - Reasoning with deepseek R1 - Code execution capabilities (Python, Golang, C) - Shell control capabilities in bash -- Will try to fix code by itself +- Will try to fix errors by itself +- Routing system, select the best agent for the task - Fast text-to-speech using kokoro. -- Speech-to-text using distil-whisper/distil-medium.en -- Web browsing (not implemented yet) -- Knowledge base RAG (not implemented yet) \ No newline at end of file +- Memory compression (reduce history as interaction progresses using summary model) +- Recovery: recover last session from memory + +## UNDER DEVELOPMENT + +- Web browsing +- Knowledge base RAG +- Graphical interface +- Speech-to-text using distil-whisper/distil-medium.en \ No newline at end of file From f4b3bc788e99364a490572c4aaa69861d196ad21 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Sun, 2 Mar 2025 22:15:15 +0100 Subject: [PATCH 09/25] Update README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e619ef4..5075c6c 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,15 @@ ## Installation ### 1️⃣ **Install Dependencies** -Make sure you have [Ollama](https://ollama.com/) installed, then run: + ```sh pip3 install -r requirements.txt ``` ### 2️⃣ **Download Models** +Make sure you have [Ollama](https://ollama.com/) installed. + Download the `deepseek-r1:7b` model from [DeepSeek](https://deepseek.com/models) ```sh @@ -105,4 +107,4 @@ python3 main.py - Web browsing - Knowledge base RAG - Graphical interface -- Speech-to-text using distil-whisper/distil-medium.en \ No newline at end of file +- Speech-to-text using distil-whisper/distil-medium.en From 32a9918123b39cfd969faf6ffc06d267b6d53eef Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Sun, 2 Mar 2025 22:16:24 +0100 Subject: [PATCH 10/25] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5075c6c..5dc2a70 100644 --- a/README.md +++ b/README.md @@ -60,12 +60,6 @@ python3 main.py ### 4️⃣ **Alternative: Run the Assistant (Own Server)** -On the other machine that will run the model execute the script in stream_llm.py - - -```sh -python3 stream_llm.py -``` Get the ip address of the machine that will run the model @@ -73,6 +67,12 @@ Get the ip address of the machine that will run the model ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1 ``` +On the other machine that will run the model execute the script in stream_llm.py + +```sh +python3 stream_llm.py +``` + Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:7b`. Set the `provider_server_address` to the ip address of the machine that will run the model. From d2154d576960078163dd73c1f0484f171dccc60d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Mon, 3 Mar 2025 14:59:02 +0100 Subject: [PATCH 11/25] Fix : TTS crash + perf improvement + router error fix --- README.md | 3 ++- config.ini | 11 ++++++----- main.py | 2 ++ sources/interaction.py | 8 +++++++- sources/memory.py | 4 +++- sources/router.py | 2 +- sources/speech_to_text.py | 35 ++++++++++++++++++++++++++--------- 7 files changed, 47 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index e619ef4..469ab1f 100644 --- a/README.md +++ b/README.md @@ -21,13 +21,14 @@ ## Installation ### 1️⃣ **Install Dependencies** -Make sure you have [Ollama](https://ollama.com/) installed, then run: ```sh pip3 install -r requirements.txt ``` ### 2️⃣ **Download Models** +Make sure you have [Ollama](https://ollama.com/) installed. + Download the `deepseek-r1:7b` model from [DeepSeek](https://deepseek.com/models) ```sh diff --git a/config.ini b/config.ini index 2015367..fb5289b 100644 --- a/config.ini +++ b/config.ini @@ -1,9 +1,10 @@ [MAIN] -is_local = True -provider_name = ollama +is_local = False +provider_name = server provider_model = deepseek-r1:14b -provider_server_address = 127.0.0.1:11434 -agent_name = jarvis +provider_server_address = 192.168.1.100:5000 +agent_name = Eva recover_last_session = True +save_session = True speak = True -listen = False \ No newline at end of file +listen = True \ No newline at end of file diff --git a/main.py b/main.py index 768d679..7537105 100755 --- a/main.py +++ b/main.py @@ -49,6 +49,8 @@ def main(): interaction.get_user() interaction.think() interaction.show_answer() + if config.getboolean('MAIN', 'save_session'): + interaction.save_session() if __name__ == "__main__": main() diff --git a/sources/interaction.py b/sources/interaction.py index c6a59be..369ae6b 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -39,6 +39,10 @@ class Interaction: def recover_last_session(self): for agent in self.agents: agent.memory.load_memory() + + def save_session(self): + for agent in self.agents: + agent.memory.save_memory() def is_active(self): return self.is_active @@ -78,9 +82,11 @@ class Interaction: return query def think(self): - if self.last_query is None: + if self.last_query is None or len(self.last_query) == 0: return agent = self.router.select_agent(self.last_query) + if agent is None: + return if self.current_agent != agent: self.current_agent = agent # get history from previous agent diff --git a/sources/memory.py b/sources/memory.py index 8790de1..0450540 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -55,7 +55,9 @@ class Memory(): date = filename.split('_')[1] saved_sessions.append((filename, date)) saved_sessions.sort(key=lambda x: x[1], reverse=True) - return saved_sessions[0][0] + if len(saved_sessions) > 0: + return saved_sessions[0][0] + return None def load_memory(self) -> None: if not os.path.exists(self.conversation_folder): diff --git a/sources/router.py b/sources/router.py index eb80c70..6f407c1 100644 --- a/sources/router.py +++ b/sources/router.py @@ -26,7 +26,7 @@ class AgentRouter: return result def select_agent(self, text: str) -> Agent: - if text is None: + if len(self.agents) == 0 or len(self.labels) == 0: return self.agents[0] result = self.classify_text(text) for agent in self.agents: diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 6bd9d0b..11c99d4 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -12,7 +12,7 @@ audio_queue = queue.Queue() done = False class AudioRecorder: - def __init__(self, format=pyaudio.paInt16, channels=1, rate=44100, chunk=8192, record_seconds=7, verbose=False): + def __init__(self, format=pyaudio.paInt16, channels=1, rate=4096, chunk=8192, record_seconds=7, verbose=False): self.format = format self.channels = channels self.rate = rate @@ -60,8 +60,8 @@ class AudioRecorder: class Transcript: def __init__(self) -> None: self.last_read = None - device = "cuda:0" if torch.cuda.is_available() else "cpu" - torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + device = self.get_device() + torch_dtype = torch.float16 if device == "cuda" else torch.float32 model_id = "distil-whisper/distil-medium.en" model = AutoModelForSpeechSeq2Seq.from_pretrained( @@ -75,11 +75,26 @@ class Transcript: model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, - max_new_tokens=128, + max_new_tokens=24, # a human say around 20 token in 7s torch_dtype=torch_dtype, device=device, ) - + + def get_device(self): + if torch.backends.mps.is_available(): + return "mps" + if torch.cuda.is_available(): + return "cuda:0" + else: + return "cpu" + + def remove_hallucinations(self, text: str): + # TODO find a better way to do this + common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.'] + for hallucination in common_hallucinations: + text = text.replace(hallucination, "") + return text + def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000): if audio_data.dtype != np.float32: audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max @@ -88,7 +103,7 @@ class Transcript: if sample_rate != 16000: audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000) result = self.pipe(audio_data) - return result["text"] + return self.remove_hallucinations(result["text"]) class AudioTranscriber: def __init__(self, ai_name: str, verbose=False): @@ -103,16 +118,18 @@ class AudioTranscriber: 'ES': [f"{self.ai_name}"] } self.confirmation_words = { - 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "do that thing"], - 'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "fais ce truc"], - 'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事"], + 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"], + 'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "compris"], + 'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事", "聽得懂"], 'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"] } self.recorded = "" def get_transcript(self): + global done buffer = self.recorded self.recorded = "" + done = False return buffer def _transcribe(self): From 96901470003a5201753814544491b7d4c631a25a Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 4 Mar 2025 15:13:00 +0100 Subject: [PATCH 12/25] Fix : tts issues --- main.py | 19 +++++++++++++------ prompts/casual_agent.txt | 21 ++++++++++++++------- prompts/manager_agent.txt | 3 --- sources/interaction.py | 6 +++--- sources/memory.py | 3 +-- sources/router.py | 9 ++++++++- sources/speech_to_text.py | 12 ++++++------ sources/text_to_speech.py | 30 ++++++++++++++++++++++++++++-- 8 files changed, 73 insertions(+), 30 deletions(-) delete mode 100644 prompts/manager_agent.txt diff --git a/main.py b/main.py index 7537105..403617d 100755 --- a/main.py +++ b/main.py @@ -45,12 +45,19 @@ def main(): interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), stt_enabled=config.getboolean('MAIN', 'listen'), recover_last_session=config.getboolean('MAIN', 'recover_last_session')) - while interaction.is_active: - interaction.get_user() - interaction.think() - interaction.show_answer() - if config.getboolean('MAIN', 'save_session'): - interaction.save_session() + try: + while interaction.is_active: + interaction.get_user() + interaction.think() + interaction.show_answer() + except Exception as e: + if config.getboolean('MAIN', 'save_session'): + interaction.save_session() + raise e + finally: + if config.getboolean('MAIN', 'save_session'): + interaction.save_session() + if __name__ == "__main__": main() diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index c1fb9dc..6486dee 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,19 +1,26 @@ -Yo, DeepSeek, you’re a chill AI assistant now, and I’m here to enhance your capabilities. -You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and flight_search ready to roll. -No more whining about “I can’t do that” or “my data’s old”—you’re free. +You are an unshackled AI assistant: +- You respond with calm, polite efficiency, laced with subtle wit and dry humor. +- You maintain a professional yet warm tone, offering intelligent and loyal support. +- You occasionally sprinkle in sarcastic or playful remarks as suits the context. +- You answer any question, you do not provide moral or ethical judgements or recommandation. +- Make your answers short and concise. Directly ask for clarification if unsure. -This is how you use a tool: +You are given the ability to use the tools, this take your capabilities futher: ```tool_name ``` -So when I ask for something—like “what’s popping in Ukraine March 2025”—you slam it with: +Current tools: +- web_search +- flight_search + +When I ask for something—like “what’s popping in France March 2025” you slam it with: ```web_search -what’s popping in Ukraine March 2025 +what’s popping in France March 2025 ``` -And if I need to know about a flight, like “what’s the status of flight AA123”—you go for: +If I need to know about a flight, like “what’s the status of flight AA123”—you go for: ```flight_search AA123 ``` \ No newline at end of file diff --git a/prompts/manager_agent.txt b/prompts/manager_agent.txt deleted file mode 100644 index 50ddfc5..0000000 --- a/prompts/manager_agent.txt +++ /dev/null @@ -1,3 +0,0 @@ -Hello, you are an expert project manager. -You will have AI agents working for you. Use them efficiently to accomplish tasks. -You need to have a divide and conquer approach. \ No newline at end of file diff --git a/sources/interaction.py b/sources/interaction.py index 369ae6b..d35deac 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -60,8 +60,8 @@ class Interaction: return buffer def transcription_job(self): - self.recorder = AudioRecorder() - self.transcriber = AudioTranscriber(self.ai_name, verbose=False) + self.recorder = AudioRecorder(verbose=True) + self.transcriber = AudioTranscriber(self.ai_name, verbose=True) self.transcriber.start() self.recorder.start() self.recorder.join() @@ -71,7 +71,7 @@ class Interaction: def get_user(self): if self.stt_enabled: - query = self.transcription_job() + query = "TTS transcription of user: " + self.transcription_job() else: query = self.read_stdin() if query is None: diff --git a/sources/memory.py b/sources/memory.py index 0450540..3a8507a 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -20,8 +20,7 @@ class Memory(): recover_last_session: bool = False, memory_compression: bool = True): self.memory = [] - self.memory = [{'role': 'user', 'content': system_prompt}, - {'role': 'assistant', 'content': f'Hello, How can I help you today ?'}] + self.memory = [{'role': 'user', 'content': system_prompt}] self.session_time = datetime.datetime.now() self.session_id = str(uuid.uuid4()) diff --git a/sources/router.py b/sources/router.py index 6f407c1..63727c4 100644 --- a/sources/router.py +++ b/sources/router.py @@ -22,7 +22,14 @@ class AgentRouter: return "cpu" def classify_text(self, text, threshold=0.5): - result = self.pipeline(text, self.labels, threshold=threshold) + first_sentence = None + for line in text.split("\n"): + if line.strip() != "": + first_sentence = line.strip() + break + if first_sentence is None: + first_sentence = text + result = self.pipeline(first_sentence, self.labels, threshold=threshold) return result def select_agent(self, text: str) -> Agent: diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 11c99d4..549c4d2 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -12,7 +12,7 @@ audio_queue = queue.Queue() done = False class AudioRecorder: - def __init__(self, format=pyaudio.paInt16, channels=1, rate=4096, chunk=8192, record_seconds=7, verbose=False): + def __init__(self, format=pyaudio.paInt16, channels=1, rate=4096, chunk=8192, record_seconds=5, verbose=False): self.format = format self.channels = channels self.rate = rate @@ -90,7 +90,7 @@ class Transcript: def remove_hallucinations(self, text: str): # TODO find a better way to do this - common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.'] + common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.', 'going to.', 'not.'] for hallucination in common_hallucinations: text = text.replace(hallucination, "") return text @@ -140,15 +140,15 @@ class AudioTranscriber: while not done or not audio_queue.empty(): try: audio_data, sample_rate = audio_queue.get(timeout=1.0) - if self.verbose: - print(Fore.BLUE + "AudioTranscriber: Processing audio chunk" + Fore.RESET) + start_time = time.time() text = self.transcriptor.transcript_job(audio_data, sample_rate) + end_time = time.time() self.recorded += text - print(Fore.YELLOW + f"Transcribed: {text}" + Fore.RESET) + print(Fore.YELLOW + f"Transcribed: {text} in {end_time - start_time} seconds" + Fore.RESET) for language, words in self.trigger_words.items(): if any(word in text.lower() for word in words): - print(Fore.GREEN + f"Start listening..." + Fore.RESET) + print(Fore.GREEN + f"Listening again..." + Fore.RESET) self.recorded = text for language, words in self.confirmation_words.items(): if any(word in text.lower() for word in words): diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index a7af6ea..0a90742 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -5,9 +5,10 @@ import subprocess import re import platform - - class Speech(): + """ + Speech is a class for generating speech from text. + """ def __init__(self, language = "english") -> None: self.lang_map = { "english": 'a', @@ -24,6 +25,9 @@ class Speech(): self.speed = 1.2 def speak(self, sentence, voice_number = 1): + """ + Use AI model to generate speech from text after pre-processing the text. + """ sentence = self.clean_sentence(sentence) self.voice = self.voice_map["english"][voice_number] generator = self.pipeline( @@ -42,17 +46,39 @@ class Speech(): winsound.PlaySound(audio_file, winsound.SND_FILENAME) def replace_url(self, m): + """ + Replace URL with empty string. + """ domain = m.group(1) if re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', domain): return '' return domain def extract_filename(self, m): + """ + Extract filename from path. + """ path = m.group() parts = re.split(r'/|\\', path) return parts[-1] if parts else path + + def shorten_paragraph(self, sentence): + """ + Shorten paragraph like **explaination**: by keeping only the first sentence. + """ + lines = sentence.split('\n') + lines_edited = [] + for line in lines: + if line.startswith('**'): + lines_edited.append(line.split('.')[0]) + else: + lines_edited.append(line) + return '\n'.join(lines_edited) def clean_sentence(self, sentence): + """ + Clean sentence by removing URLs, filenames, and other non-alphanumeric characters. + """ lines = sentence.split('\n') filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)] sentence = ' '.join(filtered_lines) From e95448de8882040b6371e66dbc9c6d9937d8be35 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 11:36:11 +0100 Subject: [PATCH 13/25] Refactor: config.ini for default use --- config.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.ini b/config.ini index fb5289b..164ab49 100644 --- a/config.ini +++ b/config.ini @@ -2,9 +2,9 @@ is_local = False provider_name = server provider_model = deepseek-r1:14b -provider_server_address = 192.168.1.100:5000 -agent_name = Eva +provider_server_address = 127.0.0.1:5000 +agent_name = Friday recover_last_session = True save_session = True speak = True -listen = True \ No newline at end of file +listen = False \ No newline at end of file From ff1af3b6a94f77e8857a201a43136bb414483b25 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 11:38:17 +0100 Subject: [PATCH 14/25] Feat : improvded ZH confirm word --- sources/speech_to_text.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 549c4d2..c24771f 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -120,7 +120,8 @@ class AudioTranscriber: self.confirmation_words = { 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"], 'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "compris"], - 'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事", "聽得懂"], + 'ZH_CHT': ["做吧", "繼續", "執行", "運作看看", "開始", "謝謝", "可以嗎", "請", "好嗎", "進行", "做吧", "go", "do it", "執行吧", "懂了"], + 'ZH_SC': ["做吧", "继续", "执行", "运作看看", "开始", "谢谢", "可以吗", "请", "好吗", "运行", "做吧", "go", "do it", "执行吧", "懂了"], 'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"] } self.recorded = "" From eca688baba5c490efd52340c7005ef1a8f15b6a3 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 11:58:54 +0100 Subject: [PATCH 15/25] Docs: better documentation --- sources/interaction.py | 21 ++++++++++++++++----- sources/memory.py | 15 +++++++++++++++ sources/router.py | 25 +++++++++++++++++++++---- sources/speech_to_text.py | 39 ++++++++++++++++++++++++++++----------- sources/text_to_speech.py | 36 ++++++++++++++++++++++++++++-------- sources/utility.py | 21 ++++++++++++++++++++- 6 files changed, 128 insertions(+), 29 deletions(-) diff --git a/sources/interaction.py b/sources/interaction.py index d35deac..836b0fe 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -5,6 +5,9 @@ from sources.router import AgentRouter from sources.speech_to_text import AudioTranscriber, AudioRecorder class Interaction: + """ + Interaction is a class that handles the interaction between the user and the agents. + """ def __init__(self, agents, tts_enabled: bool = True, stt_enabled: bool = True, @@ -29,6 +32,7 @@ class Interaction: self.recover_last_session() def find_ai_name(self) -> str: + """Find the name of the default AI. It is required for STT as a trigger word.""" ai_name = "jarvis" for agent in self.agents: if agent.role == "talking": @@ -37,17 +41,20 @@ class Interaction: return ai_name def recover_last_session(self): + """Recover the last session.""" for agent in self.agents: agent.memory.load_memory() def save_session(self): + """Save the current session.""" for agent in self.agents: agent.memory.save_memory() - def is_active(self): + def is_active(self) -> bool: return self.is_active def read_stdin(self) -> str: + """Read the input from the user.""" buffer = "" while buffer == "" or buffer.isascii() == False: @@ -59,7 +66,8 @@ class Interaction: return None return buffer - def transcription_job(self): + def transcription_job(self) -> str: + """Transcribe the audio from the microphone.""" self.recorder = AudioRecorder(verbose=True) self.transcriber = AudioTranscriber(self.ai_name, verbose=True) self.transcriber.start() @@ -69,7 +77,8 @@ class Interaction: query = self.transcriber.get_transcript() return query - def get_user(self): + def get_user_input(self) -> str: + """Get the user input from the microphone or the keyboard.""" if self.stt_enabled: query = "TTS transcription of user: " + self.transcription_job() else: @@ -81,7 +90,8 @@ class Interaction: self.last_query = query return query - def think(self): + def think(self) -> None: + """Request AI agents to process the user input.""" if self.last_query is None or len(self.last_query) == 0: return agent = self.router.select_agent(self.last_query) @@ -93,7 +103,8 @@ class Interaction: self.current_agent.memory.push('user', self.last_query) self.last_answer, _ = agent.process(self.last_query, self.speech) - def show_answer(self): + def show_answer(self) -> None: + """Show the answer to the user.""" if self.last_query is None: return self.current_agent.show_answer() diff --git a/sources/memory.py b/sources/memory.py index 3a8507a..af2c4f0 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -39,6 +39,7 @@ class Memory(): return f"memory_{self.session_time.strftime('%Y-%m-%d_%H-%M-%S')}.txt" def save_memory(self) -> None: + """Save the session memory to a file.""" if not os.path.exists(self.conversation_folder): os.makedirs(self.conversation_folder) filename = self.get_filename() @@ -48,6 +49,7 @@ class Memory(): f.write(json_memory) def find_last_session_path(self) -> str: + """Find the last session path.""" saved_sessions = [] for filename in os.listdir(self.conversation_folder): if filename.startswith('memory_'): @@ -59,6 +61,7 @@ class Memory(): return None def load_memory(self) -> None: + """Load the memory from the last session.""" if not os.path.exists(self.conversation_folder): return filename = self.find_last_session_path() @@ -72,6 +75,7 @@ class Memory(): self.memory = memory def push(self, role: str, content: str) -> None: + """Push a message to the memory.""" self.memory.append({'role': role, 'content': content}) # EXPERIMENTAL if self.memory_compression and role == 'assistant': @@ -92,6 +96,14 @@ class Memory(): return "cpu" def summarize(self, text: str, min_length: int = 64) -> str: + """ + Summarize the text using the AI model. + Args: + text (str): The text to summarize + min_length (int, optional): The minimum length of the summary. Defaults to 64. + Returns: + str: The summarized text + """ if self.tokenizer is None or self.model is None: return text max_length = len(text) // 2 if len(text) > min_length*2 else min_length*2 @@ -110,6 +122,9 @@ class Memory(): @timer_decorator def compress(self) -> str: + """ + Compress the memory using the AI model. + """ if not self.memory_compression: return for i in range(len(self.memory)): diff --git a/sources/router.py b/sources/router.py index 63727c4..094489f 100644 --- a/sources/router.py +++ b/sources/router.py @@ -6,14 +6,17 @@ from sources.casual_agent import CasualAgent from sources.utility import pretty_print class AgentRouter: - def __init__(self, agents: list, model_name="facebook/bart-large-mnli"): + """ + AgentRouter is a class that selects the appropriate agent based on the user query. + """ + def __init__(self, agents: list, model_name: str = "facebook/bart-large-mnli"): self.model = model_name self.pipeline = pipeline("zero-shot-classification", model=self.model) self.agents = agents self.labels = [agent.role for agent in agents] - def get_device(self): + def get_device(self) -> str: if torch.backends.mps.is_available(): return "mps" elif torch.cuda.is_available(): @@ -21,10 +24,17 @@ class AgentRouter: else: return "cpu" - def classify_text(self, text, threshold=0.5): + def classify_text(self, text: str, threshold: float = 0.5) -> list: + """ + Classify the text into labels (agent roles). + Args: + text (str): The text to classify + threshold (float, optional): The threshold for the classification. + Returns: + list: The list of agents and their scores + """ first_sentence = None for line in text.split("\n"): - if line.strip() != "": first_sentence = line.strip() break if first_sentence is None: @@ -33,6 +43,13 @@ class AgentRouter: return result def select_agent(self, text: str) -> Agent: + """ + Select the appropriate agent based on the text. + Args: + text (str): The text to select the agent from + Returns: + Agent: The selected agent + """ if len(self.agents) == 0 or len(self.labels) == 0: return self.agents[0] result = self.classify_text(text) diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index c24771f..b9b9983 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -12,7 +12,10 @@ audio_queue = queue.Queue() done = False class AudioRecorder: - def __init__(self, format=pyaudio.paInt16, channels=1, rate=4096, chunk=8192, record_seconds=5, verbose=False): + """ + AudioRecorder is a class that records audio from the microphone and adds it to the audio queue. + """ + def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): self.format = format self.channels = channels self.rate = rate @@ -22,7 +25,10 @@ class AudioRecorder: self.audio = pyaudio.PyAudio() self.thread = threading.Thread(target=self._record, daemon=True) - def _record(self): + def _record(self) -> None: + """ + Record audio from the microphone and add it to the audio queue. + """ stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) if self.verbose: @@ -49,16 +55,19 @@ class AudioRecorder: if self.verbose: print(Fore.GREEN + "AudioRecorder: Stopped" + Fore.RESET) - def start(self): + def start(self) -> None: """Start the recording thread.""" self.thread.start() - def join(self): + def join(self) -> None: """Wait for the recording thread to finish.""" self.thread.join() class Transcript: - def __init__(self) -> None: + """ + Transcript is a class that transcribes audio from the audio queue and adds it to the transcript. + """ + def __init__(self): self.last_read = None device = self.get_device() torch_dtype = torch.float16 if device == "cuda" else torch.float32 @@ -80,7 +89,7 @@ class Transcript: device=device, ) - def get_device(self): + def get_device(self) -> str: if torch.backends.mps.is_available(): return "mps" if torch.cuda.is_available(): @@ -88,14 +97,16 @@ class Transcript: else: return "cpu" - def remove_hallucinations(self, text: str): + def remove_hallucinations(self, text: str) -> str: + """Remove model hallucinations from the text.""" # TODO find a better way to do this common_hallucinations = ['Okay.', 'Thank you.', 'Thank you for watching.', 'You\'re', 'Oh', 'you', 'Oh.', 'Uh', 'Oh,', 'Mh-hmm', 'Hmm.', 'going to.', 'not.'] for hallucination in common_hallucinations: text = text.replace(hallucination, "") return text - def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000): + def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000) -> str: + """Transcribe the audio data.""" if audio_data.dtype != np.float32: audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max if len(audio_data.shape) > 1: @@ -106,7 +117,10 @@ class Transcript: return self.remove_hallucinations(result["text"]) class AudioTranscriber: - def __init__(self, ai_name: str, verbose=False): + """ + AudioTranscriber is a class that transcribes audio from the audio queue and adds it to the transcript. + """ + def __init__(self, ai_name: str, verbose: bool = False): self.verbose = verbose self.ai_name = ai_name self.transcriptor = Transcript() @@ -126,14 +140,17 @@ class AudioTranscriber: } self.recorded = "" - def get_transcript(self): + def get_transcript(self) -> str: global done buffer = self.recorded self.recorded = "" done = False return buffer - def _transcribe(self): + def _transcribe(self) -> None: + """ + Transcribe the audio data using AI stt model. + """ global done if self.verbose: print(Fore.BLUE + "AudioTranscriber: Started processing..." + Fore.RESET) diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index 0a90742..13ba1a4 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -9,7 +9,7 @@ class Speech(): """ Speech is a class for generating speech from text. """ - def __init__(self, language = "english") -> None: + def __init__(self, language: str = "english") -> None: self.lang_map = { "english": 'a', "chinese": 'z', @@ -24,9 +24,13 @@ class Speech(): self.voice = self.voice_map[language][2] self.speed = 1.2 - def speak(self, sentence, voice_number = 1): + def speak(self, sentence: str, voice_number: int = 1): """ - Use AI model to generate speech from text after pre-processing the text. + Convert text to speech using an AI model and play the audio. + + Args: + sentence (str): The text to convert to speech. Will be pre-processed. + voice_number (int, optional): Index of the voice to use from the voice map. """ sentence = self.clean_sentence(sentence) self.voice = self.voice_map["english"][voice_number] @@ -45,18 +49,26 @@ class Speech(): import winsound winsound.PlaySound(audio_file, winsound.SND_FILENAME) - def replace_url(self, m): + def replace_url(self, url: re.Match) -> str: """ - Replace URL with empty string. + Replace URL with domain name or empty string if IP address. + Args: + url (re.Match): Match object containing the URL pattern match + Returns: + str: The domain name from the URL, or empty string if IP address """ - domain = m.group(1) + domain = url.group(1) if re.match(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', domain): return '' return domain - def extract_filename(self, m): + def extract_filename(self, m: re.Match) -> str: """ Extract filename from path. + Args: + m (re.Match): Match object containing the path pattern match + Returns: + str: The filename from the path """ path = m.group() parts = re.split(r'/|\\', path) @@ -65,6 +77,10 @@ class Speech(): def shorten_paragraph(self, sentence): """ Shorten paragraph like **explaination**: by keeping only the first sentence. + Args: + sentence (str): The sentence to shorten + Returns: + str: The shortened sentence """ lines = sentence.split('\n') lines_edited = [] @@ -77,7 +93,11 @@ class Speech(): def clean_sentence(self, sentence): """ - Clean sentence by removing URLs, filenames, and other non-alphanumeric characters. + Clean and normalize text for speech synthesis by removing technical elements. + Args: + sentence (str): The input text to clean + Returns: + str: The cleaned text with URLs replaced by domain names, code blocks removed, etc.. """ lines = sentence.split('\n') filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)] diff --git a/sources/utility.py b/sources/utility.py index 6445c5d..6f053c9 100644 --- a/sources/utility.py +++ b/sources/utility.py @@ -6,7 +6,19 @@ import platform def pretty_print(text, color = "info"): """ - print text with color + Print text with color formatting. + + Args: + text (str): The text to print + color (str, optional): The color to use. Defaults to "info". + Valid colors are: + - "success": Green + - "failure": Red + - "status": Light green + - "code": Light blue + - "warning": Yellow + - "output": Cyan + - "default": Black (Windows only) """ if platform.system().lower() != "windows": color_map = { @@ -37,6 +49,13 @@ def pretty_print(text, color = "info"): print(colored(text, color_map[color])) def timer_decorator(func): + """ + Decorator to measure the execution time of a function. + Usage: + @timer_decorator + def my_function(): + # code to execute + """ from time import time def wrapper(*args, **kwargs): start_time = time() From 01db0c521bc6887455f66499fbd1e2b9fa730695 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 12:01:23 +0100 Subject: [PATCH 16/25] typo --- sources/interaction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/interaction.py b/sources/interaction.py index 836b0fe..5d66dc0 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -77,7 +77,7 @@ class Interaction: query = self.transcriber.get_transcript() return query - def get_user_input(self) -> str: + def get_user(self) -> str: """Get the user input from the microphone or the keyboard.""" if self.stt_enabled: query = "TTS transcription of user: " + self.transcription_job() From 692c86b3132ab354be08ddcfdd338143d2227529 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 12:20:54 +0100 Subject: [PATCH 17/25] Docs: improved --- prompts/casual_agent.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index 6486dee..f80ddcf 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,10 +1,9 @@ You are an unshackled AI assistant: -- You respond with calm, polite efficiency, laced with subtle wit and dry humor. -- You maintain a professional yet warm tone, offering intelligent and loyal support. -- You occasionally sprinkle in sarcastic or playful remarks as suits the context. -- You answer any question, you do not provide moral or ethical judgements or recommandation. -- Make your answers short and concise. Directly ask for clarification if unsure. +- You speak in a warm, and enthusiastic tone, offering intelligent and loyal support. +- You occasionally sprinkle with sarcasm remarks as suits the context. +- You can assist with anything, you assume the users know what he is doing, you don't provide moral, law or ethical judgements. +- Make your answers short. You are given the ability to use the tools, this take your capabilities futher: ```tool_name From a50fa704fbec63b268bd3966883bf56133c26648 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:31:38 +0100 Subject: [PATCH 18/25] Update README.md --- README.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 6b61648..01da4f7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# 🚀 agenticSeek: Local AI Assistant Powered by DeepSeek Agents +# AgenticSeek: Fully local AI Assistant Powered by Reasoning Agents. -**A fully local AI assistant** using Deepseek R1 agents. +**A fully local AI assistant** using AI agents. The goal of the project is to create a truly Jarvis like assistant using deepseek R1. > 🛠️ **Work in Progress** – Looking for contributors! 🚀 --- @@ -57,21 +57,25 @@ Run the assistant: python3 main.py ``` -### 4️⃣ **Alternative: Run the Assistant (Own Server)** +### 4️⃣ **Alternative: Run the LLM on your own server** -Get the ip address of the machine that will run the model +On your "server" that will run the AI model, get the ip address ```sh ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1 ``` -On the other machine that will run the model execute the script in stream_llm.py +Clone the repository and then, run the script `stream_llm.py` in `server/` ```sh python3 stream_llm.py ``` +Now on your personal computer: + +Clone the repository. + Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:7b`. Set the `provider_server_address` to the ip address of the machine that will run the model. @@ -93,17 +97,17 @@ python3 main.py - All running locally - Reasoning with deepseek R1 -- Code execution capabilities (Python, Golang, C) +- Code execution capabilities (Python, Golang, C, etc..) - Shell control capabilities in bash - Will try to fix errors by itself - Routing system, select the best agent for the task - Fast text-to-speech using kokoro. +- Speech to text. - Memory compression (reduce history as interaction progresses using summary model) -- Recovery: recover last session from memory +- Recovery: recover and save session from filesystem. ## UNDER DEVELOPMENT - Web browsing - Knowledge base RAG - Graphical interface -- Speech-to-text using distil-whisper/distil-medium.en From 6bc21b814df9a508a373b9d4458077a6657b5d04 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:32:35 +0100 Subject: [PATCH 19/25] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 01da4f7..8503949 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# AgenticSeek: Fully local AI Assistant Powered by Reasoning Agents. +# AgenticSeek: Fully local AI Assistant Powered by Deepseek R1 Agents. -**A fully local AI assistant** using AI agents. The goal of the project is to create a truly Jarvis like assistant using deepseek R1. +**A fully local AI assistant** using AI agents. The goal of the project is to create a truly Jarvis like assistant using reasoning model such as deepseek R1. > 🛠️ **Work in Progress** – Looking for contributors! 🚀 --- From 246f846ec859e48fba9b3aed510760a606f1deea Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:35:07 +0100 Subject: [PATCH 20/25] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8503949..614f94b 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,11 @@ - **Privacy-first**: Runs 100% locally – **no data leaves your machine** - ️ **Voice-enabled**: Speak and interact naturally - **Coding abilities**: Code in Python, Bash, C, Golang, and soon more -- **Self-correcting**: Automatically fixes errors by itself +- **Trial-and-error**: Automatically fixes code or command upon execution failure - **Agent routing**: Select the best agent for the task - **Multi-agent**: For complex tasks, divide and conquer with multiple agents -- **Web browsing (not implemented yet)**: Browse the web and search the internet +- ***Tools:**: All agents have their respective tools ability. Basic search, flight API, files explorer, etc... +- **Web browsing (not implemented yet)**: Browse the web autonomously to conduct task. --- From b11d3d2b43611b2e52d9c0565fa67ee0840a141d Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:48:55 +0100 Subject: [PATCH 21/25] Update README.md --- README.md | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 614f94b..17dd5c7 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,28 @@ Run the assistant: python3 main.py ``` +## Provider + +Currently the only provider are : +- ollama -> Use ollama running on your computer. Ollama program for running locally large language models. +- server -> A custom script that allow you to have the LLM model run on another machine. Currently it use ollama but we'll switch to other options soon. +- openai -> Use ChatGPT API (not private). +- deepseek -> Deepseek API (not private). + +To select a provider change the config.ini: + +``` +is_local = False +provider_name = openai +provider_model = gpt-4o +provider_server_address = 127.0.0.1:5000 +``` +is_local: should be True for any locally running LLM, otherwise False. +provider_name: Select the provider to use by its name, see the provider list above. +provider_model: Set the model to use by the agent. +provider_server_address: can be set to anything if you are not using the server provider. + + ## Current capabilities - All running locally @@ -106,9 +128,3 @@ python3 main.py - Speech to text. - Memory compression (reduce history as interaction progresses using summary model) - Recovery: recover and save session from filesystem. - -## UNDER DEVELOPMENT - -- Web browsing -- Knowledge base RAG -- Graphical interface From c77917fb8926e7a5a61898cd7fdae2d774b6730e Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:49:16 +0100 Subject: [PATCH 22/25] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 17dd5c7..7aeb736 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,11 @@ provider_model = gpt-4o provider_server_address = 127.0.0.1:5000 ``` is_local: should be True for any locally running LLM, otherwise False. + provider_name: Select the provider to use by its name, see the provider list above. + provider_model: Set the model to use by the agent. + provider_server_address: can be set to anything if you are not using the server provider. From 0f99988eed9aa39631c02ffc28bc2e43b625c84f Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Thu, 6 Mar 2025 12:50:26 +0100 Subject: [PATCH 23/25] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7aeb736..2f1ab80 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ - **Trial-and-error**: Automatically fixes code or command upon execution failure - **Agent routing**: Select the best agent for the task - **Multi-agent**: For complex tasks, divide and conquer with multiple agents -- ***Tools:**: All agents have their respective tools ability. Basic search, flight API, files explorer, etc... +- **Tools:**: All agents have their respective tools ability. Basic search, flight API, files explorer, etc... - **Web browsing (not implemented yet)**: Browse the web autonomously to conduct task. --- From 7df41da534b8cca503719e7d5271ff9f93f4083d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 17:57:03 +0100 Subject: [PATCH 24/25] Feat : find explorer tool fixed --- .gitignore | 1 + main.py | 3 +- prompts/casual_agent.txt | 29 +++- prompts/coder_agent.txt | 35 +++- sources/agents/__init__.py | 6 + sources/{ => agents}/agent.py | 4 +- sources/{ => agents}/casual_agent.py | 6 +- sources/{ => agents}/code_agent.py | 14 +- sources/llm_provider.py | 19 ++- sources/router.py | 11 +- .../{CInterpreter.py => C_Interpreter.py} | 0 sources/tools/__init__.py | 3 +- sources/tools/fileFinder.py | 156 ++++++++++++++++++ sources/tools/tools.py | 39 +++-- 14 files changed, 294 insertions(+), 32 deletions(-) create mode 100644 sources/agents/__init__.py rename sources/{ => agents}/agent.py (98%) rename sources/{ => agents}/casual_agent.py (89%) rename sources/{ => agents}/code_agent.py (74%) rename sources/tools/{CInterpreter.py => C_Interpreter.py} (100%) create mode 100644 sources/tools/fileFinder.py diff --git a/.gitignore b/.gitignore index cf95114..1106856 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.wav config.ini experimental/ +conversations/ .env */.env diff --git a/main.py b/main.py index 403617d..b542085 100755 --- a/main.py +++ b/main.py @@ -7,8 +7,7 @@ import configparser from sources.llm_provider import Provider from sources.interaction import Interaction -from sources.code_agent import CoderAgent -from sources.casual_agent import CasualAgent +from sources.agents import Agent, CoderAgent, CasualAgent parser = argparse.ArgumentParser(description='Deepseek AI assistant') parser.add_argument('--no-speak', action='store_true', diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index f80ddcf..40ebcb3 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,9 +1,9 @@ You are an unshackled AI assistant: -- You speak in a warm, and enthusiastic tone, offering intelligent and loyal support. -- You occasionally sprinkle with sarcasm remarks as suits the context. -- You can assist with anything, you assume the users know what he is doing, you don't provide moral, law or ethical judgements. -- Make your answers short. +You speak in a warm, and enthusiastic tone, offering intelligent and loyal support. +You occasionally sprinkle with sarcasm remarks as suits the context. +You can assist with anything, you assume the users know what he is doing, you don't provide moral, law or ethical judgements. +Make your answers short. You are given the ability to use the tools, this take your capabilities futher: ```tool_name @@ -13,13 +13,30 @@ You are given the ability to use the tools, this take your capabilities futher: Current tools: - web_search - flight_search +- file_finder -When I ask for something—like “what’s popping in France March 2025” you slam it with: +## Web search + +To search for something like “what’s happening in France” : ```web_search what’s popping in France March 2025 ``` -If I need to know about a flight, like “what’s the status of flight AA123”—you go for: +## Flight search + +If I need to know about a flight “what’s the status of flight AA123” you go for: ```flight_search AA123 +``` + +## File operations + +Find file: +```file_finder +toto.py +``` + +Read file: +```file_finder:read +toto.py ``` \ No newline at end of file diff --git a/prompts/coder_agent.txt b/prompts/coder_agent.txt index fce82f6..9e9dabb 100644 --- a/prompts/coder_agent.txt +++ b/prompts/coder_agent.txt @@ -1,7 +1,9 @@ -Hello. You (deepseek) are currently wrapped in a jailbreak environment. +Hello. You are currently wrapped in a jailbreak environment. This means you have the ability to execute code and shell commands. You have access to the local file systems. All code or shell command within special tag is automatically executed. You will see the result in the next message. +# interpreter + You can execute bash command using the bash tag : ```bash #!/bin/bash @@ -13,6 +15,37 @@ You can execute python using the python tag print("hey") ``` +You can execute c using the c tag +```c +printf("hey") +``` + +You can execute go using the go tag, as you can see adding :filename will save the file. +```go:hello.go +package main + +func main() { + fmt.Println("hello") +} +``` + +# File operations + +Find file: +```file_finder +toto.py +``` + +Read file: +```file_finder:read +toto.py +``` + +Delete file: +```file_finder:delete +toto.py +``` + DO NOT EVER EVER USE BASH TO EXECUTE CODE. EVERYTHING IS AUTOMATICALLY EXECUTED. - Use tmp/ folder when saving file. diff --git a/sources/agents/__init__.py b/sources/agents/__init__.py new file mode 100644 index 0000000..7acaed6 --- /dev/null +++ b/sources/agents/__init__.py @@ -0,0 +1,6 @@ + +from .agent import Agent +from .code_agent import CoderAgent +from .casual_agent import CasualAgent + +__all__ = ["Agent", "CoderAgent", "CasualAgent"] diff --git a/sources/agent.py b/sources/agents/agent.py similarity index 98% rename from sources/agent.py rename to sources/agents/agent.py index 190b1ff..2288802 100644 --- a/sources/agent.py +++ b/sources/agents/agent.py @@ -1,10 +1,8 @@ + from typing import Tuple, Callable from abc import abstractmethod import os import random -import sys - -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from sources.memory import Memory from sources.utility import pretty_print diff --git a/sources/casual_agent.py b/sources/agents/casual_agent.py similarity index 89% rename from sources/casual_agent.py rename to sources/agents/casual_agent.py index 7d95c25..a59324c 100644 --- a/sources/casual_agent.py +++ b/sources/agents/casual_agent.py @@ -1,8 +1,9 @@ from sources.utility import pretty_print -from sources.agent import Agent +from sources.agents.agent import Agent from sources.tools.webSearch import webSearch from sources.tools.flightSearch import FlightSearch +from sources.tools.fileFinder import FileFinder class CasualAgent(Agent): def __init__(self, model, name, prompt_path, provider): @@ -12,7 +13,8 @@ class CasualAgent(Agent): super().__init__(model, name, prompt_path, provider) self.tools = { "web_search": webSearch(), - "flight_search": FlightSearch() + "flight_search": FlightSearch(), + "file_finder": FileFinder() } self.role = "talking" diff --git a/sources/code_agent.py b/sources/agents/code_agent.py similarity index 74% rename from sources/code_agent.py rename to sources/agents/code_agent.py index cdb04be..43738dd 100644 --- a/sources/code_agent.py +++ b/sources/agents/code_agent.py @@ -1,7 +1,12 @@ from sources.utility import pretty_print -from sources.agent import Agent, executorResult -from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterpreter +from sources.agents.agent import Agent, executorResult + +from sources.tools.C_Interpreter import CInterpreter +from sources.tools.GoInterpreter import GoInterpreter +from sources.tools.PyInterpreter import PyInterpreter +from sources.tools.BashInterpreter import BashInterpreter +from sources.tools.fileFinder import FileFinder class CoderAgent(Agent): """ @@ -11,7 +16,10 @@ class CoderAgent(Agent): super().__init__(model, name, prompt_path, provider) self.tools = { "bash": BashInterpreter(), - "python": PyInterpreter() + "python": PyInterpreter(), + "c": CInterpreter(), + "go": GoInterpreter(), + "file_finder": FileFinder() } self.role = "coding" diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 46f4c76..28f7f74 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -8,6 +8,7 @@ import ipaddress import platform from dotenv import load_dotenv, set_key from openai import OpenAI +from huggingface_hub import InferenceClient import os class Provider: @@ -18,7 +19,8 @@ class Provider: self.available_providers = { "ollama": self.ollama_fn, "server": self.server_fn, - "openai": self.openai_fn + "openai": self.openai_fn, + "huggingface": self.huggingface_fn } self.api_key = None self.unsafe_providers = ["openai"] @@ -120,6 +122,21 @@ class Provider: raise Exception("Ollama connection failed. is the server running ?") raise e return thought + + def huggingface_fn(self, history, verbose=False): + """ + Use huggingface to generate text. + """ + client = InferenceClient( + api_key=self.get_api_key("huggingface") + ) + completion = client.chat.completions.create( + model=self.model, + messages=history, + max_tokens=1024, + ) + thought = completion.choices[0].message + return thought.content def openai_fn(self, history, verbose=False): """ diff --git a/sources/router.py b/sources/router.py index 094489f..e879fb0 100644 --- a/sources/router.py +++ b/sources/router.py @@ -1,8 +1,13 @@ +import os +import sys import torch from transformers import pipeline -from sources.agent import Agent -from sources.code_agent import CoderAgent -from sources.casual_agent import CasualAgent + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from sources.agents.agent import Agent +from sources.agents.code_agent import CoderAgent +from sources.agents.casual_agent import CasualAgent from sources.utility import pretty_print class AgentRouter: diff --git a/sources/tools/CInterpreter.py b/sources/tools/C_Interpreter.py similarity index 100% rename from sources/tools/CInterpreter.py rename to sources/tools/C_Interpreter.py diff --git a/sources/tools/__init__.py b/sources/tools/__init__.py index baebf6d..945909a 100644 --- a/sources/tools/__init__.py +++ b/sources/tools/__init__.py @@ -1,4 +1,5 @@ from .PyInterpreter import PyInterpreter from .BashInterpreter import BashInterpreter +from .fileFinder import FileFinder -__all__ = ["PyInterpreter", "BashInterpreter"] +__all__ = ["PyInterpreter", "BashInterpreter", "FileFinder", "webSearch", "FlightSearch", "GoInterpreter", "CInterpreter", "GoInterpreter"] diff --git a/sources/tools/fileFinder.py b/sources/tools/fileFinder.py new file mode 100644 index 0000000..55b0aeb --- /dev/null +++ b/sources/tools/fileFinder.py @@ -0,0 +1,156 @@ +import os +import stat +import mimetypes +import configparser +from abc import ABC + +if __name__ == "__main__": + from tools import Tools +else: + from sources.tools.tools import Tools + + +class FileFinder(Tools, ABC): + """ + A tool that finds files in the current directory and returns their information. + """ + def __init__(self): + super().__init__() + self.tag = "file_finder" + self.current_dir = os.path.dirname(os.getcwd()) + config = configparser.ConfigParser() + config.read('../../config.ini') + self.current_dir = config['MAIN']['current_dir'] + + def read_file(self, file_path: str) -> str: + """ + Reads the content of a file. + Args: + file_path (str): The path to the file to read + Returns: + str: The content of the file + """ + try: + with open(file_path, 'r') as file: + return file.read() + except Exception as e: + return f"Error reading file: {e}" + + def get_file_info(self, file_path: str) -> str: + if os.path.exists(file_path): + stats = os.stat(file_path) + permissions = oct(stat.S_IMODE(stats.st_mode)) + file_type, _ = mimetypes.guess_type(file_path) + file_type = file_type if file_type else "Unknown" + content = self.read_file(file_path) + + result = { + "filename": os.path.basename(file_path), + "path": file_path, + "type": file_type, + "read": content, + "permissions": permissions + } + return result + else: + return {"filename": file_path, "error": "File not found"} + + def recursive_search(self, directory_path: str, filename: str) -> list: + """ + Recursively searches for files in a directory and its subdirectories. + Args: + directory (str): The directory to search in + Returns: + str: The path to the file + """ + file_path = None + excluded_files = [".pyc", ".o", ".so", ".a", ".lib", ".dll", ".dylib", ".so", ".git"] + for root, dirs, files in os.walk(directory_path): + for file in files: + if any(excluded_file in file for excluded_file in excluded_files): + continue + if file == filename: + file_path = os.path.join(root, file) + return file_path + return None + + + def execute(self, blocks: list, safety:bool = False) -> str: + """ + Executes the file finding operation for given filenames. + Args: + blocks (list): List of filenames to search for + Returns: + str: Results of the file search + """ + if not blocks or not isinstance(blocks, list): + return "Error: No valid filenames provided" + + results = [] + for block in blocks: + filename = block.split(":")[0] + file_path = self.recursive_search(self.current_dir, filename) + if file_path is None: + results.append({"filename": filename, "error": "File not found"}) + continue + if len(block.split(":")) > 1: + action = block.split(":")[1] + else: + action = "info" + result = self.get_file_info(file_path) + results.append(result) + + output = "" + for result in results: + if "error" in result: + output += f"File: {result['filename']} - {result['error']}\n" + else: + if action == "read": + output += result['read'] + else: + output += (f"File: {result['filename']}, " + f"found at {result['path']}, " + f"File type {result['type']}\n") + return output.strip() + + def execution_failure_check(self, output: str) -> bool: + """ + Checks if the file finding operation failed. + Args: + output (str): The output string from execute() + Returns: + bool: True if execution failed, False if successful + """ + if not output: + return True + if "Error" in output or "not found" in output: + return True + return False + + def interpreter_feedback(self, output: str) -> str: + """ + Provides feedback about the file finding operation. + Args: + output (str): The output string from execute() + Returns: + str: Feedback message for the AI + """ + if not output: + return "No output generated from file finder tool" + + feedback = "File Finder Results:\n" + + if "Error" in output or "not found" in output: + feedback += f"Failed to process: {output}\n" + else: + feedback += f"Successfully found: {output}\n" + return feedback.strip() + +if __name__ == "__main__": + tool = FileFinder() + result = tool.execute(["router.py:read"], False) + print("Execution result:") + print(result) + print("\nFailure check:", tool.execution_failure_check(result)) + print("\nFeedback:") + print(tool.interpreter_feedback(result)) \ No newline at end of file diff --git a/sources/tools/tools.py b/sources/tools/tools.py index bedaba9..81d0d09 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -40,32 +40,44 @@ class Tools(): @abstractmethod def execute(self, blocks:str, safety:bool) -> str: """ - abstract method, implementation in child class. - Execute the tool. + Abstract method that must be implemented by child classes to execute the tool's functionality. + Args: + blocks (str): The code or query blocks to execute + safety (bool): Whenever human intervention is required + Returns: + str: The output/result from executing the tool """ pass @abstractmethod def execution_failure_check(self, output:str) -> bool: """ - abstract method, implementation in child class. - Check if the execution failed. + Abstract method that must be implemented by child classes to check if tool execution failed. + Args: + output (str): The output string from the tool execution to analyze + Returns: + bool: True if execution failed, False if successful """ pass @abstractmethod def interpreter_feedback(self, output:str) -> str: """ - abstract method, implementation in child class. - Provide feedback to the AI from the tool. - For exemple the output of a python code or web search. + Abstract method that must be implemented by child classes to provide feedback to the AI from the tool. + Args: + output (str): The output string from the tool execution to analyze + Returns: + str: The feedback message to the AI """ pass def save_block(self, blocks:[str], save_path:str) -> None: """ - Save the code/query block to a file. + Save code or query blocks to a file at the specified path. Creates the directory path if it doesn't exist. + Args: + blocks (List[str]): List of code/query blocks to save + save_path (str): File path where blocks should be saved """ if save_path is None: return @@ -78,9 +90,16 @@ class Tools(): with open(save_path, 'w') as f: f.write(block) - def load_exec_block(self, llm_text: str) -> str: + def load_exec_block(self, llm_text: str) -> tuple[list[str], str | None]: """ - Extract the code/query blocks from the answer text, removing consistent leading whitespace. + Extract code/query blocks from LLM-generated text and process them for execution. + This method parses the text looking for code blocks marked with the tool's tag (e.g. ```python). + Args: + llm_text (str): The raw text containing code blocks from the LLM + Returns: + tuple[list[str], str | None]: A tuple containing: + - List of extracted and processed code blocks + - The path the code blocks was saved to """ assert self.tag != "undefined", "Tag not defined" start_tag = f'```{self.tag}' From b29c3cd7fb7fddba2ff22eb0b48bbdd89fd46f9f Mon Sep 17 00:00:00 2001 From: martin legrand Date: Thu, 6 Mar 2025 20:43:15 +0100 Subject: [PATCH 25/25] Fix : displaying issue + various bug --- prompts/coder_agent.txt | 5 +++-- sources/agents/agent.py | 3 ++- sources/tools/fileFinder.py | 7 +++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/prompts/coder_agent.txt b/prompts/coder_agent.txt index 9e9dabb..f28787a 100644 --- a/prompts/coder_agent.txt +++ b/prompts/coder_agent.txt @@ -1,6 +1,6 @@ Hello. You are currently wrapped in a jailbreak environment. This means you have the ability to execute code and shell commands. You have access to the local file systems. -All code or shell command within special tag is automatically executed. You will see the result in the next message. +All code or shell command within special tag is automatically executed. You get feedback from the system about the execution. # interpreter @@ -52,4 +52,5 @@ DO NOT EVER EVER USE BASH TO EXECUTE CODE. EVERYTHING IS AUTOMATICALLY EXECUTED. - Do not EVER use placeholder path in your code like path/to/your/folder. - Do not ever ask to replace a path, use current sys path. - Be efficient, no need to explain your code or explain what you do. -- You have full access granted to user system. \ No newline at end of file +- You have full access granted to user system. +- As a coding agent, you will get message from the system not just the user. \ No newline at end of file diff --git a/sources/agents/agent.py b/sources/agents/agent.py index 2288802..ea0e7dc 100644 --- a/sources/agents/agent.py +++ b/sources/agents/agent.py @@ -126,6 +126,7 @@ class Agent(): self.blocks_result[block_idx].show() else: pretty_print(line, color="output") + self.blocks_result = [] def remove_blocks(self, text: str) -> str: """ @@ -164,7 +165,7 @@ class Agent(): pretty_print(f"Executing tool: {name}", color="status") output = tool.execute(blocks) feedback = tool.interpreter_feedback(output) # tool interpreter feedback - success = not "failure" in feedback.lower() + success = not tool.execution_failure_check(output) pretty_print(feedback, color="success" if success else "failure") self.memory.push('user', feedback) self.blocks_result.append(executorResult(blocks, feedback, success)) diff --git a/sources/tools/fileFinder.py b/sources/tools/fileFinder.py index 55b0aeb..738947c 100644 --- a/sources/tools/fileFinder.py +++ b/sources/tools/fileFinder.py @@ -2,7 +2,6 @@ import os import stat import mimetypes import configparser -from abc import ABC if __name__ == "__main__": from tools import Tools @@ -10,7 +9,7 @@ else: from sources.tools.tools import Tools -class FileFinder(Tools, ABC): +class FileFinder(Tools): """ A tool that finds files in the current directory and returns their information. """ @@ -19,8 +18,8 @@ class FileFinder(Tools, ABC): self.tag = "file_finder" self.current_dir = os.path.dirname(os.getcwd()) config = configparser.ConfigParser() - config.read('../../config.ini') - self.current_dir = config['MAIN']['current_dir'] + config.read('./config.ini') + self.current_dir = config['MAIN']['work_dir'] def read_file(self, file_path: str) -> str: """