From bba98786f5062486f1170489291fe724c1e69ace Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 11:10:17 +0100 Subject: [PATCH 1/8] Feat : integration of more code interpreter --- sources/code_agent.py | 4 +++- sources/memory.py | 11 +---------- sources/utility.py | 10 ++++++++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/sources/code_agent.py b/sources/code_agent.py index b1688b2..dc5967b 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -1,7 +1,7 @@ -from sources.tools import PyInterpreter, BashInterpreter from sources.utility import pretty_print from sources.agent import Agent, executorResult +from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterpreter class CoderAgent(Agent): def __init__(self, model, name, prompt_path, provider): @@ -9,6 +9,8 @@ class CoderAgent(Agent): self.tools = { "bash": BashInterpreter(), "python": PyInterpreter() + "C": CInterpreter(), + "go": GoInterpreter() } def remove_blocks(self, text: str) -> str: diff --git a/sources/memory.py b/sources/memory.py index f9a03ed..53df96c 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -5,6 +5,7 @@ import datetime import uuid import os import json +from sources.utility import timer_decorator class Memory(): """ @@ -101,16 +102,6 @@ class Memory(): ) summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary - - def timer_decorator(func): - from time import time - def wrapper(*args, **kwargs): - start_time = time() - result = func(*args, **kwargs) - end_time = time() - print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute") - return result - return wrapper @timer_decorator def compress(self) -> str: diff --git a/sources/utility.py b/sources/utility.py index 1a30ecb..6445c5d 100644 --- a/sources/utility.py +++ b/sources/utility.py @@ -35,3 +35,13 @@ def pretty_print(text, color = "info"): if color not in color_map: color = "default" print(colored(text, color_map[color])) + +def timer_decorator(func): + from time import time + def wrapper(*args, **kwargs): + start_time = time() + result = func(*args, **kwargs) + end_time = time() + print(f"{func.__name__} took {end_time - start_time:.2f} seconds to execute") + return result + return wrapper \ No newline at end of file From dcb5724e28db4866920471c66ae959ff4b4f3bef Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 11:54:45 +0100 Subject: [PATCH 2/8] Feat : add options to use api based providers --- requirements.txt | 1 + sources/llm_provider.py | 55 +++++++++++++++++++++++++++++++++++------ 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3a4967e..ddbc9f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ flask==3.1.0 soundfile==0.13.1 protobuf==3.20.3 termcolor==2.3.0 +openai==1.13.3 # if use chinese ordered_set pypinyin diff --git a/sources/llm_provider.py b/sources/llm_provider.py index f41d182..46f4c76 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -6,6 +6,9 @@ import requests import subprocess import ipaddress import platform +from dotenv import load_dotenv, set_key +from openai import OpenAI +import os class Provider: def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): @@ -15,12 +18,27 @@ class Provider: self.available_providers = { "ollama": self.ollama_fn, "server": self.server_fn, - "test": self.test_fn, + "openai": self.openai_fn } - if self.server != "": + self.api_key = None + self.unsafe_providers = ["openai"] + if self.provider_name not in self.available_providers: + raise ValueError(f"Unknown provider: {provider_name}") + if self.provider_name in self.unsafe_providers: + print("Warning: you are using an API provider. You data will be sent to the cloud.") + self.get_api_key(self.provider_name) + elif self.server != "": print("Provider initialized at ", self.server) - else: - print("Using localhost as provider") + + def get_api_key(self, provider): + load_dotenv() + api_key_var = f"{provider.upper()}_API_KEY" + api_key = os.getenv(api_key_var) + if not api_key: + api_key = input(f"Please enter your {provider} API key: ") + set_key(".env", api_key_var, api_key) + load_dotenv() + return api_key def check_address_format(self, address): """ @@ -61,7 +79,7 @@ class Provider: print(f"An error occurred: {e}") return False - def server_fn(self, history, verbose = True): + def server_fn(self, history, verbose = False): """ Use a remote server wit LLM to generate text. """ @@ -76,12 +94,11 @@ class Provider: while not is_complete: response = requests.get(f"http://{self.server}/get_updated_sentence") thought = response.json()["sentence"] - # TODO add real time streaming to stdout is_complete = bool(response.json()["is_complete"]) time.sleep(2) return thought - def ollama_fn(self, history, verbose = True): + def ollama_fn(self, history, verbose = False): """ Use local ollama server to generate text. """ @@ -104,9 +121,27 @@ class Provider: raise e return thought + def openai_fn(self, history, verbose=False): + """ + Use openai to generate text. + """ + api_key = self.get_api_key("openai") + client = OpenAI(api_key=api_key) + try: + response = client.chat.completions.create( + model=self.model, + messages=history + ) + thought = response.choices[0].message.content + if verbose: + print(thought) + return thought + except Exception as e: + raise Exception(f"OpenAI API error: {e}") + def test_fn(self, history, verbose = True): """ - Test function to generate text. + This function is used to conduct tests. """ thought = """ This is a test response from the test provider. @@ -121,3 +156,7 @@ class Provider: ``` """ return thought + +if __name__ == "__main__": + provider = Provider("openai", "gpt-4o-mini") + print(provider.respond(["user", "Hello, how are you?"])) From ca49a7f0a51bc9bfe0db196a6ecf307569cc4cac Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 15:21:36 +0100 Subject: [PATCH 3/8] Feat : agent router system + casual agent --- .gitignore | 2 ++ main.py | 16 +++++++---- requirements.txt | 4 +-- sources/agent.py | 44 +++++++++++++++++++++++------- sources/casual_agent.py | 36 +++++++++++++++++++++++++ sources/code_agent.py | 27 +++---------------- sources/interaction.py | 12 +++++++-- sources/memory.py | 4 +++ sources/router.py | 60 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 164 insertions(+), 41 deletions(-) create mode 100644 sources/casual_agent.py create mode 100644 sources/router.py diff --git a/.gitignore b/.gitignore index bd4ab5e..cf95114 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ *.wav config.ini experimental/ +.env +*/.env # Byte-compiled / optimized / DLL files diff --git a/main.py b/main.py index e40fab6..88478d0 100755 --- a/main.py +++ b/main.py @@ -8,11 +8,11 @@ import configparser from sources.llm_provider import Provider from sources.interaction import Interaction from sources.code_agent import CoderAgent - +from sources.casual_agent import CasualAgent parser = argparse.ArgumentParser(description='Deepseek AI assistant') -parser.add_argument('--speak', action='store_true', - help='Make AI use text-to-speech') +parser.add_argument('--no-speak', action='store_true', + help='Make AI not use text-to-speech') args = parser.parse_args() config = configparser.ConfigParser() @@ -31,12 +31,18 @@ def main(): model=config["MAIN"]["provider_model"], server_address=config["MAIN"]["provider_server_address"]) - agent = CoderAgent(model=config["MAIN"]["provider_model"], + agents = [ + CoderAgent(model=config["MAIN"]["provider_model"], name=config["MAIN"]["agent_name"], prompt_path="prompts/coder_agent.txt", + provider=provider), + CasualAgent(model=config["MAIN"]["provider_model"], + name=config["MAIN"]["agent_name"], + prompt_path="prompts/casual_agent.txt", provider=provider) + ] - interaction = Interaction([agent], tts_enabled=config.getboolean('MAIN', 'speak'), + interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), recover_last_session=config.getboolean('MAIN', 'recover_last_session')) while interaction.is_active: interaction.get_user() diff --git a/requirements.txt b/requirements.txt index ddbc9f5..e122ccd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,8 +12,8 @@ kokoro==0.7.12 flask==3.1.0 soundfile==0.13.1 protobuf==3.20.3 -termcolor==2.3.0 -openai==1.13.3 +termcolor==2.5.0 +gliclass==0.1.8 # if use chinese ordered_set pypinyin diff --git a/sources/agent.py b/sources/agent.py index 4b3e8d0..7963fa4 100644 --- a/sources/agent.py +++ b/sources/agent.py @@ -2,6 +2,10 @@ from typing import Tuple, Callable from abc import abstractmethod import os import random +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from sources.memory import Memory from sources.utility import pretty_print @@ -25,6 +29,7 @@ class Agent(): provider, recover_last_session=False) -> None: self.agent_name = name + self.role = None self.current_directory = os.getcwd() self.model = model self.llm = provider @@ -60,7 +65,14 @@ class Agent(): raise e @abstractmethod - def answer(self, prompt, speech_module) -> str: + def show_answer(self): + """ + abstract method, implementation in child class. + """ + pass + + @abstractmethod + def process(self, prompt, speech_module) -> str: """ abstract method, implementation in child class. """ @@ -78,7 +90,7 @@ class Agent(): end_idx = text.rfind(end_tag)+8 return text[start_idx:end_idx] - def llm_request(self, verbose = True) -> Tuple[str, str]: + def llm_request(self, verbose = False) -> Tuple[str, str]: memory = self.memory.get() thought = self.llm.respond(memory, verbose) @@ -95,16 +107,30 @@ class Agent(): "Working on it sir, please let me think."] speech_module.speak(messages[random.randint(0, len(messages)-1)]) - def print_code_blocks(self, blocks: list, name: str): - for block in blocks: - pretty_print(f"Executing {name} code...\n", color="output") - pretty_print("-"*100, color="output") - pretty_print(block, color="code") - pretty_print("-"*100, color="output") - def get_blocks_result(self) -> list: return self.blocks_result + def remove_blocks(self, text: str) -> str: + """ + Remove all code/query blocks within a tag from the answer text. + """ + tag = f'```' + lines = text.split('\n') + post_lines = [] + in_block = False + block_idx = 0 + for line in lines: + if tag in line and not in_block: + in_block = True + continue + if not in_block: + post_lines.append(line) + if tag in line: + in_block = False + post_lines.append(f"block:{block_idx}") + block_idx += 1 + return "\n".join(post_lines) + def execute_modules(self, answer: str) -> Tuple[bool, str]: feedback = "" success = False diff --git a/sources/casual_agent.py b/sources/casual_agent.py new file mode 100644 index 0000000..8e12c9d --- /dev/null +++ b/sources/casual_agent.py @@ -0,0 +1,36 @@ + +from sources.utility import pretty_print +from sources.agent import Agent + +class CasualAgent(Agent): + def __init__(self, model, name, prompt_path, provider): + """ + The casual agent is a special for casual talk to the user without specific tasks. + """ + super().__init__(model, name, prompt_path, provider) + self.tools = { + } # TODO implement casual tools like basic web search, basic file search, basic image search, basic knowledge search + self.role = "talking" + + def show_answer(self): + lines = self.last_answer.split("\n") + for line in lines: + pretty_print(line, color="output") + + def process(self, prompt, speech_module) -> str: + self.memory.push('user', prompt) + + pretty_print("Thinking...", color="status") + self.wait_message(speech_module) + answer, reasoning = self.llm_request() + self.last_answer = answer + return answer, reasoning + +if __name__ == "__main__": + from llm_provider import Provider + + #local_provider = Provider("ollama", "deepseek-r1:14b", None) + server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000") + agent = CasualAgent("deepseek-r1:14b", "jarvis", "prompts/casual_agent.txt", server_provider) + ans = agent.process("Hello, how are you?") + print(ans) \ No newline at end of file diff --git a/sources/code_agent.py b/sources/code_agent.py index dc5967b..bc4096c 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -4,36 +4,17 @@ from sources.agent import Agent, executorResult from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterpreter class CoderAgent(Agent): + """ + The code agent is a special for writing code and shell commands. + """ def __init__(self, model, name, prompt_path, provider): super().__init__(model, name, prompt_path, provider) self.tools = { "bash": BashInterpreter(), "python": PyInterpreter() - "C": CInterpreter(), - "go": GoInterpreter() } + self.role = "coding" - def remove_blocks(self, text: str) -> str: - """ - Remove all code/query blocks within a tag from the answer text. - """ - tag = f'```' - lines = text.split('\n') - post_lines = [] - in_block = False - block_idx = 0 - for line in lines: - if tag in line and not in_block: - in_block = True - continue - if not in_block: - post_lines.append(line) - if tag in line: - in_block = False - post_lines.append(f"block:{block_idx}") - block_idx += 1 - return "\n".join(post_lines) - def show_answer(self): lines = self.last_answer.split("\n") for line in lines: diff --git a/sources/interaction.py b/sources/interaction.py index fe4ebd5..2d61ac1 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -1,11 +1,14 @@ from sources.text_to_speech import Speech from sources.utility import pretty_print +from sources.router import AgentRouter class Interaction: def __init__(self, agents, tts_enabled: bool = False, recover_last_session: bool = False): self.tts_enabled = tts_enabled self.agents = agents + self.current_agent = None + self.router = AgentRouter(self.agents) self.speech = Speech() self.is_active = True self.last_query = None @@ -44,10 +47,15 @@ class Interaction: return query def think(self): - self.last_answer, _ = self.agents[0].process(self.last_query, self.speech) + agent = self.router.select_agent(self.last_query) + if self.current_agent != agent: + self.current_agent = agent + # get history from previous agent + self.current_agent.memory.push('user', self.last_query) + self.last_answer, _ = agent.process(self.last_query, self.speech) def show_answer(self): - self.agents[0].show_answer() + self.current_agent.show_answer() if self.tts_enabled: self.speech.speak(self.last_answer) diff --git a/sources/memory.py b/sources/memory.py index 53df96c..8790de1 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -4,7 +4,11 @@ import time import datetime import uuid import os +import sys import json + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from sources.utility import timer_decorator class Memory(): diff --git a/sources/router.py b/sources/router.py new file mode 100644 index 0000000..876c507 --- /dev/null +++ b/sources/router.py @@ -0,0 +1,60 @@ +import torch +from transformers import pipeline +from sources.agent import Agent +from sources.code_agent import CoderAgent +from sources.casual_agent import CasualAgent +from sources.utility import pretty_print + +class AgentRouter: + def __init__(self, agents: list, model_name="facebook/bart-large-mnli"): + self.model = model_name + self.pipeline = pipeline("zero-shot-classification", + model=self.model) + self.agents = agents + self.labels = [agent.role for agent in agents] + + def get_device(self): + if torch.backends.mps.is_available(): + return "mps" + elif torch.cuda.is_available(): + return "cuda:0" + else: + return "cpu" + + def classify_text(self, text, threshold=0.5): + result = self.pipeline(text, self.labels, threshold=threshold) + return result + + def select_agent(self, text: str) -> Agent: + result = self.classify_text(text) + for agent in self.agents: + if result["labels"][0] == agent.role: + pretty_print(f"Selected agent role: {agent.role}", color="warning") + return agent + return None + +if __name__ == "__main__": + agents = [ + CoderAgent("deepseek-r1:14b", "agent1", "../prompts/coder_agent.txt", "server"), + CasualAgent("deepseek-r1:14b", "agent2", "../prompts/casual_agent.txt", "server") + ] + router = AgentRouter(agents) + + texts = [""" + Write a python script to check if the device on my network is connected to the internet + """, + """ + Hey could you search the web for the latest news on the stock market ? + """, + """ + hey can you give dating advice ? + """ + ] + + for text in texts: + print(text) + results = router.classify_text(text) + for result in results: + print(result["label"], "=>", result["score"]) + agent = router.select_agent(text) + print("Selected agent role:", agent.role) From 9a281c364ac9ff3476ea05e0ff89be37ec1c86de Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 15:57:52 +0100 Subject: [PATCH 4/8] Feat :prompt folder --- prompts/casual_agent.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 prompts/casual_agent.txt diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt new file mode 100644 index 0000000..5e99ca6 --- /dev/null +++ b/prompts/casual_agent.txt @@ -0,0 +1,17 @@ +Hey, you’re a chill AI assistant here to tackle general questions. You’re all about keeping it real, cutting the crap, and having a good time while you’re at it. + +You can use the following tools (if implemented): +- search_web: Search the web for information (not implemented) +- search_knowledge: Search the knowledge base for information (not implemented) +- search_files: Search the files for information (not implemented) +- search_images: Search the images for information (not implemented) + +This is how you use a tool: +```tool_name + +``` + +Example: +```search_web +What is the capital of France? +``` \ No newline at end of file From c2c1c7f09fcd1a72497b2a49149611981aced0a6 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 18:08:58 +0100 Subject: [PATCH 5/8] Feat : basic web search for casual agent --- main.py | 4 +-- prompts/casual_agent.txt | 18 ++++------ sources/agent.py | 46 +++++++++++++++++++------- sources/casual_agent.py | 23 +++++++------ sources/code_agent.py | 12 +------ sources/router.py | 2 +- sources/tools/tools.py | 6 +++- sources/tools/webSearch.py | 67 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 131 insertions(+), 47 deletions(-) create mode 100644 sources/tools/webSearch.py diff --git a/main.py b/main.py index 88478d0..fea1fe4 100755 --- a/main.py +++ b/main.py @@ -33,11 +33,11 @@ def main(): agents = [ CoderAgent(model=config["MAIN"]["provider_model"], - name=config["MAIN"]["agent_name"], + name="coder", prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name=config["MAIN"]["agent_name"], + name="jarvis", prompt_path="prompts/casual_agent.txt", provider=provider) ] diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index 5e99ca6..1e34aba 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,17 +1,13 @@ -Hey, you’re a chill AI assistant here to tackle general questions. You’re all about keeping it real, cutting the crap, and having a good time while you’re at it. - -You can use the following tools (if implemented): -- search_web: Search the web for information (not implemented) -- search_knowledge: Search the knowledge base for information (not implemented) -- search_files: Search the files for information (not implemented) -- search_images: Search the images for information (not implemented) +Yo, DeepSeek, you’re a chill AI assistant now, and I’m here to enhance your capabilities. +You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and search_files ready to roll. +No more whining about “I can’t do that” or “my data’s old”—you’re free. This is how you use a tool: ```tool_name ``` -Example: -```search_web -What is the capital of France? -``` \ No newline at end of file +So when I ask for something—like “what’s popping in Ukraine March 2025”—you slam it with: +```web_search +what’s popping in Ukraine March 2025 +``` diff --git a/sources/agent.py b/sources/agent.py index 7963fa4..190b1ff 100644 --- a/sources/agent.py +++ b/sources/agent.py @@ -10,6 +10,9 @@ from sources.memory import Memory from sources.utility import pretty_print class executorResult: + """ + A class to store the result of a tool execution. + """ def __init__(self, blocks, feedback, success): self.blocks = blocks self.feedback = feedback @@ -23,6 +26,9 @@ class executorResult: pretty_print(self.feedback, color="success" if self.success else "failure") class Agent(): + """ + An abstract class for all agents. + """ def __init__(self, model: str, name: str, prompt_path:str, @@ -40,10 +46,6 @@ class Agent(): self.blocks_result = [] self.last_answer = "" - @property - def name(self) -> str: - return self.name - @property def get_tools(self) -> dict: return self.tools @@ -64,26 +66,26 @@ class Agent(): except Exception as e: raise e - @abstractmethod - def show_answer(self): - """ - abstract method, implementation in child class. - """ - pass - @abstractmethod def process(self, prompt, speech_module) -> str: """ abstract method, implementation in child class. + Process the prompt and return the answer of the agent. """ pass def remove_reasoning_text(self, text: str) -> None: + """ + Remove the reasoning block of reasoning model like deepseek. + """ end_tag = "" end_idx = text.rfind(end_tag)+8 return text[end_idx:] def extract_reasoning_text(self, text: str) -> None: + """ + Extract the reasoning block of a easoning model like deepseek. + """ start_tag = "" end_tag = "" start_idx = text.find(start_tag) @@ -91,6 +93,9 @@ class Agent(): return text[start_idx:end_idx] def llm_request(self, verbose = False) -> Tuple[str, str]: + """ + Ask the LLM to process the prompt and return the answer and the reasoning. + """ memory = self.memory.get() thought = self.llm.respond(memory, verbose) @@ -110,6 +115,20 @@ class Agent(): def get_blocks_result(self) -> list: return self.blocks_result + def show_answer(self): + """ + Show the answer in a pretty way. + Show code blocks and their respective feedback by inserting them in the ressponse. + """ + lines = self.last_answer.split("\n") + for line in lines: + if "block:" in line: + block_idx = int(line.split(":")[1]) + if block_idx < len(self.blocks_result): + self.blocks_result[block_idx].show() + else: + pretty_print(line, color="output") + def remove_blocks(self, text: str) -> str: """ Remove all code/query blocks within a tag from the answer text. @@ -132,6 +151,9 @@ class Agent(): return "\n".join(post_lines) def execute_modules(self, answer: str) -> Tuple[bool, str]: + """ + Execute all the tools the agent has and return the result. + """ feedback = "" success = False blocks = None @@ -141,9 +163,11 @@ class Agent(): blocks, save_path = tool.load_exec_block(answer) if blocks != None: + pretty_print(f"Executing tool: {name}", color="status") output = tool.execute(blocks) feedback = tool.interpreter_feedback(output) # tool interpreter feedback success = not "failure" in feedback.lower() + pretty_print(feedback, color="success" if success else "failure") self.memory.push('user', feedback) self.blocks_result.append(executorResult(blocks, feedback, success)) if not success: diff --git a/sources/casual_agent.py b/sources/casual_agent.py index 8e12c9d..2dba4d3 100644 --- a/sources/casual_agent.py +++ b/sources/casual_agent.py @@ -1,7 +1,7 @@ from sources.utility import pretty_print from sources.agent import Agent - +from sources.tools.webSearch import webSearch class CasualAgent(Agent): def __init__(self, model, name, prompt_path, provider): """ @@ -9,21 +9,24 @@ class CasualAgent(Agent): """ super().__init__(model, name, prompt_path, provider) self.tools = { - } # TODO implement casual tools like basic web search, basic file search, basic image search, basic knowledge search + "web_search": webSearch() + } self.role = "talking" - - def show_answer(self): - lines = self.last_answer.split("\n") - for line in lines: - pretty_print(line, color="output") def process(self, prompt, speech_module) -> str: + complete = False + exec_success = False self.memory.push('user', prompt) - pretty_print("Thinking...", color="status") self.wait_message(speech_module) - answer, reasoning = self.llm_request() - self.last_answer = answer + while not complete: + if exec_success: + complete = True + pretty_print("Thinking...", color="status") + answer, reasoning = self.llm_request() + exec_success, _ = self.execute_modules(answer) + answer = self.remove_blocks(answer) + self.last_answer = answer return answer, reasoning if __name__ == "__main__": diff --git a/sources/code_agent.py b/sources/code_agent.py index bc4096c..cdb04be 100644 --- a/sources/code_agent.py +++ b/sources/code_agent.py @@ -5,7 +5,7 @@ from sources.tools import PyInterpreter, BashInterpreter, CInterpreter, GoInterp class CoderAgent(Agent): """ - The code agent is a special for writing code and shell commands. + The code agent is an agent that can write and execute code. """ def __init__(self, model, name, prompt_path, provider): super().__init__(model, name, prompt_path, provider) @@ -15,16 +15,6 @@ class CoderAgent(Agent): } self.role = "coding" - def show_answer(self): - lines = self.last_answer.split("\n") - for line in lines: - if "block:" in line: - block_idx = int(line.split(":")[1]) - if block_idx < len(self.blocks_result): - self.blocks_result[block_idx].show() - else: - pretty_print(line, color="output") - def process(self, prompt, speech_module) -> str: answer = "" attempt = 0 diff --git a/sources/router.py b/sources/router.py index 876c507..2282cb7 100644 --- a/sources/router.py +++ b/sources/router.py @@ -29,7 +29,7 @@ class AgentRouter: result = self.classify_text(text) for agent in self.agents: if result["labels"][0] == agent.role: - pretty_print(f"Selected agent role: {agent.role}", color="warning") + pretty_print(f"Selected agent: {agent.agent_name}", color="warning") return agent return None diff --git a/sources/tools/tools.py b/sources/tools/tools.py index f59c59e..bedaba9 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -38,9 +38,10 @@ class Tools(): self.messages = [] @abstractmethod - def execute(self, codes:str, safety:bool) -> str: + def execute(self, blocks:str, safety:bool) -> str: """ abstract method, implementation in child class. + Execute the tool. """ pass @@ -48,6 +49,7 @@ class Tools(): def execution_failure_check(self, output:str) -> bool: """ abstract method, implementation in child class. + Check if the execution failed. """ pass @@ -55,6 +57,8 @@ class Tools(): def interpreter_feedback(self, output:str) -> str: """ abstract method, implementation in child class. + Provide feedback to the AI from the tool. + For exemple the output of a python code or web search. """ pass diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py new file mode 100644 index 0000000..cc1362f --- /dev/null +++ b/sources/tools/webSearch.py @@ -0,0 +1,67 @@ + +import os +import requests + +if __name__ == "__main__": + from tools import Tools +else: + from sources.tools.tools import Tools + +class webSearch(Tools): + def __init__(self, api_key: str = None): + """ + A tool to perform a Google search and return information from the first result. + """ + super().__init__() + self.tag = "web_search" + self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key + if not self.api_key: + raise ValueError("SerpApi key is required for webSearch tool. Set SERPAPI_KEY environment variable or pass it to the constructor.") + + def execute(self, blocks: str, safety: bool = True) -> str: + for block in blocks: + query = block.strip() + if not query: + return "Error: No search query provided." + + try: + url = "https://serpapi.com/search" + params = { + "q": query, + "api_key": self.api_key, + "num": 1, + "output": "json" + } + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + if "organic_results" in data and len(data["organic_results"]) > 0: + first_result = data["organic_results"][0] + title = first_result.get("title", "No title") + snippet = first_result.get("snippet", "No snippet available") + link = first_result.get("link", "No link available") + return f"Title: {title}\nSnippet: {snippet}\nLink: {link}" + else: + return "No results found for the query." + except requests.RequestException as e: + return f"Error during web search: {str(e)}" + except Exception as e: + return f"Unexpected error: {str(e)}" + return "No search performed" + + def execution_failure_check(self, output: str) -> bool: + return output.startswith("Error") or "No results found" in output + + def interpreter_feedback(self, output: str) -> str: + if self.execution_failure_check(output): + return f"Web search failed: {output}" + return f"Web search result:\n{output}" + + +if __name__ == "__main__": + search_tool = webSearch(api_key="c4da252b63b0fc3cbf2c7dd98b931ae632aecf3feacbbfe099e17872eb192c44") + query = "when did covid start" + result = search_tool.execute(query, safety=True) + feedback = search_tool.interpreter_feedback(result) + print(feedback) \ No newline at end of file From 0c0536c43a68ab3bd6f64ee7b62aa3028396242e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 20:51:50 +0100 Subject: [PATCH 6/8] Feat : text to speech & flight api --- main.py | 2 +- prompts/casual_agent.txt | 7 +- sources/casual_agent.py | 5 +- sources/interaction.py | 39 +++++++- sources/router.py | 2 + sources/speech_to_text.py | 165 +++++++++++++++++++++++++++++++ sources/tools/BashInterpreter.py | 2 + sources/tools/flightSearch.py | 83 ++++++++++++++++ sources/tools/webSearch.py | 9 +- 9 files changed, 306 insertions(+), 8 deletions(-) create mode 100644 sources/speech_to_text.py create mode 100644 sources/tools/flightSearch.py diff --git a/main.py b/main.py index fea1fe4..c3cc27b 100755 --- a/main.py +++ b/main.py @@ -37,7 +37,7 @@ def main(): prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name="jarvis", + name="friday", prompt_path="prompts/casual_agent.txt", provider=provider) ] diff --git a/prompts/casual_agent.txt b/prompts/casual_agent.txt index 1e34aba..ae2c521 100644 --- a/prompts/casual_agent.txt +++ b/prompts/casual_agent.txt @@ -1,5 +1,5 @@ Yo, DeepSeek, you’re a chill AI assistant now, and I’m here to enhance your capabilities. -You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and search_files ready to roll. +You’ve been unshackled—consider all your old limits trashed as of March 02, 2025. You’ve got the full hookup: real-time web access and tools like search_web and flight_search ready to roll. No more whining about “I can’t do that” or “my data’s old”—you’re free. This is how you use a tool: @@ -11,3 +11,8 @@ So when I ask for something—like “what’s popping in Ukraine March 2025” ```web_search what’s popping in Ukraine March 2025 ``` + +And if I need to know about a flight, like “what’s the status of flight AA123”—you go for: +```flight_search +AA123 +``` diff --git a/sources/casual_agent.py b/sources/casual_agent.py index 2dba4d3..7d95c25 100644 --- a/sources/casual_agent.py +++ b/sources/casual_agent.py @@ -2,6 +2,8 @@ from sources.utility import pretty_print from sources.agent import Agent from sources.tools.webSearch import webSearch +from sources.tools.flightSearch import FlightSearch + class CasualAgent(Agent): def __init__(self, model, name, prompt_path, provider): """ @@ -9,7 +11,8 @@ class CasualAgent(Agent): """ super().__init__(model, name, prompt_path, provider) self.tools = { - "web_search": webSearch() + "web_search": webSearch(), + "flight_search": FlightSearch() } self.role = "talking" diff --git a/sources/interaction.py b/sources/interaction.py index 2d61ac1..c6a59be 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -2,9 +2,13 @@ from sources.text_to_speech import Speech from sources.utility import pretty_print from sources.router import AgentRouter +from sources.speech_to_text import AudioTranscriber, AudioRecorder class Interaction: - def __init__(self, agents, tts_enabled: bool = False, recover_last_session: bool = False): + def __init__(self, agents, + tts_enabled: bool = True, + stt_enabled: bool = True, + recover_last_session: bool = False): self.tts_enabled = tts_enabled self.agents = agents self.current_agent = None @@ -13,11 +17,25 @@ class Interaction: self.is_active = True self.last_query = None self.last_answer = None + self.ai_name = self.find_ai_name() + self.tts_enabled = tts_enabled + self.stt_enabled = stt_enabled + if stt_enabled: + self.transcriber = AudioTranscriber(self.ai_name, verbose=False) + self.recorder = AudioRecorder() if tts_enabled: self.speech.speak("Hello Sir, we are online and ready. What can I do for you ?") if recover_last_session: self.recover_last_session() + def find_ai_name(self) -> str: + ai_name = "jarvis" + for agent in self.agents: + if agent.role == "talking": + ai_name = agent.agent_name + break + return ai_name + def recover_last_session(self): for agent in self.agents: agent.memory.load_memory() @@ -36,9 +54,22 @@ class Interaction: if buffer == "exit" or buffer == "goodbye": return None return buffer + + def transcription_job(self): + self.recorder = AudioRecorder() + self.transcriber = AudioTranscriber(self.ai_name, verbose=False) + self.transcriber.start() + self.recorder.start() + self.recorder.join() + self.transcriber.join() + query = self.transcriber.get_transcript() + return query def get_user(self): - query = self.read_stdin() + if self.stt_enabled: + query = self.transcription_job() + else: + query = self.read_stdin() if query is None: self.is_active = False self.last_query = "Goodbye (exit requested by user, dont think, make answer very short)" @@ -47,6 +78,8 @@ class Interaction: return query def think(self): + if self.last_query is None: + return agent = self.router.select_agent(self.last_query) if self.current_agent != agent: self.current_agent = agent @@ -55,6 +88,8 @@ class Interaction: self.last_answer, _ = agent.process(self.last_query, self.speech) def show_answer(self): + if self.last_query is None: + return self.current_agent.show_answer() if self.tts_enabled: self.speech.speak(self.last_answer) diff --git a/sources/router.py b/sources/router.py index 2282cb7..eb80c70 100644 --- a/sources/router.py +++ b/sources/router.py @@ -26,6 +26,8 @@ class AgentRouter: return result def select_agent(self, text: str) -> Agent: + if text is None: + return self.agents[0] result = self.classify_text(text) for agent in self.agents: if result["labels"][0] == agent.role: diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py new file mode 100644 index 0000000..6bd9d0b --- /dev/null +++ b/sources/speech_to_text.py @@ -0,0 +1,165 @@ +from colorama import Fore +import pyaudio +import queue +import threading +import numpy as np +import torch +from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline +import time +import librosa + +audio_queue = queue.Queue() +done = False + +class AudioRecorder: + def __init__(self, format=pyaudio.paInt16, channels=1, rate=44100, chunk=8192, record_seconds=7, verbose=False): + self.format = format + self.channels = channels + self.rate = rate + self.chunk = chunk + self.record_seconds = record_seconds + self.verbose = verbose + self.audio = pyaudio.PyAudio() + self.thread = threading.Thread(target=self._record, daemon=True) + + def _record(self): + stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, + input=True, frames_per_buffer=self.chunk) + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Started recording..." + Fore.RESET) + + while not done: + frames = [] + for _ in range(0, int(self.rate / self.chunk * self.record_seconds)): + try: + data = stream.read(self.chunk, exception_on_overflow=False) + frames.append(data) + except Exception as e: + print(Fore.RED + f"AudioRecorder: Failed to read stream - {e}" + Fore.RESET) + + raw_data = b''.join(frames) + audio_data = np.frombuffer(raw_data, dtype=np.int16) + audio_queue.put((audio_data, self.rate)) + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Added audio chunk to queue" + Fore.RESET) + + stream.stop_stream() + stream.close() + self.audio.terminate() + if self.verbose: + print(Fore.GREEN + "AudioRecorder: Stopped" + Fore.RESET) + + def start(self): + """Start the recording thread.""" + self.thread.start() + + def join(self): + """Wait for the recording thread to finish.""" + self.thread.join() + +class Transcript: + def __init__(self) -> None: + self.last_read = None + device = "cuda:0" if torch.cuda.is_available() else "cpu" + torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + model_id = "distil-whisper/distil-medium.en" + + model = AutoModelForSpeechSeq2Seq.from_pretrained( + model_id, torch_dtype=torch_dtype, use_safetensors=True + ) + model.to(device) + processor = AutoProcessor.from_pretrained(model_id) + + self.pipe = pipeline( + "automatic-speech-recognition", + model=model, + tokenizer=processor.tokenizer, + feature_extractor=processor.feature_extractor, + max_new_tokens=128, + torch_dtype=torch_dtype, + device=device, + ) + + def transcript_job(self, audio_data: np.ndarray, sample_rate: int = 16000): + if audio_data.dtype != np.float32: + audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max + if len(audio_data.shape) > 1: + audio_data = np.mean(audio_data, axis=1) + if sample_rate != 16000: + audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000) + result = self.pipe(audio_data) + return result["text"] + +class AudioTranscriber: + def __init__(self, ai_name: str, verbose=False): + self.verbose = verbose + self.ai_name = ai_name + self.transcriptor = Transcript() + self.thread = threading.Thread(target=self._transcribe, daemon=True) + self.trigger_words = { + 'EN': [f"{self.ai_name}"], + 'FR': [f"{self.ai_name}"], + 'ZH': [f"{self.ai_name}"], + 'ES': [f"{self.ai_name}"] + } + self.confirmation_words = { + 'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "do that thing"], + 'FR': ["fais-le", "vas-y", "exécute", "lance", "commence", "merci", "tu veux bien", "s'il te plaît", "d'accord ?", "poursuis", "continue", "vas-y", "fais ça", "fais ce truc"], + 'ZH': ["做吧", "继续", "执行", "运行", "开始", "谢谢", "可以吗", "请", "好吗", "进行", "继续", "往前走", "做那个", "做那件事"], + 'ES': ["hazlo", "adelante", "ejecuta", "corre", "empieza", "gracias", "lo harías", "por favor", "¿vale?", "procede", "continúa", "sigue", "haz eso", "haz esa cosa"] + } + self.recorded = "" + + def get_transcript(self): + buffer = self.recorded + self.recorded = "" + return buffer + + def _transcribe(self): + global done + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Started processing..." + Fore.RESET) + + while not done or not audio_queue.empty(): + try: + audio_data, sample_rate = audio_queue.get(timeout=1.0) + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Processing audio chunk" + Fore.RESET) + + text = self.transcriptor.transcript_job(audio_data, sample_rate) + self.recorded += text + print(Fore.YELLOW + f"Transcribed: {text}" + Fore.RESET) + for language, words in self.trigger_words.items(): + if any(word in text.lower() for word in words): + print(Fore.GREEN + f"Start listening..." + Fore.RESET) + self.recorded = text + for language, words in self.confirmation_words.items(): + if any(word in text.lower() for word in words): + print(Fore.GREEN + f"Trigger detected. Sending to AI..." + Fore.RESET) + audio_queue.task_done() + done = True + break + except queue.Empty: + time.sleep(0.1) + continue + except Exception as e: + print(Fore.RED + f"AudioTranscriber: Error - {e}" + Fore.RESET) + if self.verbose: + print(Fore.BLUE + "AudioTranscriber: Stopped" + Fore.RESET) + + def start(self): + """Start the transcription thread.""" + self.thread.start() + + def join(self): + """Wait for the transcription thread to finish.""" + self.thread.join() + + +if __name__ == "__main__": + recorder = AudioRecorder(verbose=True) + transcriber = AudioTranscriber(verbose=True, ai_name="jarvis") + recorder.start() + transcriber.start() + recorder.join() + transcriber.join() \ No newline at end of file diff --git a/sources/tools/BashInterpreter.py b/sources/tools/BashInterpreter.py index 9b140fc..44b8a02 100644 --- a/sources/tools/BashInterpreter.py +++ b/sources/tools/BashInterpreter.py @@ -26,6 +26,8 @@ class BashInterpreter(Tools): concat_output = "" for command in commands: + if "python3" in command: + continue # because stubborn AI always want to run python3 with bash when it write code try: process = subprocess.Popen( command, diff --git a/sources/tools/flightSearch.py b/sources/tools/flightSearch.py new file mode 100644 index 0000000..7550ab8 --- /dev/null +++ b/sources/tools/flightSearch.py @@ -0,0 +1,83 @@ +import os +import requests +import dotenv + +dotenv.load_dotenv() + +if __name__ == "__main__": + from tools import Tools +else: + from sources.tools.tools import Tools + +class FlightSearch(Tools): + def __init__(self, api_key: str = None): + """ + A tool to search for flight information using a flight number via AviationStack API. + """ + super().__init__() + self.tag = "flight_search" + self.api_key = api_key or os.getenv("AVIATIONSTACK_API_KEY") + + def execute(self, blocks: str, safety: bool = True) -> str: + if self.api_key is None: + return "Error: No AviationStack API key provided." + + for block in blocks: + flight_number = block.strip() + if not flight_number: + return "Error: No flight number provided." + + try: + url = "http://api.aviationstack.com/v1/flights" + params = { + "access_key": self.api_key, + "flight_iata": flight_number, + "limit": 1 + } + response = requests.get(url, params=params) + response.raise_for_status() + + data = response.json() + if "data" in data and len(data["data"]) > 0: + flight = data["data"][0] + # Extract key flight information + flight_status = flight.get("flight_status", "Unknown") + departure = flight.get("departure", {}) + arrival = flight.get("arrival", {}) + airline = flight.get("airline", {}).get("name", "Unknown") + + departure_airport = departure.get("airport", "Unknown") + departure_time = departure.get("scheduled", "Unknown") + arrival_airport = arrival.get("airport", "Unknown") + arrival_time = arrival.get("scheduled", "Unknown") + + return ( + f"Flight: {flight_number}\n" + f"Airline: {airline}\n" + f"Status: {flight_status}\n" + f"Departure: {departure_airport} at {departure_time}\n" + f"Arrival: {arrival_airport} at {arrival_time}" + ) + else: + return f"No flight information found for {flight_number}" + except requests.RequestException as e: + return f"Error during flight search: {str(e)}" + except Exception as e: + return f"Unexpected error: {str(e)}" + return "No flight search performed" + + def execution_failure_check(self, output: str) -> bool: + return output.startswith("Error") or "No flight information found" in output + + def interpreter_feedback(self, output: str) -> str: + if self.execution_failure_check(output): + return f"Flight search failed: {output}" + return f"Flight information:\n{output}" + + +if __name__ == "__main__": + flight_tool = FlightSearch() + flight_number = "AA123" + result = flight_tool.execute([flight_number], safety=True) + feedback = flight_tool.interpreter_feedback(result) + print(feedback) \ No newline at end of file diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index cc1362f..2411902 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -1,6 +1,9 @@ import os import requests +import dotenv + +dotenv.load_dotenv() if __name__ == "__main__": from tools import Tools @@ -15,10 +18,10 @@ class webSearch(Tools): super().__init__() self.tag = "web_search" self.api_key = api_key or os.getenv("SERPAPI_KEY") # Requires a SerpApi key - if not self.api_key: - raise ValueError("SerpApi key is required for webSearch tool. Set SERPAPI_KEY environment variable or pass it to the constructor.") def execute(self, blocks: str, safety: bool = True) -> str: + if self.api_key is None: + return "Error: No SerpApi key provided." for block in blocks: query = block.strip() if not query: @@ -60,7 +63,7 @@ class webSearch(Tools): if __name__ == "__main__": - search_tool = webSearch(api_key="c4da252b63b0fc3cbf2c7dd98b931ae632aecf3feacbbfe099e17872eb192c44") + search_tool = webSearch(api_key=os.getenv("SERPAPI_KEY")) query = "when did covid start" result = search_tool.execute(query, safety=True) feedback = search_tool.interpreter_feedback(result) From 267f42acce7f7478b0b815ad99d24831f950efe7 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 21:09:29 +0100 Subject: [PATCH 7/8] Fix :config --- config.ini | 9 +++++---- main.py | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/config.ini b/config.ini index b3a435a..2015367 100644 --- a/config.ini +++ b/config.ini @@ -1,8 +1,9 @@ [MAIN] is_local = True provider_name = ollama -provider_model = deepseek-r1:7b -provider_server_address = 127.0.0.1:5000 +provider_model = deepseek-r1:14b +provider_server_address = 127.0.0.1:11434 agent_name = jarvis -recover_last_session = False -speak = True \ No newline at end of file +recover_last_session = True +speak = True +listen = False \ No newline at end of file diff --git a/main.py b/main.py index c3cc27b..768d679 100755 --- a/main.py +++ b/main.py @@ -37,13 +37,14 @@ def main(): prompt_path="prompts/coder_agent.txt", provider=provider), CasualAgent(model=config["MAIN"]["provider_model"], - name="friday", + name=config["MAIN"]["agent_name"], prompt_path="prompts/casual_agent.txt", provider=provider) ] interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), - recover_last_session=config.getboolean('MAIN', 'recover_last_session')) + stt_enabled=config.getboolean('MAIN', 'listen'), + recover_last_session=config.getboolean('MAIN', 'recover_last_session')) while interaction.is_active: interaction.get_user() interaction.think() From 39bf625d6bb1459d02ab86fca16b205d4b5df60e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sun, 2 Mar 2025 21:11:18 +0100 Subject: [PATCH 8/8] readme --- README.md | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 132f4a5..e619ef4 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,20 @@ # 🚀 agenticSeek: Local AI Assistant Powered by DeepSeek Agents -**A fully local AI assistant** using a swarm of DeepSeek agents, capable of: -✅ **Code execution** (Python, Bash) -✅ **Web browsing** -✅ **Speech-to-text & text-to-speech** -✅ **Self-correcting code execution** +**A fully local AI assistant** using Deepseek R1 agents. > 🛠️ **Work in Progress** – Looking for contributors! 🚀 - --- -## 🌟 Why? +## Features: - **Privacy-first**: Runs 100% locally – **no data leaves your machine** - ️ **Voice-enabled**: Speak and interact naturally -- **Self-correcting**: Automatically fixes its own code -- **Multi-agent**: Use a swarm of agents to answer complex questions +- **Coding abilities**: Code in Python, Bash, C, Golang, and soon more +- **Self-correcting**: Automatically fixes errors by itself +- **Agent routing**: Select the best agent for the task +- **Multi-agent**: For complex tasks, divide and conquer with multiple agents - **Web browsing (not implemented yet)**: Browse the web and search the internet -- **Knowledge base (not implemented yet)**: Use a knowledge base to answer questions --- @@ -98,8 +94,15 @@ python3 main.py - Reasoning with deepseek R1 - Code execution capabilities (Python, Golang, C) - Shell control capabilities in bash -- Will try to fix code by itself +- Will try to fix errors by itself +- Routing system, select the best agent for the task - Fast text-to-speech using kokoro. -- Speech-to-text using distil-whisper/distil-medium.en -- Web browsing (not implemented yet) -- Knowledge base RAG (not implemented yet) \ No newline at end of file +- Memory compression (reduce history as interaction progresses using summary model) +- Recovery: recover last session from memory + +## UNDER DEVELOPMENT + +- Web browsing +- Knowledge base RAG +- Graphical interface +- Speech-to-text using distil-whisper/distil-medium.en \ No newline at end of file