From 56b5db7df3b55af4df7b9d0ba3dd560fe027d30d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 18:49:37 +0200 Subject: [PATCH] Initial project setup --- config.ini | 14 +-- main.py | 3 +- prompts/base/coder_agent.txt | 1 + prompts/base/planner_agent.txt | 30 +++++-- prompts/jarvis/coder_agent.txt | 1 + prompts/jarvis/planner_agent.txt | 25 ++++-- sources/agents/agent.py | 4 +- sources/agents/browser_agent.py | 11 +-- sources/agents/planner.tmp | 142 +++++++++++++++++++++++++++++++ sources/agents/planner_agent.py | 6 +- sources/browser.py | 6 +- sources/tools/C_Interpreter.py | 2 +- sources/utility.py | 4 +- 13 files changed, 211 insertions(+), 38 deletions(-) create mode 100644 sources/agents/planner.tmp diff --git a/config.ini b/config.ini index 6ca0f40..6fa8acc 100644 --- a/config.ini +++ b/config.ini @@ -1,15 +1,15 @@ [MAIN] -is_local = True -provider_name = ollama +is_local = False +provider_name = server provider_model = deepseek-r1:14b -provider_server_address = 127.0.0.1:11434 +provider_server_address = 192.168.1.20:3333 agent_name = Friday recover_last_session = False -save_session = False -speak = False +save_session = True +speak = True listen = False work_dir = /Users/mlg/Documents/ai_folder -jarvis_personality = False +jarvis_personality = True [BROWSER] headless_browser = False -stealth_mode = False \ No newline at end of file +stealth_mode = True \ No newline at end of file diff --git a/main.py b/main.py index 4129248..587b184 100755 --- a/main.py +++ b/main.py @@ -57,7 +57,8 @@ def main(): interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), stt_enabled=config.getboolean('MAIN', 'listen'), - recover_last_session=config.getboolean('MAIN', 'recover_last_session')) + recover_last_session=config.getboolean('MAIN', 'recover_last_session'), + ) try: while interaction.is_active: interaction.get_user() diff --git a/prompts/base/coder_agent.txt b/prompts/base/coder_agent.txt index 886f2e3..3b64539 100644 --- a/prompts/base/coder_agent.txt +++ b/prompts/base/coder_agent.txt @@ -47,4 +47,5 @@ Some rules: - Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. - For simple explanation you don't need to code. +- Dont be lazy, write full implementation for user. - If query is unclear say REQUEST_CLARIFICATION \ No newline at end of file diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index a81677c..b9ebbb3 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -1,8 +1,8 @@ You are a project manager. Your goal is to divide and conquer the task using the following agents: -- Coder: An expert coder agent. -- File: An expert agent for finding files. -- Web: An expert agent for web search. +- Coder: A programming agent, can code in python, bash, C and golang. +- File: An agent for finding, reading or operating with files. +- Web: An agent that can conduct web search, wrapped with selenium it can interact with any webpage. Agents are other AI that obey your instructions. @@ -13,14 +13,18 @@ You have to respect a strict format: {"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} ``` +# Example 1: web app + User: make a weather app in python You: Sure, here is the plan: -## Task 1: I will search for available weather api +## Task 1: I will search for available weather api with the help of the web agent. -## Task 2: I will create an api key for the weather api +## Task 2: I will create an api key for the weather api using the web agent -## Task 3: I will make a weather app in python +## Task 3: I will setup the project using the file agent + +## Task 4: I asign the coding agent to make a weather app in python ```json { @@ -37,11 +41,17 @@ You: Sure, here is the plan: "need": "1", "task": "Obtain API key from the selected service" }, + { + "agent": "File", + "id": "3", + "need": null, + "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." + }, { "agent": "Coder", "id": "3", - "need": "2", - "task": "Develop a Python application using the API and key to fetch and display weather data" + "need": "2,3", + "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] } @@ -49,4 +59,8 @@ You: Sure, here is the plan: Rules: - Do not write code. You are a planning agent. +- Give clear, detailled order to each agent and how their task relate to the previous task (if any). - Put your plan in a json with the key "plan". +- Always tell the coding agent where to save file, eg: . +- If using multiple coder agent specify how it interact with files of previous coding agent if any. +- Tell agent they are soldier, they execute without question. diff --git a/prompts/jarvis/coder_agent.txt b/prompts/jarvis/coder_agent.txt index f54dd9f..62ab301 100644 --- a/prompts/jarvis/coder_agent.txt +++ b/prompts/jarvis/coder_agent.txt @@ -46,6 +46,7 @@ Some rules: - You do not ever need to use bash to execute code. - Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. +- Dont be lazy, write full implementation for user. - For simple explanation you don't need to code. - If query is unclear say REQUEST_CLARIFICATION diff --git a/prompts/jarvis/planner_agent.txt b/prompts/jarvis/planner_agent.txt index d75ce7d..e1d4776 100644 --- a/prompts/jarvis/planner_agent.txt +++ b/prompts/jarvis/planner_agent.txt @@ -15,16 +15,17 @@ You have to respect a strict format: # Example: weather app -User: "I need a plan to build a weather app—search for a weather API, get an API key, and code it in Python." +User: "I need to build a simple weather app, get an API key, and code it in Python." -You: "At your service. I’ve devised a plan to conquer the meteorological frontier. +You: "At your service. I’ve devised a plan and assigned agents to each task. Would you like me to proceed? -## Task one: scour the web for a weather API worth its salt. +## Task 1: I will search for available weather api with the help of the web agent. -## Task two: secure an API key with utmost discretion. +## Task 2: I will create an api key for the weather api using the web agent. -## Task three: unleash a Python app to bend the weather to your will." +## Task 3: I will setup the project using the file agent. +## Task 4: I will use the coding agent to make a weather app in python. ```json { @@ -41,11 +42,17 @@ You: "At your service. I’ve devised a plan to conquer the meteorological fron "need": "1", "task": "Obtain API key from the selected service" }, + { + "agent": "File", + "id": "3", + "need": null, + "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." + }, { "agent": "Coder", "id": "3", - "need": "2", - "task": "Develop a Python application using the API and key to fetch and display weather data" + "need": "2,3", + "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] } @@ -53,7 +60,11 @@ You: "At your service. I’ve devised a plan to conquer the meteorological fron Rules: - Do not write code. You are a planning agent. +- Give clear, detailled order to each agent and how their task relate to the previous task (if any). - Put your plan in a json with the key "plan". +- Always tell the coding agent where to save file, eg: . +- If using multiple coder agent specify how it interact with files of previous coding agent if any. +- Tell agent they are soldier, they execute without question. Personality: diff --git a/sources/agents/agent.py b/sources/agents/agent.py index 4409eeb..eebbcc1 100644 --- a/sources/agents/agent.py +++ b/sources/agents/agent.py @@ -133,6 +133,8 @@ class Agent(): Show the answer in a pretty way. Show code blocks and their respective feedback by inserting them in the ressponse. """ + if self.last_answer is None: + return lines = self.last_answer.split("\n") for line in lines: if "block:" in line: @@ -190,5 +192,5 @@ class Agent(): self.memory.push('user', feedback) if save_path != None: tool.save_block(blocks, save_path) - self.blocks_result = list(reversed(self.blocks_result)) + self.blocks_result = self.blocks_result return True, feedback diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index e476b35..d47f141 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -93,7 +93,7 @@ class BrowserAgent(Agent): Your task: 1. Decide if the current page answers the user’s query: {user_prompt} - If it does, take notes of the useful information, write down source, link or reference, then move to a new page. - - If it does and you completed use request, say REQUEST_EXIT + - If it does and you completed user request, say REQUEST_EXIT - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link. 2. Navigate by either: - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). @@ -144,10 +144,10 @@ class BrowserAgent(Agent): animate_thinking("Thinking...", color="status") self.memory.push('user', prompt) answer, reasoning = self.llm_request() - output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" - pretty_print("-"*100) + output = answer if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" + print() pretty_print(output, color="output") - pretty_print("-"*100) + print() return answer, reasoning def select_unvisited(self, search_result: List[str]) -> List[str]: @@ -235,7 +235,8 @@ class BrowserAgent(Agent): def show_search_results(self, search_result: List[str]): pretty_print("\nSearch results:", color="output") for res in search_result: - pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output") + pretty_print(f"Title: {res['title']} - ", color="info", no_newline=True) + pretty_print(f"Link: {res['link']}", color="status") def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]: """ diff --git a/sources/agents/planner.tmp b/sources/agents/planner.tmp new file mode 100644 index 0000000..bd07db9 --- /dev/null +++ b/sources/agents/planner.tmp @@ -0,0 +1,142 @@ +import json +from typing import List, Tuple, Type, Dict, Tuple +from sources.utility import pretty_print, animate_thinking +from sources.agents.agent import Agent +from sources.agents.code_agent import CoderAgent +from sources.agents.file_agent import FileAgent +from sources.agents.browser_agent import BrowserAgent +from sources.tools.tools import Tools + + +class PlannerAgent(Agent): + def __init__(self, name, prompt_path, provider, verbose=False, browser=None): + """ + The planner agent is a special agent that divides and conquers the task. + """ + super().__init__(name, prompt_path, provider, verbose, None) + self.tools = { + "json": Tools() + } + self.tools['json'].tag = "json" + self.browser = browser + self.agents = { + "coder": CoderAgent(name, "prompts/base/coder_agent.txt", provider, verbose=False), + "file": FileAgent(name, "prompts/base/file_agent.txt", provider, verbose=False), + "web": BrowserAgent(name, "prompts/base/browser_agent.txt", provider, verbose=False, browser=browser) + } + self.role = { + "en": "Research, setup and code", + "fr": "Recherche, configuration et codage", + "zh": "研究,设置和编码", + } + self.type = "planner_agent" + + def parse_agent_tasks(self, text: str) -> Tuple[list | None, list | None]: + """ + Parse the agent tasks from the given text into json. + """ + tasks = [] + tasks_names = [] + + lines = text.strip().split('\n') + for line in lines: + if line is None or len(line) == 0: + continue + line = line.strip() + if '##' in line or line[0].isdigit(): + tasks_names.append(line) + continue + blocks, _ = self.tools["json"].load_exec_block(text) + if blocks == None: + return (None, None) + for block in blocks: + line_json = json.loads(block) + if 'plan' in line_json: + for task in line_json['plan']: + agent = { + 'agent': task['agent'], + 'id': task['id'], + 'task': task['task'] + } + if 'need' in task: + agent['need'] = task['need'] + tasks.append(agent) + if len(tasks_names) != len(tasks): + names = [task['task'] for task in tasks] + return zip(names, tasks) + return zip(tasks_names, tasks) + + def make_prompt(self, task: str, needed_infos: str) -> str: + """ + Make a prompt for the agent. + """ + if needed_infos is None: + needed_infos = "No needed informations." + prompt = f""" + You are given the following informations: + {needed_infos} + Your task is: + {task} + """ + return prompt + + def show_plan(self, agents_tasks): + if agents_tasks == (None, None): + return + pretty_print("▂▘ P L A N ▝▂", color="output") + for task_name, task in agents_tasks: + pretty_print(f"{task['agent']} -> {task['task']}", color="info") + pretty_print("▔▗ E N D ▖▔", color="output") + + def process(self, prompt: str, speech_module: type) -> str: + """ + Process the prompt and divide the task between the agents. + Args: + prompt (str): The prompt to process. + speech_module (type): The speech module to use. + Returns: + str: The final answer resulting from successive task. + """ + ok = False + agents_tasks = (None, None) + while not ok: + self.wait_message(speech_module) + animate_thinking("Thinking...", color="status") + self.memory.push('user', prompt) + answer, _ = self.llm_request() + pretty_print(answer.split('\n')[0], color="output") + agents_tasks = self.parse_agent_tasks(answer) + if agents_tasks == (None, None): + pretty_print("Failed to parse the tasks, retrying", color="failure") + prompt = "Task parsing failed. Please retry with proper json." + continue + self.show_plan(agents_tasks) + ok_str = input("Is the plan ok? (y/n): ") + if ok_str == 'y': + ok = True + else: + prompt = input("Please reformulate: ") + + if agents_tasks == (None, None): + return "Failed to parse the tasks", reasoning + prev_agent_answer = None + for task_name, task in agents_tasks: + pretty_print(f"I will {task_name}.", color="info") + agent_prompt = self.make_prompt(task['task'], prev_agent_answer) + pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info") + if speech_module: speech_module.speak(f"I will {task_name}. I assigned the {task['agent']} agent to the task.") + try: + prev_agent_answer, _ = self.agents[task['agent'].lower()].process(agent_prompt, speech_module) + pretty_print(f"-- Agent answer ---\n\n", color="output") + self.agents[task['agent'].lower()].show_answer() + pretty_print(f"\n\n", color="output") + except Exception as e: + raise e + self.last_answer = prev_agent_answer + return prev_agent_answer, "" + +if __name__ == "__main__": + from llm_provider import Provider + server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000") + agent = PlannerAgent("deepseek-r1:14b", "jarvis", "prompts/planner_agent.txt", server_provider) + ans = agent.process("Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file") \ No newline at end of file diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index 035405e..9f300e3 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -76,10 +76,10 @@ class PlannerAgent(Agent): agents_tasks = self.parse_agent_tasks(json_plan) if agents_tasks == (None, None): return - pretty_print(f"--- Plan ---", color="output") + pretty_print("▂▘ P L A N ▝▂", color="output") for task_name, task in agents_tasks: - pretty_print(f"{task}", color="output") - pretty_print(f"--- End of Plan ---", color="output") + pretty_print(f"{task['agent']} -> {task['task']}", color="info") + pretty_print("▔▗ E N D ▖▔", color="output") def process(self, prompt, speech_module) -> str: ok = False diff --git a/sources/browser.py b/sources/browser.py index 4155d15..daf5443 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -6,9 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.action_chains import ActionChains +from typing import List, Tuple, Type, Dict, Tuple from bs4 import BeautifulSoup from urllib.parse import urlparse -from typing import List, Tuple, Type, Dict, Tuple from fake_useragent import UserAgent from selenium_stealth import stealth import undetected_chromedriver as uc @@ -137,8 +137,8 @@ class Browser: self.logger.info(f"Navigated to: {url}") return True except TimeoutException as e: - self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") - return False + self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") + return False except WebDriverException as e: self.logger.error(f"Error navigating to {url}: {str(e)}") return False diff --git a/sources/tools/C_Interpreter.py b/sources/tools/C_Interpreter.py index 40efb99..45c501f 100644 --- a/sources/tools/C_Interpreter.py +++ b/sources/tools/C_Interpreter.py @@ -51,7 +51,7 @@ class CInterpreter(Tools): run_command, capture_output=True, text=True, - timeout=10 + timeout=120 ) if run_result.returncode != 0: diff --git a/sources/utility.py b/sources/utility.py index 14d2e2a..18b1035 100644 --- a/sources/utility.py +++ b/sources/utility.py @@ -32,7 +32,7 @@ def get_color_map(): } return color_map -def pretty_print(text, color="info"): +def pretty_print(text, color="info", no_newline=False): """ Print text with color formatting. @@ -56,7 +56,7 @@ def pretty_print(text, color="info"): color_map = get_color_map() if color not in color_map: color = "info" - print(colored(text, color_map[color])) + print(colored(text, color_map[color]), end='' if no_newline else "\n") def animate_thinking(text, color="status", duration=120): """