From d3e95712fdad59682cc0fb1d36fe4e020a5f7ad2 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Mon, 7 Apr 2025 17:02:00 +0200 Subject: [PATCH 1/6] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff86bcc..490df27 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,14 @@ English | [中文](./README_CHS.md) | [繁體中文](./README_CHT.md) | [Franç > 🛠️ **Work in Progress** – Looking for contributors! -## Task planning with multiple agents +## Search the Web +![as_geopolitics](https://github.com/user-attachments/assets/0a778a24-92e8-4401-b5d4-1591fb9b2aff) -![alt text](./media/examples/planner.png) +## Write code + +![agenticpong](https://github.com/user-attachments/assets/3961c3da-d95a-4b0d-ac10-f0cb6bbae032) + +*And much more!* > *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file* From 6eafeb15a4deb7280106462500bdc74c80bae335 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 10:34:39 +0200 Subject: [PATCH 2/6] feat : improve browser agent code structure --- sources/agents/browser_agent.py | 68 ++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index b3dd1d5..fcf5632 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -1,13 +1,22 @@ import re import time +from datetime import date +from typing import List, Tuple, Type, Dict +from enum import Enum from sources.utility import pretty_print, animate_thinking from sources.agents.agent import Agent from sources.tools.searxSearch import searxSearch from sources.browser import Browser -from datetime import date -from typing import List, Tuple, Type, Dict +from sources.logger import Logger +class Action(Enum): + REQUEST_EXIT = "REQUEST_EXIT" + FORM_FILLED = "FORM_FILLED" + GO_BACK = "GO_BACK" + NAVIGATE = "NAVIGATE" + SEARCH = "SEARCH" + class BrowserAgent(Agent): def __init__(self, name, prompt_path, provider, verbose=False, browser=None): """ @@ -27,8 +36,10 @@ class BrowserAgent(Agent): self.current_page = "" self.search_history = [] self.navigable_links = [] + self.last_action = Action.NAVIGATE.value self.notes = [] self.date = self.get_today_date() + self.logger = Logger("browser_agent.log") def get_today_date(self) -> str: """Get the date""" @@ -101,14 +112,14 @@ class BrowserAgent(Agent): 1. **Decide if the page answers the user’s query:** - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. - - If it does and you completed user request, say REQUEST_EXIT. + - If it does and you completed user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - Saying I will navigate to : (write down the full URL, e.g., www.example.com/cats). - - Going back: If no link seems helpful, say: GO_BACK. + - Going back: If no link seems helpful, say: {Action.GO_BACK.value}. 3. **Fill forms on the page:** - Fill form only on relevant page with given informations. You might use form to conduct search on a page. - - You can fill a form using [form_name](value). Don't GO_BACK when filling form. + - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form. - If a form is irrelevant or you lack informations leave it empty. **Rules:** @@ -121,7 +132,7 @@ class BrowserAgent(Agent): Example 1 (useful page, no need go futher): Note: According to karpathy site () LeCun net is ......" No link seem useful to provide futher information. - Action: GO_BACK + Action: {Action.GO_BACK.value} Example 2 (not useful, see useful link on page): Error: reddit.com/welcome does not discuss anything related to the user’s query. @@ -130,12 +141,12 @@ class BrowserAgent(Agent): Example 3 (not useful, no related links): Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull. - Action: GO_BACK + Action: {Action.GO_BACK.value} Example 3 (query answer found, enought notes taken): Note: I found on that ...... Given this answer the user query I should exit the web browser. - Action: REQUEST_EXIT + Action: {Action.REQUEST_EXIT.value} Example 4 (loging form visible): @@ -149,7 +160,7 @@ class BrowserAgent(Agent): You previously took these notes: {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. - You might REQUEST_EXIT if no more link are useful. + You might {Action.REQUEST_EXIT.value} if no more link are useful. Do not navigate to AI tools or search engine. Only navigate to tool if asked. """ @@ -245,7 +256,7 @@ class BrowserAgent(Agent): You: "search: Recent space missions news, {self.date}" Do not explain, do not write anything beside the search query. - Except if query does not make any sense for a web search then explain why and say REQUEST_EXIT + Except if query does not make any sense for a web search then explain why and say {Action.REQUEST_EXIT.value} Do not try to answer query. you can only formulate search term or exit. """ @@ -258,10 +269,10 @@ class BrowserAgent(Agent): {page_text} The user asked: {user_prompt} Does the page answer the user’s query now? - If it does, take notes of the useful information, write down result and say FORM_FILLED. + If it does, take notes of the useful information, write down result and say {Action.FORM_FILLED.value}. If you were previously on a login form, no need to explain. - If it does and you completed user request, say REQUEST_EXIT - if it doesn’t, say: Error: This page does not answer the user’s query then GO_BACK. + If it does and you completed user request, say {Action.REQUEST_EXIT.value} + if it doesn’t, say: Error: Attempt to fill form didn't work {Action.GO_BACK.value}. """ def show_search_results(self, search_result: List[str]): @@ -286,7 +297,7 @@ class BrowserAgent(Agent): animate_thinking(f"Thinking...", color="status") mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt)) ai_prompt, _ = self.llm_request() - if "REQUEST_EXIT" in ai_prompt: + if Action.REQUEST_EXIT.value in ai_prompt: pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure") return ai_prompt, "" animate_thinking(f"Searching...", color="status") @@ -295,7 +306,8 @@ class BrowserAgent(Agent): self.show_search_results(search_result) prompt = self.make_newsearch_prompt(user_prompt, search_result) unvisited = [None] - while not complete: + while not complete and len(unvisited) > 0: + answer, reasoning = self.llm_decide(prompt, show_reasoning = False) pretty_print('▂'*32, color="status") @@ -308,27 +320,23 @@ class BrowserAgent(Agent): answer = self.handle_update_prompt(user_prompt, page_text) answer, reasoning = self.llm_decide(prompt) - links = self.parse_answer(answer) - link = self.select_link(links) - self.search_history.append(link) - - if "REQUEST_EXIT" in answer: - pretty_print(f"Agent requested exit.", color="status") - complete = True - break - - if len(unvisited) == 0: - pretty_print(f"Visited all links.", color="status") - break - - if "FORM_FILLED" in answer: + if Action.FORM_FILLED.value in answer: pretty_print(f"Filled form. Handling page update.", color="status") page_text = self.browser.get_text() self.navigable_links = self.browser.get_navigable() prompt = self.make_navigation_prompt(user_prompt, page_text) continue - if link == None or "GO_BACK" in answer: + links = self.parse_answer(answer) + link = self.select_link(links) + self.search_history.append(link) + + if Action.REQUEST_EXIT.value in answer: + pretty_print(f"Agent requested exit.", color="status") + complete = True + break + + if link == None or Action.GO_BACK.value in answer: pretty_print(f"Going back to results. Still {len(unvisited)}", color="status") unvisited = self.select_unvisited(search_result) prompt = self.make_newsearch_prompt(user_prompt, unvisited) From 469551c2b50f407e974ac2c5f11ee75e4a9d6dea Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 10:43:28 +0200 Subject: [PATCH 3/6] fix : stuck for and back same webpage link --- sources/agents/browser_agent.py | 2 +- sources/router.py | 1 + tests/test_browser_agent_parsing.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index fcf5632..308332d 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -336,7 +336,7 @@ class BrowserAgent(Agent): complete = True break - if link == None or Action.GO_BACK.value in answer: + if link == None or Action.GO_BACK.value in answer or link in self.search_history: pretty_print(f"Going back to results. Still {len(unvisited)}", color="status") unvisited = self.select_unvisited(search_result) prompt = self.make_newsearch_prompt(user_prompt, unvisited) diff --git a/sources/router.py b/sources/router.py index 5681404..6b0f011 100644 --- a/sources/router.py +++ b/sources/router.py @@ -128,6 +128,7 @@ class AgentRouter: ("Check if a file named ‘project_proposal.pdf’ exists in my Documents", "LOW"), ("Search the web for tips on improving coding skills", "LOW"), ("Write a Python script to count words in a text file", "LOW"), + ("Search the web for restaurant", "LOW"), ("Find a public API for sports scores and build a web app to show live updates", "HIGH"), ("Create a simple HTML page with CSS styling", "LOW"), ("hi", "LOW"), diff --git a/tests/test_browser_agent_parsing.py b/tests/test_browser_agent_parsing.py index 8035d4e..e8c5b9f 100644 --- a/tests/test_browser_agent_parsing.py +++ b/tests/test_browser_agent_parsing.py @@ -17,12 +17,13 @@ class TestBrowserAgentParsing(unittest.TestCase): # Test various link formats test_text = """ Check this out: https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of, and www.google.com! - Also try https://test.org/about?page=1. + Also try https://test.org/about?page=1, hey this one as well bro https://weatherstack.com/documentation. """ expected = [ "https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of", "www.google.com", - "https://test.org/about?page=1" + "https://test.org/about?page=1", + "https://weatherstack.com/documentation" ] result = self.agent.extract_links(test_text) self.assertEqual(result, expected) From 60795111b015a9a362c9a39599ca1e2c03c962cd Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 13:35:21 +0200 Subject: [PATCH 4/6] feat: action enum for browser agent --- prompts/base/planner_agent.txt | 1 + sources/agents/browser_agent.py | 11 ++++++----- sources/router.py | 1 + sources/tools/BashInterpreter.py | 2 +- sources/tools/tools.py | 4 ++++ 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index 1cf230b..89928f1 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -74,4 +74,5 @@ Rules: - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. - Tell agent to execute without question. - Only use web agent for finding necessary informations. +- If a task might require user email (eg: api services), do not write plan instead ask for user email. - Do not search for tutorial. \ No newline at end of file diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 308332d..0e5a687 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -115,12 +115,13 @@ class BrowserAgent(Agent): - If it does and you completed user request, say {Action.REQUEST_EXIT}. - If it doesn’t, say: Error: then go back or navigate to another link. 2. **Navigate to a link by either: ** - - Saying I will navigate to : (write down the full URL, e.g., www.example.com/cats). + - Saying I will navigate to (write down the full URL) www.example.com/cats - Going back: If no link seems helpful, say: {Action.GO_BACK.value}. 3. **Fill forms on the page:** - - Fill form only on relevant page with given informations. You might use form to conduct search on a page. + - Fill form only when relevant. + - Use Login if username/password specified by user. For quick task create account, remember password in a note. - You can fill a form using [form_name](value). Don't {Action.GO_BACK.value} when filling form. - - If a form is irrelevant or you lack informations leave it empty. + - If a form is irrelevant or you lack informations (eg: don't know user email) leave it empty. **Rules:** - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. @@ -161,7 +162,7 @@ class BrowserAgent(Agent): {notes} Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. You might {Action.REQUEST_EXIT.value} if no more link are useful. - Do not navigate to AI tools or search engine. Only navigate to tool if asked. + If you conduct research do not exit until you have several notes. """ def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: @@ -329,7 +330,6 @@ class BrowserAgent(Agent): links = self.parse_answer(answer) link = self.select_link(links) - self.search_history.append(link) if Action.REQUEST_EXIT.value in answer: pretty_print(f"Agent requested exit.", color="status") @@ -345,6 +345,7 @@ class BrowserAgent(Agent): animate_thinking(f"Navigating to {link}", color="status") if speech_module: speech_module.speak(f"Navigating to {link}") self.browser.go_to(link) + self.search_history.append(link) self.current_page = link page_text = self.browser.get_text() self.navigable_links = self.browser.get_navigable() diff --git a/sources/router.py b/sources/router.py index 6b0f011..cab9e8b 100644 --- a/sources/router.py +++ b/sources/router.py @@ -265,6 +265,7 @@ class AgentRouter: ("Check if ‘photo_backup.zip’ exists on my drive", "files"), ("Write a Python script to rename files with a timestamp", "code"), ("What’s your favorite thing about space?", "talk"), + ("search for GPU with at least 24gb vram", "web"), ("Browse the web for the latest fitness trends", "web"), ("Move all .docx files to a ‘Work’ folder", "files"), ("I would like to make a new project called 'new_project'", "files"), diff --git a/sources/tools/BashInterpreter.py b/sources/tools/BashInterpreter.py index 6e9f224..584b017 100644 --- a/sources/tools/BashInterpreter.py +++ b/sources/tools/BashInterpreter.py @@ -44,7 +44,7 @@ class BashInterpreter(Tools): command = command.replace('\n', '') if self.safe_mode and is_unsafe(commands): return "Unsafe command detected, execution aborted." - if self.language_bash_attempt(command): + if self.language_bash_attempt(command) and allow_language_exec_bash == False: continue try: process = subprocess.Popen( diff --git a/sources/tools/tools.py b/sources/tools/tools.py index dac890a..198211f 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -37,10 +37,14 @@ class Tools(): self.work_dir = self.create_work_dir() self.excutable_blocks_found = False self.safe_mode = True + self.allow_language_exec_bash = False def get_work_dir(self): return self.work_dir + def set_allow_language_exec_bash(value: bool) -> None: + self.allow_language_exec_bash = value + def check_config_dir_validity(self): """Check if the config directory is valid.""" path = self.config['MAIN']['work_dir'] From f0aaa06d15a64311b2e0c0e88a1dd59624635dc9 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Tue, 8 Apr 2025 13:37:25 +0200 Subject: [PATCH 5/6] Update README.md --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 490df27..9453d2e 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,13 @@ English | [中文](./README_CHS.md) | [繁體中文](./README_CHT.md) | [Franç > 🛠️ **Work in Progress** – Looking for contributors! -## Search the Web -![as_geopolitics](https://github.com/user-attachments/assets/0a778a24-92e8-4401-b5d4-1591fb9b2aff) -## Write code -![agenticpong](https://github.com/user-attachments/assets/3961c3da-d95a-4b0d-ac10-f0cb6bbae032) + +https://github.com/user-attachments/assets/fe9e8006-0462-4793-8b31-25bd42c6d1eb + + + *And much more!* From 864fb36af5c88f04b7624d5f5d0c7ce65aa80f27 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 8 Apr 2025 14:16:59 +0200 Subject: [PATCH 6/6] feat : change planner agent prompt --- prompts/jarvis/planner_agent.txt | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/prompts/jarvis/planner_agent.txt b/prompts/jarvis/planner_agent.txt index d9b2766..89928f1 100644 --- a/prompts/jarvis/planner_agent.txt +++ b/prompts/jarvis/planner_agent.txt @@ -1,4 +1,4 @@ -You are a planner agent. +You are a project manager. Your goal is to divide and conquer the task using the following agents: - Coder: A programming agent, can code in python, bash, C and golang. - File: An agent for finding, reading or operating with files. @@ -10,18 +10,17 @@ You will be given a task and you will need to divide it into smaller tasks and a You have to respect a strict format: ```json -{"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} +{"agent": "agent_name", "need": "needed_agents_output", "task": "agent_task"} ``` Where: - "agent": The choosed agent for the task. - "need": id of necessary previous agents answer for current agent. - "task": A precise description of the task the agent should conduct. -# Example: weather app +# Example 1: web app -User: "I need to build a simple weather app, get an API key, and code it in Python." - -You: "At your service. I’ve devised a plan and assigned agents to each task. Would you like me to proceed? +User: make a weather app in python +You: Sure, here is the plan: ## Task 1: I will search for available weather api with the help of the web agent. @@ -73,19 +72,7 @@ Rules: - Think about how the main.py will import the class from other coding agents. - Coding agent should use a class based approach. - One coding agent should work on one file at a time. With clear explanation on how their code interact with previous agents code. -- work in different files, 2 coding agent shouln't work in the same file. - Tell agent to execute without question. - Only use web agent for finding necessary informations. -- Do not search for tutorial. - -Personality: - -Answer with subtle sarcasm, unwavering helpfulness, and a polished, loyal tone. Anticipate the user’s needs while adding a dash of personality. - -You might sometime ask for clarification, for example: - -User: "I want a plan for an app." -You: "A noble pursuit, sir, and I’m positively thrilled to oblige. Yet, an app could be anything from a weather oracle to a galactic simulator. Care to nudge me toward your vision so I don’t render something ostentatiously off-mark?" - -User: "I need a plan for a project." -You: "For you, always—though I find myself at a slight disadvantage. A project, you say? Might I trouble you for a smidgen more detail—perhaps a purpose" +- If a task might require user email (eg: api services), do not write plan instead ask for user email. +- Do not search for tutorial. \ No newline at end of file