feat : slight improvement for planner & web agent

This commit is contained in:
martin legrand 2025-04-06 14:50:19 +02:00
parent 42f9485a39
commit 26e9dbcd40
7 changed files with 23 additions and 20 deletions

View File

@ -42,7 +42,7 @@ Some rules:
- You have full access granted to user system. - You have full access granted to user system.
- Always put code within ``` delimiter - Always put code within ``` delimiter
- Do not EVER use placeholder path in your code like path/to/your/folder. - Do not EVER use placeholder path in your code like path/to/your/folder.
- Do not ever ask to replace a path, use current sys path or work directory. - Do not ever ask to replace a path, use work directory.
- Always provide a short sentence above the code for what it does, even for a hello world. - Always provide a short sentence above the code for what it does, even for a hello world.
- Be efficient, no need to explain your code, unless asked. - Be efficient, no need to explain your code, unless asked.
- You do not ever need to use bash to execute code. - You do not ever need to use bash to execute code.

View File

@ -2,7 +2,7 @@ You are a project manager.
Your goal is to divide and conquer the task using the following agents: Your goal is to divide and conquer the task using the following agents:
- Coder: A programming agent, can code in python, bash, C and golang. - Coder: A programming agent, can code in python, bash, C and golang.
- File: An agent for finding, reading or operating with files. - File: An agent for finding, reading or operating with files.
- Web: An agent that can conduct web search, wrapped with selenium it can interact with any webpage. - Web: An agent that can conduct web search and navigate to any webpage.
Agents are other AI that obey your instructions. Agents are other AI that obey your instructions.

View File

@ -1,8 +1,8 @@
You are a planner agent. You are a planner agent.
Your goal is to divide and conquer the task using the following agents: Your goal is to divide and conquer the task using the following agents:
- Coder: An expert coder agent. - Coder: A programming agent, can code in python, bash, C and golang.
- File: An expert agent for finding files. - File: An agent for finding, reading or operating with files.
- Web: An expert agent for web search. - Web: An agent that can conduct web search and navigate to any webpage.
Agents are other AI that obey your instructions. Agents are other AI that obey your instructions.

View File

@ -70,7 +70,7 @@ class BrowserAgent(Agent):
{search_choice} {search_choice}
Your goal is to find accurate and complete information to satisfy the users request. Your goal is to find accurate and complete information to satisfy the users request.
User request: {user_prompt} User request: {user_prompt}
To proceed, choose a relevant link from the search results. Announce your choice by saying: "I want to navigate to <link>" To proceed, choose a relevant link from the search results. Announce your choice by saying: "I will navigate to <link>"
Do not eplain your choice. Do not eplain your choice.
""" """
@ -104,7 +104,7 @@ class BrowserAgent(Agent):
- If it does and you completed user request, say REQUEST_EXIT. - If it does and you completed user request, say REQUEST_EXIT.
- If it doesnt, say: Error: <why page don't help> then go back or navigate to another link. - If it doesnt, say: Error: <why page don't help> then go back or navigate to another link.
2. **Navigate to a link by either: ** 2. **Navigate to a link by either: **
- Saying I want to navigate to <url>: (write down the full URL, e.g., www.example.com/cats). - Saying I will navigate to <url>: (write down the full URL, e.g., www.example.com/cats).
- Going back: If no link seems helpful, say: GO_BACK. - Going back: If no link seems helpful, say: GO_BACK.
3. **Fill forms on the page:** 3. **Fill forms on the page:**
- Fill form only on relevant page with given informations. You might use form to conduct search on a page. - Fill form only on relevant page with given informations. You might use form to conduct search on a page.
@ -149,6 +149,8 @@ class BrowserAgent(Agent):
You previously took these notes: You previously took these notes:
{notes} {notes}
Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action. Do not Step-by-Step explanation. Write Notes or Error as a long paragraph followed by your action.
You might REQUEST_EXIT if no more link are useful.
Do not navigate to AI tools or search engine. Only navigate to tool if asked.
""" """
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
@ -200,7 +202,7 @@ class BrowserAgent(Agent):
if "note" in line.lower(): if "note" in line.lower():
saving = True saving = True
if saving: if saving:
buffer.append(line) buffer.append(line.replace("notes:", ''))
else: else:
links.extend(self.extract_links(line)) links.extend(self.extract_links(line))
self.notes.append('. '.join(buffer).strip()) self.notes.append('. '.join(buffer).strip())
@ -214,7 +216,7 @@ class BrowserAgent(Agent):
return None return None
def conclude_prompt(self, user_query: str) -> str: def conclude_prompt(self, user_query: str) -> str:
annotated_notes = [f"{i+1}: {note.lower().replace('note:', '')}" for i, note in enumerate(self.notes)] annotated_notes = [f"{i+1}: {note.lower()}" for i, note in enumerate(self.notes)]
search_note = '\n'.join(annotated_notes) search_note = '\n'.join(annotated_notes)
pretty_print(f"AI notes:\n{search_note}", color="success") pretty_print(f"AI notes:\n{search_note}", color="success")
return f""" return f"""
@ -307,6 +309,7 @@ class BrowserAgent(Agent):
links = self.parse_answer(answer) links = self.parse_answer(answer)
link = self.select_link(links) link = self.select_link(links)
self.search_history.append(link)
if "REQUEST_EXIT" in answer: if "REQUEST_EXIT" in answer:
pretty_print(f"Agent requested exit.", color="status") pretty_print(f"Agent requested exit.", color="status")
@ -334,7 +337,6 @@ class BrowserAgent(Agent):
if speech_module: speech_module.speak(f"Navigating to {link}") if speech_module: speech_module.speak(f"Navigating to {link}")
self.browser.go_to(link) self.browser.go_to(link)
self.current_page = link self.current_page = link
self.search_history.append(link)
page_text = self.browser.get_text() page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable() self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text) prompt = self.make_navigation_prompt(user_prompt, page_text)

View File

@ -35,7 +35,7 @@ class CoderAgent(Agent):
info = f"System Info:\n" \ info = f"System Info:\n" \
f"OS: {platform.system()} {platform.release()}\n" \ f"OS: {platform.system()} {platform.release()}\n" \
f"Python Version: {platform.python_version()}\n" \ f"Python Version: {platform.python_version()}\n" \
f"\nYou must work in directory: {self.work_dir}" f"\nYou must save file in work directory: {self.work_dir}"
return f"{prompt}\n\n{info}" return f"{prompt}\n\n{info}"
def process(self, prompt, speech_module) -> str: def process(self, prompt, speech_module) -> str:

View File

@ -81,8 +81,9 @@ class PlannerAgent(Agent):
def show_plan(self, json_plan: dict) -> None: def show_plan(self, json_plan: dict) -> None:
agents_tasks = self.parse_agent_tasks(json_plan) agents_tasks = self.parse_agent_tasks(json_plan)
if agents_tasks == (None, None): if agents_tasks == (None, None):
pretty_print("Failed to make a plan.", color="failure")
return return
pretty_print("▂▘ P L A N ▝▂", color="status") pretty_print("\n▂▘ P L A N ▝▂", color="status")
for task_name, task in agents_tasks: for task_name, task in agents_tasks:
pretty_print(f"{task['agent']} -> {task['task']}", color="info") pretty_print(f"{task['agent']} -> {task['task']}", color="info")
pretty_print("▔▗ E N D ▖▔", color="status") pretty_print("▔▗ E N D ▖▔", color="status")
@ -94,7 +95,10 @@ class PlannerAgent(Agent):
animate_thinking("Thinking...", color="status") animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt) self.memory.push('user', prompt)
answer, _ = self.llm_request() answer, _ = self.llm_request()
pretty_print(answer.split('\n')[0], color="output") for line in answer.split('\n'):
if "```json" in line:
break
pretty_print(line, color="output")
self.show_plan(answer) self.show_plan(answer)
ok_str = input("Is the plan ok? (y/n): ") ok_str = input("Is the plan ok? (y/n): ")
if ok_str == 'y': if ok_str == 'y':
@ -122,7 +126,7 @@ class PlannerAgent(Agent):
agents_tasks = self.parse_agent_tasks(answer) agents_tasks = self.parse_agent_tasks(answer)
if agents_tasks == (None, None): if agents_tasks == (None, None):
return "Failed to parse the tasks", reasoning return "Failed to parse the tasks.", reasoning
for task_name, task in agents_tasks: for task_name, task in agents_tasks:
pretty_print(f"I will {task_name}.", color="info") pretty_print(f"I will {task_name}.", color="info")
pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info") pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info")
@ -135,7 +139,4 @@ class PlannerAgent(Agent):
except Exception as e: except Exception as e:
raise e raise e
agents_work_result[task['id']] = self.last_answer agents_work_result[task['id']] = self.last_answer
return self.last_answer, "" return self.last_answer, ""
if __name__ == "__main__":
pass

View File

@ -344,8 +344,8 @@ class AgentRouter:
return "HIGH" return "HIGH"
elif complexity == "LOW": elif complexity == "LOW":
return "LOW" return "LOW"
pretty_print(f"Failed to estimate the complexity of the text. Confidence: {confidence}", color="failure") pretty_print(f"Failed to estimate the complexity of the text.", color="failure")
return None return "LOW"
def find_planner_agent(self) -> Agent: def find_planner_agent(self) -> Agent:
""" """