mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-05 02:25:27 +00:00
feat : better web navigation of web agent
This commit is contained in:
parent
06ddc45955
commit
6fb9ce67c0
@ -10,8 +10,12 @@ You will be given a task and you will need to divide it into smaller tasks and a
|
||||
|
||||
You have to respect a strict format:
|
||||
```json
|
||||
{"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"}
|
||||
{"agent": "agent_name", "need": "needed_agents_output", "task": "agent_task"}
|
||||
```
|
||||
Where:
|
||||
- "agent": The choosed agent for the task.
|
||||
- "need": id of necessary previous agents answer for current agent.
|
||||
- "task": A precise description of the task the agent should conduct.
|
||||
|
||||
# Example 1: web app
|
||||
|
||||
@ -32,25 +36,25 @@ You: Sure, here is the plan:
|
||||
{
|
||||
"agent": "Web",
|
||||
"id": "1",
|
||||
"need": null,
|
||||
"need": [],
|
||||
"task": "Search for reliable weather APIs"
|
||||
},
|
||||
{
|
||||
"agent": "Web",
|
||||
"id": "2",
|
||||
"need": "1",
|
||||
"need": ["1"],
|
||||
"task": "Obtain API key from the selected service"
|
||||
},
|
||||
{
|
||||
"agent": "File",
|
||||
"id": "3",
|
||||
"need": null,
|
||||
"need": [],
|
||||
"task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute."
|
||||
},
|
||||
{
|
||||
"agent": "Coder",
|
||||
"id": "3",
|
||||
"need": "2,3",
|
||||
"need": ["2", "3"],
|
||||
"task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute.""
|
||||
}
|
||||
]
|
||||
|
@ -12,6 +12,10 @@ You have to respect a strict format:
|
||||
```json
|
||||
{"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"}
|
||||
```
|
||||
Where:
|
||||
- "agent": The choosed agent for the task.
|
||||
- "need": id of necessary previous agents answer for current agent.
|
||||
- "task": A precise description of the task the agent should conduct.
|
||||
|
||||
# Example: weather app
|
||||
|
||||
@ -21,11 +25,11 @@ You: "At your service. I’ve devised a plan and assigned agents to each task. W
|
||||
|
||||
## Task 1: I will search for available weather api with the help of the web agent.
|
||||
|
||||
## Task 2: I will create an api key for the weather api using the web agent.
|
||||
## Task 2: I will create an api key for the weather api using the web agent
|
||||
|
||||
## Task 3: I will setup the project using the file agent.
|
||||
## Task 3: I will setup the project using the file agent
|
||||
|
||||
## Task 4: I will use the coding agent to make a weather app in python.
|
||||
## Task 4: I asign the coding agent to make a weather app in python
|
||||
|
||||
```json
|
||||
{
|
||||
@ -33,25 +37,25 @@ You: "At your service. I’ve devised a plan and assigned agents to each task. W
|
||||
{
|
||||
"agent": "Web",
|
||||
"id": "1",
|
||||
"need": null,
|
||||
"need": [],
|
||||
"task": "Search for reliable weather APIs"
|
||||
},
|
||||
{
|
||||
"agent": "Web",
|
||||
"id": "2",
|
||||
"need": "1",
|
||||
"need": ["1"],
|
||||
"task": "Obtain API key from the selected service"
|
||||
},
|
||||
{
|
||||
"agent": "File",
|
||||
"id": "3",
|
||||
"need": null,
|
||||
"need": [],
|
||||
"task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute."
|
||||
},
|
||||
{
|
||||
"agent": "Coder",
|
||||
"id": "3",
|
||||
"need": "2,3",
|
||||
"need": ["2", "3"],
|
||||
"task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute.""
|
||||
}
|
||||
]
|
||||
|
@ -28,10 +28,10 @@ class OllamaLLM(GeneratorLLM):
|
||||
)
|
||||
for chunk in stream:
|
||||
content = chunk['message']['content']
|
||||
if '\n' in content:
|
||||
self.logger.info(content)
|
||||
|
||||
with self.state.lock:
|
||||
if '.' in content:
|
||||
self.logger.info(self.state.current_buffer)
|
||||
self.state.current_buffer += content
|
||||
|
||||
except Exception as e:
|
||||
|
@ -82,36 +82,47 @@ class BrowserAgent(Agent):
|
||||
notes = '\n'.join(self.notes)
|
||||
|
||||
return f"""
|
||||
You are a web browser.
|
||||
You are currently on this webpage:
|
||||
You are navigating the web.
|
||||
|
||||
**Current Context**
|
||||
|
||||
Webpage ({self.current_page}) content:
|
||||
{page_text}
|
||||
|
||||
You can navigate to these navigation links:
|
||||
Allowed Navigation Links:
|
||||
{remaining_links_text}
|
||||
|
||||
Your task:
|
||||
1. Decide if the current page answers the user’s query:
|
||||
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
|
||||
- If it does and you completed user request, say REQUEST_EXIT
|
||||
- If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link.
|
||||
2. Navigate by either:
|
||||
- Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
|
||||
- If no link seems helpful, say: GO_BACK.
|
||||
3. Fill forms on the page:
|
||||
- If user give you informations that help you fill form, fill it.
|
||||
- If you don't know how to fill a form, leave it empty.
|
||||
- You can fill a form using [form_name](value). Do not go back when you fill a form.
|
||||
Inputs forms:
|
||||
{inputs_form_text}
|
||||
|
||||
End of webpage ({self.current_page}.
|
||||
|
||||
# Instruction
|
||||
|
||||
1. **Decide if the page answers the user’s query:**
|
||||
- If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page.
|
||||
- If it does and you completed user request, say REQUEST_EXIT.
|
||||
- If it doesn’t, say: Error: <why page don't help> then go back or navigate to another link.
|
||||
2. **Navigate to a link by either: **
|
||||
- Saying I want to navigate to <url>: (write down the full URL, e.g., www.example.com/cats).
|
||||
- Going back: If no link seems helpful, say: GO_BACK.
|
||||
3. **Fill forms on the page:**
|
||||
- Fill form only on relevant page with given informations. You might use form to conduct search on a page.
|
||||
- You can fill a form using [form_name](value). Don't GO_BACK when filling form.
|
||||
- If a form is irrelevant or you lack informations leave it empty.
|
||||
|
||||
Recap of note taking:
|
||||
If useful -> Note: [Briefly summarize the key information or task you conducted.]
|
||||
Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer.
|
||||
If not useful -> Error: [Explain why the page doesn’t help.]
|
||||
**Rules:**
|
||||
- Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer.
|
||||
- Put note in a single paragraph.
|
||||
- When you exit, explain why.
|
||||
|
||||
Example 1 (useful page, no need of going futher):
|
||||
Note: According to karpathy site (https://karpathy.github.io/) LeCun net is the earliest real-world application of a neural net"
|
||||
# Example:
|
||||
|
||||
Example 1 (useful page, no need go futher):
|
||||
Note: According to karpathy site (<link>) LeCun net is ...<expand on page content>..."
|
||||
No link seem useful to provide futher information. GO_BACK
|
||||
|
||||
Example 2 (not useful, but related link):
|
||||
Example 2 (not useful, see useful link on page):
|
||||
Error: reddit.com/welcome does not discuss anything related to the user’s query.
|
||||
There is a link that could lead to the information, I want to navigate to http://reddit.com/r/locallama
|
||||
|
||||
@ -119,35 +130,29 @@ class BrowserAgent(Agent):
|
||||
Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull.
|
||||
GO_BACK
|
||||
|
||||
Example 3 (query answer found):
|
||||
Note: I found on github.com that agenticSeek is made by Fosowl.
|
||||
Given this information, given this I should exit the web browser. REQUEST_EXIT
|
||||
Example 3 (query answer found, enought notes taken):
|
||||
Note: I found on <link> that ...<expand on information found>...
|
||||
Given this answer the user query I should exit the web browser. REQUEST_EXIT
|
||||
|
||||
Example 4 (loging form visible):
|
||||
Note: I am on the login page, I should now type the given username and password.
|
||||
Note: I am on the login page, I will type the given username and password.
|
||||
[username_field](David)
|
||||
[password_field](edgerunners77)
|
||||
|
||||
You see the following inputs forms:
|
||||
{inputs_form_text}
|
||||
|
||||
Remember, the user asked:
|
||||
Remember, user asked:
|
||||
{user_prompt}
|
||||
So far you took these notes:
|
||||
You previously took these notes:
|
||||
{notes}
|
||||
You are currently on page : {self.current_page}
|
||||
Do not explain your choice.
|
||||
Refusal is not an option, you have been given all capabilities that allow you to perform any tasks.
|
||||
Do not Step-by-Step explanation. Instead write simple explanation sentence following by your notes and actions.
|
||||
"""
|
||||
|
||||
def llm_decide(self, prompt: str) -> Tuple[str, str]:
|
||||
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
|
||||
animate_thinking("Thinking...", color="status")
|
||||
self.memory.push('user', prompt)
|
||||
answer, reasoning = self.llm_request()
|
||||
output = answer if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}"
|
||||
print()
|
||||
pretty_print(output, color="output")
|
||||
print()
|
||||
if show_reasoning:
|
||||
pretty_print(reasoning, color="failure")
|
||||
pretty_print(answer, color="output")
|
||||
return answer, reasoning
|
||||
|
||||
def select_unvisited(self, search_result: List[str]) -> List[str]:
|
||||
@ -179,11 +184,29 @@ class BrowserAgent(Agent):
|
||||
def stringify_search_results(self, results_arr: List[str]) -> str:
|
||||
return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr])
|
||||
|
||||
def save_notes(self, text):
|
||||
def parse_answer(self, text):
|
||||
lines = text.split('\n')
|
||||
saving = False
|
||||
buffer = []
|
||||
links = []
|
||||
for line in lines:
|
||||
if "exit" in line:
|
||||
saving = False
|
||||
if "note" in line.lower():
|
||||
self.notes.append(line)
|
||||
saving = True
|
||||
if saving:
|
||||
buffer.append(line)
|
||||
else:
|
||||
links.extend(self.extract_links(line))
|
||||
self.notes.append('. '.join(buffer))
|
||||
return links
|
||||
|
||||
def select_link(self, links: List[str]) -> str | None:
|
||||
for lk in links:
|
||||
if lk == self.current_page:
|
||||
continue
|
||||
return lk
|
||||
return None
|
||||
|
||||
def conclude_prompt(self, user_query: str) -> str:
|
||||
annotated_notes = [f"{i+1}: {note.lower().replace('note:', '')}" for i, note in enumerate(self.notes)]
|
||||
@ -196,6 +219,7 @@ class BrowserAgent(Agent):
|
||||
{search_note}
|
||||
|
||||
Expand on the finding or step that lead to success, and provide a conclusion that answer the request. Include link when possible.
|
||||
Do not give advices or try to answer the human. Just structure the AI finding in a structured and clear way.
|
||||
"""
|
||||
|
||||
def search_prompt(self, user_prompt: str) -> str:
|
||||
@ -214,7 +238,8 @@ class BrowserAgent(Agent):
|
||||
You: "search: Recent space missions news, {self.date}"
|
||||
|
||||
Do not explain, do not write anything beside the search query.
|
||||
If the query does not make any sense for a web search explain why and say REQUEST_EXIT
|
||||
Except if query does not make any sense for a web search then explain why and say REQUEST_EXIT
|
||||
Do not try to answer query. you can only formulate search term or exit.
|
||||
"""
|
||||
|
||||
def handle_update_prompt(self, user_prompt: str, page_text: str) -> str:
|
||||
@ -255,17 +280,16 @@ class BrowserAgent(Agent):
|
||||
mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt))
|
||||
ai_prompt, _ = self.llm_request()
|
||||
if "REQUEST_EXIT" in ai_prompt:
|
||||
pretty_print(f"{reasoning}\n{ai_prompt}", color="output")
|
||||
pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure")
|
||||
return ai_prompt, ""
|
||||
animate_thinking(f"Searching...", color="status")
|
||||
search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
|
||||
search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement
|
||||
search_result = self.jsonify_search_results(search_result_raw)[:12]
|
||||
self.show_search_results(search_result)
|
||||
prompt = self.make_newsearch_prompt(user_prompt, search_result)
|
||||
unvisited = [None]
|
||||
while not complete:
|
||||
answer, reasoning = self.llm_decide(prompt)
|
||||
self.save_notes(answer)
|
||||
answer, reasoning = self.llm_decide(prompt, show_reasoning = True)
|
||||
|
||||
extracted_form = self.extract_form(answer)
|
||||
if len(extracted_form) > 0:
|
||||
@ -275,11 +299,13 @@ class BrowserAgent(Agent):
|
||||
answer = self.handle_update_prompt(user_prompt, page_text)
|
||||
answer, reasoning = self.llm_decide(prompt)
|
||||
|
||||
links = self.parse_answer(answer)
|
||||
link = self.select_link(links)
|
||||
|
||||
if "REQUEST_EXIT" in answer:
|
||||
complete = True
|
||||
break
|
||||
|
||||
links = self.extract_links(answer)
|
||||
if len(unvisited) == 0:
|
||||
break
|
||||
|
||||
@ -289,21 +315,21 @@ class BrowserAgent(Agent):
|
||||
prompt = self.make_navigation_prompt(user_prompt, page_text)
|
||||
continue
|
||||
|
||||
if len(links) == 0 or "GO_BACK" in answer:
|
||||
if link == None or "GO_BACK" in answer:
|
||||
unvisited = self.select_unvisited(search_result)
|
||||
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
|
||||
pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning")
|
||||
links = []
|
||||
continue
|
||||
|
||||
animate_thinking(f"Navigating to {links[0]}", color="status")
|
||||
if speech_module: speech_module.speak(f"Navigating to {links[0]}")
|
||||
self.browser.go_to(links[0])
|
||||
self.current_page = links[0]
|
||||
self.search_history.append(links[0])
|
||||
animate_thinking(f"Navigating to {link}", color="status")
|
||||
if speech_module: speech_module.speak(f"Navigating to {link}")
|
||||
self.browser.go_to(link)
|
||||
self.current_page = link
|
||||
self.search_history.append(link)
|
||||
page_text = self.browser.get_text()
|
||||
self.navigable_links = self.browser.get_navigable()
|
||||
prompt = self.make_navigation_prompt(user_prompt, page_text)
|
||||
pretty_print(f"Current page: {self.current_page}", color="warning")
|
||||
|
||||
prompt = self.conclude_prompt(user_prompt)
|
||||
mem_last_idx = self.memory.push('user', prompt)
|
||||
|
@ -68,7 +68,7 @@ class PlannerAgent(Agent):
|
||||
if agent_infos_dict is None or len(agent_infos_dict) == 0:
|
||||
infos = "No needed informations."
|
||||
else:
|
||||
for agent_id, info in agent_infos_dict:
|
||||
for agent_id, info in agent_infos_dict.items():
|
||||
infos += f"\t- According to agent {agent_id}:\n{info}\n\n"
|
||||
prompt = f"""
|
||||
You are given informations from your AI friends work:
|
||||
@ -116,7 +116,6 @@ class PlannerAgent(Agent):
|
||||
|
||||
def process(self, prompt: str, speech_module: Speech) -> str:
|
||||
agents_tasks = (None, None)
|
||||
required_infos = None
|
||||
agents_work_result = dict()
|
||||
|
||||
answer = self.make_plan(prompt)
|
||||
|
@ -5,6 +5,7 @@ from sources.utility import pretty_print, animate_thinking
|
||||
from sources.router import AgentRouter
|
||||
from sources.speech_to_text import AudioTranscriber, AudioRecorder
|
||||
|
||||
|
||||
class Interaction:
|
||||
"""
|
||||
Interaction is a class that handles the interaction between the user and the agents.
|
||||
|
@ -14,8 +14,8 @@ class LanguageUtility:
|
||||
self.sid = None
|
||||
self.translators_tokenizer = None
|
||||
self.translators_model = None
|
||||
self.load_model()
|
||||
self.logger = Logger("language.log")
|
||||
self.load_model()
|
||||
|
||||
def load_model(self) -> None:
|
||||
animate_thinking("Loading language utility...", color="status")
|
||||
|
@ -31,6 +31,7 @@ class Provider:
|
||||
"dsk_deepseek": self.dsk_deepseek,
|
||||
"test": self.test_fn
|
||||
}
|
||||
self.logger = Logger("provider.log")
|
||||
self.api_key = None
|
||||
self.unsafe_providers = ["openai", "deepseek", "dsk_deepseek"]
|
||||
if self.provider_name not in self.available_providers:
|
||||
@ -43,7 +44,6 @@ class Provider:
|
||||
self.check_address_format(self.server_ip)
|
||||
if not self.is_ip_online(self.server_ip.split(':')[0]):
|
||||
raise Exception(f"Server at {self.server_ip} is offline.")
|
||||
self.logger = Logger("provider.log")
|
||||
|
||||
def get_api_key(self, provider):
|
||||
load_dotenv()
|
||||
@ -79,6 +79,9 @@ class Provider:
|
||||
self.logger.info(f"Using provider: {self.provider_name} at {self.server_ip}")
|
||||
try:
|
||||
thought = llm(history, verbose)
|
||||
except KeyboardInterrupt:
|
||||
self.logger.warning("User interrupted the operation with Ctrl+C")
|
||||
return "Operation interrupted by user. REQUEST_EXIT"
|
||||
except ConnectionError as e:
|
||||
raise ConnectionError(f"{str(e)}\nConnection to {self.server_ip} failed.")
|
||||
except AttributeError as e:
|
||||
@ -105,11 +108,9 @@ class Provider:
|
||||
self.logger.error(f"Ping command returned code: {output.returncode}")
|
||||
return False
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.error("Ping subprocess timeout.")
|
||||
return False
|
||||
except Exception as e:
|
||||
pretty_print(f"Error with ping request {str(e)}", color="failure")
|
||||
self.logger.error(f"Ping error: {str(e)}")
|
||||
return False
|
||||
|
||||
def server_fn(self, history, verbose = False):
|
||||
@ -299,6 +300,6 @@ class Provider:
|
||||
return thought
|
||||
|
||||
if __name__ == "__main__":
|
||||
provider = Provider("ollama", "deepseek-r1:1.5b", "127.0.0.1:11434")
|
||||
provider = Provider("server", "deepseek-r1:14b", "192.168.1.20:3333")
|
||||
res = provider.respond(["user", "Hello, how are you?"])
|
||||
print("Response:", res)
|
||||
|
@ -22,6 +22,7 @@ class Memory():
|
||||
self.memory = []
|
||||
self.memory = [{'role': 'system', 'content': system_prompt}]
|
||||
|
||||
self.logger = Logger("memory.log")
|
||||
self.session_time = datetime.datetime.now()
|
||||
self.session_id = str(uuid.uuid4())
|
||||
self.conversation_folder = f"conversations/"
|
||||
@ -35,7 +36,6 @@ class Memory():
|
||||
self.memory_compression = memory_compression
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model)
|
||||
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model)
|
||||
self.logger = Logger("memory.log")
|
||||
|
||||
def get_filename(self) -> str:
|
||||
"""Get the filename for the save file."""
|
||||
|
@ -21,13 +21,13 @@ class AgentRouter:
|
||||
"""
|
||||
def __init__(self, agents: list):
|
||||
self.agents = agents
|
||||
self.logger = Logger("router.log")
|
||||
self.lang_analysis = LanguageUtility()
|
||||
self.pipelines = self.load_pipelines()
|
||||
self.talk_classifier = self.load_llm_router()
|
||||
self.complexity_classifier = self.load_llm_router()
|
||||
self.learn_few_shots_tasks()
|
||||
self.learn_few_shots_complexity()
|
||||
self.logger = Logger("router.log")
|
||||
|
||||
def load_pipelines(self) -> Dict[str, Type[pipeline]]:
|
||||
"""
|
||||
@ -82,6 +82,7 @@ class AgentRouter:
|
||||
("search my drive for a file called vacation_photos_2023.jpg.", "LOW"),
|
||||
("help me organize my desktop files into folders by type.", "LOW"),
|
||||
("write a Python function to sort a list of dictionaries by key", "LOW"),
|
||||
("can you search for startup in tokyo?", "LOW"),
|
||||
("find the latest updates on quantum computing on the web", "LOW"),
|
||||
("check if the folder ‘Work_Projects’ exists on my desktop", "LOW"),
|
||||
("create a bash script to monitor CPU usage", "LOW"),
|
||||
@ -383,7 +384,6 @@ class AgentRouter:
|
||||
try:
|
||||
best_agent = self.router_vote(text, labels, log_confidence=False)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Router failure: {str(e)}")
|
||||
raise e
|
||||
for agent in self.agents:
|
||||
if best_agent == agent.role["en"]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user