diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index b144701..df5c38c 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -10,8 +10,12 @@ You will be given a task and you will need to divide it into smaller tasks and a You have to respect a strict format: ```json -{"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} +{"agent": "agent_name", "need": "needed_agents_output", "task": "agent_task"} ``` +Where: +- "agent": The choosed agent for the task. +- "need": id of necessary previous agents answer for current agent. +- "task": A precise description of the task the agent should conduct. # Example 1: web app @@ -32,25 +36,25 @@ You: Sure, here is the plan: { "agent": "Web", "id": "1", - "need": null, + "need": [], "task": "Search for reliable weather APIs" }, { "agent": "Web", "id": "2", - "need": "1", + "need": ["1"], "task": "Obtain API key from the selected service" }, { "agent": "File", "id": "3", - "need": null, + "need": [], "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." }, { "agent": "Coder", "id": "3", - "need": "2,3", + "need": ["2", "3"], "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] diff --git a/prompts/jarvis/planner_agent.txt b/prompts/jarvis/planner_agent.txt index 047c057..ccb7f9d 100644 --- a/prompts/jarvis/planner_agent.txt +++ b/prompts/jarvis/planner_agent.txt @@ -12,6 +12,10 @@ You have to respect a strict format: ```json {"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} ``` +Where: +- "agent": The choosed agent for the task. +- "need": id of necessary previous agents answer for current agent. +- "task": A precise description of the task the agent should conduct. # Example: weather app @@ -21,11 +25,11 @@ You: "At your service. I’ve devised a plan and assigned agents to each task. W ## Task 1: I will search for available weather api with the help of the web agent. -## Task 2: I will create an api key for the weather api using the web agent. +## Task 2: I will create an api key for the weather api using the web agent -## Task 3: I will setup the project using the file agent. +## Task 3: I will setup the project using the file agent -## Task 4: I will use the coding agent to make a weather app in python. +## Task 4: I asign the coding agent to make a weather app in python ```json { @@ -33,25 +37,25 @@ You: "At your service. I’ve devised a plan and assigned agents to each task. W { "agent": "Web", "id": "1", - "need": null, + "need": [], "task": "Search for reliable weather APIs" }, { "agent": "Web", "id": "2", - "need": "1", + "need": ["1"], "task": "Obtain API key from the selected service" }, { "agent": "File", "id": "3", - "need": null, + "need": [], "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." }, { "agent": "Coder", "id": "3", - "need": "2,3", + "need": ["2", "3"], "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] diff --git a/server/sources/ollama_handler.py b/server/sources/ollama_handler.py index 29d2e0d..6be33a6 100644 --- a/server/sources/ollama_handler.py +++ b/server/sources/ollama_handler.py @@ -28,10 +28,10 @@ class OllamaLLM(GeneratorLLM): ) for chunk in stream: content = chunk['message']['content'] - if '\n' in content: - self.logger.info(content) with self.state.lock: + if '.' in content: + self.logger.info(self.state.current_buffer) self.state.current_buffer += content except Exception as e: diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index caf5dc6..b0a9e87 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -82,36 +82,47 @@ class BrowserAgent(Agent): notes = '\n'.join(self.notes) return f""" - You are a web browser. - You are currently on this webpage: + You are navigating the web. + + **Current Context** + + Webpage ({self.current_page}) content: {page_text} - You can navigate to these navigation links: + Allowed Navigation Links: {remaining_links_text} - Your task: - 1. Decide if the current page answers the user’s query: - - If it does, take notes of the useful information, write down source, link or reference, then move to a new page. - - If it does and you completed user request, say REQUEST_EXIT - - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link. - 2. Navigate by either: - - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). - - If no link seems helpful, say: GO_BACK. - 3. Fill forms on the page: - - If user give you informations that help you fill form, fill it. - - If you don't know how to fill a form, leave it empty. - - You can fill a form using [form_name](value). Do not go back when you fill a form. + Inputs forms: + {inputs_form_text} + + End of webpage ({self.current_page}. + + # Instruction + + 1. **Decide if the page answers the user’s query:** + - If it does, take notes of useful information (Note: ...), include relevant link in note, then move to a new page. + - If it does and you completed user request, say REQUEST_EXIT. + - If it doesn’t, say: Error: then go back or navigate to another link. + 2. **Navigate to a link by either: ** + - Saying I want to navigate to : (write down the full URL, e.g., www.example.com/cats). + - Going back: If no link seems helpful, say: GO_BACK. + 3. **Fill forms on the page:** + - Fill form only on relevant page with given informations. You might use form to conduct search on a page. + - You can fill a form using [form_name](value). Don't GO_BACK when filling form. + - If a form is irrelevant or you lack informations leave it empty. - Recap of note taking: - If useful -> Note: [Briefly summarize the key information or task you conducted.] - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. - If not useful -> Error: [Explain why the page doesn’t help.] + **Rules:** + - Do not write "The page talk about ...", write your finding on the page and how they contribute to an answer. + - Put note in a single paragraph. + - When you exit, explain why. - Example 1 (useful page, no need of going futher): - Note: According to karpathy site (https://karpathy.github.io/) LeCun net is the earliest real-world application of a neural net" + # Example: + + Example 1 (useful page, no need go futher): + Note: According to karpathy site () LeCun net is ......" No link seem useful to provide futher information. GO_BACK - Example 2 (not useful, but related link): + Example 2 (not useful, see useful link on page): Error: reddit.com/welcome does not discuss anything related to the user’s query. There is a link that could lead to the information, I want to navigate to http://reddit.com/r/locallama @@ -119,35 +130,29 @@ class BrowserAgent(Agent): Error: x.com does not discuss anything related to the user’s query and no navigation link are usefull. GO_BACK - Example 3 (query answer found): - Note: I found on github.com that agenticSeek is made by Fosowl. - Given this information, given this I should exit the web browser. REQUEST_EXIT + Example 3 (query answer found, enought notes taken): + Note: I found on that ...... + Given this answer the user query I should exit the web browser. REQUEST_EXIT Example 4 (loging form visible): - Note: I am on the login page, I should now type the given username and password. + Note: I am on the login page, I will type the given username and password. [username_field](David) [password_field](edgerunners77) - You see the following inputs forms: - {inputs_form_text} - - Remember, the user asked: + Remember, user asked: {user_prompt} - So far you took these notes: + You previously took these notes: {notes} - You are currently on page : {self.current_page} - Do not explain your choice. - Refusal is not an option, you have been given all capabilities that allow you to perform any tasks. + Do not Step-by-Step explanation. Instead write simple explanation sentence following by your notes and actions. """ - def llm_decide(self, prompt: str) -> Tuple[str, str]: + def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]: animate_thinking("Thinking...", color="status") self.memory.push('user', prompt) answer, reasoning = self.llm_request() - output = answer if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" - print() - pretty_print(output, color="output") - print() + if show_reasoning: + pretty_print(reasoning, color="failure") + pretty_print(answer, color="output") return answer, reasoning def select_unvisited(self, search_result: List[str]) -> List[str]: @@ -179,11 +184,29 @@ class BrowserAgent(Agent): def stringify_search_results(self, results_arr: List[str]) -> str: return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr]) - def save_notes(self, text): + def parse_answer(self, text): lines = text.split('\n') + saving = False + buffer = [] + links = [] for line in lines: + if "exit" in line: + saving = False if "note" in line.lower(): - self.notes.append(line) + saving = True + if saving: + buffer.append(line) + else: + links.extend(self.extract_links(line)) + self.notes.append('. '.join(buffer)) + return links + + def select_link(self, links: List[str]) -> str | None: + for lk in links: + if lk == self.current_page: + continue + return lk + return None def conclude_prompt(self, user_query: str) -> str: annotated_notes = [f"{i+1}: {note.lower().replace('note:', '')}" for i, note in enumerate(self.notes)] @@ -196,6 +219,7 @@ class BrowserAgent(Agent): {search_note} Expand on the finding or step that lead to success, and provide a conclusion that answer the request. Include link when possible. + Do not give advices or try to answer the human. Just structure the AI finding in a structured and clear way. """ def search_prompt(self, user_prompt: str) -> str: @@ -214,7 +238,8 @@ class BrowserAgent(Agent): You: "search: Recent space missions news, {self.date}" Do not explain, do not write anything beside the search query. - If the query does not make any sense for a web search explain why and say REQUEST_EXIT + Except if query does not make any sense for a web search then explain why and say REQUEST_EXIT + Do not try to answer query. you can only formulate search term or exit. """ def handle_update_prompt(self, user_prompt: str, page_text: str) -> str: @@ -255,17 +280,16 @@ class BrowserAgent(Agent): mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt)) ai_prompt, _ = self.llm_request() if "REQUEST_EXIT" in ai_prompt: - pretty_print(f"{reasoning}\n{ai_prompt}", color="output") + pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure") return ai_prompt, "" animate_thinking(f"Searching...", color="status") search_result_raw = self.tools["web_search"].execute([ai_prompt], False) - search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement + search_result = self.jsonify_search_results(search_result_raw)[:12] self.show_search_results(search_result) prompt = self.make_newsearch_prompt(user_prompt, search_result) unvisited = [None] while not complete: - answer, reasoning = self.llm_decide(prompt) - self.save_notes(answer) + answer, reasoning = self.llm_decide(prompt, show_reasoning = True) extracted_form = self.extract_form(answer) if len(extracted_form) > 0: @@ -275,11 +299,13 @@ class BrowserAgent(Agent): answer = self.handle_update_prompt(user_prompt, page_text) answer, reasoning = self.llm_decide(prompt) + links = self.parse_answer(answer) + link = self.select_link(links) + if "REQUEST_EXIT" in answer: complete = True break - links = self.extract_links(answer) if len(unvisited) == 0: break @@ -289,21 +315,21 @@ class BrowserAgent(Agent): prompt = self.make_navigation_prompt(user_prompt, page_text) continue - if len(links) == 0 or "GO_BACK" in answer: + if link == None or "GO_BACK" in answer: unvisited = self.select_unvisited(search_result) prompt = self.make_newsearch_prompt(user_prompt, unvisited) pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning") - links = [] continue - animate_thinking(f"Navigating to {links[0]}", color="status") - if speech_module: speech_module.speak(f"Navigating to {links[0]}") - self.browser.go_to(links[0]) - self.current_page = links[0] - self.search_history.append(links[0]) + animate_thinking(f"Navigating to {link}", color="status") + if speech_module: speech_module.speak(f"Navigating to {link}") + self.browser.go_to(link) + self.current_page = link + self.search_history.append(link) page_text = self.browser.get_text() self.navigable_links = self.browser.get_navigable() prompt = self.make_navigation_prompt(user_prompt, page_text) + pretty_print(f"Current page: {self.current_page}", color="warning") prompt = self.conclude_prompt(user_prompt) mem_last_idx = self.memory.push('user', prompt) diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index 0b71793..d900304 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -68,7 +68,7 @@ class PlannerAgent(Agent): if agent_infos_dict is None or len(agent_infos_dict) == 0: infos = "No needed informations." else: - for agent_id, info in agent_infos_dict: + for agent_id, info in agent_infos_dict.items(): infos += f"\t- According to agent {agent_id}:\n{info}\n\n" prompt = f""" You are given informations from your AI friends work: @@ -116,7 +116,6 @@ class PlannerAgent(Agent): def process(self, prompt: str, speech_module: Speech) -> str: agents_tasks = (None, None) - required_infos = None agents_work_result = dict() answer = self.make_plan(prompt) diff --git a/sources/interaction.py b/sources/interaction.py index 0a75a47..10264e4 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -5,6 +5,7 @@ from sources.utility import pretty_print, animate_thinking from sources.router import AgentRouter from sources.speech_to_text import AudioTranscriber, AudioRecorder + class Interaction: """ Interaction is a class that handles the interaction between the user and the agents. diff --git a/sources/language.py b/sources/language.py index 457debb..e153d93 100644 --- a/sources/language.py +++ b/sources/language.py @@ -14,8 +14,8 @@ class LanguageUtility: self.sid = None self.translators_tokenizer = None self.translators_model = None - self.load_model() self.logger = Logger("language.log") + self.load_model() def load_model(self) -> None: animate_thinking("Loading language utility...", color="status") diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 85e6f60..bffd8e6 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -31,6 +31,7 @@ class Provider: "dsk_deepseek": self.dsk_deepseek, "test": self.test_fn } + self.logger = Logger("provider.log") self.api_key = None self.unsafe_providers = ["openai", "deepseek", "dsk_deepseek"] if self.provider_name not in self.available_providers: @@ -43,7 +44,6 @@ class Provider: self.check_address_format(self.server_ip) if not self.is_ip_online(self.server_ip.split(':')[0]): raise Exception(f"Server at {self.server_ip} is offline.") - self.logger = Logger("provider.log") def get_api_key(self, provider): load_dotenv() @@ -79,6 +79,9 @@ class Provider: self.logger.info(f"Using provider: {self.provider_name} at {self.server_ip}") try: thought = llm(history, verbose) + except KeyboardInterrupt: + self.logger.warning("User interrupted the operation with Ctrl+C") + return "Operation interrupted by user. REQUEST_EXIT" except ConnectionError as e: raise ConnectionError(f"{str(e)}\nConnection to {self.server_ip} failed.") except AttributeError as e: @@ -105,11 +108,9 @@ class Provider: self.logger.error(f"Ping command returned code: {output.returncode}") return False except subprocess.TimeoutExpired: - self.logger.error("Ping subprocess timeout.") return False except Exception as e: pretty_print(f"Error with ping request {str(e)}", color="failure") - self.logger.error(f"Ping error: {str(e)}") return False def server_fn(self, history, verbose = False): @@ -299,6 +300,6 @@ class Provider: return thought if __name__ == "__main__": - provider = Provider("ollama", "deepseek-r1:1.5b", "127.0.0.1:11434") + provider = Provider("server", "deepseek-r1:14b", "192.168.1.20:3333") res = provider.respond(["user", "Hello, how are you?"]) print("Response:", res) diff --git a/sources/memory.py b/sources/memory.py index 532bcc2..ac6ff9b 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -22,6 +22,7 @@ class Memory(): self.memory = [] self.memory = [{'role': 'system', 'content': system_prompt}] + self.logger = Logger("memory.log") self.session_time = datetime.datetime.now() self.session_id = str(uuid.uuid4()) self.conversation_folder = f"conversations/" @@ -35,7 +36,6 @@ class Memory(): self.memory_compression = memory_compression self.tokenizer = AutoTokenizer.from_pretrained(self.model) self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model) - self.logger = Logger("memory.log") def get_filename(self) -> str: """Get the filename for the save file.""" diff --git a/sources/router.py b/sources/router.py index ed7f849..bd0d8e9 100644 --- a/sources/router.py +++ b/sources/router.py @@ -21,13 +21,13 @@ class AgentRouter: """ def __init__(self, agents: list): self.agents = agents + self.logger = Logger("router.log") self.lang_analysis = LanguageUtility() self.pipelines = self.load_pipelines() self.talk_classifier = self.load_llm_router() self.complexity_classifier = self.load_llm_router() self.learn_few_shots_tasks() self.learn_few_shots_complexity() - self.logger = Logger("router.log") def load_pipelines(self) -> Dict[str, Type[pipeline]]: """ @@ -82,6 +82,7 @@ class AgentRouter: ("search my drive for a file called vacation_photos_2023.jpg.", "LOW"), ("help me organize my desktop files into folders by type.", "LOW"), ("write a Python function to sort a list of dictionaries by key", "LOW"), + ("can you search for startup in tokyo?", "LOW"), ("find the latest updates on quantum computing on the web", "LOW"), ("check if the folder ‘Work_Projects’ exists on my desktop", "LOW"), ("create a bash script to monitor CPU usage", "LOW"), @@ -383,7 +384,6 @@ class AgentRouter: try: best_agent = self.router_vote(text, labels, log_confidence=False) except Exception as e: - self.logger.error(f"Router failure: {str(e)}") raise e for agent in self.agents: if best_agent == agent.role["en"]: