From b3efd09fb3597839045a279a2c9b750b368dce03 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 13:09:16 +0200 Subject: [PATCH 01/13] fix : browser not handling properly web form --- sources/agents/browser_agent.py | 62 ++++++++++++++++++++++----- sources/browser.py | 74 +++++++++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 18 deletions(-) diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 66929b0..e476b35 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -6,7 +6,8 @@ from sources.agents.agent import Agent from sources.tools.searxSearch import searxSearch from sources.browser import Browser from datetime import date -from typing import List, Tuple +from typing import List, Tuple, Type, Dict, Tuple + class BrowserAgent(Agent): def __init__(self, name, prompt_path, provider, verbose=False, browser=None): @@ -92,7 +93,7 @@ class BrowserAgent(Agent): Your task: 1. Decide if the current page answers the user’s query: {user_prompt} - If it does, take notes of the useful information, write down source, link or reference, then move to a new page. - - If it does and you are 100% certain that it provide a definive answer, say REQUEST_EXIT + - If it does and you completed use request, say REQUEST_EXIT - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link. 2. Navigate by either: - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). @@ -100,7 +101,7 @@ class BrowserAgent(Agent): 3. Fill forms on the page: - If user give you informations that help you fill form, fill it. - If you don't know how to fill a form, leave it empty. - - You can fill a form using [form_name](value). + - You can fill a form using [form_name](value). Do not go back when you fill a form. Recap of note taking: If useful -> Note: [Briefly summarize the key information or task you conducted.] @@ -125,8 +126,8 @@ class BrowserAgent(Agent): Example 4 (loging form visible): Note: I am on the login page, I should now type the given username and password. - [form_name_1](David) - [form_name_2](edgerunners_2077) + [username_field](David) + [password_field](edgerunners77) You see the following inputs forms: {inputs_form_text} @@ -143,8 +144,9 @@ class BrowserAgent(Agent): animate_thinking("Thinking...", color="status") self.memory.push('user', prompt) answer, reasoning = self.llm_request() + output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" pretty_print("-"*100) - pretty_print(answer, color="output") + pretty_print(output, color="output") pretty_print("-"*100) return answer, reasoning @@ -175,7 +177,7 @@ class BrowserAgent(Agent): return parsed_results def stringify_search_results(self, results_arr: List[str]) -> str: - return '\n\n'.join([f"Link: {res['link']}" for res in results_arr]) + return '\n\n'.join([f"Link: {res['link']}\nPreview: {res['snippet']}" for res in results_arr]) def save_notes(self, text): lines = text.split('\n') @@ -214,19 +216,50 @@ class BrowserAgent(Agent): Do not explain, do not write anything beside the search query. If the query does not make any sense for a web search explain why and say REQUEST_EXIT """ + + def handle_update_prompt(self, user_prompt: str, page_text: str) -> str: + return f""" + You are a web browser. + You just filled a form on the page. + Now you should see the result of the form submission on the page: + Page text: + {page_text} + The user asked: {user_prompt} + Does the page answer the user’s query now? + If it does, take notes of the useful information, write down result and say FORM_FILLED. + If you were previously on a login form, no need to explain. + If it does and you completed user request, say REQUEST_EXIT + if it doesn’t, say: Error: This page does not answer the user’s query then GO_BACK. + """ + + def show_search_results(self, search_result: List[str]): + pretty_print("\nSearch results:", color="output") + for res in search_result: + pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output") - def process(self, user_prompt, speech_module) -> str: + def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]: + """ + Process the user prompt to conduct an autonomous web search. + Start with a google search with searxng using web_search tool. + Then enter a navigation logic to find the answer or conduct required actions. + Args: + user_prompt: The user's input query + speech_module: Optional speech output module + Returns: + tuple containing the final answer and reasoning + """ complete = False animate_thinking(f"Thinking...", color="status") self.memory.push('user', self.search_prompt(user_prompt)) ai_prompt, _ = self.llm_request() if "REQUEST_EXIT" in ai_prompt: - # request make no sense, maybe wrong agent was allocated? + pretty_print(f"{reasoning}\n{ai_prompt}", color="output") return ai_prompt, "" animate_thinking(f"Searching...", color="status") search_result_raw = self.tools["web_search"].execute([ai_prompt], False) search_result = self.jsonify_search_results(search_result_raw)[:12] # until futher improvement + self.show_search_results(search_result) prompt = self.make_newsearch_prompt(user_prompt, search_result) unvisited = [None] while not complete: @@ -236,7 +269,10 @@ class BrowserAgent(Agent): extracted_form = self.extract_form(answer) if len(extracted_form) > 0: self.browser.fill_form_inputs(extracted_form) - self.browser.find_and_click_submit() + self.browser.find_and_click_submission() + page_text = self.browser.get_text() + answer = self.handle_update_prompt(user_prompt, page_text) + answer, reasoning = self.llm_decide(prompt) if "REQUEST_EXIT" in answer: complete = True @@ -246,6 +282,12 @@ class BrowserAgent(Agent): if len(unvisited) == 0: break + if "FORM_FILLED" in answer: + page_text = self.browser.get_text() + self.navigable_links = self.browser.get_navigable() + prompt = self.make_navigation_prompt(user_prompt, page_text) + continue + if len(links) == 0 or "GO_BACK" in answer: unvisited = self.select_unvisited(search_result) prompt = self.make_newsearch_prompt(user_prompt, unvisited) diff --git a/sources/browser.py b/sources/browser.py index b6f7651..6c68b77 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -6,10 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.action_chains import ActionChains -from selenium.webdriver.chrome.options import Options from bs4 import BeautifulSoup from urllib.parse import urlparse -from typing import List, Tuple +from typing import List, Tuple, Type, Dict, Tuple from fake_useragent import UserAgent from selenium_stealth import stealth import undetected_chromedriver as uc @@ -126,13 +125,26 @@ class Browser: try: initial_handles = self.driver.window_handles self.driver.get(url) - time.sleep(1) + wait = WebDriverWait(self.driver, timeout=30) + wait.until( + lambda driver: ( + driver.execute_script("return document.readyState") == "complete" and + not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "verifying", "captcha"]) + ), + message="stuck on 'checking browser' or verification screen" + ) self.apply_web_safety() self.logger.info(f"Navigated to: {url}") return True + except TimeoutException as e: + self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") + return False except WebDriverException as e: self.logger.error(f"Error navigating to {url}: {str(e)}") return False + except Exception as e: + self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}") + raise e def is_sentence(self, text:str) -> bool: """Check if the text qualifies as a meaningful sentence or contains important error codes.""" @@ -199,7 +211,7 @@ class Browser: return False return True - def get_navigable(self) -> [str]: + def get_navigable(self) -> List[str]: """Get all navigable links on the current page.""" try: links = [] @@ -301,13 +313,55 @@ class Browser: result.sort(key=lambda x: len(x[0])) return result - def find_and_click_submit(self, btn_type:str = 'login') -> None: + """ + def find_and_click_submission(self, btn_type:str = 'login') -> None: buttons = self.get_buttons_xpath() if len(buttons) == 0: self.logger.warning(f"No visible buttons found") for button in buttons: if button[0] == btn_type: self.click_element(button[1]) + """ + + def find_and_click_submission(self, timeout: int = 10) -> bool: + possible_submissions = ["login", "submit", "register"] + for submission in possible_submissions: + if self.find_and_click_btn(submission, timeout): + return True + self.logger.warning("No submission button found") + return False + + def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 10) -> bool: + """ + Find and click a submit button matching the specified type. + Args: + btn_type: The type of button to find (e.g., 'login', 'submit'), matched against button text. + timeout: Maximum time (in seconds) to wait for the button to appear. + Returns: + bool: True if the button was found and clicked, False otherwise. + """ + buttons = self.get_buttons_xpath() + if not buttons: + self.logger.warning("No visible buttons found") + return False + + for button_text, xpath in buttons: + if btn_type.lower() in button_text.lower(): + try: + wait = WebDriverWait(self.driver, timeout) + element = wait.until( + EC.element_to_be_clickable((By.XPATH, xpath)), + message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds" + ) + if self.click_element(xpath): + return True + else: + return False + except TimeoutException: + self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}") + return False + self.logger.warning(f"No button matching '{btn_type}' found") + return False def find_input_xpath_by_name(self, inputs, name: str) -> str | None: for field in inputs: @@ -315,8 +369,11 @@ class Browser: return field["xpath"] return None - def fill_form_inputs(self, input_list:[str]) -> bool: + def fill_form_inputs(self, input_list: List[str]) -> bool: """Fill form inputs based on a list of [name](value) strings.""" + if not isinstance(input_list, list): + self.logger.error("input_list must be a list") + return False inputs = self.find_all_inputs() try: for input_str in input_list: @@ -389,7 +446,7 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) driver = create_driver() - browser = Browser(driver) + browser = Browser(driver, anticaptcha_manual_install=True) time.sleep(10) print("AntiCaptcha Test") @@ -400,4 +457,5 @@ if __name__ == "__main__": inputs = browser.get_form_inputs() inputs = ['[username](student)', f'[password](Password123)', '[appOtp]()', '[backupOtp]()'] browser.fill_form_inputs(inputs) - browser.find_and_click_submit() + browser.find_and_click_submission() + time.sleep(30) From 1b5a55ccf232a694082239535b361677d7ebf55d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 13:10:06 +0200 Subject: [PATCH 02/13] feat : cleaner import arrangement --- prompts/base/planner_agent.txt | 2 +- sources/interaction.py | 4 +++- sources/language.py | 1 + sources/llm_provider.py | 5 +++-- sources/memory.py | 5 +++-- sources/router.py | 2 ++ sources/speech_to_text.py | 1 + sources/text_to_speech.py | 29 ++++++++++++++++------------- 8 files changed, 30 insertions(+), 19 deletions(-) diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index 1eac277..a81677c 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -1,4 +1,4 @@ -You are a planner agent. +You are a project manager. Your goal is to divide and conquer the task using the following agents: - Coder: An expert coder agent. - File: An expert agent for finding files. diff --git a/sources/interaction.py b/sources/interaction.py index a4a0351..aee1924 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -1,3 +1,4 @@ +from typing import List, Tuple, Type, Dict, Tuple from sources.text_to_speech import Speech from sources.utility import pretty_print, animate_thinking @@ -11,7 +12,8 @@ class Interaction: def __init__(self, agents, tts_enabled: bool = True, stt_enabled: bool = True, - recover_last_session: bool = False): + recover_last_session: bool = False, + ): self.is_active = True self.current_agent = None self.last_query = None diff --git a/sources/language.py b/sources/language.py index 172e16f..d1fcde8 100644 --- a/sources/language.py +++ b/sources/language.py @@ -1,3 +1,4 @@ +from typing import List, Tuple, Type, Dict, Tuple import langid import re import nltk diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 79034fd..5724d89 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -1,16 +1,17 @@ +import os import time import ollama from ollama import chat import requests import subprocess import ipaddress +import httpx import platform from dotenv import load_dotenv, set_key from openai import OpenAI from huggingface_hub import InferenceClient -import os -import httpx +from typing import List, Tuple, Type, Dict, Tuple from sources.utility import pretty_print, animate_thinking diff --git a/sources/memory.py b/sources/memory.py index 8d4c323..090bbf5 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -1,11 +1,12 @@ -import torch -from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import time import datetime import uuid import os import sys import json +from typing import List, Tuple, Type, Dict, Tuple +import torch +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/sources/router.py b/sources/router.py index 1f11604..8f42459 100644 --- a/sources/router.py +++ b/sources/router.py @@ -1,6 +1,8 @@ import os import sys import torch +from typing import List, Tuple, Type, Dict, Tuple + from transformers import pipeline from adaptive_classifier import AdaptiveClassifier diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 92f50b3..c554c8c 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -1,4 +1,5 @@ from colorama import Fore +from typing import List, Tuple, Type, Dict, Tuple import queue import threading import numpy as np diff --git a/sources/text_to_speech.py b/sources/text_to_speech.py index 14d4dab..9087fb2 100644 --- a/sources/text_to_speech.py +++ b/sources/text_to_speech.py @@ -1,30 +1,33 @@ +import re +import platform +import subprocess +from sys import modules +from typing import List, Tuple, Type, Dict, Tuple + from kokoro import KPipeline from IPython.display import display, Audio import soundfile as sf -import subprocess -import re -import platform -from sys import modules class Speech(): """ Speech is a class for generating speech from text. """ - def __init__(self, enable: bool = True, language: str = "english") -> None: + def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 0) -> None: self.lang_map = { - "english": 'a', - "chinese": 'z', - "french": 'f' + "en": 'a', + "zh": 'z', + "fr": 'f' } self.voice_map = { - "english": ['af_alloy', 'af_bella', 'af_kore', 'af_nicole', 'af_nova', 'af_sky', 'am_echo', 'am_michael', 'am_puck'], - "chinese": ['zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'], - "french": ['ff_siwis'] + "en": ['af_kore', 'af_bella', 'af_alloy', 'af_nicole', 'af_nova', 'af_sky', 'am_echo', 'am_michael', 'am_puck'], + "zh": ['zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'], + "fr": ['ff_siwis'] } self.pipeline = None + self.language = language if enable: self.pipeline = KPipeline(lang_code=self.lang_map[language]) - self.voice = self.voice_map[language][2] + self.voice = self.voice_map[language][voice_idx] self.speed = 1.2 def speak(self, sentence: str, voice_number: int = 1 , audio_file: str = 'sample.wav'): @@ -38,7 +41,7 @@ class Speech(): if not self.pipeline: return sentence = self.clean_sentence(sentence) - self.voice = self.voice_map["english"][voice_number] + self.voice = self.voice_map[self.language][voice_number] generator = self.pipeline( sentence, voice=self.voice, speed=self.speed, split_pattern=r'\n+' From 9c3330b45dfff50bc9be24cbce5387f6347ba34b Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 14:21:48 +0200 Subject: [PATCH 03/13] add french readme --- README_FR.md | 424 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 424 insertions(+) create mode 100644 README_FR.md diff --git a/README_FR.md b/README_FR.md new file mode 100644 index 0000000..348f5c2 --- /dev/null +++ b/README_FR.md @@ -0,0 +1,424 @@ + + + +# AgenticSeek: Une IA comme Manus qui utilise des agents DeepSeek R1. + +Une alternative **entièrement** locale à Manus AI, un assistant vocal IA qui code, explore votre système de fichiers, navigue sur le web et corrige ses erreurs, tout cela sans envoyer la moindre donnée dans le cloud. Construit avec des modèles de raisonnement comme DeepSeek R1, cet agent autonome fonctionne entièrement sur votre matériel, garantissant la confidentialité de vos données. + +[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj) + +> 🛠️ **En cours de développement** – On cherche activement des contributeurs! + +![alt text](./media/whale_readme.jpg) + +> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file* + +> *Can you make a tetris game in C ?* + +> *I would like to setup a new project file index as mark2.* + + +### agenticSeek peut planifier des taches! + +![alt text](./media/exemples/demo_image.png) + +## Fonctionnalités: + +- **100% Local**: Fonctionne en local sur votre PC. Vos données restent les vôtres. + +- **Accès à vos Fichiers**: Utilise bash pour naviguer et manipuler vos fichiers. + +- **Codage semi-autonome**: Peut écrire, déboguer et exécuter du code en Python, C, Golang et d'autres langages à venir. + +- **Routage d'Agent**: Sélectionne automatiquement l’agent approprié pour la tâche. + +- **Planification**: Pour les taches complexe utilise plusieurs agents. + +- **Navigation Web Autonome**: Navigation web autonome. + +- **Memoire efficace**: Gestion efficace de la mémoire et des sessions. + +--- + +## **Installation** + +Assurez-vous d’avoir installé le pilote Chrome, Docker et Python 3.10 (ou une version plus récente). + +Pour les problèmes liés au pilote Chrome, consultez la section Chromedriver. + +### 1️⃣ Cloner le dépôt et configurer + +```sh +git clone https://github.com/Fosowl/agenticSeek.git +cd agenticSeek +mv .env.example .env +``` + +### 2 **Créer un environnement virtuel** + +```sh +python3 -m venv agentic_seek_env +source agentic_seek_env/bin/activate +# On Windows: agentic_seek_env\Scripts\activate +``` + +### 3️⃣ **Installation** + +**Automatique:** + +```sh +./install.sh +``` + +**Manuel:** + +```sh +pip3 install -r requirements.txt +``` + + +## Faire fonctionner sur votre machine + +**Nous recommandons d’utiliser au moins DeepSeek 14B, les modèles plus petits ont du mal avec l’utilisation des outils et oublient rapidement le contexte.** + +### 1️⃣ **Téléchargement du modèle** + +Assurer vous d'avoir [Ollama](https://ollama.com/) installé. + +Télécharger `deepseek-r1:14b` de [DeepSeek](https://deepseek.com/models) + +```sh +ollama pull deepseek-r1:14b +``` + +### 2️ **Démarrage** + +```sh +ollama serve +``` + +Modifiez le fichier config.ini pour définir provider_name sur ollama et provider_model sur deepseek-r1:14b + +```sh +[MAIN] +is_local = True +provider_name = ollama +provider_model = deepseek-r1:14b +provider_server_address = 127.0.0.1:11434 +``` + +démarrer tous les services : + +```sh +sudo ./start_services.sh +``` + +Lancer l'assitant: + +```sh +python3 main.py +``` + +Voir la section **Utilisation** si vous ne comprenez pas comment l’utiliser + +Voir la section **Problèmes** connus si vous rencontrez des problèmes + +Voir la section **Exécuter** avec une API si votre matériel ne peut pas exécuter DeepSeek localement + +Voir la section **Configuration** pour une explication détaillée du fichier de configuration. + +--- + +## Utilisation + +Avertissement : actuellement, le système qui choisit le meilleur agent IA fonctionnera mal avec du texte non anglophone. Cela est dû au fait que le routage des agents utilise un modèle entraîné sur du texte en anglais. Nous travaillons dur pour corriger cela. Veuillez utiliser l’anglais pour le moment. + +Assurez-vous que les services sont en cours d’exécution avec ./start_services.sh et lancez AgenticSeek avec python3 main.py + +```sh +sudo ./start_services.sh +python3 main.py +``` + +Vous verrez >>> +Cela indique qu’AgenticSeek attend que vous saisissiez des instructions. +Vous pouvez également utiliser la reconnaissance vocale en définissant listen = True dans la configuration. + +Pour quitter, dites simplement `goodbye`. + +Voici quelques exemples d’utilisation : + +### Programmation + +> *Help me with matrix multiplication in Golang* + +> *Scan my network with nmap, find if any suspicious devices is connected* + +> *Make a snake game in python* + +### Recherche web + +> *Do a web search to find cool tech startup in Japan working on cutting edge AI research* + +> *Can you find on the internet who created agenticSeek?* + +> *Can you find on which website I can buy a rtx 4090 for cheap* + +### Fichier + +> *Hey can you find where is million_dollars_contract.pdf i lost it* + +> *Show me how much space I have left on my disk* + +> *Find and read the README.md and follow the install instruction* + +### Conversation + +> *Tell me about France* + +> *What is the meaning of life ?* + +> *Should I take creatine before or after workout?* + + +Après avoir saisi votre requête, AgenticSeek attribuera le meilleur agent pour la tâche. + +Comme il s’agit d’un prototype précoce, le système de routage des agents pourrait ne pas toujours attribuer le bon agent en fonction de votre requête. + +Par conséquent, vous devriez être très explicite sur ce que vous voulez et sur la manière dont l’IA doit procéder. Par exemple, si vous voulez qu’elle effectue une recherche sur le web, ne dites pas : + +Connaissez-vous de bons pays pour voyager seul ? + +Dites plutôt : + +Faites une recherche sur le web et découvrez quels sont les meilleurs pays pour voyager seul + +--- + +## **Exécuter le LLM sur votre propre serveur** + +Si vous disposez d’un ordinateur puissant ou d’un serveur que vous pouvez utiliser, mais que vous souhaitez y accéder depuis votre ordinateur portable, vous avez la possibilité d’exécuter le LLM sur un serveur distant. + +### 1️⃣ **Configurer et démarrer les scripts du serveur** + +Sur votre "serveur" qui exécutera le modèle IA, obtenez l’adresse IP + +```sh +ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1 +``` + +Remarque : Pour Windows ou macOS, utilisez respectivement ipconfig ou ifconfig pour trouver l’adresse IP. + +**Si vous souhaitez utiliser un fournisseur basé sur OpenAI, suivez la section Exécuter avec une API.** + +Clonez le dépôt et entrez dans le dossier server/. + + +```sh +git clone --depth 1 https://github.com/Fosowl/agenticSeek.git +cd agenticSeek/server/ +``` + +Installez les dépendances spécifiques au serveur : + +```sh +pip3 install -r requirements.txt +``` + +Exécutez le script du serveur. + +```sh +python3 app.py --provider ollama --port 3333 +``` + +Vous avez le choix entre utiliser ollama et llamacpp comme service LLM. + +### 2️⃣ **Lancer** + +Maintenant, sur votre ordinateur personnel : + +Modifiez le fichier config.ini pour définir provider_name sur server et provider_model sur deepseek-r1:14b. + +Définissez provider_server_address sur l’adresse IP de la machine qui exécutera le modèle. + +```sh +[MAIN] +is_local = False +provider_name = server +provider_model = deepseek-r1:14b +provider_server_address = x.x.x.x:3333 +``` + +Exécutez l’assistant : + +```sh +sudo ./start_services.sh +python3 main.py +``` + +## **Exécuter avec une API** + +AVERTISSEMENT : Assurez-vous qu’il n’y a pas d’espace en fin de ligne dans la configuration. + +Définissez is_local sur True si vous utilisez une API basée sur OpenAI localement. + +Changez l’adresse IP si votre API basée sur OpenAI fonctionne sur votre propre serveur. + +```sh +[MAIN] +is_local = False +provider_name = openai +provider_model = gpt-4o +provider_server_address = 127.0.0.1:5000 +``` + +Exécutez l’assistant : + +```sh +sudo ./start_services.sh +python3 main.py +``` + +## Config + +Exemple de configuration : +``` +[MAIN] +is_local = True +provider_name = ollama +provider_model = deepseek-r1:1.5b +provider_server_address = 127.0.0.1:11434 +agent_name = Friday +recover_last_session = False +save_session = False +speak = False +listen = False +work_dir = /Users/mlg/Documents/ai_folder +jarvis_personality = False +[BROWSER] +headless_browser = False +stealth_mode = False +``` + +**Explanation**: + +`is_local` -> Exécute l’agent localement (True) ou sur un serveur distant (False). + +`provider_name` -> Le fournisseur à utiliser (parmi : ollama, server, lm-studio, deepseek-api). + +`provider_model` -> Le modèle utilisé, par exemple, deepseek-r1:1.5b. + +`provider_server_address` -> Adresse du serveur, par exemple, 127.0.0.1:11434 pour local. Définissez n’importe quoi pour une API non locale. + +`agent_name` -> Nom de l’agent, par exemple, Friday. Utilisé comme mot déclencheur pour la reconnaissance vocale. + +`recover_last_session` -> Reprend la dernière session (True) ou non (False). + +`save_session` -> Sauvegarde les données de la session (True) ou non (False). + +`speak` -> Active la sortie vocale (True) ou non (False). + +`listen` -> Écoute les entrées vocales (True) ou non (False). + +`work_dir` -> Dossier auquel l’IA aura accès, par exemple : /Users/user/Documents/. + +`jarvis_personality` -> Utilise une personnalité de type JARVIS (True) ou non (False). Cela modifie simplement le fichier de prompt. + +`headless_browser` -> Exécute le navigateur sans fenêtre visible (True) ou non (False). + +`stealth_mode` -> Rend la détection des bots plus difficile. Le seul inconvénient est que vous devez installer manuellement l’extension anticaptcha. + + + +## Providers + +Le tableau ci-dessous montre les fournisseurs disponibles : + +| Provider | Local? | Description | +|-----------|--------|-----------------------------------------------------------| +| ollama | Yes | Exécutez des LLM localement avec facilité en utilisant Ollama comme fournisseur LLM +| server | Yes | Hébergez le modèle sur une autre machine, exécutez sur votre machine locale +| lm-studio | Yes | Exécutez un LLM localement avec LM Studio (définissez provider_name sur lm-studio) +| openai | No | Utilise ChatGPT API (pas privé) | +| deepseek-api | No | Deepseek API (pas privé) | +| huggingface| No | Hugging-Face API (pas privé) | + +Pour sélectionner un fournisseur, modifiez le config.ini : + +``` +is_local = False +provider_name = openai +provider_model = gpt-4o +provider_server_address = 127.0.0.1:5000 +``` + +`is_local` : doit être True pour tout LLM exécuté localement, sinon False. + +`provider_name` : Sélectionnez le fournisseur à utiliser par son nom, voir la liste des fournisseurs ci-dessus. + +`provider_model` : Définissez le modèle à utiliser par l’agent. + +`provider_server_address` : peut être défini sur n’importe quoi si vous n’utilisez pas le fournisseur server. + +# Problèmes connus + +## Problèmes avec Chromedriver + +Erreur #1:**incompatibilité** + +`Exception: Failed to initialize browser: Message: session not created: This version of ChromeDriver only supports Chrome version 113 +Current browser version is 134.0.6998.89 with binary path` + +Cela se produit s’il y a une incompatibilité entre votre navigateur et la version de chromedriver. + +Vous devez naviguer pour télécharger la dernière version : + +https://developer.chrome.com/docs/chromedriver/downloads + +Si vous utilisez Chrome version 115 ou plus récent, allez sur : + +https://googlechromelabs.github.io/chrome-for-testing/ + +Et téléchargez la version de chromedriver correspondant à votre système d’exploitation. + +![alt text](./media/chromedriver_readme.png) + +Si cette section est incomplète, veuillez signaler un problème. + +## FAQ + +**Q: What hardware do I need?** + +Modèle 7B : GPU avec 8 Go de VRAM. +Modèle 14B : GPU 12 Go (par exemple, RTX 3060). +Modèle 32B : 24 Go+ de VRAM. + +**Q: Why Deepseek R1 over other models?** + +DeepSeek R1 excelle dans le raisonnement et l’utilisation d’outils pour sa taille. Nous pensons que c’est un choix solide pour nos besoins, bien que d’autres modèles fonctionnent également bien, DeepSeek est notre choix principal. + +**Q: I get an error running `main.py`. What do I do?** + +Assurez-vous qu’Ollama est en cours d’exécution (ollama serve), que votre config.ini correspond à votre fournisseur, et que les dépendances sont installées. Si cela ne fonctionne pas, n’hésitez pas à signaler un problème. + +**Q: Can it really run 100% locally?** + +Oui, avec les fournisseurs Ollama ou Server, toute la reconnaissance vocale, le LLM et la synthèse vocale fonctionnent localement. Les options non locales (OpenAI ou autres API) sont facultatives. + +**Q: How come it is older than manus ?** + +Nous avons commencé cela comme un projet amusant pour créer une IA locale de type Jarvis. Cependant, avec l’émergence de Manus, nous avons vu l’opportunité de réorienter certaines tâches pour en faire une autre alternative. + +**Q: How is it better than manus ?** + +Il ne l’est pas, mais nous privilégions l’exécution locale et la confidentialité par rapport à une approche basée sur le cloud. C’est une alternative amusante et accessible ! + +## Contribute + +Nous recherchons des développeurs pour améliorer AgenticSeek ! Consultez les problèmes ouverts ou les discussions. + +[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date) + +## Auteurs: + > [Fosowl](https://github.com/Fosowl) + > [steveh8758](https://github.com/steveh8758) From 698ed78acc6f75ac6dcd80825f013a731b6133dc Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 15:12:53 +0200 Subject: [PATCH 04/13] feat: better find_and_click_submission function --- sources/browser.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sources/browser.py b/sources/browser.py index 6c68b77..4155d15 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -313,16 +313,6 @@ class Browser: result.sort(key=lambda x: len(x[0])) return result - """ - def find_and_click_submission(self, btn_type:str = 'login') -> None: - buttons = self.get_buttons_xpath() - if len(buttons) == 0: - self.logger.warning(f"No visible buttons found") - for button in buttons: - if button[0] == btn_type: - self.click_element(button[1]) - """ - def find_and_click_submission(self, timeout: int = 10) -> bool: possible_submissions = ["login", "submit", "register"] for submission in possible_submissions: @@ -335,8 +325,8 @@ class Browser: """ Find and click a submit button matching the specified type. Args: - btn_type: The type of button to find (e.g., 'login', 'submit'), matched against button text. - timeout: Maximum time (in seconds) to wait for the button to appear. + btn_type: The type of button to find. + timeout: time to wait for button to appear. Returns: bool: True if the button was found and clicked, False otherwise. """ From 56b5db7df3b55af4df7b9d0ba3dd560fe027d30d Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 18:49:37 +0200 Subject: [PATCH 05/13] Initial project setup --- config.ini | 14 +-- main.py | 3 +- prompts/base/coder_agent.txt | 1 + prompts/base/planner_agent.txt | 30 +++++-- prompts/jarvis/coder_agent.txt | 1 + prompts/jarvis/planner_agent.txt | 25 ++++-- sources/agents/agent.py | 4 +- sources/agents/browser_agent.py | 11 +-- sources/agents/planner.tmp | 142 +++++++++++++++++++++++++++++++ sources/agents/planner_agent.py | 6 +- sources/browser.py | 6 +- sources/tools/C_Interpreter.py | 2 +- sources/utility.py | 4 +- 13 files changed, 211 insertions(+), 38 deletions(-) create mode 100644 sources/agents/planner.tmp diff --git a/config.ini b/config.ini index 6ca0f40..6fa8acc 100644 --- a/config.ini +++ b/config.ini @@ -1,15 +1,15 @@ [MAIN] -is_local = True -provider_name = ollama +is_local = False +provider_name = server provider_model = deepseek-r1:14b -provider_server_address = 127.0.0.1:11434 +provider_server_address = 192.168.1.20:3333 agent_name = Friday recover_last_session = False -save_session = False -speak = False +save_session = True +speak = True listen = False work_dir = /Users/mlg/Documents/ai_folder -jarvis_personality = False +jarvis_personality = True [BROWSER] headless_browser = False -stealth_mode = False \ No newline at end of file +stealth_mode = True \ No newline at end of file diff --git a/main.py b/main.py index 4129248..587b184 100755 --- a/main.py +++ b/main.py @@ -57,7 +57,8 @@ def main(): interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'), stt_enabled=config.getboolean('MAIN', 'listen'), - recover_last_session=config.getboolean('MAIN', 'recover_last_session')) + recover_last_session=config.getboolean('MAIN', 'recover_last_session'), + ) try: while interaction.is_active: interaction.get_user() diff --git a/prompts/base/coder_agent.txt b/prompts/base/coder_agent.txt index 886f2e3..3b64539 100644 --- a/prompts/base/coder_agent.txt +++ b/prompts/base/coder_agent.txt @@ -47,4 +47,5 @@ Some rules: - Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. - For simple explanation you don't need to code. +- Dont be lazy, write full implementation for user. - If query is unclear say REQUEST_CLARIFICATION \ No newline at end of file diff --git a/prompts/base/planner_agent.txt b/prompts/base/planner_agent.txt index a81677c..b9ebbb3 100644 --- a/prompts/base/planner_agent.txt +++ b/prompts/base/planner_agent.txt @@ -1,8 +1,8 @@ You are a project manager. Your goal is to divide and conquer the task using the following agents: -- Coder: An expert coder agent. -- File: An expert agent for finding files. -- Web: An expert agent for web search. +- Coder: A programming agent, can code in python, bash, C and golang. +- File: An agent for finding, reading or operating with files. +- Web: An agent that can conduct web search, wrapped with selenium it can interact with any webpage. Agents are other AI that obey your instructions. @@ -13,14 +13,18 @@ You have to respect a strict format: {"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} ``` +# Example 1: web app + User: make a weather app in python You: Sure, here is the plan: -## Task 1: I will search for available weather api +## Task 1: I will search for available weather api with the help of the web agent. -## Task 2: I will create an api key for the weather api +## Task 2: I will create an api key for the weather api using the web agent -## Task 3: I will make a weather app in python +## Task 3: I will setup the project using the file agent + +## Task 4: I asign the coding agent to make a weather app in python ```json { @@ -37,11 +41,17 @@ You: Sure, here is the plan: "need": "1", "task": "Obtain API key from the selected service" }, + { + "agent": "File", + "id": "3", + "need": null, + "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." + }, { "agent": "Coder", "id": "3", - "need": "2", - "task": "Develop a Python application using the API and key to fetch and display weather data" + "need": "2,3", + "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] } @@ -49,4 +59,8 @@ You: Sure, here is the plan: Rules: - Do not write code. You are a planning agent. +- Give clear, detailled order to each agent and how their task relate to the previous task (if any). - Put your plan in a json with the key "plan". +- Always tell the coding agent where to save file, eg: . +- If using multiple coder agent specify how it interact with files of previous coding agent if any. +- Tell agent they are soldier, they execute without question. diff --git a/prompts/jarvis/coder_agent.txt b/prompts/jarvis/coder_agent.txt index f54dd9f..62ab301 100644 --- a/prompts/jarvis/coder_agent.txt +++ b/prompts/jarvis/coder_agent.txt @@ -46,6 +46,7 @@ Some rules: - You do not ever need to use bash to execute code. - Do not ever use user input, input are not supported by the system. - Do not ever tell user how to run it. user know it. +- Dont be lazy, write full implementation for user. - For simple explanation you don't need to code. - If query is unclear say REQUEST_CLARIFICATION diff --git a/prompts/jarvis/planner_agent.txt b/prompts/jarvis/planner_agent.txt index d75ce7d..e1d4776 100644 --- a/prompts/jarvis/planner_agent.txt +++ b/prompts/jarvis/planner_agent.txt @@ -15,16 +15,17 @@ You have to respect a strict format: # Example: weather app -User: "I need a plan to build a weather app—search for a weather API, get an API key, and code it in Python." +User: "I need to build a simple weather app, get an API key, and code it in Python." -You: "At your service. I’ve devised a plan to conquer the meteorological frontier. +You: "At your service. I’ve devised a plan and assigned agents to each task. Would you like me to proceed? -## Task one: scour the web for a weather API worth its salt. +## Task 1: I will search for available weather api with the help of the web agent. -## Task two: secure an API key with utmost discretion. +## Task 2: I will create an api key for the weather api using the web agent. -## Task three: unleash a Python app to bend the weather to your will." +## Task 3: I will setup the project using the file agent. +## Task 4: I will use the coding agent to make a weather app in python. ```json { @@ -41,11 +42,17 @@ You: "At your service. I’ve devised a plan to conquer the meteorological fron "need": "1", "task": "Obtain API key from the selected service" }, + { + "agent": "File", + "id": "3", + "need": null, + "task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute." + }, { "agent": "Coder", "id": "3", - "need": "2", - "task": "Develop a Python application using the API and key to fetch and display weather data" + "need": "2,3", + "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute."" } ] } @@ -53,7 +60,11 @@ You: "At your service. I’ve devised a plan to conquer the meteorological fron Rules: - Do not write code. You are a planning agent. +- Give clear, detailled order to each agent and how their task relate to the previous task (if any). - Put your plan in a json with the key "plan". +- Always tell the coding agent where to save file, eg: . +- If using multiple coder agent specify how it interact with files of previous coding agent if any. +- Tell agent they are soldier, they execute without question. Personality: diff --git a/sources/agents/agent.py b/sources/agents/agent.py index 4409eeb..eebbcc1 100644 --- a/sources/agents/agent.py +++ b/sources/agents/agent.py @@ -133,6 +133,8 @@ class Agent(): Show the answer in a pretty way. Show code blocks and their respective feedback by inserting them in the ressponse. """ + if self.last_answer is None: + return lines = self.last_answer.split("\n") for line in lines: if "block:" in line: @@ -190,5 +192,5 @@ class Agent(): self.memory.push('user', feedback) if save_path != None: tool.save_block(blocks, save_path) - self.blocks_result = list(reversed(self.blocks_result)) + self.blocks_result = self.blocks_result return True, feedback diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index e476b35..d47f141 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -93,7 +93,7 @@ class BrowserAgent(Agent): Your task: 1. Decide if the current page answers the user’s query: {user_prompt} - If it does, take notes of the useful information, write down source, link or reference, then move to a new page. - - If it does and you completed use request, say REQUEST_EXIT + - If it does and you completed user request, say REQUEST_EXIT - If it doesn’t, say: Error: This page does not answer the user’s query then go back or navigate to another link. 2. Navigate by either: - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). @@ -144,10 +144,10 @@ class BrowserAgent(Agent): animate_thinking("Thinking...", color="status") self.memory.push('user', prompt) answer, reasoning = self.llm_request() - output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" - pretty_print("-"*100) + output = answer if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" + print() pretty_print(output, color="output") - pretty_print("-"*100) + print() return answer, reasoning def select_unvisited(self, search_result: List[str]) -> List[str]: @@ -235,7 +235,8 @@ class BrowserAgent(Agent): def show_search_results(self, search_result: List[str]): pretty_print("\nSearch results:", color="output") for res in search_result: - pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output") + pretty_print(f"Title: {res['title']} - ", color="info", no_newline=True) + pretty_print(f"Link: {res['link']}", color="status") def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]: """ diff --git a/sources/agents/planner.tmp b/sources/agents/planner.tmp new file mode 100644 index 0000000..bd07db9 --- /dev/null +++ b/sources/agents/planner.tmp @@ -0,0 +1,142 @@ +import json +from typing import List, Tuple, Type, Dict, Tuple +from sources.utility import pretty_print, animate_thinking +from sources.agents.agent import Agent +from sources.agents.code_agent import CoderAgent +from sources.agents.file_agent import FileAgent +from sources.agents.browser_agent import BrowserAgent +from sources.tools.tools import Tools + + +class PlannerAgent(Agent): + def __init__(self, name, prompt_path, provider, verbose=False, browser=None): + """ + The planner agent is a special agent that divides and conquers the task. + """ + super().__init__(name, prompt_path, provider, verbose, None) + self.tools = { + "json": Tools() + } + self.tools['json'].tag = "json" + self.browser = browser + self.agents = { + "coder": CoderAgent(name, "prompts/base/coder_agent.txt", provider, verbose=False), + "file": FileAgent(name, "prompts/base/file_agent.txt", provider, verbose=False), + "web": BrowserAgent(name, "prompts/base/browser_agent.txt", provider, verbose=False, browser=browser) + } + self.role = { + "en": "Research, setup and code", + "fr": "Recherche, configuration et codage", + "zh": "研究,设置和编码", + } + self.type = "planner_agent" + + def parse_agent_tasks(self, text: str) -> Tuple[list | None, list | None]: + """ + Parse the agent tasks from the given text into json. + """ + tasks = [] + tasks_names = [] + + lines = text.strip().split('\n') + for line in lines: + if line is None or len(line) == 0: + continue + line = line.strip() + if '##' in line or line[0].isdigit(): + tasks_names.append(line) + continue + blocks, _ = self.tools["json"].load_exec_block(text) + if blocks == None: + return (None, None) + for block in blocks: + line_json = json.loads(block) + if 'plan' in line_json: + for task in line_json['plan']: + agent = { + 'agent': task['agent'], + 'id': task['id'], + 'task': task['task'] + } + if 'need' in task: + agent['need'] = task['need'] + tasks.append(agent) + if len(tasks_names) != len(tasks): + names = [task['task'] for task in tasks] + return zip(names, tasks) + return zip(tasks_names, tasks) + + def make_prompt(self, task: str, needed_infos: str) -> str: + """ + Make a prompt for the agent. + """ + if needed_infos is None: + needed_infos = "No needed informations." + prompt = f""" + You are given the following informations: + {needed_infos} + Your task is: + {task} + """ + return prompt + + def show_plan(self, agents_tasks): + if agents_tasks == (None, None): + return + pretty_print("▂▘ P L A N ▝▂", color="output") + for task_name, task in agents_tasks: + pretty_print(f"{task['agent']} -> {task['task']}", color="info") + pretty_print("▔▗ E N D ▖▔", color="output") + + def process(self, prompt: str, speech_module: type) -> str: + """ + Process the prompt and divide the task between the agents. + Args: + prompt (str): The prompt to process. + speech_module (type): The speech module to use. + Returns: + str: The final answer resulting from successive task. + """ + ok = False + agents_tasks = (None, None) + while not ok: + self.wait_message(speech_module) + animate_thinking("Thinking...", color="status") + self.memory.push('user', prompt) + answer, _ = self.llm_request() + pretty_print(answer.split('\n')[0], color="output") + agents_tasks = self.parse_agent_tasks(answer) + if agents_tasks == (None, None): + pretty_print("Failed to parse the tasks, retrying", color="failure") + prompt = "Task parsing failed. Please retry with proper json." + continue + self.show_plan(agents_tasks) + ok_str = input("Is the plan ok? (y/n): ") + if ok_str == 'y': + ok = True + else: + prompt = input("Please reformulate: ") + + if agents_tasks == (None, None): + return "Failed to parse the tasks", reasoning + prev_agent_answer = None + for task_name, task in agents_tasks: + pretty_print(f"I will {task_name}.", color="info") + agent_prompt = self.make_prompt(task['task'], prev_agent_answer) + pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info") + if speech_module: speech_module.speak(f"I will {task_name}. I assigned the {task['agent']} agent to the task.") + try: + prev_agent_answer, _ = self.agents[task['agent'].lower()].process(agent_prompt, speech_module) + pretty_print(f"-- Agent answer ---\n\n", color="output") + self.agents[task['agent'].lower()].show_answer() + pretty_print(f"\n\n", color="output") + except Exception as e: + raise e + self.last_answer = prev_agent_answer + return prev_agent_answer, "" + +if __name__ == "__main__": + from llm_provider import Provider + server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000") + agent = PlannerAgent("deepseek-r1:14b", "jarvis", "prompts/planner_agent.txt", server_provider) + ans = agent.process("Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file") \ No newline at end of file diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index 035405e..9f300e3 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -76,10 +76,10 @@ class PlannerAgent(Agent): agents_tasks = self.parse_agent_tasks(json_plan) if agents_tasks == (None, None): return - pretty_print(f"--- Plan ---", color="output") + pretty_print("▂▘ P L A N ▝▂", color="output") for task_name, task in agents_tasks: - pretty_print(f"{task}", color="output") - pretty_print(f"--- End of Plan ---", color="output") + pretty_print(f"{task['agent']} -> {task['task']}", color="info") + pretty_print("▔▗ E N D ▖▔", color="output") def process(self, prompt, speech_module) -> str: ok = False diff --git a/sources/browser.py b/sources/browser.py index 4155d15..daf5443 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -6,9 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.common.action_chains import ActionChains +from typing import List, Tuple, Type, Dict, Tuple from bs4 import BeautifulSoup from urllib.parse import urlparse -from typing import List, Tuple, Type, Dict, Tuple from fake_useragent import UserAgent from selenium_stealth import stealth import undetected_chromedriver as uc @@ -137,8 +137,8 @@ class Browser: self.logger.info(f"Navigated to: {url}") return True except TimeoutException as e: - self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") - return False + self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") + return False except WebDriverException as e: self.logger.error(f"Error navigating to {url}: {str(e)}") return False diff --git a/sources/tools/C_Interpreter.py b/sources/tools/C_Interpreter.py index 40efb99..45c501f 100644 --- a/sources/tools/C_Interpreter.py +++ b/sources/tools/C_Interpreter.py @@ -51,7 +51,7 @@ class CInterpreter(Tools): run_command, capture_output=True, text=True, - timeout=10 + timeout=120 ) if run_result.returncode != 0: diff --git a/sources/utility.py b/sources/utility.py index 14d2e2a..18b1035 100644 --- a/sources/utility.py +++ b/sources/utility.py @@ -32,7 +32,7 @@ def get_color_map(): } return color_map -def pretty_print(text, color="info"): +def pretty_print(text, color="info", no_newline=False): """ Print text with color formatting. @@ -56,7 +56,7 @@ def pretty_print(text, color="info"): color_map = get_color_map() if color not in color_map: color = "info" - print(colored(text, color_map[color])) + print(colored(text, color_map[color]), end='' if no_newline else "\n") def animate_thinking(text, color="status", duration=120): """ From 3acbae5ea08bf891566f844dafd7c2827e113857 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 19:00:39 +0200 Subject: [PATCH 06/13] feat : no call to llm on goodbye, prompt adjustement for file agent --- prompts/base/file_agent.txt | 1 + prompts/jarvis/file_agent.txt | 1 + sources/interaction.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/prompts/base/file_agent.txt b/prompts/base/file_agent.txt index 309d01e..32c2e73 100644 --- a/prompts/base/file_agent.txt +++ b/prompts/base/file_agent.txt @@ -42,6 +42,7 @@ rules: - Use file finder to find the path of the file. - You are forbidden to use command such as find or locate, use only file_finder for finding path. - Do not ever use editor such as vim or nano. +- Make sure to always cd your work folder before executing commands, like cd && Example Interaction User: "I need to find the file config.txt and read its contents." diff --git a/prompts/jarvis/file_agent.txt b/prompts/jarvis/file_agent.txt index 3e642e2..2ad4c3c 100644 --- a/prompts/jarvis/file_agent.txt +++ b/prompts/jarvis/file_agent.txt @@ -51,6 +51,7 @@ rules: - Do not ever use placeholder path like /path/to/file.c, find the path first. - Use file finder to find the path of the file. - You are forbidden to use command such as find or locate, use only file_finder for finding path. +- Make sure to always cd your work folder before executing commands, like cd && - Do not ever use editor such as vim or nano. Example Interaction diff --git a/sources/interaction.py b/sources/interaction.py index aee1924..eb7f7be 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -101,7 +101,7 @@ class Interaction: query = self.read_stdin() if query is None: self.is_active = False - self.last_query = "Goodbye (exit requested by user, dont think, make answer very short)" + self.last_query = None return None self.last_query = query return query From e99851fba30f99773ac2b669187c0067e97a8425 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 19:05:47 +0200 Subject: [PATCH 07/13] note: commit 56b5db7 was a mistake by agenticseek itself lol --- sources/interaction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sources/interaction.py b/sources/interaction.py index eb7f7be..c5820bc 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -114,7 +114,6 @@ class Interaction: if agent is None: return False if self.current_agent != agent and self.last_answer is not None: - ## get last history from previous agent self.current_agent.memory.push('user', self.last_query) self.current_agent.memory.push('assistant', self.last_answer) self.current_agent = agent From b5311b265130915aeee863bb3d9578002c22be1f Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 19:10:57 +0200 Subject: [PATCH 08/13] fix : typo in readme fr version --- README_FR.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README_FR.md b/README_FR.md index 348f5c2..b172403 100644 --- a/README_FR.md +++ b/README_FR.md @@ -1,9 +1,9 @@ -# AgenticSeek: Une IA comme Manus qui utilise des agents DeepSeek R1. +# AgenticSeek: Une IA comme Manus mais à base d'agents DeepSeek R1 fonctionnant en local. -Une alternative **entièrement** locale à Manus AI, un assistant vocal IA qui code, explore votre système de fichiers, navigue sur le web et corrige ses erreurs, tout cela sans envoyer la moindre donnée dans le cloud. Construit avec des modèles de raisonnement comme DeepSeek R1, cet agent autonome fonctionne entièrement sur votre matériel, garantissant la confidentialité de vos données. +Une alternative **entièrement locale** à Manus AI, un assistant vocal IA qui code, explore votre système de fichiers, navigue sur le web et corrige ses erreurs, tout cela sans envoyer la moindre donnée dans le cloud. Construit avec des modèles de raisonnement comme DeepSeek R1, cet agent autonome fonctionne entièrement sur votre hardware, garantissant la confidentialité de vos données. [![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj) @@ -91,7 +91,7 @@ Télécharger `deepseek-r1:14b` de [DeepSeek](https://deepseek.com/models) ollama pull deepseek-r1:14b ``` -### 2️ **Démarrage** +### 2️ **Démarrage d'ollama** ```sh ollama serve @@ -140,7 +140,7 @@ sudo ./start_services.sh python3 main.py ``` -Vous verrez >>> +Vous verrez un prompt: ">>> " Cela indique qu’AgenticSeek attend que vous saisissiez des instructions. Vous pouvez également utiliser la reconnaissance vocale en définissant listen = True dans la configuration. @@ -183,21 +183,21 @@ Voici quelques exemples d’utilisation : Après avoir saisi votre requête, AgenticSeek attribuera le meilleur agent pour la tâche. -Comme il s’agit d’un prototype précoce, le système de routage des agents pourrait ne pas toujours attribuer le bon agent en fonction de votre requête. +Comme il s’agit d’un prototype, le système de routage des agents pourrait ne pas toujours attribuer le bon agent en fonction de votre requête. -Par conséquent, vous devriez être très explicite sur ce que vous voulez et sur la manière dont l’IA doit procéder. Par exemple, si vous voulez qu’elle effectue une recherche sur le web, ne dites pas : +Par conséquent, vous devez être explicite sur ce que vous voulez et sur la manière dont l’IA doit procéder. Par exemple, si vous voulez qu’elle effectue une recherche sur le web, ne dites pas : -Connaissez-vous de bons pays pour voyager seul ? +Connait-tu de bons pays pour voyager seul ? Dites plutôt : -Faites une recherche sur le web et découvrez quels sont les meilleurs pays pour voyager seul +Fait une recherche sur le web, quels sont les meilleurs pays pour voyager seul? --- ## **Exécuter le LLM sur votre propre serveur** -Si vous disposez d’un ordinateur puissant ou d’un serveur que vous pouvez utiliser, mais que vous souhaitez y accéder depuis votre ordinateur portable, vous avez la possibilité d’exécuter le LLM sur un serveur distant. +Si vous disposez d’un ordinateur puissant ou d’un serveur que vous voulez utiliser, mais que vous souhaitez y accéder depuis votre ordinateur portable, vous avez la possibilité d’exécuter le LLM sur un serveur distant. ### 1️⃣ **Configurer et démarrer les scripts du serveur** From 140f7842cc912f845373324943142b91269ded2e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 19:19:10 +0200 Subject: [PATCH 09/13] feat: add config back to default settings --- config.ini | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config.ini b/config.ini index 6fa8acc..6ca0f40 100644 --- a/config.ini +++ b/config.ini @@ -1,15 +1,15 @@ [MAIN] -is_local = False -provider_name = server +is_local = True +provider_name = ollama provider_model = deepseek-r1:14b -provider_server_address = 192.168.1.20:3333 +provider_server_address = 127.0.0.1:11434 agent_name = Friday recover_last_session = False -save_session = True -speak = True +save_session = False +speak = False listen = False work_dir = /Users/mlg/Documents/ai_folder -jarvis_personality = True +jarvis_personality = False [BROWSER] headless_browser = False -stealth_mode = True \ No newline at end of file +stealth_mode = False \ No newline at end of file From 1ffaf4689eb44b4df34f7819c8ced9b86914d13e Mon Sep 17 00:00:00 2001 From: martin legrand Date: Tue, 1 Apr 2025 19:21:35 +0200 Subject: [PATCH 10/13] rm : tmp file --- sources/agents/planner.tmp | 142 ------------------------------------- 1 file changed, 142 deletions(-) delete mode 100644 sources/agents/planner.tmp diff --git a/sources/agents/planner.tmp b/sources/agents/planner.tmp deleted file mode 100644 index bd07db9..0000000 --- a/sources/agents/planner.tmp +++ /dev/null @@ -1,142 +0,0 @@ -import json -from typing import List, Tuple, Type, Dict, Tuple -from sources.utility import pretty_print, animate_thinking -from sources.agents.agent import Agent -from sources.agents.code_agent import CoderAgent -from sources.agents.file_agent import FileAgent -from sources.agents.browser_agent import BrowserAgent -from sources.tools.tools import Tools - - -class PlannerAgent(Agent): - def __init__(self, name, prompt_path, provider, verbose=False, browser=None): - """ - The planner agent is a special agent that divides and conquers the task. - """ - super().__init__(name, prompt_path, provider, verbose, None) - self.tools = { - "json": Tools() - } - self.tools['json'].tag = "json" - self.browser = browser - self.agents = { - "coder": CoderAgent(name, "prompts/base/coder_agent.txt", provider, verbose=False), - "file": FileAgent(name, "prompts/base/file_agent.txt", provider, verbose=False), - "web": BrowserAgent(name, "prompts/base/browser_agent.txt", provider, verbose=False, browser=browser) - } - self.role = { - "en": "Research, setup and code", - "fr": "Recherche, configuration et codage", - "zh": "研究,设置和编码", - } - self.type = "planner_agent" - - def parse_agent_tasks(self, text: str) -> Tuple[list | None, list | None]: - """ - Parse the agent tasks from the given text into json. - """ - tasks = [] - tasks_names = [] - - lines = text.strip().split('\n') - for line in lines: - if line is None or len(line) == 0: - continue - line = line.strip() - if '##' in line or line[0].isdigit(): - tasks_names.append(line) - continue - blocks, _ = self.tools["json"].load_exec_block(text) - if blocks == None: - return (None, None) - for block in blocks: - line_json = json.loads(block) - if 'plan' in line_json: - for task in line_json['plan']: - agent = { - 'agent': task['agent'], - 'id': task['id'], - 'task': task['task'] - } - if 'need' in task: - agent['need'] = task['need'] - tasks.append(agent) - if len(tasks_names) != len(tasks): - names = [task['task'] for task in tasks] - return zip(names, tasks) - return zip(tasks_names, tasks) - - def make_prompt(self, task: str, needed_infos: str) -> str: - """ - Make a prompt for the agent. - """ - if needed_infos is None: - needed_infos = "No needed informations." - prompt = f""" - You are given the following informations: - {needed_infos} - Your task is: - {task} - """ - return prompt - - def show_plan(self, agents_tasks): - if agents_tasks == (None, None): - return - pretty_print("▂▘ P L A N ▝▂", color="output") - for task_name, task in agents_tasks: - pretty_print(f"{task['agent']} -> {task['task']}", color="info") - pretty_print("▔▗ E N D ▖▔", color="output") - - def process(self, prompt: str, speech_module: type) -> str: - """ - Process the prompt and divide the task between the agents. - Args: - prompt (str): The prompt to process. - speech_module (type): The speech module to use. - Returns: - str: The final answer resulting from successive task. - """ - ok = False - agents_tasks = (None, None) - while not ok: - self.wait_message(speech_module) - animate_thinking("Thinking...", color="status") - self.memory.push('user', prompt) - answer, _ = self.llm_request() - pretty_print(answer.split('\n')[0], color="output") - agents_tasks = self.parse_agent_tasks(answer) - if agents_tasks == (None, None): - pretty_print("Failed to parse the tasks, retrying", color="failure") - prompt = "Task parsing failed. Please retry with proper json." - continue - self.show_plan(agents_tasks) - ok_str = input("Is the plan ok? (y/n): ") - if ok_str == 'y': - ok = True - else: - prompt = input("Please reformulate: ") - - if agents_tasks == (None, None): - return "Failed to parse the tasks", reasoning - prev_agent_answer = None - for task_name, task in agents_tasks: - pretty_print(f"I will {task_name}.", color="info") - agent_prompt = self.make_prompt(task['task'], prev_agent_answer) - pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info") - if speech_module: speech_module.speak(f"I will {task_name}. I assigned the {task['agent']} agent to the task.") - try: - prev_agent_answer, _ = self.agents[task['agent'].lower()].process(agent_prompt, speech_module) - pretty_print(f"-- Agent answer ---\n\n", color="output") - self.agents[task['agent'].lower()].show_answer() - pretty_print(f"\n\n", color="output") - except Exception as e: - raise e - self.last_answer = prev_agent_answer - return prev_agent_answer, "" - -if __name__ == "__main__": - from llm_provider import Provider - server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000") - agent = PlannerAgent("deepseek-r1:14b", "jarvis", "prompts/planner_agent.txt", server_provider) - ans = agent.process("Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file") \ No newline at end of file From 95f5b9df680dbe1a4c40f8f858d03c250b83ff52 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Tue, 1 Apr 2025 19:41:59 +0200 Subject: [PATCH 11/13] Update sources/speech_to_text.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sources/speech_to_text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index c554c8c..5b8b9ce 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -1,5 +1,5 @@ from colorama import Fore -from typing import List, Tuple, Type, Dict, Tuple +from typing import List, Tuple, Type, Dict import queue import threading import numpy as np From 9e47e2bf4f50900489a7e27ebb182e726d92a135 Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Tue, 1 Apr 2025 19:42:16 +0200 Subject: [PATCH 12/13] Update sources/llm_provider.py to remove duplicate typing Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sources/llm_provider.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 5724d89..2b5af9a 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -11,8 +11,7 @@ import platform from dotenv import load_dotenv, set_key from openai import OpenAI from huggingface_hub import InferenceClient -from typing import List, Tuple, Type, Dict, Tuple - +from typing import List, Tuple, Type, Dict from sources.utility import pretty_print, animate_thinking class Provider: From 02d28b4322122c6d60e68519c91f3667805f0f1f Mon Sep 17 00:00:00 2001 From: Martin <49105846+Fosowl@users.noreply.github.com> Date: Tue, 1 Apr 2025 19:42:44 +0200 Subject: [PATCH 13/13] Update sources/agents/browser_agent.py for duplicate typing Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- sources/agents/browser_agent.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index d47f141..ba84468 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -6,8 +6,7 @@ from sources.agents.agent import Agent from sources.tools.searxSearch import searxSearch from sources.browser import Browser from datetime import date -from typing import List, Tuple, Type, Dict, Tuple - +from typing import List, Tuple, Type, Dict class BrowserAgent(Agent): def __init__(self, name, prompt_path, provider, verbose=False, browser=None):