diff --git a/README.md b/README.md index d6d9d64..728e98c 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,13 @@ ![alt text](./media/whale_readme.jpg) +> *Do a web search to find tech startup in Japan working on cutting edge AI research* +> *Make a snake game in Python* + +> *Scan my network with nmap, find out who is connected?* + +> *Hey can you find where is contract.pdf*? ## Features: @@ -34,6 +40,7 @@ - **Web Browsing**: Autonomous web navigation is underway. + ### Searching the web with agenticSeek : ![alt text](./media/exemples/search_startup.png) @@ -126,6 +133,77 @@ Run the assistant: python3 main.py ``` +*See the **Usage** section if you don't understand how to use it* + +*See the **Known issues** section if you are having issues* + +*See the **Run with an API** section if your hardware can't run deepseek locally* + +--- + +## Usage + +Warning: currently the system that choose the best AI agent routing system will work poorly with non-english text. This is because the agent routing currently use a model that was trained on english text. We are working hard to fix this. Please use english for now. + + +Make sure the services are up and running with `./start_services.sh` and run the agenticSeek with `python3 main.py` + +```sh +./start_services.sh +python3 main.py +``` + +You will be prompted with `>>> ` +This indicate agenticSeek await you type for instructions. +You can also use speech to text by setting `listen = True` in the config. + +Here are some example usage: + +### Coding/Bash + +> *Help me with matrix multiplication in Golang* + +> *Scan my network with nmap, find if any suspicious devices is connected* + +> *Make a snake game in python* + +### Web search + +> *Do a web search to find cool tech startup in Japan working on cutting edge AI research* + +> *Can you find on the internet who created agenticSeek?* + +> *Can you find on which website I can buy a rtx 4090 for cheap* + +### File system + +> *Hey can you find where is million_dollars_contract.pdf i lost it* + +> *Show me how much space I have left on my disk* + +> *Find and read the README.md and follow the install instruction* + +### Casual + +> *Tell me a joke* + +> *Where is flight ABC777 ? my mom is on that plane* + +> *what is the meaning of life ?* + + +After you type your query, agenticSeek will allocate the best agent for the task. + +Because this is an early prototype, the agent routing system might not always allocate the right agent based on your query. + +Therefore, you should be very explicit in what you want and how the AI might proceed for example if you want it to conduct a web search, do not say: + +`Do you know some good countries for solo-travel?` + +Instead, ask: + +`Do a web search and find out which are the best country for solo-travel` + --- ## **Run the LLM on your own server** @@ -195,6 +273,30 @@ python3 main.py --- + + +## Speech to Text + +The speech to text is disabled by default, you can enable it by setting listen to true in the config.ini: + +``` +listen = True +``` + +The speech to text will await for a AI name as a trigger keyword before it start listening, you can change the AI name by changing the agent_name in the config.ini: + +``` +agent_name = Friday +``` + +It will work better if you use a common english name like John or Emma. + +After hearing it's name agenticSeek will listen until it hear one of the following keyword for confirmation: + +``` +"do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?" +``` + ## Providers The table below show the available providers: diff --git a/media/exemples/search_jobs.png b/media/exemples/search_jobs.png deleted file mode 100644 index 3a64988..0000000 Binary files a/media/exemples/search_jobs.png and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 81751aa..d2eae9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,8 @@ pyaudio==0.2.14 librosa==0.10.2.post1 selenium==4.29.0 markdownify==1.1.0 +text2emotion==0.0.5 +langid==1.1.6 chromedriver-autoinstaller==0.6.4 httpx>=0.27,<0.29 anyio>=3.5.0,<5 diff --git a/setup.py b/setup.py index 56ee537..a309b45 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,8 @@ setup( "librosa==0.10.2.post1", "selenium==4.29.0", "markdownify==1.1.0", + "text2emotion==0.0.5", + "langid==1.1.6", "httpx>=0.27,<0.29", "anyio>=3.5.0,<5", "distro>=1.7.0,<2", diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index feecb3c..7a4952f 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -16,7 +16,7 @@ class BrowserAgent(Agent): self.tools = { "web_search": searxSearch(), } - self.role = "web search, internet browsing, news" + self.role = "Web Research" self.browser = Browser() self.search_history = [] self.navigable_links = [] @@ -101,7 +101,7 @@ class BrowserAgent(Agent): Current date: {self.date} Remember, the user asked: {user_prompt} - Do not exit until you found information the user seek or accomplished the task. + Do not explain your choice. """ def llm_decide(self, prompt): @@ -182,11 +182,12 @@ class BrowserAgent(Agent): def process(self, user_prompt, speech_module) -> str: complete = False - animate_thinking(f"Searching...", color="status") + animate_thinking(f"Thinking...", color="status") self.memory.push('user', self.search_prompt(user_prompt)) ai_prompt, _ = self.llm_request() + animate_thinking(f"Searching...", color="status") search_result_raw = self.tools["web_search"].execute([ai_prompt], False) - search_result = self.jsonify_search_results(search_result_raw)[:5] # until futher improvement + search_result = self.jsonify_search_results(search_result_raw)[:7] # until futher improvement prompt = self.make_newsearch_prompt(user_prompt, search_result) unvisited = [None] while not complete: @@ -220,4 +221,4 @@ class BrowserAgent(Agent): return answer, reasoning if __name__ == "__main__": - browser = Browser() \ No newline at end of file + pass \ No newline at end of file diff --git a/sources/agents/casual_agent.py b/sources/agents/casual_agent.py index 11d2935..512610e 100644 --- a/sources/agents/casual_agent.py +++ b/sources/agents/casual_agent.py @@ -18,7 +18,7 @@ class CasualAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "casual talking" + self.role = "Chat and Conversation" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/agents/code_agent.py b/sources/agents/code_agent.py index 13e2d20..01569bb 100644 --- a/sources/agents/code_agent.py +++ b/sources/agents/code_agent.py @@ -20,7 +20,7 @@ class CoderAgent(Agent): "go": GoInterpreter(), "file_finder": FileFinder() } - self.role = "coding and programming" + self.role = "Code Assistance" def process(self, prompt, speech_module) -> str: answer = "" diff --git a/sources/agents/file_agent.py b/sources/agents/file_agent.py index 3f3c0d1..6f49d77 100644 --- a/sources/agents/file_agent.py +++ b/sources/agents/file_agent.py @@ -14,7 +14,7 @@ class FileAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "find, read, write, edit files" + self.role = "find and read files" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index 1760c33..6b374ee 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -21,7 +21,7 @@ class PlannerAgent(Agent): "file": FileAgent(model, name, prompt_path, provider), "web": BrowserAgent(model, name, prompt_path, provider) } - self.role = "Manage complex tasks" + self.role = "Research, setup and code" self.tag = "json" def parse_agent_tasks(self, text): diff --git a/sources/browser.py b/sources/browser.py index 2f11af4..80b8b4f 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -5,6 +5,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, WebDriverException +from selenium.webdriver.chrome.options import Options import chromedriver_autoinstaller import time import os @@ -39,6 +40,19 @@ class Browser: chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") + chrome_options.add_argument("--autoplay-policy=user-gesture-required") + chrome_options.add_argument("--mute-audio") + chrome_options.add_argument("--disable-webgl") + chrome_options.add_argument("--disable-notifications") + security_prefs = { + "profile.default_content_setting_values.media_stream": 2, # Block webcam/mic + "profile.default_content_setting_values.notifications": 2, # Block notifications + "profile.default_content_setting_values.popups": 2, # Block pop-ups + "profile.default_content_setting_values.geolocation": 2, # Block geolocation + "download_restrictions": 3, # Block all downloads + "safebrowsing.enabled": True, # Enable safe browsing + } + chrome_options.add_experimental_option("prefs", security_prefs) chromedriver_path = shutil.which("chromedriver") # system installed driver. @@ -76,18 +90,19 @@ class Browser: return path return None - def go_to(self, url): """Navigate to a specified URL.""" try: + initial_handles = self.driver.window_handles self.driver.get(url) - time.sleep(1) # Wait for page to load + time.sleep(1) + self.apply_web_countermeasures() self.logger.info(f"Navigated to: {url}") return True except WebDriverException as e: self.logger.error(f"Error navigating to {url}: {str(e)}") return False - + def is_sentence(self, text): """Check if the text qualifies as a meaningful sentence or contains important error codes.""" text = text.strip() @@ -217,6 +232,130 @@ class Browser: self.logger.error(f"Error taking screenshot: {str(e)}") return False +####################### +# WEB SECURITY # +####################### + + def apply_web_countermeasures(self): + """ + Apply security measures to block any website malicious execution, privacy violation etc.. + """ + self.inject_safety_script() + self.neutralize_event_listeners() + self.monitor_and_reset_css() + self.block_clipboard_access() + self.limit_intervals_and_timeouts() + self.block_external_requests() + self.monitor_and_close_popups() + + def inject_safety_script(self): + script = """ + // Block hardware access by removing or disabling APIs + Object.defineProperty(navigator, 'serial', { get: () => undefined }); + Object.defineProperty(navigator, 'hid', { get: () => undefined }); + Object.defineProperty(navigator, 'bluetooth', { get: () => undefined }); + // Block media playback + HTMLMediaElement.prototype.play = function() { + this.pause(); // Immediately pause if play is called + return Promise.reject('Blocked by script'); + }; + // Block fullscreen requests + Element.prototype.requestFullscreen = function() { + console.log('Blocked fullscreen request'); + return Promise.reject('Blocked by script'); + }; + // Block pointer lock + Element.prototype.requestPointerLock = function() { + console.log('Blocked pointer lock'); + }; + // Block iframe creation (optional, since browser already blocks these) + const originalCreateElement = document.createElement; + document.createElement = function(tagName) { + if (tagName.toLowerCase() === 'iframe') { + console.log('Blocked iframe creation'); + return null; + } + return originalCreateElement.apply(this, arguments); + }; + // Block annoying dialogs + window.alert = function() {}; + window.confirm = function() { return false; }; + window.prompt = function() { return null; }; + """ + self.driver.execute_script(script) + + def neutralize_event_listeners(self): + script = """ + const originalAddEventListener = EventTarget.prototype.addEventListener; + EventTarget.prototype.addEventListener = function(type, listener, options) { + if (['mousedown', 'mouseup', 'click', 'touchstart', 'keydown', 'keyup', 'keypress'].includes(type)) { + console.log(`Blocked adding listener for ${type}`); + return; + } + originalAddEventListener.apply(this, arguments); + }; + """ + self.driver.execute_script(script) + + def monitor_and_reset_css(self): + script = """ + const observer = new MutationObserver((mutations) => { + mutations.forEach((mutation) => { + if (mutation.type === 'attributes' && mutation.attributeName === 'style') { + const html = document.querySelector('html'); + if (html.style.cursor === 'none') { + html.style.cursor = 'auto'; + } + } + }); + }); + observer.observe(document.querySelector('html'), { attributes: true }); + """ + self.driver.execute_script(script) + + def block_clipboard_access(self): + script = """ + navigator.clipboard.readText = function() { + console.log('Blocked clipboard read'); + return Promise.reject('Blocked'); + }; + navigator.clipboard.writeText = function() { + console.log('Blocked clipboard write'); + return Promise.resolve(); + }; + """ + self.driver.execute_script(script) + + def limit_intervals_and_timeouts(self): + script = """ + const originalSetInterval = window.setInterval; + window.setInterval = function(callback, delay) { + if (typeof callback === 'function' && callback.toString().includes('alert')) { + console.log('Blocked suspicious interval'); + return; + } + return originalSetInterval.apply(this, arguments); + }; + """ + self.driver.execute_script(script) + + def monitor_and_close_popups(self): + initial_handles = self.driver.window_handles + for handle in self.driver.window_handles: + if handle not in initial_handles: + self.driver.switch_to.window(handle) + self.driver.close() + self.driver.switch_to.window(self.driver.window_handles[0]) + + def block_external_requests(self): + script = """ + window.fetch = function() { + console.log('Blocked fetch request'); + return Promise.reject('Blocked'); + }; + """ + self.driver.execute_script(script) + def close(self): """Close the browser.""" try: @@ -235,11 +374,16 @@ if __name__ == "__main__": browser = Browser(headless=False) try: - browser.go_to("https://github.com/Fosowl/agenticSeek") + # stress test + browser.go_to("https://www.bbc.com/news") text = browser.get_text() print("Page Text in Markdown:") print(text) links = browser.get_navigable() print("\nNavigable Links:", links) + print("WARNING SECURITY STRESS TEST WILL BE RUN IN 20s") + time.sleep(20) + browser.go_to("https://theannoyingsite.com/") + time.sleep(15) finally: browser.close() diff --git a/sources/language.py b/sources/language.py new file mode 100644 index 0000000..a4cc98d --- /dev/null +++ b/sources/language.py @@ -0,0 +1,84 @@ +import langid +import re +import nltk +from nltk.sentiment.vader import SentimentIntensityAnalyzer + +class LanguageUtility: + """LanguageUtility for language, or emotion identification""" + def __init__(self): + try: + nltk.data.find('vader_lexicon') + except LookupError: + nltk.download('vader_lexicon') + self.sid = SentimentIntensityAnalyzer() + + def detect_language(self, text: str) -> str: + """ + Detect the language of the given text using langdetect + Args: + text: string to analyze + Returns: ISO639-1 language code + """ + langid.set_languages(['fr', 'en', 'zh', 'es']) # ISO 639-1 codes + lang, score = langid.classify(text) + return lang + + def detect_emotion(self, text: str) -> str: + """ + Detect the dominant emotion in the given text + Args: + text: string to analyze + Returns: string of the dominant emotion + """ + try: + scores = self.sid.polarity_scores(text) + emotions = { + 'Happy': max(scores['pos'], 0), + 'Angry': 0, + 'Sad': max(scores['neg'], 0), + 'Fear': 0, + 'Surprise': 0 + } + if scores['compound'] < -0.5: + emotions['Angry'] = abs(scores['compound']) * 0.5 + emotions['Fear'] = abs(scores['compound']) * 0.5 + elif scores['compound'] > 0.5: + emotions['Happy'] = scores['compound'] + emotions['Surprise'] = scores['compound'] * 0.5 + dominant_emotion = max(emotions, key=emotions.get) + if emotions[dominant_emotion] == 0: + return 'Neutral' + return dominant_emotion + except Exception as e: + raise e + + def analyze(self, text): + """ + Combined analysis of language and emotion + Args: + text: string to analyze + Returns: dictionary with language and emotion results + """ + try: + language = self.detect_language(text) + emotions = self.detect_emotion(text) + return { + "language": language, + "emotions": emotions + } + except Exception as e: + raise e + +if __name__ == "__main__": + detector = LanguageUtility() + + test_texts = [ + "I am so happy today!", + "Qué tristeza siento ahora", + "我不要去巴黎", + "La vie c'est cool" + ] + for text in test_texts: + print(f"\nAnalyzing: {text}") + result = detector.analyze(text) + print(result) \ No newline at end of file diff --git a/sources/llm_provider.py b/sources/llm_provider.py index a510c92..b3f30e9 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -12,6 +12,8 @@ from huggingface_hub import InferenceClient import os import httpx +from sources.utility import pretty_print, animate_thinking + class Provider: def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): self.provider_name = provider_name.lower() @@ -29,10 +31,10 @@ class Provider: if self.provider_name not in self.available_providers: raise ValueError(f"Unknown provider: {provider_name}") if self.provider_name in self.unsafe_providers: - print("Warning: you are using an API provider. You data will be sent to the cloud.") + pretty_print("Warning: you are using an API provider. You data will be sent to the cloud.", color="warning") self.api_key = self.get_api_key(self.provider_name) elif self.server != "": - print("Provider", provider_name, "initialized at", self.server) + pretty_print(f"Provider: {provider_name} initialized at {self.server}", color="success") self.check_address_format(self.server) if not self.is_ip_online(self.server.split(':')[0]): raise Exception(f"Server at {self.server} is offline.") @@ -89,13 +91,11 @@ class Provider: if output.returncode == 0: return True else: - print("errorcode:", output) return False except subprocess.TimeoutExpired: - print("timeout") - return True + return False except Exception as e: - print(f"is_ip_online error:\n{e}") + pretty_print(f"Error with ping request {str(e)}", color="failure") return False def server_fn(self, history, verbose = False): @@ -117,7 +117,7 @@ class Provider: is_complete = bool(response.json()["is_complete"]) time.sleep(2) except KeyError as e: - raise f"{str(e)}\n\nError occured with server route. Are you using the correct address for the config.ini provider?" + raise Exception(f"{str(e)}\n\nError occured with server route. Are you using the correct address for the config.ini provider?") from e except Exception as e: raise e return thought @@ -141,7 +141,7 @@ class Provider: raise Exception("\nOllama connection failed. provider should not be set to ollama if server address is not localhost") from e except ollama.ResponseError as e: if e.status_code == 404: - print(f"Downloading {self.model}...") + animate_thinking(f"Downloading {self.model}...") ollama.pull(self.model) if "refused" in str(e).lower(): raise Exception("Ollama connection failed. is the server running ?") from e diff --git a/sources/router.py b/sources/router.py index f6b27b5..1fc6781 100644 --- a/sources/router.py +++ b/sources/router.py @@ -9,6 +9,7 @@ from sources.agents.agent import Agent from sources.agents.code_agent import CoderAgent from sources.agents.casual_agent import CasualAgent from sources.agents.planner_agent import PlannerAgent +from sources.agents.browser_agent import BrowserAgent from sources.utility import pretty_print class AgentRouter: @@ -30,7 +31,7 @@ class AgentRouter: else: return "cpu" - def classify_text(self, text: str, threshold: float = 0.5) -> list: + def classify_text(self, text: str, threshold: float = 0.4) -> list: """ Classify the text into labels (agent roles). Args: @@ -41,8 +42,8 @@ class AgentRouter: """ first_sentence = None for line in text.split("\n"): - first_sentence = line.strip() - break + first_sentence = line.strip() + break if first_sentence is None: first_sentence = text result = self.pipeline(first_sentence, self.labels, threshold=threshold) @@ -67,30 +68,50 @@ class AgentRouter: if __name__ == "__main__": agents = [ - CoderAgent("deepseek-r1:14b", "agent1", "../prompts/coder_agent.txt", "server"), - CasualAgent("deepseek-r1:14b", "agent2", "../prompts/casual_agent.txt", "server"), - PlannerAgent("deepseek-r1:14b", "agent3", "../prompts/planner_agent.txt", "server") + CasualAgent("deepseek-r1:14b", "jarvis", "../prompts/casual_agent.txt", None), + BrowserAgent("deepseek-r1:14b", "browser", "../prompts/planner_agent.txt", None), + CoderAgent("deepseek-r1:14b", "coder", "../prompts/coder_agent.txt", None) ] router = AgentRouter(agents) - - texts = [""" - Write a python script to check if the device on my network is connected to the internet - """, - """ - Hey could you search the web for the latest news on the stock market ? - """, - """ - hey can you give give a list of the files in the current directory ? - """, - """ - Make a cool game to illustrate the current relation between USA and europe - """ + texts = [ + "Write a python script to check if the device on my network is connected to the internet", + #"Peut tu écrire un script python qui vérifie si l'appareil sur mon réseau est connecté à internet?", + #"写一个Python脚本,检查我网络上的设备是否连接到互联网", + "Hey could you search the web for the latest news on the tesla stock market ?", + #"嘿,你能搜索网页上关于股票市场的最新新闻吗?", + #"Yo, cherche sur internet comment va tesla en bourse.", + "I would like you to search for weather api and then make an app using this API", + #"我想让你搜索天气API,然后用这个API做一个应用程序", + #"J'aimerais que tu cherche une api météo et que l'utilise pour faire une application", + "Plan a 3-day trip to New York, including flights and hotels.", + #"计划一次为期3天的纽约之旅,包括机票和酒店。", + #"Planifie un trip de 3 jours à Paris, y compris les vols et hotels.", + "Find me the latest research papers on AI.", + #"给我找最新的AI研究论文。", + #"Trouve moi les derniers papiers de recherche en IA", + "Help me write a C++ program to sort an array", + #"帮我写一个C++程序来排序数组", + #"Aide moi à faire un programme c++ pour trier une array.", + "What’s the weather like today? Oh, and can you find a good weather app?", + #"今天天气怎么样?哦,你还能找到一个好的天气应用程序吗?", + #"La météo est comment aujourd'hui ? oh et trouve moi une bonne appli météo tant que tu y est.", + "Can you debug this Java code? It’s not working.", + #"你能调试这段Java代码吗?它不起作用。", + #"Peut tu m'aider à debugger ce code java, ça marche pas", + "What's the latest brainrot on the internet ?", + #"互联网上最新的“脑残”是什么?", + #"Quel est la dernière connerie sur internet ?", + "i would like to setup a new AI project, index as mark2", + #"我想建立一个新的 AI 项目,索引为 Mark2", + #"Je voudrais configurer un nouveau projet d'IA, index Mark2", + "Hey, can you find the old_project.zip file somewhere on my drive?", + #"嘿,你能在我驱动器上找到old_project.zip文件吗?", + #"Hé trouve moi le old_project.zip, il est quelque part sur mon disque.", + "Tell me a funny story", + #"给我讲一个有趣的故事", + #"Raconte moi une histoire drole" ] - for text in texts: - print(text) - results = router.classify_text(text) - for result in results: - print(result["label"], "=>", result["score"]) + print("Input text:", text) agent = router.select_agent(text) - print("Selected agent role:", agent.role) + print() diff --git a/sources/tools/searxSearch.py b/sources/tools/searxSearch.py index 7f3cac7..96483dc 100644 --- a/sources/tools/searxSearch.py +++ b/sources/tools/searxSearch.py @@ -50,7 +50,6 @@ class searxSearch(Tools): """Check all links, one by one.""" # TODO Make it asyncromous or smth statuses = [] - print("Web scrawl to verify links accessibilty...") for i, link in enumerate(links): status = self.link_valid(link) statuses.append(status) diff --git a/sources/tools/tools.py b/sources/tools/tools.py index ace0924..8a17b09 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -118,10 +118,8 @@ class Tools(): save_path_file = os.path.basename(save_path) directory = os.path.join(self.current_dir, save_path_dir) if directory and not os.path.exists(directory): - print(f"Creating directory: {directory}") os.makedirs(directory) for block in blocks: - print(f"Saving code block to: {save_path}") with open(os.path.join(directory, save_path_file), 'w') as f: f.write(block) diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index fc60c6a..6cf6e5f 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -58,7 +58,6 @@ class webSearch(Tools): """Check all links, one by one.""" # TODO Make it asyncromous or smth statuses = [] - print("Workers started, scrawling the web...") for i, link in enumerate(links): status = self.link_valid(link) statuses.append(status)