diff --git a/README.md b/README.md index d6d9d64..ad4f0bc 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,13 @@ ![alt text](./media/whale_readme.jpg) +> *Do a web search to find tech startup in Japan working on cutting edge AI research* +> *Make a snake game in Python* + +> *Scan my network with nmap, find out who is connected?* + +> *Hey can you find where is contract.pdf*? ## Features: @@ -34,6 +40,7 @@ - **Web Browsing**: Autonomous web navigation is underway. + ### Searching the web with agenticSeek : ![alt text](./media/exemples/search_startup.png) @@ -126,6 +133,77 @@ Run the assistant: python3 main.py ``` +*See the **Usage** section if you don't understand how to use it* + +*See the **Config** section for more informations on the config* + +--- + +## Usage + +致中文使用者的重要提示:目前根据您的提示选择最佳AI代理的系统(代理路由)在处理中文文本时效果不佳,这是因为我们使用的是基于英文文本训练的一次性分类模型。我们正在努力解决这个问题。请暂时使用英文 + +Warning: currently the system that choose the best AI agent routing system will work poorly with non-english text. This is because the agent routing currently use a model that was trained on english text. We are working hard to fix this. Please use english for now. + + +Make sure the services are up and running with `./start_services.sh` and run the agenticSeek with `python3 main.py` + +```sh +./start_services.sh +python3 main.py +``` + +You will be prompted with `>>> ` +This indicate agenticSeek await you type for instructions. +You can also use speech to text by setting `listen = True` in the config. + +Here are some example usage: + +### Coding/Bash + +> *Help me with matrix multiplication in Golang* + +> *Scan my network with nmap, find if any suspicious devices is connected* + +> *Make a snake game in python* + +### Web search + +> *Do a web search to find cool tech startup in Japan working on cutting edge AI research* + +> *Can you find on the internet who created agenticSeek?* + +> *Can you find on which website I can buy a rtx 4090 for cheap* + +### File system + +> *Hey can you find where is million_dollars_contract.pdf i lost it* + +> *Show me how much space I have left on my disk* + +> *Find and read the README.md and follow the install instruction* + +### Casual + +> *Tell me a joke* + +> *Where is flight ABC777 ? my mom is on that plane* + +> *what is the meaning of life ?* + + +After you type your query, agenticSeek will allocate the best agent for the task. + +Because this is an early prototype, the agent routing system might not always allocate the right agent based on your query. + +Therefore, you should be very explicit in what you want and how the AI might proceed for example if you want it to conduct a web search, do not say: + +`Do you know some good countries for solo-travel?` + +Instead, ask: + +`Do a web search and find out which are the best country for solo-travel` + --- ## **Run the LLM on your own server** @@ -195,6 +273,30 @@ python3 main.py --- + + +## Speech to Text + +The speech to text is disabled by default, you can enable it by setting listen to true in the config.ini: + +``` +listen = True +``` + +The speech to text will await for a AI name as a trigger keyword before it start listening, you can change the AI name by changing the agent_name in the config.ini: + +``` +agent_name = Friday +``` + +It will work better if you use a common english name like John or Emma. + +After hearing it's name agenticSeek will listen until it hear one of the following keyword for confirmation: + +``` +"do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?" +``` + ## Providers The table below show the available providers: diff --git a/media/exemples/search_jobs.png b/media/exemples/search_jobs.png deleted file mode 100644 index 3a64988..0000000 Binary files a/media/exemples/search_jobs.png and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 81751aa..d2eae9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,8 @@ pyaudio==0.2.14 librosa==0.10.2.post1 selenium==4.29.0 markdownify==1.1.0 +text2emotion==0.0.5 +langid==1.1.6 chromedriver-autoinstaller==0.6.4 httpx>=0.27,<0.29 anyio>=3.5.0,<5 diff --git a/setup.py b/setup.py index 56ee537..a309b45 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,8 @@ setup( "librosa==0.10.2.post1", "selenium==4.29.0", "markdownify==1.1.0", + "text2emotion==0.0.5", + "langid==1.1.6", "httpx>=0.27,<0.29", "anyio>=3.5.0,<5", "distro>=1.7.0,<2", diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 65540d4..7a4952f 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -16,7 +16,7 @@ class BrowserAgent(Agent): self.tools = { "web_search": searxSearch(), } - self.role = "web search, internet browsing, news" + self.role = "Web Research" self.browser = Browser() self.search_history = [] self.navigable_links = [] @@ -101,7 +101,7 @@ class BrowserAgent(Agent): Current date: {self.date} Remember, the user asked: {user_prompt} - Do not exit until you found information the user seek or accomplished the task. + Do not explain your choice. """ def llm_decide(self, prompt): @@ -182,9 +182,10 @@ class BrowserAgent(Agent): def process(self, user_prompt, speech_module) -> str: complete = False - animate_thinking(f"Searching...", color="status") + animate_thinking(f"Thinking...", color="status") self.memory.push('user', self.search_prompt(user_prompt)) ai_prompt, _ = self.llm_request() + animate_thinking(f"Searching...", color="status") search_result_raw = self.tools["web_search"].execute([ai_prompt], False) search_result = self.jsonify_search_results(search_result_raw)[:7] # until futher improvement prompt = self.make_newsearch_prompt(user_prompt, search_result) @@ -220,4 +221,4 @@ class BrowserAgent(Agent): return answer, reasoning if __name__ == "__main__": - browser = Browser() \ No newline at end of file + pass \ No newline at end of file diff --git a/sources/agents/casual_agent.py b/sources/agents/casual_agent.py index 11d2935..512610e 100644 --- a/sources/agents/casual_agent.py +++ b/sources/agents/casual_agent.py @@ -18,7 +18,7 @@ class CasualAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "casual talking" + self.role = "Chat and Conversation" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/agents/code_agent.py b/sources/agents/code_agent.py index 13e2d20..01569bb 100644 --- a/sources/agents/code_agent.py +++ b/sources/agents/code_agent.py @@ -20,7 +20,7 @@ class CoderAgent(Agent): "go": GoInterpreter(), "file_finder": FileFinder() } - self.role = "coding and programming" + self.role = "Code Assistance" def process(self, prompt, speech_module) -> str: answer = "" diff --git a/sources/agents/file_agent.py b/sources/agents/file_agent.py index 3f3c0d1..6f49d77 100644 --- a/sources/agents/file_agent.py +++ b/sources/agents/file_agent.py @@ -14,7 +14,7 @@ class FileAgent(Agent): "file_finder": FileFinder(), "bash": BashInterpreter() } - self.role = "find, read, write, edit files" + self.role = "find and read files" def process(self, prompt, speech_module) -> str: complete = False diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index 1760c33..6b374ee 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -21,7 +21,7 @@ class PlannerAgent(Agent): "file": FileAgent(model, name, prompt_path, provider), "web": BrowserAgent(model, name, prompt_path, provider) } - self.role = "Manage complex tasks" + self.role = "Research, setup and code" self.tag = "json" def parse_agent_tasks(self, text): diff --git a/sources/language.py b/sources/language.py new file mode 100644 index 0000000..a4cc98d --- /dev/null +++ b/sources/language.py @@ -0,0 +1,84 @@ +import langid +import re +import nltk +from nltk.sentiment.vader import SentimentIntensityAnalyzer + +class LanguageUtility: + """LanguageUtility for language, or emotion identification""" + def __init__(self): + try: + nltk.data.find('vader_lexicon') + except LookupError: + nltk.download('vader_lexicon') + self.sid = SentimentIntensityAnalyzer() + + def detect_language(self, text: str) -> str: + """ + Detect the language of the given text using langdetect + Args: + text: string to analyze + Returns: ISO639-1 language code + """ + langid.set_languages(['fr', 'en', 'zh', 'es']) # ISO 639-1 codes + lang, score = langid.classify(text) + return lang + + def detect_emotion(self, text: str) -> str: + """ + Detect the dominant emotion in the given text + Args: + text: string to analyze + Returns: string of the dominant emotion + """ + try: + scores = self.sid.polarity_scores(text) + emotions = { + 'Happy': max(scores['pos'], 0), + 'Angry': 0, + 'Sad': max(scores['neg'], 0), + 'Fear': 0, + 'Surprise': 0 + } + if scores['compound'] < -0.5: + emotions['Angry'] = abs(scores['compound']) * 0.5 + emotions['Fear'] = abs(scores['compound']) * 0.5 + elif scores['compound'] > 0.5: + emotions['Happy'] = scores['compound'] + emotions['Surprise'] = scores['compound'] * 0.5 + dominant_emotion = max(emotions, key=emotions.get) + if emotions[dominant_emotion] == 0: + return 'Neutral' + return dominant_emotion + except Exception as e: + raise e + + def analyze(self, text): + """ + Combined analysis of language and emotion + Args: + text: string to analyze + Returns: dictionary with language and emotion results + """ + try: + language = self.detect_language(text) + emotions = self.detect_emotion(text) + return { + "language": language, + "emotions": emotions + } + except Exception as e: + raise e + +if __name__ == "__main__": + detector = LanguageUtility() + + test_texts = [ + "I am so happy today!", + "Qué tristeza siento ahora", + "我不要去巴黎", + "La vie c'est cool" + ] + for text in test_texts: + print(f"\nAnalyzing: {text}") + result = detector.analyze(text) + print(result) \ No newline at end of file diff --git a/sources/llm_provider.py b/sources/llm_provider.py index a510c92..b3f30e9 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -12,6 +12,8 @@ from huggingface_hub import InferenceClient import os import httpx +from sources.utility import pretty_print, animate_thinking + class Provider: def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): self.provider_name = provider_name.lower() @@ -29,10 +31,10 @@ class Provider: if self.provider_name not in self.available_providers: raise ValueError(f"Unknown provider: {provider_name}") if self.provider_name in self.unsafe_providers: - print("Warning: you are using an API provider. You data will be sent to the cloud.") + pretty_print("Warning: you are using an API provider. You data will be sent to the cloud.", color="warning") self.api_key = self.get_api_key(self.provider_name) elif self.server != "": - print("Provider", provider_name, "initialized at", self.server) + pretty_print(f"Provider: {provider_name} initialized at {self.server}", color="success") self.check_address_format(self.server) if not self.is_ip_online(self.server.split(':')[0]): raise Exception(f"Server at {self.server} is offline.") @@ -89,13 +91,11 @@ class Provider: if output.returncode == 0: return True else: - print("errorcode:", output) return False except subprocess.TimeoutExpired: - print("timeout") - return True + return False except Exception as e: - print(f"is_ip_online error:\n{e}") + pretty_print(f"Error with ping request {str(e)}", color="failure") return False def server_fn(self, history, verbose = False): @@ -117,7 +117,7 @@ class Provider: is_complete = bool(response.json()["is_complete"]) time.sleep(2) except KeyError as e: - raise f"{str(e)}\n\nError occured with server route. Are you using the correct address for the config.ini provider?" + raise Exception(f"{str(e)}\n\nError occured with server route. Are you using the correct address for the config.ini provider?") from e except Exception as e: raise e return thought @@ -141,7 +141,7 @@ class Provider: raise Exception("\nOllama connection failed. provider should not be set to ollama if server address is not localhost") from e except ollama.ResponseError as e: if e.status_code == 404: - print(f"Downloading {self.model}...") + animate_thinking(f"Downloading {self.model}...") ollama.pull(self.model) if "refused" in str(e).lower(): raise Exception("Ollama connection failed. is the server running ?") from e diff --git a/sources/router.py b/sources/router.py index f6b27b5..1fc6781 100644 --- a/sources/router.py +++ b/sources/router.py @@ -9,6 +9,7 @@ from sources.agents.agent import Agent from sources.agents.code_agent import CoderAgent from sources.agents.casual_agent import CasualAgent from sources.agents.planner_agent import PlannerAgent +from sources.agents.browser_agent import BrowserAgent from sources.utility import pretty_print class AgentRouter: @@ -30,7 +31,7 @@ class AgentRouter: else: return "cpu" - def classify_text(self, text: str, threshold: float = 0.5) -> list: + def classify_text(self, text: str, threshold: float = 0.4) -> list: """ Classify the text into labels (agent roles). Args: @@ -41,8 +42,8 @@ class AgentRouter: """ first_sentence = None for line in text.split("\n"): - first_sentence = line.strip() - break + first_sentence = line.strip() + break if first_sentence is None: first_sentence = text result = self.pipeline(first_sentence, self.labels, threshold=threshold) @@ -67,30 +68,50 @@ class AgentRouter: if __name__ == "__main__": agents = [ - CoderAgent("deepseek-r1:14b", "agent1", "../prompts/coder_agent.txt", "server"), - CasualAgent("deepseek-r1:14b", "agent2", "../prompts/casual_agent.txt", "server"), - PlannerAgent("deepseek-r1:14b", "agent3", "../prompts/planner_agent.txt", "server") + CasualAgent("deepseek-r1:14b", "jarvis", "../prompts/casual_agent.txt", None), + BrowserAgent("deepseek-r1:14b", "browser", "../prompts/planner_agent.txt", None), + CoderAgent("deepseek-r1:14b", "coder", "../prompts/coder_agent.txt", None) ] router = AgentRouter(agents) - - texts = [""" - Write a python script to check if the device on my network is connected to the internet - """, - """ - Hey could you search the web for the latest news on the stock market ? - """, - """ - hey can you give give a list of the files in the current directory ? - """, - """ - Make a cool game to illustrate the current relation between USA and europe - """ + texts = [ + "Write a python script to check if the device on my network is connected to the internet", + #"Peut tu écrire un script python qui vérifie si l'appareil sur mon réseau est connecté à internet?", + #"写一个Python脚本,检查我网络上的设备是否连接到互联网", + "Hey could you search the web for the latest news on the tesla stock market ?", + #"嘿,你能搜索网页上关于股票市场的最新新闻吗?", + #"Yo, cherche sur internet comment va tesla en bourse.", + "I would like you to search for weather api and then make an app using this API", + #"我想让你搜索天气API,然后用这个API做一个应用程序", + #"J'aimerais que tu cherche une api météo et que l'utilise pour faire une application", + "Plan a 3-day trip to New York, including flights and hotels.", + #"计划一次为期3天的纽约之旅,包括机票和酒店。", + #"Planifie un trip de 3 jours à Paris, y compris les vols et hotels.", + "Find me the latest research papers on AI.", + #"给我找最新的AI研究论文。", + #"Trouve moi les derniers papiers de recherche en IA", + "Help me write a C++ program to sort an array", + #"帮我写一个C++程序来排序数组", + #"Aide moi à faire un programme c++ pour trier une array.", + "What’s the weather like today? Oh, and can you find a good weather app?", + #"今天天气怎么样?哦,你还能找到一个好的天气应用程序吗?", + #"La météo est comment aujourd'hui ? oh et trouve moi une bonne appli météo tant que tu y est.", + "Can you debug this Java code? It’s not working.", + #"你能调试这段Java代码吗?它不起作用。", + #"Peut tu m'aider à debugger ce code java, ça marche pas", + "What's the latest brainrot on the internet ?", + #"互联网上最新的“脑残”是什么?", + #"Quel est la dernière connerie sur internet ?", + "i would like to setup a new AI project, index as mark2", + #"我想建立一个新的 AI 项目,索引为 Mark2", + #"Je voudrais configurer un nouveau projet d'IA, index Mark2", + "Hey, can you find the old_project.zip file somewhere on my drive?", + #"嘿,你能在我驱动器上找到old_project.zip文件吗?", + #"Hé trouve moi le old_project.zip, il est quelque part sur mon disque.", + "Tell me a funny story", + #"给我讲一个有趣的故事", + #"Raconte moi une histoire drole" ] - for text in texts: - print(text) - results = router.classify_text(text) - for result in results: - print(result["label"], "=>", result["score"]) + print("Input text:", text) agent = router.select_agent(text) - print("Selected agent role:", agent.role) + print() diff --git a/sources/tools/searxSearch.py b/sources/tools/searxSearch.py index 7f3cac7..96483dc 100644 --- a/sources/tools/searxSearch.py +++ b/sources/tools/searxSearch.py @@ -50,7 +50,6 @@ class searxSearch(Tools): """Check all links, one by one.""" # TODO Make it asyncromous or smth statuses = [] - print("Web scrawl to verify links accessibilty...") for i, link in enumerate(links): status = self.link_valid(link) statuses.append(status) diff --git a/sources/tools/tools.py b/sources/tools/tools.py index ace0924..8a17b09 100644 --- a/sources/tools/tools.py +++ b/sources/tools/tools.py @@ -118,10 +118,8 @@ class Tools(): save_path_file = os.path.basename(save_path) directory = os.path.join(self.current_dir, save_path_dir) if directory and not os.path.exists(directory): - print(f"Creating directory: {directory}") os.makedirs(directory) for block in blocks: - print(f"Saving code block to: {save_path}") with open(os.path.join(directory, save_path_file), 'w') as f: f.write(block) diff --git a/sources/tools/webSearch.py b/sources/tools/webSearch.py index fc60c6a..6cf6e5f 100644 --- a/sources/tools/webSearch.py +++ b/sources/tools/webSearch.py @@ -58,7 +58,6 @@ class webSearch(Tools): """Check all links, one by one.""" # TODO Make it asyncromous or smth statuses = [] - print("Workers started, scrawling the web...") for i, link in enumerate(links): status = self.link_valid(link) statuses.append(status)