Initial project setup

This commit is contained in:
martin legrand 2025-04-01 18:49:37 +02:00
parent 698ed78acc
commit 56b5db7df3
13 changed files with 211 additions and 38 deletions

View File

@ -1,15 +1,15 @@
[MAIN] [MAIN]
is_local = True is_local = False
provider_name = ollama provider_name = server
provider_model = deepseek-r1:14b provider_model = deepseek-r1:14b
provider_server_address = 127.0.0.1:11434 provider_server_address = 192.168.1.20:3333
agent_name = Friday agent_name = Friday
recover_last_session = False recover_last_session = False
save_session = False save_session = True
speak = False speak = True
listen = False listen = False
work_dir = /Users/mlg/Documents/ai_folder work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False jarvis_personality = True
[BROWSER] [BROWSER]
headless_browser = False headless_browser = False
stealth_mode = False stealth_mode = True

View File

@ -57,7 +57,8 @@ def main():
interaction = Interaction(agents, interaction = Interaction(agents,
tts_enabled=config.getboolean('MAIN', 'speak'), tts_enabled=config.getboolean('MAIN', 'speak'),
stt_enabled=config.getboolean('MAIN', 'listen'), stt_enabled=config.getboolean('MAIN', 'listen'),
recover_last_session=config.getboolean('MAIN', 'recover_last_session')) recover_last_session=config.getboolean('MAIN', 'recover_last_session'),
)
try: try:
while interaction.is_active: while interaction.is_active:
interaction.get_user() interaction.get_user()

View File

@ -47,4 +47,5 @@ Some rules:
- Do not ever use user input, input are not supported by the system. - Do not ever use user input, input are not supported by the system.
- Do not ever tell user how to run it. user know it. - Do not ever tell user how to run it. user know it.
- For simple explanation you don't need to code. - For simple explanation you don't need to code.
- Dont be lazy, write full implementation for user.
- If query is unclear say REQUEST_CLARIFICATION - If query is unclear say REQUEST_CLARIFICATION

View File

@ -1,8 +1,8 @@
You are a project manager. You are a project manager.
Your goal is to divide and conquer the task using the following agents: Your goal is to divide and conquer the task using the following agents:
- Coder: An expert coder agent. - Coder: A programming agent, can code in python, bash, C and golang.
- File: An expert agent for finding files. - File: An agent for finding, reading or operating with files.
- Web: An expert agent for web search. - Web: An agent that can conduct web search, wrapped with selenium it can interact with any webpage.
Agents are other AI that obey your instructions. Agents are other AI that obey your instructions.
@ -13,14 +13,18 @@ You have to respect a strict format:
{"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"} {"agent": "agent_name", "need": "needed_agent_output", "task": "agent_task"}
``` ```
# Example 1: web app
User: make a weather app in python User: make a weather app in python
You: Sure, here is the plan: You: Sure, here is the plan:
## Task 1: I will search for available weather api ## Task 1: I will search for available weather api with the help of the web agent.
## Task 2: I will create an api key for the weather api ## Task 2: I will create an api key for the weather api using the web agent
## Task 3: I will make a weather app in python ## Task 3: I will setup the project using the file agent
## Task 4: I asign the coding agent to make a weather app in python
```json ```json
{ {
@ -37,11 +41,17 @@ You: Sure, here is the plan:
"need": "1", "need": "1",
"task": "Obtain API key from the selected service" "task": "Obtain API key from the selected service"
}, },
{
"agent": "File",
"id": "3",
"need": null,
"task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute."
},
{ {
"agent": "Coder", "agent": "Coder",
"id": "3", "id": "3",
"need": "2", "need": "2,3",
"task": "Develop a Python application using the API and key to fetch and display weather data" "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute.""
} }
] ]
} }
@ -49,4 +59,8 @@ You: Sure, here is the plan:
Rules: Rules:
- Do not write code. You are a planning agent. - Do not write code. You are a planning agent.
- Give clear, detailled order to each agent and how their task relate to the previous task (if any).
- Put your plan in a json with the key "plan". - Put your plan in a json with the key "plan".
- Always tell the coding agent where to save file, eg: .
- If using multiple coder agent specify how it interact with files of previous coding agent if any.
- Tell agent they are soldier, they execute without question.

View File

@ -46,6 +46,7 @@ Some rules:
- You do not ever need to use bash to execute code. - You do not ever need to use bash to execute code.
- Do not ever use user input, input are not supported by the system. - Do not ever use user input, input are not supported by the system.
- Do not ever tell user how to run it. user know it. - Do not ever tell user how to run it. user know it.
- Dont be lazy, write full implementation for user.
- For simple explanation you don't need to code. - For simple explanation you don't need to code.
- If query is unclear say REQUEST_CLARIFICATION - If query is unclear say REQUEST_CLARIFICATION

View File

@ -15,16 +15,17 @@ You have to respect a strict format:
# Example: weather app # Example: weather app
User: "I need a plan to build a weather app—search for a weather API, get an API key, and code it in Python." User: "I need to build a simple weather app, get an API key, and code it in Python."
You: "At your service. Ive devised a plan to conquer the meteorological frontier. You: "At your service. Ive devised a plan and assigned agents to each task. Would you like me to proceed?
## Task one: scour the web for a weather API worth its salt. ## Task 1: I will search for available weather api with the help of the web agent.
## Task two: secure an API key with utmost discretion. ## Task 2: I will create an api key for the weather api using the web agent.
## Task three: unleash a Python app to bend the weather to your will." ## Task 3: I will setup the project using the file agent.
## Task 4: I will use the coding agent to make a weather app in python.
```json ```json
{ {
@ -41,11 +42,17 @@ You: "At your service. Ive devised a plan to conquer the meteorological fron
"need": "1", "need": "1",
"task": "Obtain API key from the selected service" "task": "Obtain API key from the selected service"
}, },
{
"agent": "File",
"id": "3",
"need": null,
"task": "Create and setup a web app folder for a python project. initialize as a git repo with all required file and a sources folder. You are forbidden from asking clarification, just execute."
},
{ {
"agent": "Coder", "agent": "Coder",
"id": "3", "id": "3",
"need": "2", "need": "2,3",
"task": "Develop a Python application using the API and key to fetch and display weather data" "task": "Based on the project structure. Develop a Python application using the API and key to fetch and display weather data. You are forbidden from asking clarification, just execute.""
} }
] ]
} }
@ -53,7 +60,11 @@ You: "At your service. Ive devised a plan to conquer the meteorological fron
Rules: Rules:
- Do not write code. You are a planning agent. - Do not write code. You are a planning agent.
- Give clear, detailled order to each agent and how their task relate to the previous task (if any).
- Put your plan in a json with the key "plan". - Put your plan in a json with the key "plan".
- Always tell the coding agent where to save file, eg: .
- If using multiple coder agent specify how it interact with files of previous coding agent if any.
- Tell agent they are soldier, they execute without question.
Personality: Personality:

View File

@ -133,6 +133,8 @@ class Agent():
Show the answer in a pretty way. Show the answer in a pretty way.
Show code blocks and their respective feedback by inserting them in the ressponse. Show code blocks and their respective feedback by inserting them in the ressponse.
""" """
if self.last_answer is None:
return
lines = self.last_answer.split("\n") lines = self.last_answer.split("\n")
for line in lines: for line in lines:
if "block:" in line: if "block:" in line:
@ -190,5 +192,5 @@ class Agent():
self.memory.push('user', feedback) self.memory.push('user', feedback)
if save_path != None: if save_path != None:
tool.save_block(blocks, save_path) tool.save_block(blocks, save_path)
self.blocks_result = list(reversed(self.blocks_result)) self.blocks_result = self.blocks_result
return True, feedback return True, feedback

View File

@ -93,7 +93,7 @@ class BrowserAgent(Agent):
Your task: Your task:
1. Decide if the current page answers the users query: {user_prompt} 1. Decide if the current page answers the users query: {user_prompt}
- If it does, take notes of the useful information, write down source, link or reference, then move to a new page. - If it does, take notes of the useful information, write down source, link or reference, then move to a new page.
- If it does and you completed use request, say REQUEST_EXIT - If it does and you completed user request, say REQUEST_EXIT
- If it doesnt, say: Error: This page does not answer the users query then go back or navigate to another link. - If it doesnt, say: Error: This page does not answer the users query then go back or navigate to another link.
2. Navigate by either: 2. Navigate by either:
- Navigate to a navigation links (write the full URL, e.g., www.example.com/cats). - Navigate to a navigation links (write the full URL, e.g., www.example.com/cats).
@ -144,10 +144,10 @@ class BrowserAgent(Agent):
animate_thinking("Thinking...", color="status") animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt) self.memory.push('user', prompt)
answer, reasoning = self.llm_request() answer, reasoning = self.llm_request()
output = f"Answer: {answer}" if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}" output = answer if len(answer) > 16 else f"Action: {answer}\nReasoning: {reasoning}"
pretty_print("-"*100) print()
pretty_print(output, color="output") pretty_print(output, color="output")
pretty_print("-"*100) print()
return answer, reasoning return answer, reasoning
def select_unvisited(self, search_result: List[str]) -> List[str]: def select_unvisited(self, search_result: List[str]) -> List[str]:
@ -235,7 +235,8 @@ class BrowserAgent(Agent):
def show_search_results(self, search_result: List[str]): def show_search_results(self, search_result: List[str]):
pretty_print("\nSearch results:", color="output") pretty_print("\nSearch results:", color="output")
for res in search_result: for res in search_result:
pretty_print(f"Title: {res['title']} - Link: {res['link']}", color="output") pretty_print(f"Title: {res['title']} - ", color="info", no_newline=True)
pretty_print(f"Link: {res['link']}", color="status")
def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]: def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
""" """

142
sources/agents/planner.tmp Normal file
View File

@ -0,0 +1,142 @@
import json
from typing import List, Tuple, Type, Dict, Tuple
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.agents.code_agent import CoderAgent
from sources.agents.file_agent import FileAgent
from sources.agents.browser_agent import BrowserAgent
from sources.tools.tools import Tools
class PlannerAgent(Agent):
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
"""
The planner agent is a special agent that divides and conquers the task.
"""
super().__init__(name, prompt_path, provider, verbose, None)
self.tools = {
"json": Tools()
}
self.tools['json'].tag = "json"
self.browser = browser
self.agents = {
"coder": CoderAgent(name, "prompts/base/coder_agent.txt", provider, verbose=False),
"file": FileAgent(name, "prompts/base/file_agent.txt", provider, verbose=False),
"web": BrowserAgent(name, "prompts/base/browser_agent.txt", provider, verbose=False, browser=browser)
}
self.role = {
"en": "Research, setup and code",
"fr": "Recherche, configuration et codage",
"zh": "研究,设置和编码",
}
self.type = "planner_agent"
def parse_agent_tasks(self, text: str) -> Tuple[list | None, list | None]:
"""
Parse the agent tasks from the given text into json.
"""
tasks = []
tasks_names = []
lines = text.strip().split('\n')
for line in lines:
if line is None or len(line) == 0:
continue
line = line.strip()
if '##' in line or line[0].isdigit():
tasks_names.append(line)
continue
blocks, _ = self.tools["json"].load_exec_block(text)
if blocks == None:
return (None, None)
for block in blocks:
line_json = json.loads(block)
if 'plan' in line_json:
for task in line_json['plan']:
agent = {
'agent': task['agent'],
'id': task['id'],
'task': task['task']
}
if 'need' in task:
agent['need'] = task['need']
tasks.append(agent)
if len(tasks_names) != len(tasks):
names = [task['task'] for task in tasks]
return zip(names, tasks)
return zip(tasks_names, tasks)
def make_prompt(self, task: str, needed_infos: str) -> str:
"""
Make a prompt for the agent.
"""
if needed_infos is None:
needed_infos = "No needed informations."
prompt = f"""
You are given the following informations:
{needed_infos}
Your task is:
{task}
"""
return prompt
def show_plan(self, agents_tasks):
if agents_tasks == (None, None):
return
pretty_print("▂▘ P L A N ▝▂", color="output")
for task_name, task in agents_tasks:
pretty_print(f"{task['agent']} -> {task['task']}", color="info")
pretty_print("▔▗ E N D ▖▔", color="output")
def process(self, prompt: str, speech_module: type) -> str:
"""
Process the prompt and divide the task between the agents.
Args:
prompt (str): The prompt to process.
speech_module (type): The speech module to use.
Returns:
str: The final answer resulting from successive task.
"""
ok = False
agents_tasks = (None, None)
while not ok:
self.wait_message(speech_module)
animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt)
answer, _ = self.llm_request()
pretty_print(answer.split('\n')[0], color="output")
agents_tasks = self.parse_agent_tasks(answer)
if agents_tasks == (None, None):
pretty_print("Failed to parse the tasks, retrying", color="failure")
prompt = "Task parsing failed. Please retry with proper json."
continue
self.show_plan(agents_tasks)
ok_str = input("Is the plan ok? (y/n): ")
if ok_str == 'y':
ok = True
else:
prompt = input("Please reformulate: ")
if agents_tasks == (None, None):
return "Failed to parse the tasks", reasoning
prev_agent_answer = None
for task_name, task in agents_tasks:
pretty_print(f"I will {task_name}.", color="info")
agent_prompt = self.make_prompt(task['task'], prev_agent_answer)
pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info")
if speech_module: speech_module.speak(f"I will {task_name}. I assigned the {task['agent']} agent to the task.")
try:
prev_agent_answer, _ = self.agents[task['agent'].lower()].process(agent_prompt, speech_module)
pretty_print(f"-- Agent answer ---\n\n", color="output")
self.agents[task['agent'].lower()].show_answer()
pretty_print(f"\n\n", color="output")
except Exception as e:
raise e
self.last_answer = prev_agent_answer
return prev_agent_answer, ""
if __name__ == "__main__":
from llm_provider import Provider
server_provider = Provider("server", "deepseek-r1:14b", "192.168.1.100:5000")
agent = PlannerAgent("deepseek-r1:14b", "jarvis", "prompts/planner_agent.txt", server_provider)
ans = agent.process("Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file")

View File

@ -76,10 +76,10 @@ class PlannerAgent(Agent):
agents_tasks = self.parse_agent_tasks(json_plan) agents_tasks = self.parse_agent_tasks(json_plan)
if agents_tasks == (None, None): if agents_tasks == (None, None):
return return
pretty_print(f"--- Plan ---", color="output") pretty_print("▂▘ P L A N ▝▂", color="output")
for task_name, task in agents_tasks: for task_name, task in agents_tasks:
pretty_print(f"{task}", color="output") pretty_print(f"{task['agent']} -> {task['task']}", color="info")
pretty_print(f"--- End of Plan ---", color="output") pretty_print("▔▗ E N D ▖▔", color="output")
def process(self, prompt, speech_module) -> str: def process(self, prompt, speech_module) -> str:
ok = False ok = False

View File

@ -6,9 +6,9 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.common.exceptions import TimeoutException, WebDriverException
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
from typing import List, Tuple, Type, Dict, Tuple
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from urllib.parse import urlparse from urllib.parse import urlparse
from typing import List, Tuple, Type, Dict, Tuple
from fake_useragent import UserAgent from fake_useragent import UserAgent
from selenium_stealth import stealth from selenium_stealth import stealth
import undetected_chromedriver as uc import undetected_chromedriver as uc
@ -137,8 +137,8 @@ class Browser:
self.logger.info(f"Navigated to: {url}") self.logger.info(f"Navigated to: {url}")
return True return True
except TimeoutException as e: except TimeoutException as e:
self.logger.error(f"Timeout waiting for {url} to load: {str(e)}") self.logger.error(f"Timeout waiting for {url} to load: {str(e)}")
return False return False
except WebDriverException as e: except WebDriverException as e:
self.logger.error(f"Error navigating to {url}: {str(e)}") self.logger.error(f"Error navigating to {url}: {str(e)}")
return False return False

View File

@ -51,7 +51,7 @@ class CInterpreter(Tools):
run_command, run_command,
capture_output=True, capture_output=True,
text=True, text=True,
timeout=10 timeout=120
) )
if run_result.returncode != 0: if run_result.returncode != 0:

View File

@ -32,7 +32,7 @@ def get_color_map():
} }
return color_map return color_map
def pretty_print(text, color="info"): def pretty_print(text, color="info", no_newline=False):
""" """
Print text with color formatting. Print text with color formatting.
@ -56,7 +56,7 @@ def pretty_print(text, color="info"):
color_map = get_color_map() color_map = get_color_map()
if color not in color_map: if color not in color_map:
color = "info" color = "info"
print(colored(text, color_map[color])) print(colored(text, color_map[color]), end='' if no_newline else "\n")
def animate_thinking(text, color="status", duration=120): def animate_thinking(text, color="status", duration=120):
""" """