diff --git a/README.md b/README.md index e91ba6e..724bf62 100644 --- a/README.md +++ b/README.md @@ -10,24 +10,16 @@ ![alt text](./media/whale_readme.jpg) -> *Do a web search to find tech startup in Japan working on cutting edge AI research* +> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file* > *Can you make a tetris game in C ?* -> *Can you find where is contract.pdf*? +> *I would like to setup a new project file index as mark2.* -### Browse the web +### Plan & Execute tasks -![alt text](./media/exemples/search_startup.png) - -### Code hand free - -![alt text](./media/exemples/matmul_golang.png) - -### Plan and execute with agents (Experimental) - -![alt text](./media/exemples/plan_weather_app.png) +![alt text](./media/exemples/startup_search.png) *See media/examples for other use case screenshots.* @@ -222,7 +214,11 @@ ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1 Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address. -Clone the repository and then, run the script `stream_llm.py` in `server/` +**If you wish to use openai based provider follow the *Run with an API* section.** + +Make sure ollama is installed (Currently our script only support ollama) + +Run our server script. ```sh python3 server_ollama.py --model "deepseek-r1:32b" @@ -232,8 +228,6 @@ python3 server_ollama.py --model "deepseek-r1:32b" Now on your personal computer: -Clone the repository. - Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:14b`. Set the `provider_server_address` to the ip address of the machine that will run the model. @@ -254,18 +248,22 @@ python3 main.py ## **Run with an API** -Clone the repository. - Set the desired provider in the `config.ini` ```sh [MAIN] is_local = False provider_name = openai -provider_model = gpt4-o -provider_server_address = 127.0.0.1:5000 # can be set to anything, not used +provider_model = gpt-4o +provider_server_address = 127.0.0.1:5000 ``` +WARNING: Make sure there is not trailing space in the config. + +Set `is_local` to True if using a local openai-based api. + +Change the IP address if your openai-based api run on your own server. + Run the assistant: ```sh diff --git a/main.py b/main.py index 0ff444e..4f763ad 100755 --- a/main.py +++ b/main.py @@ -22,12 +22,10 @@ def handleInterrupt(signum, frame): def main(): signal.signal(signal.SIGINT, handler=handleInterrupt) - if config.getboolean('MAIN', 'is_local'): - provider = Provider(config["MAIN"]["provider_name"], config["MAIN"]["provider_model"], config["MAIN"]["provider_server_address"]) - else: - provider = Provider(provider_name=config["MAIN"]["provider_name"], - model=config["MAIN"]["provider_model"], - server_address=config["MAIN"]["provider_server_address"]) + provider = Provider(provider_name=config["MAIN"]["provider_name"], + model=config["MAIN"]["provider_model"], + server_address=config["MAIN"]["provider_server_address"], + is_local=config.getboolean('MAIN', 'is_local')) browser = Browser(create_driver(), headless=config.getboolean('MAIN', 'headless_browser')) diff --git a/media/exemples/failure_recover.png b/media/exemples/failure_recover.png deleted file mode 100644 index 99f792f..0000000 Binary files a/media/exemples/failure_recover.png and /dev/null differ diff --git a/media/exemples/files_interaction.png b/media/exemples/files_interaction.png deleted file mode 100644 index ac57958..0000000 Binary files a/media/exemples/files_interaction.png and /dev/null differ diff --git a/media/exemples/plan_flight_app.png b/media/exemples/plan_flight_app.png deleted file mode 100644 index 0b24ec5..0000000 Binary files a/media/exemples/plan_flight_app.png and /dev/null differ diff --git a/media/exemples/search_startup.png b/media/exemples/search_startup.png deleted file mode 100644 index 180317f..0000000 Binary files a/media/exemples/search_startup.png and /dev/null differ diff --git a/media/exemples/startup_search.png b/media/exemples/startup_search.png new file mode 100644 index 0000000..4be3e29 Binary files /dev/null and b/media/exemples/startup_search.png differ diff --git a/sources/agents/agent.py b/sources/agents/agent.py index 636d6a5..3095212 100644 --- a/sources/agents/agent.py +++ b/sources/agents/agent.py @@ -33,7 +33,6 @@ class Agent(): def __init__(self, name: str, prompt_path:str, provider, - recover_last_session=True, verbose=False, browser=None) -> None: """ @@ -53,7 +52,7 @@ class Agent(): self.current_directory = os.getcwd() self.llm = provider self.memory = Memory(self.load_prompt(prompt_path), - recover_last_session=recover_last_session, + recover_last_session=False, # session recovery in handled by the interaction class memory_compression=False) self.tools = {} self.blocks_result = [] diff --git a/sources/agents/browser_agent.py b/sources/agents/browser_agent.py index 487f65b..62e2380 100644 --- a/sources/agents/browser_agent.py +++ b/sources/agents/browser_agent.py @@ -80,6 +80,7 @@ class BrowserAgent(Agent): remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search." inputs_form = self.browser.get_form_inputs() inputs_form_text = '\n'.join(inputs_form) + notes = '\n'.join(self.notes) return f""" You are a web browser. @@ -132,6 +133,8 @@ class BrowserAgent(Agent): {inputs_form_text} Remember, the user asked: {user_prompt} + So far you took these notes: + {notes} You are currently on page : {self.current_page} Do not explain your choice. Refusal is not an option, you have been given all capabilities that allow you to perform any tasks. @@ -260,7 +263,6 @@ class BrowserAgent(Agent): self.navigable_links = self.browser.get_navigable() prompt = self.make_navigation_prompt(user_prompt, page_text) - self.browser.close() prompt = self.conclude_prompt(user_prompt) self.memory.push('user', prompt) answer, reasoning = self.llm_request() diff --git a/sources/agents/planner_agent.py b/sources/agents/planner_agent.py index ce674d0..8a46335 100644 --- a/sources/agents/planner_agent.py +++ b/sources/agents/planner_agent.py @@ -112,7 +112,7 @@ class PlannerAgent(Agent): except Exception as e: raise e self.last_answer = prev_agent_answer - return prev_agent_answer, reasoning + return prev_agent_answer, "" if __name__ == "__main__": from llm_provider import Provider diff --git a/sources/browser.py b/sources/browser.py index d713125..c870be9 100644 --- a/sources/browser.py +++ b/sources/browser.py @@ -356,18 +356,6 @@ class Browser: script = self.load_js("inject_safety_script.js") input_elements = self.driver.execute_script(script) - def close(self): - """Close the browser.""" - try: - self.driver.quit() - self.logger.info("Browser closed") - except Exception as e: - self.logger.error(f"Error closing browser: {str(e)}") - - def __del__(self): - """Destructor to ensure browser is closed.""" - self.close() - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) diff --git a/sources/interaction.py b/sources/interaction.py index 9b0e5f6..a25a95c 100644 --- a/sources/interaction.py +++ b/sources/interaction.py @@ -28,7 +28,7 @@ class Interaction: if tts_enabled: self.speech.speak("Hello, we are online and ready. What can I do for you ?") if recover_last_session: - self.recover_last_session() + self.load_last_session() def find_ai_name(self) -> str: """Find the name of the default AI. It is required for STT as a trigger word.""" @@ -39,7 +39,7 @@ class Interaction: break return ai_name - def recover_last_session(self): + def load_last_session(self): """Recover the last session.""" for agent in self.agents: agent.memory.load_memory(agent.type) diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 2e74cd2..66addbf 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -15,9 +15,10 @@ import httpx from sources.utility import pretty_print, animate_thinking class Provider: - def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): + def __init__(self, provider_name, model, server_address = "127.0.0.1:5000", is_local=False): self.provider_name = provider_name.lower() self.model = model + self.is_local = is_local self.server = self.check_address_format(server_address) self.available_providers = { "ollama": self.ollama_fn, @@ -169,11 +170,15 @@ class Provider: """ Use openai to generate text. """ - client = OpenAI(api_key=self.api_key) + if self.is_local: + client = OpenAI(api_key=self.api_key, base_url=base_url) + else: + client = OpenAI(api_key=self.api_key) + try: response = client.chat.completions.create( model=self.model, - messages=history + messages=history, ) thought = response.choices[0].message.content if verbose: diff --git a/sources/memory.py b/sources/memory.py index 893a2f2..018c49a 100644 --- a/sources/memory.py +++ b/sources/memory.py @@ -25,8 +25,10 @@ class Memory(): self.session_time = datetime.datetime.now() self.session_id = str(uuid.uuid4()) self.conversation_folder = f"conversations/" + self.session_recovered = False if recover_last_session: self.load_memory() + self.session_recovered = True # memory compression system self.model = "pszemraj/led-base-book-summary" self.device = self.get_cuda_device() @@ -65,6 +67,8 @@ class Memory(): def load_memory(self, agent_type: str = "casual_agent") -> None: """Load the memory from the last session.""" + if self.session_recovered == True: + return save_path = os.path.join(self.conversation_folder, agent_type) if not os.path.exists(save_path): return