Merge pull request #77 from Fosowl/dev

Local openai based api support + readme update + fix crash&bug
2025-06-06 11:05:26 +00:00 · 2025-03-27 12:58:08 +01:00 · 2025-03-27 12:58:08 +01:00 · f58e7f04f1
commit f58e7f04f1
parent 63e947bf84 bd951e19d3
14 changed files with 40 additions and 46 deletions
--- a/README.md
+++ b/README.md
@ -10,24 +10,16 @@

 ![alt text](./media/whale_readme.jpg)

-> *Do a web search to find tech startup in Japan working on cutting edge AI research*
+> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file*

 > *Can you make a tetris game in C ?*

-> *Can you find where is contract.pdf*?
+> *I would like to setup a new project file index as mark2.*


-### Browse the web
+### Plan & Execute tasks

-![alt text](./media/exemples/search_startup.png)
-
-### Code hand free
-
-![alt text](./media/exemples/matmul_golang.png)
-
-### Plan and execute with agents (Experimental)
-
-![alt text](./media/exemples/plan_weather_app.png)
+![alt text](./media/exemples/startup_search.png)

 *See media/examples for other use case screenshots.*

@ -222,7 +214,11 @@ ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1

 Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address.

-Clone the repository and then, run the script `stream_llm.py` in `server/`
+**If you wish to use openai based provider follow the *Run with an API*  section.**
+
+Make sure ollama is installed (Currently our script only support ollama)
+
+Run our server script.

 ```sh
 python3 server_ollama.py --model "deepseek-r1:32b"
@ -232,8 +228,6 @@ python3 server_ollama.py --model "deepseek-r1:32b"

 Now on your personal computer:

-Clone the repository.
-
 Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:14b`.
 Set the `provider_server_address` to the ip address of the machine that will run the model.

@ -254,18 +248,22 @@ python3 main.py

 ## **Run with an API**  

-Clone the repository.
-
 Set the desired provider in the `config.ini`

 ```sh
 [MAIN]
 is_local = False
 provider_name = openai
-provider_model = gpt4-o
-provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
+provider_model = gpt-4o
+provider_server_address = 127.0.0.1:5000
 ```

+WARNING: Make sure there is not trailing space in the config.
+
+Set `is_local` to True if using a local openai-based api.
+
+Change the IP address if your openai-based api run on your own server.
+
 Run the assistant:

 ```sh
--- a/main.py
+++ b/main.py
@ -22,12 +22,10 @@ def handleInterrupt(signum, frame):
 def main():
    signal.signal(signal.SIGINT, handler=handleInterrupt)

-    if config.getboolean('MAIN', 'is_local'):
-        provider = Provider(config["MAIN"]["provider_name"], config["MAIN"]["provider_model"], config["MAIN"]["provider_server_address"])
-    else:
-        provider = Provider(provider_name=config["MAIN"]["provider_name"],
-                                   model=config["MAIN"]["provider_model"],
-                                   server_address=config["MAIN"]["provider_server_address"])
+    provider = Provider(provider_name=config["MAIN"]["provider_name"],
+                        model=config["MAIN"]["provider_model"],
+                        server_address=config["MAIN"]["provider_server_address"],
+                        is_local=config.getboolean('MAIN', 'is_local'))

    browser = Browser(create_driver(), headless=config.getboolean('MAIN', 'headless_browser'))

--- a/media/exemples/failure_recover.png
+++ b/media/exemples/failure_recover.png
--- a/media/exemples/files_interaction.png
+++ b/media/exemples/files_interaction.png
--- a/media/exemples/plan_flight_app.png
+++ b/media/exemples/plan_flight_app.png
--- a/media/exemples/search_startup.png
+++ b/media/exemples/search_startup.png
--- a/media/exemples/startup_search.png
+++ b/media/exemples/startup_search.png
--- a/sources/agents/agent.py
+++ b/sources/agents/agent.py
@ -33,7 +33,6 @@ class Agent():
    def __init__(self, name: str,
                       prompt_path:str,
                       provider,
-                       recover_last_session=True,
                       verbose=False,
                       browser=None) -> None:
        """
@ -53,7 +52,7 @@ class Agent():
        self.current_directory = os.getcwd()
        self.llm = provider 
        self.memory = Memory(self.load_prompt(prompt_path),
-                                recover_last_session=recover_last_session,
+                                recover_last_session=False, # session recovery in handled by the interaction class
                                memory_compression=False)
        self.tools = {}
        self.blocks_result = []
--- a/sources/agents/browser_agent.py
+++ b/sources/agents/browser_agent.py
@ -80,6 +80,7 @@ class BrowserAgent(Agent):
        remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search." 
        inputs_form = self.browser.get_form_inputs()
        inputs_form_text = '\n'.join(inputs_form)
+        notes = '\n'.join(self.notes)

        return f"""
        You are a web browser.
@ -132,6 +133,8 @@ class BrowserAgent(Agent):
        {inputs_form_text}

        Remember, the user asked: {user_prompt}
+        So far you took these notes:
+        {notes}
        You are currently on page : {self.current_page}
        Do not explain your choice.
        Refusal is not an option, you have been given all capabilities that allow you to perform any tasks.
@ -260,7 +263,6 @@ class BrowserAgent(Agent):
            self.navigable_links = self.browser.get_navigable()
            prompt = self.make_navigation_prompt(user_prompt, page_text)

-        self.browser.close()
        prompt = self.conclude_prompt(user_prompt)
        self.memory.push('user', prompt)
        answer, reasoning = self.llm_request()
--- a/sources/agents/planner_agent.py
+++ b/sources/agents/planner_agent.py
@ -112,7 +112,7 @@ class PlannerAgent(Agent):
            except Exception as e:
                raise e
        self.last_answer = prev_agent_answer
-        return prev_agent_answer, reasoning
+        return prev_agent_answer, ""

 if __name__ == "__main__":
    from llm_provider import Provider
--- a/sources/browser.py
+++ b/sources/browser.py
@ -356,18 +356,6 @@ class Browser:
        script = self.load_js("inject_safety_script.js")
        input_elements = self.driver.execute_script(script)

-    def close(self):
-        """Close the browser."""
-        try:
-            self.driver.quit()
-            self.logger.info("Browser closed")
-        except Exception as e:
-            self.logger.error(f"Error closing browser: {str(e)}")
-
-    def __del__(self):
-        """Destructor to ensure browser is closed."""
-        self.close()
-
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
--- a/sources/interaction.py
+++ b/sources/interaction.py
@ -28,7 +28,7 @@ class Interaction:
        if tts_enabled:
            self.speech.speak("Hello, we are online and ready. What can I do for you ?")
        if recover_last_session:
-            self.recover_last_session()
+            self.load_last_session()
    
    def find_ai_name(self) -> str:
        """Find the name of the default AI. It is required for STT as a trigger word."""
@ -39,7 +39,7 @@ class Interaction:
                break
        return ai_name
    
-    def recover_last_session(self):
+    def load_last_session(self):
        """Recover the last session."""
        for agent in self.agents:
            agent.memory.load_memory(agent.type)
--- a/sources/llm_provider.py
+++ b/sources/llm_provider.py
@ -15,9 +15,10 @@ import httpx
 from sources.utility import pretty_print, animate_thinking

 class Provider:
-    def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"):
+    def __init__(self, provider_name, model, server_address = "127.0.0.1:5000", is_local=False):
        self.provider_name = provider_name.lower()
        self.model = model
+        self.is_local = is_local
        self.server = self.check_address_format(server_address)
        self.available_providers = {
            "ollama": self.ollama_fn,
@ -169,11 +170,15 @@ class Provider:
        """
        Use openai to generate text.
        """
-        client = OpenAI(api_key=self.api_key)
+        if self.is_local:
+            client = OpenAI(api_key=self.api_key, base_url=base_url)
+        else:
+            client = OpenAI(api_key=self.api_key)
+
        try:
            response = client.chat.completions.create(
                model=self.model,
-                messages=history
+                messages=history,
            )
            thought = response.choices[0].message.content
            if verbose:
--- a/sources/memory.py
+++ b/sources/memory.py
@ -25,8 +25,10 @@ class Memory():
        self.session_time = datetime.datetime.now()
        self.session_id = str(uuid.uuid4())
        self.conversation_folder = f"conversations/"
+        self.session_recovered = False
        if recover_last_session:
            self.load_memory()
+            self.session_recovered = True
        # memory compression system
        self.model = "pszemraj/led-base-book-summary"
        self.device = self.get_cuda_device()
@ -65,6 +67,8 @@ class Memory():

    def load_memory(self, agent_type: str = "casual_agent") -> None:
        """Load the memory from the last session."""
+        if self.session_recovered == True:
+            return
        save_path = os.path.join(self.conversation_folder, agent_type)
        if not os.path.exists(save_path):
            return