Merge pull request #77 from Fosowl/dev

Local openai based api support + readme update + fix crash&bug
This commit is contained in:
Martin 2025-03-27 12:58:08 +01:00 committed by GitHub
commit f58e7f04f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 40 additions and 46 deletions

View File

@ -10,24 +10,16 @@
![alt text](./media/whale_readme.jpg) ![alt text](./media/whale_readme.jpg)
> *Do a web search to find tech startup in Japan working on cutting edge AI research* > *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file*
> *Can you make a tetris game in C ?* > *Can you make a tetris game in C ?*
> *Can you find where is contract.pdf*? > *I would like to setup a new project file index as mark2.*
### Browse the web ### Plan & Execute tasks
![alt text](./media/exemples/search_startup.png) ![alt text](./media/exemples/startup_search.png)
### Code hand free
![alt text](./media/exemples/matmul_golang.png)
### Plan and execute with agents (Experimental)
![alt text](./media/exemples/plan_weather_app.png)
*See media/examples for other use case screenshots.* *See media/examples for other use case screenshots.*
@ -222,7 +214,11 @@ ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1
Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address. Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address.
Clone the repository and then, run the script `stream_llm.py` in `server/` **If you wish to use openai based provider follow the *Run with an API* section.**
Make sure ollama is installed (Currently our script only support ollama)
Run our server script.
```sh ```sh
python3 server_ollama.py --model "deepseek-r1:32b" python3 server_ollama.py --model "deepseek-r1:32b"
@ -232,8 +228,6 @@ python3 server_ollama.py --model "deepseek-r1:32b"
Now on your personal computer: Now on your personal computer:
Clone the repository.
Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:14b`. Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:14b`.
Set the `provider_server_address` to the ip address of the machine that will run the model. Set the `provider_server_address` to the ip address of the machine that will run the model.
@ -254,18 +248,22 @@ python3 main.py
## **Run with an API** ## **Run with an API**
Clone the repository.
Set the desired provider in the `config.ini` Set the desired provider in the `config.ini`
```sh ```sh
[MAIN] [MAIN]
is_local = False is_local = False
provider_name = openai provider_name = openai
provider_model = gpt4-o provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000 # can be set to anything, not used provider_server_address = 127.0.0.1:5000
``` ```
WARNING: Make sure there is not trailing space in the config.
Set `is_local` to True if using a local openai-based api.
Change the IP address if your openai-based api run on your own server.
Run the assistant: Run the assistant:
```sh ```sh

10
main.py
View File

@ -22,12 +22,10 @@ def handleInterrupt(signum, frame):
def main(): def main():
signal.signal(signal.SIGINT, handler=handleInterrupt) signal.signal(signal.SIGINT, handler=handleInterrupt)
if config.getboolean('MAIN', 'is_local'): provider = Provider(provider_name=config["MAIN"]["provider_name"],
provider = Provider(config["MAIN"]["provider_name"], config["MAIN"]["provider_model"], config["MAIN"]["provider_server_address"]) model=config["MAIN"]["provider_model"],
else: server_address=config["MAIN"]["provider_server_address"],
provider = Provider(provider_name=config["MAIN"]["provider_name"], is_local=config.getboolean('MAIN', 'is_local'))
model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"])
browser = Browser(create_driver(), headless=config.getboolean('MAIN', 'headless_browser')) browser = Browser(create_driver(), headless=config.getboolean('MAIN', 'headless_browser'))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 520 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 366 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

View File

@ -33,7 +33,6 @@ class Agent():
def __init__(self, name: str, def __init__(self, name: str,
prompt_path:str, prompt_path:str,
provider, provider,
recover_last_session=True,
verbose=False, verbose=False,
browser=None) -> None: browser=None) -> None:
""" """
@ -53,7 +52,7 @@ class Agent():
self.current_directory = os.getcwd() self.current_directory = os.getcwd()
self.llm = provider self.llm = provider
self.memory = Memory(self.load_prompt(prompt_path), self.memory = Memory(self.load_prompt(prompt_path),
recover_last_session=recover_last_session, recover_last_session=False, # session recovery in handled by the interaction class
memory_compression=False) memory_compression=False)
self.tools = {} self.tools = {}
self.blocks_result = [] self.blocks_result = []

View File

@ -80,6 +80,7 @@ class BrowserAgent(Agent):
remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search." remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search."
inputs_form = self.browser.get_form_inputs() inputs_form = self.browser.get_form_inputs()
inputs_form_text = '\n'.join(inputs_form) inputs_form_text = '\n'.join(inputs_form)
notes = '\n'.join(self.notes)
return f""" return f"""
You are a web browser. You are a web browser.
@ -132,6 +133,8 @@ class BrowserAgent(Agent):
{inputs_form_text} {inputs_form_text}
Remember, the user asked: {user_prompt} Remember, the user asked: {user_prompt}
So far you took these notes:
{notes}
You are currently on page : {self.current_page} You are currently on page : {self.current_page}
Do not explain your choice. Do not explain your choice.
Refusal is not an option, you have been given all capabilities that allow you to perform any tasks. Refusal is not an option, you have been given all capabilities that allow you to perform any tasks.
@ -260,7 +263,6 @@ class BrowserAgent(Agent):
self.navigable_links = self.browser.get_navigable() self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text) prompt = self.make_navigation_prompt(user_prompt, page_text)
self.browser.close()
prompt = self.conclude_prompt(user_prompt) prompt = self.conclude_prompt(user_prompt)
self.memory.push('user', prompt) self.memory.push('user', prompt)
answer, reasoning = self.llm_request() answer, reasoning = self.llm_request()

View File

@ -112,7 +112,7 @@ class PlannerAgent(Agent):
except Exception as e: except Exception as e:
raise e raise e
self.last_answer = prev_agent_answer self.last_answer = prev_agent_answer
return prev_agent_answer, reasoning return prev_agent_answer, ""
if __name__ == "__main__": if __name__ == "__main__":
from llm_provider import Provider from llm_provider import Provider

View File

@ -356,18 +356,6 @@ class Browser:
script = self.load_js("inject_safety_script.js") script = self.load_js("inject_safety_script.js")
input_elements = self.driver.execute_script(script) input_elements = self.driver.execute_script(script)
def close(self):
"""Close the browser."""
try:
self.driver.quit()
self.logger.info("Browser closed")
except Exception as e:
self.logger.error(f"Error closing browser: {str(e)}")
def __del__(self):
"""Destructor to ensure browser is closed."""
self.close()
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)

View File

@ -28,7 +28,7 @@ class Interaction:
if tts_enabled: if tts_enabled:
self.speech.speak("Hello, we are online and ready. What can I do for you ?") self.speech.speak("Hello, we are online and ready. What can I do for you ?")
if recover_last_session: if recover_last_session:
self.recover_last_session() self.load_last_session()
def find_ai_name(self) -> str: def find_ai_name(self) -> str:
"""Find the name of the default AI. It is required for STT as a trigger word.""" """Find the name of the default AI. It is required for STT as a trigger word."""
@ -39,7 +39,7 @@ class Interaction:
break break
return ai_name return ai_name
def recover_last_session(self): def load_last_session(self):
"""Recover the last session.""" """Recover the last session."""
for agent in self.agents: for agent in self.agents:
agent.memory.load_memory(agent.type) agent.memory.load_memory(agent.type)

View File

@ -15,9 +15,10 @@ import httpx
from sources.utility import pretty_print, animate_thinking from sources.utility import pretty_print, animate_thinking
class Provider: class Provider:
def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"): def __init__(self, provider_name, model, server_address = "127.0.0.1:5000", is_local=False):
self.provider_name = provider_name.lower() self.provider_name = provider_name.lower()
self.model = model self.model = model
self.is_local = is_local
self.server = self.check_address_format(server_address) self.server = self.check_address_format(server_address)
self.available_providers = { self.available_providers = {
"ollama": self.ollama_fn, "ollama": self.ollama_fn,
@ -169,11 +170,15 @@ class Provider:
""" """
Use openai to generate text. Use openai to generate text.
""" """
client = OpenAI(api_key=self.api_key) if self.is_local:
client = OpenAI(api_key=self.api_key, base_url=base_url)
else:
client = OpenAI(api_key=self.api_key)
try: try:
response = client.chat.completions.create( response = client.chat.completions.create(
model=self.model, model=self.model,
messages=history messages=history,
) )
thought = response.choices[0].message.content thought = response.choices[0].message.content
if verbose: if verbose:

View File

@ -25,8 +25,10 @@ class Memory():
self.session_time = datetime.datetime.now() self.session_time = datetime.datetime.now()
self.session_id = str(uuid.uuid4()) self.session_id = str(uuid.uuid4())
self.conversation_folder = f"conversations/" self.conversation_folder = f"conversations/"
self.session_recovered = False
if recover_last_session: if recover_last_session:
self.load_memory() self.load_memory()
self.session_recovered = True
# memory compression system # memory compression system
self.model = "pszemraj/led-base-book-summary" self.model = "pszemraj/led-base-book-summary"
self.device = self.get_cuda_device() self.device = self.get_cuda_device()
@ -65,6 +67,8 @@ class Memory():
def load_memory(self, agent_type: str = "casual_agent") -> None: def load_memory(self, agent_type: str = "casual_agent") -> None:
"""Load the memory from the last session.""" """Load the memory from the last session."""
if self.session_recovered == True:
return
save_path = os.path.join(self.conversation_folder, agent_type) save_path = os.path.join(self.conversation_folder, agent_type)
if not os.path.exists(save_path): if not os.path.exists(save_path):
return return