Merge pull request #77 from Fosowl/dev

Local openai based api support + readme update + fix crash&bug
This commit is contained in:
Martin 2025-03-27 12:58:08 +01:00 committed by GitHub
commit f58e7f04f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 40 additions and 46 deletions

View File

@ -10,24 +10,16 @@
![alt text](./media/whale_readme.jpg)
> *Do a web search to find tech startup in Japan working on cutting edge AI research*
> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file*
> *Can you make a tetris game in C ?*
> *Can you find where is contract.pdf*?
> *I would like to setup a new project file index as mark2.*
### Browse the web
### Plan & Execute tasks
![alt text](./media/exemples/search_startup.png)
### Code hand free
![alt text](./media/exemples/matmul_golang.png)
### Plan and execute with agents (Experimental)
![alt text](./media/exemples/plan_weather_app.png)
![alt text](./media/exemples/startup_search.png)
*See media/examples for other use case screenshots.*
@ -222,7 +214,11 @@ ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1
Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address.
Clone the repository and then, run the script `stream_llm.py` in `server/`
**If you wish to use openai based provider follow the *Run with an API* section.**
Make sure ollama is installed (Currently our script only support ollama)
Run our server script.
```sh
python3 server_ollama.py --model "deepseek-r1:32b"
@ -232,8 +228,6 @@ python3 server_ollama.py --model "deepseek-r1:32b"
Now on your personal computer:
Clone the repository.
Change the `config.ini` file to set the `provider_name` to `server` and `provider_model` to `deepseek-r1:14b`.
Set the `provider_server_address` to the ip address of the machine that will run the model.
@ -254,18 +248,22 @@ python3 main.py
## **Run with an API**
Clone the repository.
Set the desired provider in the `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt4-o
provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
WARNING: Make sure there is not trailing space in the config.
Set `is_local` to True if using a local openai-based api.
Change the IP address if your openai-based api run on your own server.
Run the assistant:
```sh

10
main.py
View File

@ -22,12 +22,10 @@ def handleInterrupt(signum, frame):
def main():
signal.signal(signal.SIGINT, handler=handleInterrupt)
if config.getboolean('MAIN', 'is_local'):
provider = Provider(config["MAIN"]["provider_name"], config["MAIN"]["provider_model"], config["MAIN"]["provider_server_address"])
else:
provider = Provider(provider_name=config["MAIN"]["provider_name"],
model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"])
provider = Provider(provider_name=config["MAIN"]["provider_name"],
model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"],
is_local=config.getboolean('MAIN', 'is_local'))
browser = Browser(create_driver(), headless=config.getboolean('MAIN', 'headless_browser'))

Binary file not shown.

Before

Width:  |  Height:  |  Size: 520 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 366 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

View File

@ -33,7 +33,6 @@ class Agent():
def __init__(self, name: str,
prompt_path:str,
provider,
recover_last_session=True,
verbose=False,
browser=None) -> None:
"""
@ -53,7 +52,7 @@ class Agent():
self.current_directory = os.getcwd()
self.llm = provider
self.memory = Memory(self.load_prompt(prompt_path),
recover_last_session=recover_last_session,
recover_last_session=False, # session recovery in handled by the interaction class
memory_compression=False)
self.tools = {}
self.blocks_result = []

View File

@ -80,6 +80,7 @@ class BrowserAgent(Agent):
remaining_links_text = remaining_links if remaining_links is not None else "No links remaining, do a new search."
inputs_form = self.browser.get_form_inputs()
inputs_form_text = '\n'.join(inputs_form)
notes = '\n'.join(self.notes)
return f"""
You are a web browser.
@ -132,6 +133,8 @@ class BrowserAgent(Agent):
{inputs_form_text}
Remember, the user asked: {user_prompt}
So far you took these notes:
{notes}
You are currently on page : {self.current_page}
Do not explain your choice.
Refusal is not an option, you have been given all capabilities that allow you to perform any tasks.
@ -260,7 +263,6 @@ class BrowserAgent(Agent):
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
self.browser.close()
prompt = self.conclude_prompt(user_prompt)
self.memory.push('user', prompt)
answer, reasoning = self.llm_request()

View File

@ -112,7 +112,7 @@ class PlannerAgent(Agent):
except Exception as e:
raise e
self.last_answer = prev_agent_answer
return prev_agent_answer, reasoning
return prev_agent_answer, ""
if __name__ == "__main__":
from llm_provider import Provider

View File

@ -356,18 +356,6 @@ class Browser:
script = self.load_js("inject_safety_script.js")
input_elements = self.driver.execute_script(script)
def close(self):
"""Close the browser."""
try:
self.driver.quit()
self.logger.info("Browser closed")
except Exception as e:
self.logger.error(f"Error closing browser: {str(e)}")
def __del__(self):
"""Destructor to ensure browser is closed."""
self.close()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

View File

@ -28,7 +28,7 @@ class Interaction:
if tts_enabled:
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
if recover_last_session:
self.recover_last_session()
self.load_last_session()
def find_ai_name(self) -> str:
"""Find the name of the default AI. It is required for STT as a trigger word."""
@ -39,7 +39,7 @@ class Interaction:
break
return ai_name
def recover_last_session(self):
def load_last_session(self):
"""Recover the last session."""
for agent in self.agents:
agent.memory.load_memory(agent.type)

View File

@ -15,9 +15,10 @@ import httpx
from sources.utility import pretty_print, animate_thinking
class Provider:
def __init__(self, provider_name, model, server_address = "127.0.0.1:5000"):
def __init__(self, provider_name, model, server_address = "127.0.0.1:5000", is_local=False):
self.provider_name = provider_name.lower()
self.model = model
self.is_local = is_local
self.server = self.check_address_format(server_address)
self.available_providers = {
"ollama": self.ollama_fn,
@ -169,11 +170,15 @@ class Provider:
"""
Use openai to generate text.
"""
client = OpenAI(api_key=self.api_key)
if self.is_local:
client = OpenAI(api_key=self.api_key, base_url=base_url)
else:
client = OpenAI(api_key=self.api_key)
try:
response = client.chat.completions.create(
model=self.model,
messages=history
messages=history,
)
thought = response.choices[0].message.content
if verbose:

View File

@ -25,8 +25,10 @@ class Memory():
self.session_time = datetime.datetime.now()
self.session_id = str(uuid.uuid4())
self.conversation_folder = f"conversations/"
self.session_recovered = False
if recover_last_session:
self.load_memory()
self.session_recovered = True
# memory compression system
self.model = "pszemraj/led-base-book-summary"
self.device = self.get_cuda_device()
@ -65,6 +67,8 @@ class Memory():
def load_memory(self, agent_type: str = "casual_agent") -> None:
"""Load the memory from the last session."""
if self.session_recovered == True:
return
save_path = os.path.join(self.conversation_folder, agent_type)
if not os.path.exists(save_path):
return