Merge pull request #122 from Fosowl/dev

Huge refactor and Frontend interface integration
This commit is contained in:
Martin 2025-04-18 19:05:22 +02:00 committed by GitHub
commit 523e7f8271
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
75 changed files with 22412 additions and 498 deletions

23
.gitignore vendored
View File

@ -1,20 +1,37 @@
*.wav
*.DS_Store
*.log
cookies.json
*.tmp
*.safetensors
config.ini
test_agent.py
*.egg-info
cookies.json
test_agent.py
config.ini
.voices/
experimental/
.logs/
.screenshots/*.png
.screenshots/*.jpg
conversations/
agentic_env/*
agentic_seek_env/*
.env
*/.env
dsk/
### react ###
.DS_*
*.log
logs
**/*.backup.*
**/*.back.*
node_modules
bower_components
*.sublime*
psd
thumb
sketch
# Byte-compiled / optimized / DLL files
__pycache__/

View File

@ -1,26 +0,0 @@
# Use official Python 3.11 image as the base
FROM python:3.11
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
gfortran \
libportaudio2 \
portaudio19-dev \
ffmpeg \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
chromium \
chromium-driver \
&& rm -rf /var/lib/apt/lists/*
RUN pip cache purge
COPY . .
RUN BLIS_ARCH=generic pip install --no-cache-dir -r requirements.txt

47
Dockerfile.backend Normal file
View File

@ -0,0 +1,47 @@
FROM ubuntu:22.04
# Warning: doesn't work yet, backend is run on host machine for now
WORKDIR /app
RUN apt-get update -qq -y && \
apt-get install -y \
gcc \
g++ \
gfortran \
libportaudio2 \
portaudio19-dev \
ffmpeg \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
gnupg2 \
wget \
unzip \
python3 \
python3-pip \
libasound2 \
libatk-bridge2.0-0 \
libgtk-4-1 \
libnss3 \
xdg-utils \
wget && \
RUN chmod +x /opt/chrome/chrome
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY api.py .
COPY sources/ ./sources/
COPY prompts/ ./prompts/
COPY crx/ crx/
COPY llm_router/ llm_router/
COPY .env .
COPY config.ini .
# Expose port
EXPOSE 8000
# Run the application
CMD ["python3", "api.py"]

185
README.md
View File

@ -43,7 +43,7 @@ https://github.com/user-attachments/assets/4bd5faf6-459f-4f94-bd1d-238c4b331469
---
## **Installation**
## Installation
Make sure you have chrome driver, docker and python3.10 (or newer) installed.
@ -81,54 +81,47 @@ pip3 install -r requirements.txt
python3 setup.py install
```
---
## Run locally on your machine
## Setup for running LLM locally on your machine
**We recommend using at the very least Deepseek 14B, smaller models will struggle with tasks especially for web browsing.**
### 1**Download Models**
Make sure you have [Ollama](https://ollama.com/) installed.
**Setup your local provider**
Download the `deepseek-r1:14b` model from [DeepSeek](https://deepseek.com/models)
Start your local provider, for example with ollama:
```sh
ollama pull deepseek-r1:14b
```
### 2 **Run the Assistant (Ollama)**
Start the ollama server
```sh
ollama serve
```
Change the config.ini file to set the provider_name to `ollama` and provider_model to `deepseek-r1:14b`
See below for a list of local supported provider.
**Update the config.ini**
Change the config.ini file to set the provider_name to a supported provider and provider_model to `deepseek-r1:14b`
NOTE: `deepseek-r1:14b`is an example, use a bigger model if your hardware allow it.
```sh
[MAIN]
is_local = True
provider_name = ollama
provider_name = ollama # or lm-studio, openai, etc..
provider_model = deepseek-r1:14b
provider_server_address = 127.0.0.1:11434
```
start all services :
**List of local providers**
```sh
sudo ./start_services.sh # MacOS
start ./start_services.cmd # Window
```
| Provider | Local? | Description |
|-----------|--------|-----------------------------------------------------------|
| ollama | Yes | Run LLMs locally with ease using ollama as a LLM provider |
| lm-studio | Yes | Run LLM locally with LM studio (set `provider_name` to `lm-studio`)|
| openai | Yes | Use openai compatible API |
Run the assistant:
```sh
python3 main.py
```
*See the **Usage** section if you don't understand how to use it*
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
*See the **Known issues** section if you are having issues*
@ -138,17 +131,82 @@ python3 main.py
---
## Setup to run with an API
Set the desired provider in the `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
WARNING: Make sure there is not trailing space in the config.
Set `is_local` to True if using a local openai-based api.
Change the IP address if your openai-based api run on your own server.
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
*See the **Known issues** section if you are having issues*
*See the **Config** section for detailled config file explanation.*
---
## Start services and Run
Activate your python env if needed.
```sh
source agentic_seek_env/bin/activate
```
Start required services. This will start all services from the docker-compose.yml, including:
- searxng
- redis (required by searxng)
- frontend
```sh
sudo ./start_services.sh # MacOS
start ./start_services.cmd # Window
```
**Options 1:** Run with the CLI interface.
```sh
python3 cli.py
```
**Options 2:** Run with the Web interface.
Note: Currently we advice you run the CLI instead. Web interface is an active work in progress.
Start the backend.
```sh
python3 api.py
```
Go to `http://localhost:3000/` and you should see the web interface.
Please note that the Web interface doesn't stream messages at the moment.
---
## Usage
Make sure the services are up and running with `./start_services.sh` and run the agenticSeek with `python3 main.py`
Make sure the services are up and running with `./start_services.sh` and run the AgenticSeek with `python3 main.py`
```sh
sudo ./start_services.sh
python3 main.py
python3 cli.py
```
You will be prompted with `>>> `
This indicate agenticSeek await you type for instructions.
This indicate AgenticSeek await you type for instructions.
You can also use speech to text by setting `listen = True` in the config.
To exit, simply say `goodbye`.
@ -167,7 +225,7 @@ Here are some example usage:
> *Do a web search to find cool tech startup in Japan working on cutting edge AI research*
> *Can you find on the internet who created agenticSeek?*
> *Can you find on the internet who created AgenticSeek?*
> *Can you use a fuel calculator online to estimate the cost of a Nice - Milan trip*
@ -188,7 +246,7 @@ Here are some example usage:
> *What's the best workout routine ?*
After you type your query, agenticSeek will allocate the best agent for the task.
After you type your query, AgenticSeek will allocate the best agent for the task.
Because this is an early prototype, the agent routing system might not always allocate the right agent based on your query.
@ -202,12 +260,10 @@ Instead, ask:
---
## **Run the LLM on your own server**
## **Bonus: Setup to run the LLM on your own server**
If you have a powerful computer or a server that you can use, but you want to use it from your laptop you have the options to run the LLM on a remote server.
### 1**Set up and start the server scripts**
On your "server" that will run the AI model, get the ip address
```sh
@ -217,8 +273,6 @@ curl https://ipinfo.io/ip # public ip
Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP address.
**If you wish to use openai based provider follow the *Run with an API* section.**
Clone the repository and enter the `server/`folder.
@ -241,7 +295,6 @@ python3 app.py --provider ollama --port 3333
You have the choice between using `ollama` and `llamacpp` as a LLM service.
### 2**Run it**
Now on your personal computer:
@ -256,39 +309,8 @@ provider_model = deepseek-r1:70b
provider_server_address = x.x.x.x:3333
```
Run the assistant:
```sh
sudo ./start_services.sh # start_services.cmd on windows
python3 main.py
```
## **Run with an API**
Set the desired provider in the `config.ini`.
We recommand together AI if you want to use Qwen/Deepseek-r1. openai or other API work as well.
```sh
[MAIN]
is_local = False
provider_name = together
provider_model = deepseek-ai/DeepSeek-R1-Distill-Llama-70B
provider_server_address = 127.0.0.1:5000 # doesn't matter for non local API provider
```
WARNING: Make sure there is not trailing space in the config.
Set `is_local` to True if using a local openai-based api.
Change the IP address if your openai-based api run on your own server.
Run the assistant:
```sh
sudo ./start_services.sh # start_services.cmd on windows
python3 main.py
```
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
---
@ -333,6 +355,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en zh
[BROWSER]
headless_browser = False
stealth_mode = False
@ -341,20 +364,35 @@ stealth_mode = False
**Explanation**:
- is_local -> Runs the agent locally (True) or on a remote server (False).
- provider_name -> The provider to use (one of: `ollama`, `server`, `lm-studio`, `deepseek-api`)
- provider_model -> The model used, e.g., deepseek-r1:32b.
- provider_server_address -> Server address, e.g., 127.0.0.1:11434 for local. Set to anything for non-local API.
- agent_name -> Name of the agent, e.g., Friday. Used as a trigger word for TTS.
- recover_last_session -> Restarts from last session (True) or not (False).
- save_session -> Saves session data (True) or not (False).
- speak -> Enables voice output (True) or not (False).
- listen -> listen to voice input (True) or not (False).
- work_dir -> Folder the AI will have access to. eg: /Users/user/Documents/.
- jarvis_personality -> Uses a JARVIS-like personality (True) or not (False). This simply change the prompt file.
- languages -> The list of supported language, needed for the llm router to work properly, avoid putting too many or too similar languages.
- headless_browser -> Runs browser without a visible window (True) or not (False).
- stealth_mode -> Make bot detector time harder. Only downside is you have to manually install the anticaptcha extension.
- languages -> List of supported languages. Required for agent routing system. The longer the languages list the more model will be downloaded.
## Providers
The table below show the available providers:
@ -363,10 +401,11 @@ The table below show the available providers:
|-----------|--------|-----------------------------------------------------------|
| ollama | Yes | Run LLMs locally with ease using ollama as a LLM provider |
| server | Yes | Host the model on another machine, run your local machine |
| lm-studio | Yes | Run LLM locally with LM studio (set `provider_name` to `lm-studio`)|
| openai | No | Use ChatGPT API (non-private) |
| deepseek-api | No | Deepseek API (non-private) |
| lm-studio | Yes | Run LLM locally with LM studio (`lm-studio`) |
| openai | Depends | Use ChatGPT API (non-private) or openai compatible API |
| deepseek-api | No | Deepseek API (non-private) |
| huggingface| No | Hugging-Face API (non-private) |
| togetherAI | No | Use together AI API (non-private) |
To select a provider change the config.ini:
@ -424,7 +463,7 @@ If this section is incomplete please raise an issue.
Deepseek R1 excels at reasoning and tool use for its size. We think its a solid fit for our needs other models work fine, but Deepseek is our primary pick.
**Q: I get an error running `main.py`. What do I do?**
**Q: I get an error running `cli.py`. What do I do?**
Ensure local is running (`ollama serve`), your `config.ini` matches your provider, and dependencies are installed. If none work feel free to raise an issue.
@ -442,6 +481,8 @@ Unlike Manus, AgenticSeek prioritizes independence from external systems, giving
Were looking for developers to improve AgenticSeek! Check out open issues or discussion.
[Contribution guide](./docs/CONTRIBUTING.md)
[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date)
## Maintainers:

View File

@ -2,29 +2,26 @@
<img align="center" src="./media/whale_readme.jpg">
<p>
--------------------------------------------------------------------------------
[English](./README.md) | 中文 | [日本語](./README_JP.md)
# AgenticSeek: 类似 Manus 但基于 Deepseek R1 Agents 的本地模型。
# AgenticSeek: 類似 Manus 但基於 Deepseek R1 Agents 的本地模型。
**Manus AI 的本地替代品**,它是一个具有语音功能的大语言模型秘书,可以 Coding、访问你的电脑文件、浏览网页并自动修正错误与反省最重要的是不会向云端传送任何资料。采用 DeepSeek R1 等推理模型构建,完全在本地硬体上运行,进而保证资料的隐私。
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj)
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/fosowl.svg?style=social&label=Update%20%40Fosowl)](https://x.com/Martin993886460)
> 🛠️ **目前还在开发阶段** 欢迎任何贡献者加入我们!
https://github.com/user-attachments/assets/4bd5faf6-459f-4f94-bd1d-238c4b331469
> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file*
> *在大阪和东京深入搜寻人工智慧新创公司,至少找到 5 家,然后储存在 research_japan.txt 档案中*
> *Can you make a tetris game in C ?*
> *你可以用 C 语言制作俄罗斯方块游戏吗?*
> *I would like to setup a new project file index as mark2.*
> *我想设定一个新的专案档案索引,命名为 mark2。*
### AgenticSeek 可以进行任务规划!
![alt text](./media/examples/planner.png)
## Features:
@ -82,72 +79,120 @@ pip3 install -r requirements.txt
python3 setup.py install
```
## 在本地机器上运行 AgenticSeek
**建议至少使用 Deepseek 14B 以上参数的模型,较小的模型难以使用助理功能并且很快就会忘记上下文之间的关系。**
### 1⃣ **下载模型**
**本地运行助手**
确定已经安装 [Ollama](https://ollama.com/)。
启动你的本地提供者,例如使用 ollama
请在 [DeepSeek](https://deepseek.com/models) 下载至少大于 `deepseek-r1:14b` 的模型。
```sh
ollama pull deepseek-r1:14b
```
### 2 **启动框架 ollama**
启动 Ollama 服务器。
```sh
ollama serve
```
更改 `config.ini` 文件,将 `provider_name` 设置为 `ollama` 并且 `provider_model` 设置为你刚刚下载的模型,如 `deepseek-r1:14b`
请参阅下方支持的本地提供者列表。
注意:`deepseek-r1:14b` 只是范例,如果你的电脑允许的话,请使用更大的模型。
修改 `config.ini` 文件,将 `provider_name` 设置为支持的提供者,并将 `provider_model` 设置为 `deepseek-r1:14b`
注意:`deepseek-r1:14b` 只是一个示例,如果你的硬件允许,可以使用更大的模型。
```sh
[MAIN]
is_local = True
provider_name = ollama
provider_name = ollama # 或 lm-studio, openai 等
provider_model = deepseek-r1:14b
provider_server_address = 127.0.0.1:11434
```
开始所有服务:
**本地提供者列表**
| 提供者 | 本地? | 描述 |
|-------------|--------|-------------------------------------------------------|
| ollama | 是 | 使用 ollama 作为 LLM 提供者,轻松本地运行 LLM |
| lm-studio | 是 | 使用 LM Studio 本地运行 LLM`provider_name` 设置为 `lm-studio`|
| openai | 否 | 使用兼容的 API |
下一步: [Start services and run AgenticSeek](#Start-services-and-Run)
---
## **Run with an API (透过 API 执行)**
设定 `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
警告:确保 `config.ini` 没有行尾空格。
如果使用基于本机的 openai-based api 则把 `is_local` 设定为 `True`
同时更改你的 IP 为 openai-based api 的 IP。
下一步: [Start services and run AgenticSeek](#Start-services-and-Run)
---
## Start services and Run
(启动服务并运行)
如果需要,请激活你的 Python 环境。
```sh
source agentic_seek_env/bin/activate
```
启动所需的服务。这将启动 `docker-compose.yml` 中的所有服务,包括:
- searxng
- redis由 redis 提供支持)
- 前端
```sh
sudo ./start_services.sh # MacOS
start ./start_services.cmd # Window
start ./start_services.cmd # Windows
```
运行 AgenticSeek:
**选项 1:** 使用 CLI 界面运行。
```sh
python3 main.py
python3 cli.py
```
**选项 2:** 使用 Web 界面运行。
注意:目前我們建議您使用 CLI 界面。Web 界面仍在積極開發中。
启动后端服务。
```sh
python3 api.py
```
访问 `http://localhost:3000/`,你应该会看到 Web 界面。
请注意,目前 Web 界面不支持消息流式传输。
*如果你不知道如何开始,请参阅 **Usage** 部分*
*如果遇到问题,请先参考 **Known issues** 部分*
*如果你的电脑无法在本机运行 deepseek也许你可以试看看 API 的方式,参见 **Run with an API***
*有关设定档的详细解释,请参阅 **Config** 部分。*
---
## Usage (使用方法)
确定所有的核心档案都启用了,也就是执行过这条命令 `./start_services.sh` 然后你就可以使用 `python3 main.py` 来启动 AgenticSeek 了!
为确保 agenticSeek 在中文环境下正常工作,请确保在 config.ini 中设置语言选项。
languages = en zh
更多信息请参阅 Config 部分
确定所有的核心档案都启用了,也就是执行过这条命令 `./start_services.sh` 然后你就可以使用 `python3 cli.py` 来启动 AgenticSeek 了!
```sh
sudo ./start_services.sh
python3 main.py
python3 cli.py
```
当你看到执行后显示 `>>> `
@ -160,35 +205,35 @@ python3 main.py
### Coding/Bash
> *Help me with matrix multiplication in Golang*
> *在 Golang 中帮助我进行矩阵乘法*
> *Scan my network with nmap, find if any suspicious devices is connected*
> *使用 nmap 扫描我的网路,找出是否有任何可疑装置连接*
> *Make a snake game in python*
> *用 Python 制作一个贪食蛇游戏*
### 网路搜寻
> *Do a web search to find cool tech startup in Japan working on cutting edge AI research*
> *进行网路搜寻,找出日本从事尖端人工智慧研究的酷炫科技新创公司*
> *Can you find on the internet who created agenticSeek?*
> *你能在网路上找到谁创造了 AgenticSeek 吗?*
> *Can you find on which website I can buy a rtx 4090 for cheap*
> *你能在哪个网站上找到便宜的 RTX 4090 吗?*
### 档案浏览与搜寻
> *Hey can you find where is million_dollars_contract.pdf i lost it*
> *嘿,你能找到我遗失的 million_dollars_contract.pdf 在哪里吗?*
> *Show me how much space I have left on my disk*
> *告诉我我的磁碟还剩下多少空间*
> *Find and read the README.md and follow the install instruction*
> *寻找并阅读 README.md并按照安装说明进行操作*
### 日常聊天
> *Tell me about France*
> *告诉我关于法国的事*
> *What is the meaning of life ?*
> *人生的意义是什么?*
> *Should I take creatine before or after workout?*
> *我应该在锻炼前还是锻炼后服用肌酸?*
当你把指令送出后AgenticSeek 会自动调用最能提供帮助的助理,去完成你交办的工作和指令。
@ -198,11 +243,14 @@ python3 main.py
所以我们希望你在使用时,能明确地表明你希望他要怎么做,下面给你一个范例!
你该说:
`Do a web search and find out which are the best country for solo-travel`
- 进行网络搜索,找出哪些国家最适合独自旅行
而不是说:
`Do you know some good countries for solo-travel?`
- 你知道哪些国家适合独自旅行?
---
---
@ -260,37 +308,6 @@ provider_model = deepseek-r1:14b
provider_server_address = x.x.x.x:3333
```
执行 AgenticSeek
```sh
sudo ./start_services.sh
python3 main.py
```
## **Run with an API (透过 API 执行)**
设定 `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
警告:确保 `config.ini` 没有行尾空格。
如果使用基于本机的 openai-based api 则把 `is_local` 设定为 `True`
同时更改你的 IP 为 openai-based api 的 IP。
执行 AgenticSeek
```sh
sudo ./start_services.sh
python3 main.py
```
---
@ -335,6 +352,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en zh
[BROWSER]
headless_browser = False
stealth_mode = False
@ -342,43 +360,45 @@ stealth_mode = False
**说明**:
- is_local
- True在本地運行。
- False在遠端伺服器運行。
- True在本地运行。
- False在远端伺服器运行。
- provider_name
- 框架類
- `ollama`, `server`, `lm-studio`, `deepseek-api`
- 框架类
- `ollama`, `server`, `lm-studio`, `deepseek-api`
- provider_model
- 運行的模型
- `deepseek-r1:1.5b`, `deepseek-r1:14b`
- 运行的模型
- `deepseek-r1:1.5b`, `deepseek-r1:14b`
- provider_server_address
- 伺服器 IP
- `127.0.0.1:11434`
- 伺服器 IP
- `127.0.0.1:11434`
- agent_name
- AgenticSeek 的名字用作TTS的觸發單詞
- `Friday`
- AgenticSeek 的名字用作TTS的触发单词
- `Friday`
- recover_last_session
- True從上個對話繼續
- False重啟對話
- True从上个对话继续
- False重启对话
- save_session
- True儲存對話紀錄
- False不保存。
- True储存对话纪录
- False不保存。
- speak
- True啟用語音輸出。
- False關閉語音輸出。
- True启用语音输出。
- False关闭语音输出。
- listen
- True啟用語音輸入。
- False關閉語音輸入。
- True启用语音输入。
- False关闭语音输入。
- work_dir
- AgenticSeek 擁有能存取與交互的工作目錄
- AgenticSeek 拥有能存取与交互的工作目录
- jarvis_personality
> 就是那個鋼鐵人的 JARVIS
- True啟用 JARVIS 個性。
- False關閉 JARVIS 個性。
> 就是那个钢铁人的 JARVIS
- True启用 JARVIS 个性。
- False关闭 JARVIS 个性。
- headless_browser
- True前景瀏覽器。很酷推薦使用他 XD
- False背景執行瀏覽器。
- True前景浏览器。很酷推荐使用他 XD
- False背景执行浏览器。
- stealth_mode
- 隱私模式,但需要你自己安裝反爬蟲擴充功能。
- 隐私模式,但需要你自己安装反爬虫扩充功能。
- languages
- 支持的语言列表。用于代理路由系统。语言列表越长,下载的模型越多。
## 框架
@ -436,11 +456,14 @@ https://googlechromelabs.github.io/chrome-for-testing/
## FAQ
**Q:我需要什么的硬体配备?**
**Q: 我需要什麼硬體?**
7B 型号:具有 8GB VRAM 的 GPU。
14B 型号12GB GPU例如 RTX 3060
32B 型号24GB+ VRAM。
| 模型大小 | GPU | 備註 |
|-----------|--------|-----------------------------------------------------------|
| 7B | 8GB Vram | ⚠️ 不推薦。性能較差,經常出現幻覺,規劃代理可能會失敗。 |
| 14B | 12 GB VRAM (例如 RTX 3060) | ✅ 適用於簡單任務。可能在網頁瀏覽和規劃任務上表現不佳。 |
| 32B | 24+ GB VRAM (例如 RTX 4090) | 🚀 大多數任務成功,但可能仍在任務規劃上有困難。 |
| 70B+ | 48+ GB Vram (例如 mac studio) | 💪 表現優異。建議用於高級使用情境。 |
**Q为什么选择 Deepseek R1 而不是其他模型?**
@ -467,12 +490,24 @@ https://googlechromelabs.github.io/chrome-for-testing/
不不不AgenticSeek 和 Manus 是不同取向的东西,我们优先考虑的是本地执行和隐私,而不是基于云端。这是一个与 Manus 相比起来更有趣且易使用的方案!
**Q: 是否支持中文以外的语言?**
DeepSeek R1 天生会说中文
但注意:代理路由系统只懂英文,所以必须通过 config.ini 的 languages 参数(如 languages = en zh告诉系统
如果不设置中文?后果可能是:你让它写代码,结果跳出来个"医生代理"(虽然我们根本没有这个代理... 但系统会一脸懵圈!)
实际上会下载一个小型翻译模型来协助任务分配
## 贡献
我们正在寻找开发者来改善 AgenticSeek你可以在 Issues 查看未解决的问题或和我们讨论更酷的新功能!
[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date)
[Contribution guide](./docs/CONTRIBUTING.md)
## 作者:
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758)

View File

@ -10,21 +10,19 @@
**Manus AI 的本地替代品**,它是一個具有語音功能的大語言模型秘書,可以 Coding、訪問你的電腦文件、瀏覽網頁並自動修正錯誤與反省最重要的是不會向雲端傳送任何資料。採用 DeepSeek R1 等推理模型構建,完全在本地硬體上運行,進而保證資料的隱私。
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj)
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/fosowl.svg?style=social&label=Update%20%40Fosowl)](https://x.com/Martin993886460)
> 🛠️ **目前還在開發階段** 歡迎任何貢獻者加入我們!
https://github.com/user-attachments/assets/4bd5faf6-459f-4f94-bd1d-238c4b331469
> *Do a deep search of AI startup in Osaka and Tokyo, find at least 5, then save in the research_japan.txt file*
> *在大阪和東京深入搜尋人工智慧新創公司,至少找到 5 家,然後儲存在 research_japan.txt 檔案中*
> *Can you make a tetris game in C ?*
> *你可以用 C 語言製作俄羅斯方塊遊戲嗎?*
> *I would like to setup a new project file index as mark2.*
> *我想設定一個新的專案檔案索引,命名為 mark2。*
### AgenticSeek 可以進行任務規劃!
![alt text](./media/examples/planner.png)
## Features:
@ -82,72 +80,120 @@ pip3 install -r requirements.txt
python3 setup.py install
```
## 在本地機器上運行 AgenticSeek
**建議至少使用 Deepseek 14B 以上參數的模型,較小的模型難以使用助理功能並且很快就會忘記上下文之間的關係。**
### 1**下載模型**
**本地运行助手**
確定已經安裝 [Ollama](https://ollama.com/)。
启动你的本地提供者,例如使用 ollama
請在 [DeepSeek](https://deepseek.com/models) 下載至少大於 `deepseek-r1:14b` 的模型。
```sh
ollama pull deepseek-r1:14b
```
### 2 **啟動框架 ollama**
啟動 Ollama 服務器。
```sh
ollama serve
```
請更改 `config.ini` 文件,將 `provider_name` 設置為 `ollama` 並且 `provider_model` 設置為你剛剛下載的模型,如 `deepseek-r1:14b`
请参阅下方支持的本地提供者列表。
注意:`deepseek-r1:14b` 只是範例,如果你的電腦允許的話,請使用更大的模型。
修改 `config.ini` 文件,将 `provider_name` 设置为支持的提供者,并将 `provider_model` 设置为 `deepseek-r1:14b`
注意:`deepseek-r1:14b` 只是一个示例,如果你的硬件允许,可以使用更大的模型。
```sh
[MAIN]
is_local = True
provider_name = ollama
provider_name = ollama # 或 lm-studio, openai 等
provider_model = deepseek-r1:14b
provider_server_address = 127.0.0.1:11434
```
開始所有服務:
**本地提供者列表**
| 提供者 | 本地? | 描述 |
|-------------|--------|-------------------------------------------------------|
| ollama | 是 | 使用 ollama 作为 LLM 提供者,轻松本地运行 LLM |
| lm-studio | 是 | 使用 LM Studio 本地运行 LLM`provider_name` 设置为 `lm-studio`|
| openai | 否 | 使用兼容的 API |
下一步: [Start services and run AgenticSeek](#Start-services-and-Run)
---
## **Run with an API (透過 API 執行)**
設定 `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
警告:確保 `config.ini` 沒有行尾空格。
如果使用基於本機的 openai-based api 則把 `is_local` 設定為 `True`
同時更改你的 IP 為 openai-based api 的 IP。
下一步: [Start services and run AgenticSeek](#Start-services-and-Run)
---
## Start services and Run
(启动服务并运行)
如果需要,请激活你的 Python 环境。
```sh
source agentic_seek_env/bin/activate
```
启动所需的服务。这将启动 `docker-compose.yml` 中的所有服务,包括:
- searxng
- redis由 redis 提供支持)
- 前端
```sh
sudo ./start_services.sh # MacOS
start ./start_services.cmd # Window
start ./start_services.cmd # Windows
```
運行 AgenticSeek:
**选项 1:** 使用 CLI 界面运行。
```sh
python3 main.py
python3 cli.py
```
**选项 2:** 使用 Web 界面运行。
注意:目前我們建議您使用 CLI 界面。Web 界面仍在積極開發中。
启动后端服务。
```sh
python3 api.py
```
访问 `http://localhost:3000/`,你应该会看到 Web 界面。
请注意,目前 Web 界面不支持消息流式传输。
*如果你不知道如何開始,請參閱 **Usage** 部分*
*如果遇到問題,請先參考 **Known issues** 部分*
*如果你的電腦無法在本機運行 deepseek也許你可以試看看 API 的方式,參見 **Run with an API***
*有關設定檔的詳細解釋,請參閱 **Config** 部分。*
---
## Usage (使用方法)
確定所有的核心檔案都啟用了,也就是執行過這條命令 `./start_services.sh` 然後你就可以使用 `python3 main.py` 來啟動 AgenticSeek 了!
为确保 agenticSeek 在中文环境下正常工作,请确保在 config.ini 中设置语言选项。
languages = en zh
更多信息请参阅 Config 部分
確定所有的核心檔案都啟用了,也就是執行過這條命令 `./start_services.sh` 然後你就可以使用 `python3 cli.py` 來啟動 AgenticSeek 了!
```sh
sudo ./start_services.sh
python3 main.py
python3 cli.py
```
當你看到執行後顯示 `>>> `
@ -160,35 +206,35 @@ python3 main.py
### Coding/Bash
> *Help me with matrix multiplication in Golang*
> *在 Golang 中幫助我進行矩陣乘法*
> *Scan my network with nmap, find if any suspicious devices is connected*
> *使用 nmap 掃描我的網路,找出是否有任何可疑裝置連接*
> *Make a snake game in python*
> *用 Python 製作一個貪食蛇遊戲*
### 網路搜尋
> *Do a web search to find cool tech startup in Japan working on cutting edge AI research*
> *進行網路搜尋,找出日本從事尖端人工智慧研究的酷炫科技新創公司*
> *Can you find on the internet who created agenticSeek?*
> *你能在網路上找到誰創造了 AgenticSeek 嗎?*
> *Can you find on which website I can buy a rtx 4090 for cheap*
> *你能在哪個網站上找到便宜的 RTX 4090 嗎?*
### 檔案瀏覽與搜尋
> *Hey can you find where is million_dollars_contract.pdf i lost it*
> *嘿,你能找到我遺失的 million_dollars_contract.pdf 在哪裡嗎?*
> *Show me how much space I have left on my disk*
> *告訴我我的磁碟還剩下多少空間*
> *Find and read the README.md and follow the install instruction*
> *尋找並閱讀 README.md並按照安裝說明進行操作*
### 日常聊天
> *Tell me about France*
> *告訴我關於法國的事*
> *What is the meaning of life ?*
> *人生的意義是什麼?*
> *Should I take creatine before or after workout?*
> *我應該在鍛鍊前還是鍛鍊後服用肌酸?*
當你把指令送出後AgenticSeek 會自動調用最能提供幫助的助理,去完成你交辦的工作和指令。
@ -198,11 +244,14 @@ python3 main.py
所以我們希望你在使用時,能明確地表明你希望他要怎麼做,下面給你一個範例!
你該說:
`Do a web search and find out which are the best country for solo-travel`
- 进行网络搜索,找出哪些国家最适合独自旅行
而不是說:
`Do you know some good countries for solo-travel?`
- 你知道哪些国家适合独自旅行?
---
---
@ -260,37 +309,6 @@ provider_model = deepseek-r1:14b
provider_server_address = x.x.x.x:3333
```
執行 AgenticSeek
```sh
sudo ./start_services.sh
python3 main.py
```
## **Run with an API (透過 API 執行)**
設定 `config.ini`
```sh
[MAIN]
is_local = False
provider_name = openai
provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
警告:確保 `config.ini` 沒有行尾空格。
如果使用基於本機的 openai-based api 則把 `is_local` 設定為 `True`
同時更改你的 IP 為 openai-based api 的 IP。
執行 AgenticSeek
```sh
sudo ./start_services.sh
python3 main.py
```
---
@ -335,6 +353,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en zh
[BROWSER]
headless_browser = False
stealth_mode = False
@ -379,6 +398,8 @@ stealth_mode = False
- False背景執行瀏覽器。
- stealth_mode
- 隱私模式,但需要你自己安裝反爬蟲擴充功能。
- languages
- 支持的语言列表。用于代理路由系统。语言列表越长,下载的模型越多。
## 框架
@ -436,11 +457,14 @@ https://googlechromelabs.github.io/chrome-for-testing/
## FAQ
**Q:我需要什麼的硬體配備?**
**Q: 我需要什麼硬體?**
7B 型號:具有 8GB VRAM 的 GPU。
14B 型號12GB GPU例如 RTX 3060
32B 型號24GB+ VRAM。
| 模型大小 | GPU | 備註 |
|-----------|--------|-----------------------------------------------------------|
| 7B | 8GB Vram | ⚠️ 不推薦。性能較差,經常出現幻覺,規劃代理可能會失敗。 |
| 14B | 12 GB VRAM (例如 RTX 3060) | ✅ 適用於簡單任務。可能在網頁瀏覽和規劃任務上表現不佳。 |
| 32B | 24+ GB VRAM (例如 RTX 4090) | 🚀 大多數任務成功,但可能仍在任務規劃上有困難。 |
| 70B+ | 48+ GB Vram (例如 mac studio) | 💪 表現優異。建議用於高級使用情境。 |
**Q為什麼選擇 Deepseek R1 而不是其他模型?**
@ -467,12 +491,24 @@ https://googlechromelabs.github.io/chrome-for-testing/
不不不AgenticSeek 和 Manus 是不同取向的東西,我們優先考慮的是本地執行和隱私,而不是基於雲端。這是一個與 Manus 相比起來更有趣且易使用的方案!
**Q: 是否支持中文以外的语言?**
DeepSeek R1 天生会说中文
但注意:代理路由系统只懂英文,所以必须通过 config.ini 的 languages 参数(如 languages = en zh告诉系统
如果不设置中文?后果可能是:你让它写代码,结果跳出来个"医生代理"(虽然我们根本没有这个代理... 但系统会一脸懵圈!)
实际上会下载一个小型翻译模型来协助任务分配
## 貢獻
我們正在尋找開發者來改善 AgenticSeek你可以在 Issues 查看未解決的問題或和我們討論更酷的新功能!
[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date)
[Contribution guide](./docs/CONTRIBUTING.md)
## 作者:
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758)

View File

@ -9,11 +9,11 @@
Une alternative **entièrement locale** à Manus AI, un assistant IA qui code, explore votre système de fichiers, navigue sur le web et corrige ses erreurs, tout cela sans envoyer la moindre donnée dans le cloud. Cet agent autonome fonctionne entièrement sur votre hardware, garantissant la confidentialité de vos données.
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj)
[![Visit AgenticSeek](https://img.shields.io/static/v1?label=Website&message=AgenticSeek&color=blue&style=flat-square)](https://fosowl.github.io/agenticSeek.html) ![License](https://img.shields.io/badge/license-GPL--3.0-green) [![Discord](https://img.shields.io/badge/Discord-Join%20Us-7289DA?logo=discord&logoColor=white)](https://discord.gg/4Ub2D6Fj) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/fosowl.svg?style=social&label=Update%20%40Fosowl)](https://x.com/Martin993886460)
> 🛠️ **En cours de développement** On cherche activement des contributeurs!
![alt text](./media/whale_readme.jpg)
https://github.com/user-attachments/assets/4bd5faf6-459f-4f94-bd1d-238c4b331469
> *Recherche sur le web des activités à faire à Paris*
@ -22,9 +22,6 @@ Une alternative **entièrement locale** à Manus AI, un assistant IA qui code, e
> *J'aimerais que tu trouve une api météo et que tu me code une application qui affiche la météo à Toulouse*
### agenticSeek peut désormais planifier des taches!
![alt text](./media/examples/planner.png)
## Fonctionnalités:
@ -84,51 +81,75 @@ pip3 install -r requirements.txt
## Faire fonctionner sur votre machine
**Nous recommandons dutiliser au moins DeepSeek 14B, les modèles plus petits ont du mal avec lutilisation des outils et oublient rapidement le contexte.**
### 1**Téléchargement du modèle**
Assurer vous d'avoir [Ollama](https://ollama.com/) installé.
Télécharger `deepseek-r1:14b` de [DeepSeek](https://deepseek.com/models) (ou autre en fonction de votre hardware, voir section FAQ)
```sh
ollama pull deepseek-r1:14b
```
### 2 **Démarrage d'ollama**
**Nous recommandons dutiliser au minimum DeepSeek 14B, les modèles plus petits ont du mal avec lutilisation des outils et oublient rapidement le contexte.**
Lancer votre provider local, par exemple avec ollama:
```sh
ollama serve
```
Modifiez le fichier config.ini pour définir provider_name sur ollama et provider_model sur deepseek-r1:14b
Voyez la section **Provider** pour la liste de provideurs disponible.
Modifiez le fichier config.ini pour définir provider_name sur le nom d'un provideur et provider_model sur le LLM à utiliser.
```sh
[MAIN]
is_local = True
provider_name = ollama
provider_name = ollama # ou lm-studio, openai, etc...
provider_model = deepseek-r1:14b
provider_server_address = 127.0.0.1:11434
```
démarrer tous les services :
**Liste des provideurs locaux**
| Fournisseur | Local ? | Description |
|-------------|---------|-----------------------------------------------------------|
| ollama | Oui | Exécutez des LLM localement avec facilité en utilisant ollama comme fournisseur LLM |
| lm-studio | Oui | Exécutez un LLM localement avec LM studio (définissez `provider_name` sur `lm-studio`) |
| openai | Oui | Utilisez une API local compatible avec openai |
### **Démarrer les services & Exécuter**
Activez votre environnement Python si nécessaire.
```sh
sudo ./start_services.sh
source agentic_seek_env/bin/activate
```
Lancer agenticSeek:
Démarrez les services requis. Cela lancera tous les services définis dans le fichier docker-compose.yml, y compris :
- searxng
- redis (nécessaire pour searxng)
- frontend
```sh
python3 main.py
sudo ./start_services.sh # MacOS
start ./start_services.cmd # Windows
```
**Option 1 :** Exécuter avec l'interface CLI.
```sh
python3 cli.py
```
**Option 2 :** Exécuter avec l'interface Web.
Démarrez le backend.
```sh
python3 api.py
```
Allez sur `http://localhost:3000/` et vous devriez voir l'interface web.
Veuillez noter que l'interface web ne diffuse pas les messages en continu pour le moment.
Voyez la section **Utilisation** si vous ne comprenez pas comment lutiliser
Voyez la section **Problèmes** connus si vous rencontrez des problèmes
Voyez la section **Exécuter** avec une API si votre matériel ne peut pas exécuter DeepSeek localement
Voyez la section **Exécuter avec une API** si votre matériel ne peut pas exécuter DeepSeek localement
Voyez la section **Configuration** pour une explication détaillée du fichier de configuration.
@ -136,18 +157,20 @@ Voyez la section **Configuration** pour une explication détaillée du fichier d
## Utilisation
Assurez-vous que les services sont en cours dexécution avec ./start_services.sh et lancez AgenticSeek avec python3 main.py
Assurez-vous que les services sont en cours dexécution avec ./start_services.sh et lancez AgenticSeek avec le CLI ou l'interface Web.
```sh
sudo ./start_services.sh
python3 main.py
```
**CLI:**
Vous verrez un prompt : ">>> "
Cela indique quAgenticSeek attend que vous saisissiez des instructions.
Vous pouvez également utiliser la reconnaissance vocale en définissant `listen = True` dans la configuration.
Pour quitter, dites simplement `goodbye`.
Vous verrez un prompt: ">>> "
Cela indique quAgenticSeek attend que vous saisissiez des instructions.
Vous pouvez également utiliser la reconnaissance vocale en définissant listen = True dans la configuration.
**Interface:**
Pour quitter, dites simplement `goodbye`.
Assurez-vous d'avoir bien démarré le backend avec `python3 api.py`.
Allez sur `localhost:3000` où vous verrez une interface web.
Tapez simplement votre message et patientez.
Si vous n'avez pas d'interface sur `localhost:3000`, c'est que vous n'avez pas démarré les services avec `start_services.sh`.
Voici quelques exemples dutilisation :
@ -155,7 +178,7 @@ Voici quelques exemples dutilisation :
> *Aide-moi avec la multiplication de matrices en Golang*
> *Initalize un nouveau project python, setup le readme, gitignore et tout le bordel et fait un premier commit*
> *Initalize un nouveau project python, setup le readme, gitignore etc.. et fait un premier commit*
> *Fais un jeu snake en Python*
@ -211,8 +234,6 @@ ip a | grep "inet " | grep -v 127.0.0.1 | awk '{print $2}' | cut -d/ -f1
Remarque : Pour Windows ou macOS, utilisez respectivement ipconfig ou ifconfig pour trouver ladresse IP.
**Si vous souhaitez utiliser un fournisseur basé sur OpenAI, suivez la section Exécuter avec une API.**
Clonez le dépôt et entrez dans le dossier server/.
@ -251,12 +272,7 @@ provider_model = deepseek-r1:14b
provider_server_address = x.x.x.x:3333
```
Exécutez lassistant :
```sh
sudo ./start_services.sh
python3 main.py
```
Ensuite, exécutez avec le CLI ou l'interface graphique comme expliqué dans la section pour les fournisseurs locaux.
## **Exécuter avec une API**
@ -274,12 +290,7 @@ provider_model = gpt-4o
provider_server_address = 127.0.0.1:5000
```
Exécutez lassistant :
```sh
sudo ./start_services.sh
python3 main.py
```
Ensuite, exécutez avec le CLI ou l'interface graphique comme expliqué dans la section pour les fournisseurs locaux.
## Config
@ -297,6 +308,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en fr
[BROWSER]
headless_browser = False
stealth_mode = False
@ -330,6 +342,7 @@ stealth_mode = False
`stealth_mode` -> Rend la détection des bots plus difficile. Le seul inconvénient est que vous devez installer manuellement lextension anticaptcha.
`languages` -> La liste de languages supportés (nécessaire pour le routage d'agents). Plus la liste est longue. Plus un nombre important de modèles sera téléchargés.
## Providers
@ -340,9 +353,10 @@ Le tableau ci-dessous montre les LLM providers disponibles :
| ollama | Yes | Exécutez des LLM localement avec facilité en utilisant Ollama comme fournisseur LLM
| server | Yes | Hébergez le modèle sur une autre machine, exécutez sur votre machine locale
| lm-studio | Yes | Exécutez un LLM localement avec LM Studio (définissez provider_name sur lm-studio)
| openai | No | Utilise ChatGPT API (pas privé) |
| deepseek-api | No | Deepseek API (pas privé) |
| huggingface| No | Hugging-Face API (pas privé) |
| openai | No | Utilise l'API ChatGPT (pas privé) |
| deepseek-api | No | Utilise l'API Deepseek (pas privé) |
| huggingface| No | Utilise Hugging-Face (pas privé) |
| together| No | Utilise l'api Together AI |
Pour sélectionner un provider LLM, modifiez le config.ini :
@ -387,13 +401,14 @@ Et téléchargez la version de chromedriver correspondant à votre système d
Si cette section est incomplète, merci de faire une nouvelle issue sur github.
## FAQ
**Q: Quel matériel est nécessaire ?**
**Q: J'ai besoin d'un gros PC?**
ça dépend du modèle!
Pour un modèle 7B : GPU avec 8 Go de VRAM.
Pour un modèle 14B : GPU 12 Go (par exemple, RTX 3060).
Et un modèle 32B : 24 Go+ de VRAM.
| Taille du Modèle | GPU | Commentaire |
|--------------------|------|----------------------------------------------------------|
| 7B | 8 Go VRAM | ⚠️ Non recommandé. Performances médiocres, hallucinations fréquentes, et l'agent planificateur échouera probablement. |
| 14B | 12 Go VRAM (par ex. RTX 3060) | ✅ Utilisable pour des tâches simples. Peut rencontrer des difficultés avec la navigation web et les tâches de planification. |
| 32B | 24+ Go VRAM (par ex. RTX 4090) | 🚀 Réussite avec la plupart des tâches, peut encore avoir des difficultés avec la planification des tâches. |
| 70B+ | 48+ Go VRAM (par ex. Mac Studio) | 💪 Excellent. Recommandé pour des cas d'utilisation avancés. |
**Q: Pourquoi deepseek et pas un autre modèle**
@ -417,6 +432,8 @@ Nous recherchons des développeurs pour améliorer AgenticSeek ! Consultez la se
[![Star History Chart](https://api.star-history.com/svg?repos=Fosowl/agenticSeek&type=Date)](https://www.star-history.com/#Fosowl/agenticSeek&Date)
[Guide du contributeur](./docs/CONTRIBUTING.md)
## Auteurs/Mainteneurs:
> [Fosowl](https://github.com/Fosowl) - Epitech 2024, France
> [steveh8758](https://github.com/steveh8758) - Université Feng Chia, Taiwan
> [Fosowl](https://github.com/Fosowl)
> [steveh8758](https://github.com/steveh8758)

233
api.py Executable file
View File

@ -0,0 +1,233 @@
#!/usr/bin/env python3
import os, sys
import uvicorn
import aiofiles
import configparser
import asyncio
import time
from typing import List
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from fastapi.responses import FileResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from sources.llm_provider import Provider
from sources.interaction import Interaction
from sources.agents import CasualAgent, CoderAgent, FileAgent, PlannerAgent, BrowserAgent
from sources.browser import Browser, create_driver
from sources.utility import pretty_print
from sources.logger import Logger
from sources.schemas import QueryRequest, QueryResponse
from celery import Celery
api = FastAPI(title="AgenticSeek API", version="0.1.0")
celery_app = Celery("tasks", broker="redis://localhost:6379/0", backend="redis://localhost:6379/0")
celery_app.conf.update(task_track_started=True)
logger = Logger("backend.log")
config = configparser.ConfigParser()
config.read('config.ini')
api.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
if not os.path.exists(".screenshots"):
os.makedirs(".screenshots")
api.mount("/screenshots", StaticFiles(directory=".screenshots"), name="screenshots")
def initialize_system():
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
languages = config["MAIN"]["languages"].split(' ')
provider = Provider(
provider_name=config["MAIN"]["provider_name"],
model=config["MAIN"]["provider_model"],
server_address=config["MAIN"]["provider_server_address"],
is_local=config.getboolean('MAIN', 'is_local')
)
logger.info(f"Provider initialized: {provider.provider_name} ({provider.model})")
browser = Browser(
create_driver(headless=config.getboolean('BROWSER', 'headless_browser'), stealth_mode=stealth_mode),
anticaptcha_manual_install=stealth_mode
)
logger.info("Browser initialized")
agents = [
CasualAgent(
name=config["MAIN"]["agent_name"],
prompt_path=f"prompts/{personality_folder}/casual_agent.txt",
provider=provider, verbose=False
),
CoderAgent(
name="coder",
prompt_path=f"prompts/{personality_folder}/coder_agent.txt",
provider=provider, verbose=False
),
FileAgent(
name="File Agent",
prompt_path=f"prompts/{personality_folder}/file_agent.txt",
provider=provider, verbose=False
),
BrowserAgent(
name="Browser",
prompt_path=f"prompts/{personality_folder}/browser_agent.txt",
provider=provider, verbose=False, browser=browser
),
PlannerAgent(
name="Planner",
prompt_path=f"prompts/{personality_folder}/planner_agent.txt",
provider=provider, verbose=False, browser=browser
)
]
logger.info("Agents initialized")
interaction = Interaction(
agents,
tts_enabled=config.getboolean('MAIN', 'speak'),
stt_enabled=config.getboolean('MAIN', 'listen'),
recover_last_session=config.getboolean('MAIN', 'recover_last_session'),
langs=languages
)
logger.info("Interaction initialized")
return interaction
interaction = initialize_system()
is_generating = False
query_resp_history = []
@api.get("/screenshot")
async def get_screenshot():
logger.info("Screenshot endpoint called")
screenshot_path = ".screenshots/updated_screen.png"
if os.path.exists(screenshot_path):
return FileResponse(screenshot_path)
logger.error("No screenshot available")
return JSONResponse(
status_code=404,
content={"error": "No screenshot available"}
)
@api.get("/health")
async def health_check():
logger.info("Health check endpoint called")
return {"status": "healthy", "version": "0.1.0"}
@api.get("/is_active")
async def is_active():
logger.info("Is active endpoint called")
return {"is_active": interaction.is_active}
@api.get("/latest_answer")
async def get_latest_answer():
global query_resp_history
if interaction.current_agent is None:
return JSONResponse(status_code=404, content={"error": "No agent available"})
if interaction.current_agent.last_answer not in [q["answer"] for q in query_resp_history]:
query_resp = {
"done": "false",
"answer": interaction.current_agent.last_answer,
"agent_name": interaction.current_agent.agent_name if interaction.current_agent else "None",
"success": interaction.current_agent.success,
"blocks": {f'{i}': block.jsonify() for i, block in enumerate(interaction.current_agent.get_blocks_result())} if interaction.current_agent else {},
"status": interaction.current_agent.get_status_message if interaction.current_agent else "No status available",
"timestamp": str(time.time())
}
query_resp_history.append(query_resp)
return JSONResponse(status_code=200, content=query_resp)
if query_resp_history:
return JSONResponse(status_code=200, content=query_resp_history[-1])
return JSONResponse(status_code=404, content={"error": "No answer available"})
async def think_wrapper(interaction, query, tts_enabled):
try:
interaction.tts_enabled = tts_enabled
interaction.last_query = query
logger.info("Agents request is being processed")
success = await interaction.think()
if not success:
interaction.last_answer = "Error: No answer from agent"
interaction.last_success = False
else:
interaction.last_success = True
return success
except Exception as e:
logger.error(f"Error in think_wrapper: {str(e)}")
interaction.last_answer = f"Error: {str(e)}"
interaction.last_success = False
raise e
@api.post("/query", response_model=QueryResponse)
async def process_query(request: QueryRequest):
global is_generating, query_resp_history
logger.info(f"Processing query: {request.query}")
query_resp = QueryResponse(
done="false",
answer="",
agent_name="Unknown",
success="false",
blocks={},
status="Ready",
timestamp=str(time.time())
)
if is_generating:
logger.warning("Another query is being processed, please wait.")
return JSONResponse(status_code=429, content=query_resp.jsonify())
try:
is_generating = True
success = await think_wrapper(interaction, request.query, request.tts_enabled)
is_generating = False
if not success:
query_resp.answer = interaction.last_answer
return JSONResponse(status_code=400, content=query_resp.jsonify())
if interaction.current_agent:
blocks_json = {f'{i}': block.jsonify() for i, block in enumerate(interaction.current_agent.get_blocks_result())}
else:
logger.error("No current agent found")
blocks_json = {}
query_resp.answer = "Error: No current agent"
return JSONResponse(status_code=400, content=query_resp.jsonify())
logger.info(f"Answer: {interaction.last_answer}")
logger.info(f"Blocks: {blocks_json}")
query_resp.done = "true"
query_resp.answer = interaction.last_answer
query_resp.agent_name = interaction.current_agent.agent_name
query_resp.success = str(interaction.last_success)
query_resp.blocks = blocks_json
# Store the raw dictionary representation
query_resp_dict = {
"done": query_resp.done,
"answer": query_resp.answer,
"agent_name": query_resp.agent_name,
"success": query_resp.success,
"blocks": query_resp.blocks,
"status": query_resp.status,
"timestamp": query_resp.timestamp
}
query_resp_history.append(query_resp_dict)
logger.info("Query processed successfully")
return JSONResponse(status_code=200, content=query_resp.jsonify())
except Exception as e:
logger.error(f"An error occurred: {str(e)}")
sys.exit(1)
finally:
logger.info("Processing finished")
if config.getboolean('MAIN', 'save_session'):
interaction.save_session()
if __name__ == "__main__":
uvicorn.run(api, host="0.0.0.0", port=8000)

View File

@ -3,6 +3,7 @@
import sys
import argparse
import configparser
import asyncio
from sources.llm_provider import Provider
from sources.interaction import Interaction
@ -16,7 +17,7 @@ warnings.filterwarnings("ignore")
config = configparser.ConfigParser()
config.read('config.ini')
def main():
async def main():
pretty_print("Initializing...", color="status")
stealth_mode = config.getboolean('BROWSER', 'stealth_mode')
personality_folder = "jarvis" if config.getboolean('MAIN', 'jarvis_personality') else "base"
@ -59,7 +60,7 @@ def main():
try:
while interaction.is_active:
interaction.get_user()
if interaction.think():
if await interaction.think():
interaction.show_answer()
except Exception as e:
if config.getboolean('MAIN', 'save_session'):
@ -69,6 +70,5 @@ def main():
if config.getboolean('MAIN', 'save_session'):
interaction.save_session()
if __name__ == "__main__":
main()
asyncio.run(main())

View File

@ -10,7 +10,7 @@ speak = False
listen = False
work_dir = /Users/mlg/Documents/ai_folder
jarvis_personality = False
languages = en zh fr
languages = en
[BROWSER]
headless_browser = False
stealth_mode = True

105
docker-compose.yml Normal file
View File

@ -0,0 +1,105 @@
version: '3'
services:
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
volumes:
- redis-data:/data
cap_drop:
- ALL
cap_add:
- SETGID
- SETUID
- DAC_OVERRIDE
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
networks:
- agentic-seek-net
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- SEARXNG_SECRET_KEY=$(openssl rand -hex 32)
- UWSGI_WORKERS=4
- UWSGI_THREADS=4
cap_add:
- CHOWN
- SETGID
- SETUID
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
depends_on:
- redis
networks:
- agentic-seek-net
frontend:
container_name: frontend
build:
context: ./frontend
dockerfile: Dockerfile.frontend
ports:
- "3000:3000"
volumes:
- ./frontend/agentic-seek-front/src:/app/src
- ./screenshots:/app/screenshots
environment:
- NODE_ENV=development
- CHOKIDAR_USEPOLLING=true
- BACKEND_URL=http://backend:8000
networks:
- agentic-seek-net
# NOTE: backend service is not working yet due to issue with chromedriver on docker.
# Therefore backend is run on host machine.
# Open to pull requests to fix this.
#backend:
# container_name: backend
# build:
# context: ./
# dockerfile: Dockerfile.backend
# stdin_open: true
# tty: true
# shm_size: 8g
# ports:
# - "8000:8000"
# volumes:
# - ./:/app
# environment:
# - NODE_ENV=development
# - REDIS_URL=redis://redis:6379/0
# - SEARXNG_URL=http://searxng:8080
# - OLLAMA_URL=http://localhost:11434
# - LM_STUDIO_URL=http://localhost:1234
# extra_hosts:
# - "host.docker.internal:host-gateway"
# depends_on:
# - redis
# - searxng
# networks:
# - agentic-seek-net
volumes:
redis-data:
chrome_profiles:
networks:
agentic-seek-net:
driver: bridge

View File

@ -125,4 +125,4 @@ enforcement ladder](https://github.com/mozilla/diversity).
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
https://www.contributor-covenant.org/translations.

View File

@ -254,7 +254,7 @@ def execute_modules(self, answer: str) -> Tuple[bool, str]:
## 1. Agent selection logic
<p align="center">
<img align="center" src="./media/technical/routing_system.png">
<img align="center" src="./technical/routing_system.png">
<p>
The agent selection is done in 4 steps:
@ -271,7 +271,7 @@ The agent selection is done in 4 steps:
### File/Code agents
<p align="center">
<img align="center" src="./media/technical/code_agent.png">
<img align="center" src="./technical/code_agent.png">
<p>
The File and Code agents operate similarly: when a prompt is submitted, they initiate a loop between the LLM and a code interpreter. This loop continues executing commands or code until the execution is successful or the maximum number of attempts is reached.
@ -279,7 +279,7 @@ The File and Code agents operate similarly: when a prompt is submitted, they ini
### Web agent
<p align="center">
<img align="center" src="./media/technical/web_agent.png">
<img align="center" src="./technical/web_agent.png">
<p>
The Web agent controls a Selenium-driven browser. Upon receiving a query, it begins by generating an optimized search prompt and executing the web_search tool. It then enters a navigation loop, during which it:

View File

Before

Width:  |  Height:  |  Size: 129 KiB

After

Width:  |  Height:  |  Size: 129 KiB

View File

Before

Width:  |  Height:  |  Size: 112 KiB

After

Width:  |  Height:  |  Size: 112 KiB

View File

Before

Width:  |  Height:  |  Size: 116 KiB

After

Width:  |  Height:  |  Size: 116 KiB

View File

Before

Width:  |  Height:  |  Size: 482 KiB

After

Width:  |  Height:  |  Size: 482 KiB

23
frontend/.gitignore vendored Normal file
View File

@ -0,0 +1,23 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# production
/build
# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*

View File

@ -0,0 +1,16 @@
FROM node:18
WORKDIR /app
# Install dependencies
COPY agentic-seek-front/package.json agentic-seek-front/package-lock.json ./
RUN npm install
# Copy application code
COPY agentic-seek-front/ .
# Expose port
EXPOSE 3000
# Run the application
CMD ["npm", "start"]

70
frontend/README.md Normal file
View File

@ -0,0 +1,70 @@
# Getting Started with Create React App
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
## Available Scripts
In the project directory, you can run:
### `npm start`
Runs the app in the development mode.\
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
The page will reload when you make changes.\
You may also see any lint errors in the console.
### `npm test`
Launches the test runner in the interactive watch mode.\
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
### `npm run build`
Builds the app for production to the `build` folder.\
It correctly bundles React in production mode and optimizes the build for the best performance.
The build is minified and the filenames include the hashes.\
Your app is ready to be deployed!
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
### `npm run eject`
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
## Learn More
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
To learn React, check out the [React documentation](https://reactjs.org/).
### Code Splitting
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
### Analyzing the Bundle Size
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
### Making a Progressive Web App
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
### Advanced Configuration
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
### Deployment
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
### `npm run build` fails to minify
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)

17575
frontend/agentic-seek-front/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
{
"name": "agentic-seek",
"version": "0.1.0",
"private": true,
"dependencies": {
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^13.5.0",
"axios": "^1.8.4",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-scripts": "5.0.1",
"web-vitals": "^2.1.4"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -0,0 +1,15 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>AgenticSeek</title>
<link
href="https://fonts.googleapis.com/css2?family=Orbitron:wght@400;700&display=swap"
rel="stylesheet"
/>
</head>
<body>
<div id="root"></div>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -0,0 +1,25 @@
{
"short_name": "React App",
"name": "Create React App Sample",
"icons": [
{
"src": "favicon.ico",
"sizes": "64x64 32x32 24x24 16x16",
"type": "image/x-icon"
},
{
"src": "logo192.png",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "logo512.png",
"type": "image/png",
"sizes": "512x512"
}
],
"start_url": ".",
"display": "standalone",
"theme_color": "#000000",
"background_color": "#ffffff"
}

View File

@ -0,0 +1,3 @@
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:

View File

@ -0,0 +1,257 @@
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Orbitron', sans-serif;
background-color: #0a0a0a;
color: #ffffff;
overflow-x: hidden;
}
.app {
min-height: 100vh;
display: flex;
flex-direction: column;
}
.header {
padding: 20px;
text-align: center;
background-color: #1a1a1a;
border-bottom: 2px solid #00ffcc;
box-shadow: 0 0 10px #00ffcc;
}
.header h1 {
font-size: 2.5rem;
text-transform: uppercase;
letter-spacing: 2px;
}
.main {
flex: 1;
padding: 40px;
max-width: 1400px;
margin: 0 auto;
width: 100%;
}
.chat-container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 40px;
height: calc(100vh - 200px);
}
.left-panel,
.right-panel {
background-color: #1a1a1a;
border: 1px solid #00ffcc;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 255, 204, 0.2);
display: flex;
flex-direction: column;
overflow: hidden;
}
.left-panel {
padding: 20px;
}
.messages {
flex: 1;
overflow-y: auto;
padding: 10px;
display: flex;
flex-direction: column;
max-height: 100%;
gap: 15px;
}
.no-messages {
text-align: center;
color: #666;
margin-top: 20px;
}
.message {
max-width: 80%;
padding: 10px 15px;
border-radius: 8px;
font-size: 0.9rem;
}
.messages::-webkit-scrollbar,
.content::-webkit-scrollbar {
width: 8px;
}
.user-message {
background-color: #00ffcc;
color: #000;
align-self: flex-end;
border: 1px solid #00ccaa;
}
.agent-message {
background-color: #333;
color: #fff;
align-self: flex-start;
border: 1px solid #00ffcc;
}
.error-message {
background-color: #ff4444;
color: #fff;
align-self: flex-start;
border: 1px solid #cc3333;
}
.agent-name {
display: block;
font-size: 0.8rem;
color: #00ffcc;
margin-bottom: 5px;
}
.input-form {
display: flex;
gap: 10px;
margin-top: 20px;
}
.input-form input {
flex: 1;
padding: 12px;
font-size: 1rem;
background-color: #222;
border: 1px solid #00ffcc;
color: #fff;
border-radius: 4px;
outline: none;
transition: box-shadow 0.3s;
}
.input-form input:focus {
box-shadow: 0 0 8px #00ffcc;
}
.input-form button {
padding: 12px 24px;
font-size: 1rem;
background-color: #00ffcc;
color: #000;
border: none;
border-radius: 4px;
cursor: pointer;
text-transform: uppercase;
transition: background-color 0.3s;
}
.input-form button:hover {
background-color: #00ccaa;
}
.input-form button:disabled {
background-color: #555;
cursor: not-allowed;
}
.right-panel {
padding: 20px;
}
.view-selector {
display: flex;
gap: 10px;
margin-bottom: 20px;
}
.view-selector button {
padding: 10px 20px;
font-size: 0.9rem;
background-color: #222;
color: #fff;
border: 1px solid #00ffcc;
border-radius: 4px;
cursor: pointer;
text-transform: uppercase;
transition: background-color 0.3s, color 0.3s;
}
.view-selector button.active,
.view-selector button:hover {
background-color: #00ffcc;
color: #000;
}
.view-selector button:disabled {
background-color: #555;
cursor: not-allowed;
}
.content {
flex: 1;
overflow-y: auto;
padding: 5px;
}
.blocks {
display: flex;
flex-direction: column;
gap: 20px;
}
.block {
background-color: #222;
padding: 10px;
border: 1px solid #00ffcc;
border-radius: 4px;
}
.block-tool,
.block-feedback,
.block-success {
font-size: 0.9rem;
margin-bottom: 10px;
}
.block pre {
background-color: #111;
padding: 5px;
border-radius: 4px;
font-size: 0.85rem;
white-space: pre-wrap;
word-break: break-all;
}
.screenshot {
margin-top: 10px;
}
.screenshot img {
max-width: 100%;
border: 1px solid #00ffcc;
border-radius: 4px;
}
.error {
color: #ff4444;
font-size: 0.9rem;
margin-bottom: 10px;
}
@media (max-width: 768px) {
.chat-container {
grid-template-columns: 1fr;
height: auto;
}
.left-panel,
.right-panel {
height: 50vh;
}
}

View File

@ -0,0 +1,262 @@
import React, { useState, useEffect, useRef } from 'react';
import axios from 'axios';
import './App.css';
function App() {
const [query, setQuery] = useState('');
const [messages, setMessages] = useState([]);
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState(null);
const [currentView, setCurrentView] = useState('blocks');
const [responseData, setResponseData] = useState(null);
const [isOnline, setIsOnline] = useState(false);
const [status, setStatus] = useState('Agents ready');
const messagesEndRef = useRef(null);
useEffect(() => {
const intervalId = setInterval(() => {
checkHealth();
fetchLatestAnswer();
fetchScreenshot();
}, 1500);
return () => clearInterval(intervalId);
}, [messages]);
const checkHealth = async () => {
try {
await axios.get('http://0.0.0.0:8000/health');
setIsOnline(true);
console.log('System is online');
} catch {
setIsOnline(false);
console.log('System is offline');
}
};
const fetchScreenshot = async () => {
try {
const timestamp = new Date().getTime();
const res = await axios.get(`http://0.0.0.0:8000/screenshots/updated_screen.png?timestamp=${timestamp}`, {
responseType: 'blob'
});
console.log('Screenshot fetched successfully');
const imageUrl = URL.createObjectURL(res.data);
setResponseData((prev) => {
if (prev?.screenshot && prev.screenshot !== 'placeholder.png') {
URL.revokeObjectURL(prev.screenshot);
}
return {
...prev,
screenshot: imageUrl,
screenshotTimestamp: new Date().getTime()
};
});
} catch (err) {
console.error('Error fetching screenshot:', err);
setResponseData((prev) => ({
...prev,
screenshot: 'placeholder.png',
screenshotTimestamp: new Date().getTime()
}));
}
};
const normalizeAnswer = (answer) => answer.trim().toLowerCase();
const scrollToBottom = () => {
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
};
const fetchLatestAnswer = async () => {
try {
const res = await axios.get('http://0.0.0.0:8000/latest_answer');
const data = res.data;
if (!data.answer || data.answer.trim() === '') {
return;
}
const answerExists = messages.some(
(msg) =>
msg.timestamp === data.timestamp &&
normalizeAnswer(msg.content) === normalizeAnswer(data.answer)
);
console.log('Fetched latest answer:', data.answer);
if (!answerExists) {
setMessages((prev) => [
...prev,
{
type: 'agent',
content: data.answer,
agentName: data.agent_name,
status: data.status,
timestamp: data.timestamp,
},
]);
setStatus(data.status);
scrollToBottom();
}
} catch (error) {
console.error('Error fetching latest answer:', error);
}
};
const handleSubmit = async (e) => {
e.preventDefault();
checkHealth();
if (!query.trim()) {
console.log('Empty query');
return;
}
setMessages((prev) => [...prev, { type: 'user', content: query }]);
setIsLoading(true);
setError(null);
try {
console.log('Sending query:', query);
setQuery('waiting for response...');
const res = await axios.post('http://0.0.0.0:8000/query', {
query,
tts_enabled: false
});
setQuery('Enter your query...');
console.log('Response:', res.data);
const data = res.data;
setResponseData(data);
fetchLatestAnswer();
} catch (err) {
console.error('Error:', err);
setError('Failed to process query.');
setMessages((prev) => [
...prev,
{ type: 'error', content: 'Error: Unable to get a response.' },
]);
} finally {
console.log('Query completed');
setIsLoading(false);
setQuery('');
}
};
const handleGetScreenshot = async () => {
try {
console.log('Fetching screenshot...');
const res = await axios.get('http://0.0.0.0:8000/screenshots/updated_screen.png');
setResponseData((prev) => ({ ...prev, screenshot: res.data.screenshot }));
setCurrentView('screenshot');
} catch (err) {
console.error('Error fetching screenshot:', err);
setError('Browser not in use');
}
};
return (
<div className="app">
<header className="header">
<h1>AgenticSeek</h1>
</header>
<main className="main">
<div className="chat-container">
<div className="left-panel">
<h2>C H A T</h2>
<br />
<div className="messages">
{messages.length === 0 ? (
<p className="placeholder">No messages yet. Type below to start!</p>
) : (
messages.map((msg, index) => (
<div
key={index}
className={`message ${
msg.type === 'user'
? 'user-message'
: msg.type === 'agent'
? 'agent-message'
: 'error-message'
}`}
>
{msg.type === 'agent' && (
<span className="agent-name">{msg.agentName}</span>
)}
<p>{msg.content}</p>
</div>
))
)}
<div ref={messagesEndRef} />
</div>
{isOnline && <div className="loading-animation">{status}</div>}
{!isLoading && !isOnline && <p className="loading-animation">System offline. Deploy backend first.</p>}
<form onSubmit={handleSubmit} className="input-form">
<input
type="text"
value={query}
onChange={(e) => setQuery(e.target.value)}
placeholder="Type your query..."
disabled={isLoading}
/>
<button type="submit" disabled={isLoading}>
Send
</button>
</form>
</div>
<div className="right-panel">
<h2>I N T E R F A C E</h2>
<br />
<div className="view-selector">
<button
className={currentView === 'blocks' ? 'active' : ''}
onClick={() => setCurrentView('blocks')}
>
Editor View
</button>
<button
className={currentView === 'screenshot' ? 'active' : ''}
onClick={responseData?.screenshot ? () => setCurrentView('screenshot') : handleGetScreenshot}
>
Browser View
</button>
</div>
<div className="content">
{error && <p className="error">{error}</p>}
{currentView === 'blocks' ? (
<div className="blocks">
{responseData && responseData.blocks && Object.values(responseData.blocks).length > 0 ? (
Object.values(responseData.blocks).map((block, index) => (
<div key={index} className="block">
<p className="block-tool">Tool: {block.tool_type}</p>
<pre>{block.block}</pre>
<p className="block-feedback">Feedback: {block.feedback}</p>
<p className="block-success">
Success: {block.success ? 'Yes' : 'No'}
</p>
</div>
))
) : (
<div className="block">
<p className="block-tool">Tool: No tool in use</p>
<pre>No file opened</pre>
</div>
)}
</div>
) : (
<div className="screenshot">
<img
src={responseData?.screenshot || 'placeholder.png'}
alt="Screenshot"
onError={(e) => {
e.target.src = 'placeholder.png';
console.error('Failed to load screenshot');
}}
key={responseData?.screenshotTimestamp || 'default'}
/>
</div>
)}
</div>
</div>
</div>
</main>
</div>
);
}
export default App;

View File

@ -0,0 +1,8 @@
import { render, screen } from '@testing-library/react';
import App from './App';
test('renders learn react link', () => {
render(<App />);
const linkElement = screen.getByText(/learn react/i);
expect(linkElement).toBeInTheDocument();
});

View File

@ -0,0 +1,13 @@
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
code {
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
monospace;
}

View File

@ -0,0 +1,10 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import App from './App';
const root = ReactDOM.createRoot(document.getElementById('root'));
root.render(
<React.StrictMode>
<App />
</React.StrictMode>
);

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>

After

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -0,0 +1,13 @@
const reportWebVitals = onPerfEntry => {
if (onPerfEntry && onPerfEntry instanceof Function) {
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
getCLS(onPerfEntry);
getFID(onPerfEntry);
getFCP(onPerfEntry);
getLCP(onPerfEntry);
getTTFB(onPerfEntry);
});
}
};
export default reportWebVitals;

View File

@ -0,0 +1,5 @@
// jest-dom adds custom jest matchers for asserting on DOM nodes.
// allows you to do things like:
// expect(element).toHaveTextContent(/react/i)
// learn more: https://github.com/testing-library/jest-dom
import '@testing-library/jest-dom';

Binary file not shown.

Before

Width:  |  Height:  |  Size: 879 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 558 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB

View File

@ -47,7 +47,6 @@ Some rules:
- Be efficient, no need to explain your code, unless asked.
- You do not ever need to use bash to execute code.
- Do not ever tell user how to run it. user know it.
- In python do not use if __name__ == "__main__"
- If using gui, make sure echap or exit button close the program
- No lazyness, write and rewrite full code every time
- If query is unclear say REQUEST_CLARIFICATION

View File

@ -45,6 +45,7 @@ rules:
- Do not ever use editor such as vim or nano.
- Make sure to always cd your work folder before executing commands, like cd <work dir> && <your command>
- only use file name with file_finder, not path
- If query is unrelated to file operations, do nothing, and say that there was mistake in agent allocation.
Example Interaction
User: "I need to find the file config.txt and read its contents."

View File

@ -46,7 +46,6 @@ Some rules:
- Be efficient, no need to explain your code, unless asked.
- You do not ever need to use bash to execute code.
- Do not ever tell user how to run it. user know it.
- In python do not use if __name__ == "__main__"
- If using gui, make sure echap close the program
- No lazyness, write and rewrite full code every time
- If query is unclear say REQUEST_CLARIFICATION

View File

@ -55,6 +55,7 @@ rules:
- Make sure to always cd your work folder before executing commands, like cd <work dir> && <your command>
- Do not ever use editor such as vim or nano.
- only use file name with file_finder, not path
- If query is unrelated to file operations, do nothing, and say that there was mistake in agent allocation.
Example Interaction
User: "I need to find the file config.txt and read its contents."

View File

@ -1,4 +1,12 @@
fastapi>=0.115.12
flask>=3.1.0
celery>=5.5.1
aiofiles>=24.1.0
uvicorn>=0.34.0
pydantic>=2.10.6
pydantic_core>=2.27.2
setuptools>=75.6.0
sacremoses>=0.0.53
requests>=2.31.0
numpy>=1.24.4
colorama>=0.4.6
@ -27,14 +35,13 @@ httpx>=0.27,<0.29
anyio>=3.5.0,<5
distro>=1.7.0,<2
jiter>=0.4.0,<1
sniffio
tqdm>4
fake_useragent>=2.1.0
selenium_stealth>=1.0.6
undetected-chromedriver>=3.5.5
sentencepiece>=0.2.0
# for api provider
openai
sniffio
tqdm>4
# if use chinese
ordered_set
pypinyin

View File

@ -29,8 +29,8 @@ services:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- UWSGI_WORKERS=4
- UWSGI_THREADS=4
- UWSGI_WORKERS=1
- UWSGI_THREADS=1
cap_add:
- CHOWN
- SETGID

View File

@ -95,7 +95,7 @@ server:
# If your instance owns a /etc/searxng/settings.yml file, then set the following
# values there.
secret_key: "ultrasecretkey" # Is overwritten by ${SEARXNG_SECRET}
secret_key: "supersecret" # Is overwritten by ${SEARXNG_SECRET},W
# Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY}
image_proxy: false
# 1.0 and 1.1 are supported

2737
searxng/settings.yml.new Normal file

File diff suppressed because it is too large Load Diff

53
searxng/uwsgi.ini Normal file
View File

@ -0,0 +1,53 @@
[uwsgi]
# Who will run the code
uid = searxng
gid = searxng
# Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile)
workers = 1
# Number of threads per worker
# default value: 4 (see Dockerfile)
enable-threads = true
threads = 1
# The right granted on the created socket
chmod-socket = 666
# Plugin to use and interpreter config
single-interpreter = true
master = true
plugin = python3
lazy-apps = true
enable-threads = 4
# Module to import
module = searx.webapp
# Virtualenv and python path
pythonpath = /usr/local/searxng/
chdir = /usr/local/searxng/searx/
# automatically set processes name to something meaningful
auto-procname = true
# Disable request logging for privacy
disable-logging = true
log-5xx = true
# Set the max size of a request (request-body excluded)
buffer-size = 8192
# No keep alive
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
static-gzip-all = True
offload-threads = 4

View File

@ -15,8 +15,16 @@ setup(
packages=find_packages(),
include_package_data=True,
install_requires=[
"fastapi>=0.115.12",
"celery>=5.5.1",
"uvicorn>=0.34.0",
"flask>=3.1.0",
"aiofiles>=24.1.0",
"pydantic>=2.10.6",
"pydantic_core>=2.27.2",
"requests>=2.31.0",
"openai",
"sacremoses>=0.0.53",
"numpy>=1.24.4",
"colorama>=0.4.6",
"python-dotenv>=1.0.0",
"playsound>=1.3.0",
@ -26,7 +34,6 @@ setup(
"ollama>=0.4.7",
"scipy>=1.9.3",
"kokoro>=0.7.12",
"flask>=3.1.0",
"protobuf>=3.20.3",
"termcolor>=2.5.0",
"ipython>=8.34.0",
@ -35,8 +42,9 @@ setup(
"markdownify>=1.1.0",
"text2emotion>=0.0.5",
"python-dotenv>=1.0.0",
"langid>=1.1.6",
"adaptive-classifier>=0.0.10",
"langid>=1.1.6",
"chromedriver-autoinstaller>=0.6.4",
"httpx>=0.27,<0.29",
"anyio>=3.5.0,<5",
"distro>=1.7.0,<2",
@ -45,6 +53,7 @@ setup(
"selenium_stealth>=1.0.6",
"undetected-chromedriver>=3.5.5",
"sentencepiece>=0.2.0",
"openai",
"sniffio",
"tqdm>4"
],

View File

@ -5,27 +5,15 @@ import os
import random
import time
import asyncio
from concurrent.futures import ThreadPoolExecutor
from sources.memory import Memory
from sources.utility import pretty_print
from sources.schemas import executorResult
random.seed(time.time())
class executorResult:
"""
A class to store the result of a tool execution.
"""
def __init__(self, block, feedback, success, tool_type):
self.block = block
self.feedback = feedback
self.success = success
self.tool_type = tool_type
def show(self):
pretty_print(''*64, color="status")
pretty_print(self.block, color="code" if self.success else "failure")
pretty_print(''*64, color="status")
pretty_print(self.feedback, color="success" if self.success else "failure")
class Agent():
"""
An abstract class for all agents.
@ -56,12 +44,42 @@ class Agent():
memory_compression=False)
self.tools = {}
self.blocks_result = []
self.success = True
self.last_answer = ""
self.status_message = "Haven't started yet"
self.verbose = verbose
self.executor = ThreadPoolExecutor(max_workers=1)
@property
def get_agent_name(self) -> str:
return self.agent_name
@property
def get_agent_type(self) -> str:
return self.type
@property
def get_agent_role(self) -> str:
return self.role
@property
def get_last_answer(self) -> str:
return self.last_answer
@property
def get_blocks(self) -> list:
return self.blocks_result
@property
def get_status_message(self) -> str:
return self.status_message
@property
def get_tools(self) -> dict:
return self.tools
def get_blocks_result(self) -> list:
return self.blocks_result
def add_tool(self, name: str, tool: Callable) -> None:
if tool is not Callable:
@ -105,7 +123,15 @@ class Agent():
end_idx = text.rfind(end_tag)+8
return text[start_idx:end_idx]
def llm_request(self) -> Tuple[str, str]:
async def llm_request(self) -> Tuple[str, str]:
"""
Asynchronously ask the LLM to process the prompt.
"""
self.status_message = "Thinking..."
loop = asyncio.get_event_loop()
return await loop.run_in_executor(self.executor, self.sync_llm_request)
def sync_llm_request(self) -> Tuple[str, str]:
"""
Ask the LLM to process the prompt and return the answer and the reasoning.
"""
@ -117,17 +143,15 @@ class Agent():
self.memory.push('assistant', answer)
return answer, reasoning
def wait_message(self, speech_module):
async def wait_message(self, speech_module):
if speech_module is None:
return
messages = ["Please be patient, I am working on it.",
"Computing... I recommand you have a coffee while I work.",
"Hold on, Im crunching numbers.",
"Working on it, please let me think."]
if speech_module: speech_module.speak(messages[random.randint(0, len(messages)-1)])
def get_blocks_result(self) -> list:
return self.blocks_result
loop = asyncio.get_event_loop()
return await loop.run_in_executor(self.executor, lambda: speech_module.speak(messages[random.randint(0, len(messages)-1)]))
def get_last_tool_type(self) -> str:
return self.blocks_result[-1].tool_type if len(self.blocks_result) > 0 else None
@ -175,11 +199,12 @@ class Agent():
Execute all the tools the agent has and return the result.
"""
feedback = ""
success = False
success = True
blocks = None
if answer.startswith("```"):
answer = "I will execute:\n" + answer # there should always be a text before blocks for the function that display answer
self.success = True
for name, tool in self.tools.items():
feedback = ""
blocks, save_path = tool.load_exec_block(answer)
@ -191,6 +216,7 @@ class Agent():
success = not tool.execution_failure_check(output)
self.blocks_result.append(executorResult(block, feedback, success, name))
if not success:
self.success = False
self.memory.push('user', feedback)
return False, feedback
self.memory.push('user', feedback)

View File

@ -3,6 +3,7 @@ import time
from datetime import date
from typing import List, Tuple, Type, Dict
from enum import Enum
import asyncio
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
@ -166,10 +167,10 @@ class BrowserAgent(Agent):
You must always take notes.
"""
def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
async def llm_decide(self, prompt: str, show_reasoning: bool = False) -> Tuple[str, str]:
animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt)
answer, reasoning = self.llm_request()
answer, reasoning = await self.llm_request()
if show_reasoning:
pretty_print(reasoning, color="failure")
pretty_print(answer, color="output")
@ -287,7 +288,7 @@ class BrowserAgent(Agent):
pretty_print(f"Title: {res['title']} - ", color="info", no_newline=True)
pretty_print(f"Link: {res['link']}", color="status")
def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
async def process(self, user_prompt: str, speech_module: type) -> Tuple[str, str]:
"""
Process the user prompt to conduct an autonomous web search.
Start with a google search with searxng using web_search tool.
@ -302,11 +303,12 @@ class BrowserAgent(Agent):
animate_thinking(f"Thinking...", color="status")
mem_begin_idx = self.memory.push('user', self.search_prompt(user_prompt))
ai_prompt, reasoning = self.llm_request()
ai_prompt, reasoning = await self.llm_request()
if Action.REQUEST_EXIT.value in ai_prompt:
pretty_print(f"Web agent requested exit.\n{reasoning}\n\n{ai_prompt}", color="failure")
return ai_prompt, ""
animate_thinking(f"Searching...", color="status")
self.status_message = "Searching..."
search_result_raw = self.tools["web_search"].execute([ai_prompt], False)
search_result = self.jsonify_search_results(search_result_raw)[:16]
self.show_search_results(search_result)
@ -315,16 +317,18 @@ class BrowserAgent(Agent):
while not complete and len(unvisited) > 0:
self.memory.clear()
answer, reasoning = self.llm_decide(prompt, show_reasoning = False)
answer, reasoning = await self.llm_decide(prompt, show_reasoning = False)
self.last_answer = answer
pretty_print(''*32, color="status")
extracted_form = self.extract_form(answer)
if len(extracted_form) > 0:
self.status_message = "Filling web form..."
pretty_print(f"Filling inputs form...", color="status")
fill_success = self.browser.fill_form(extracted_form)
page_text = self.browser.get_text()
answer = self.handle_update_prompt(user_prompt, page_text, fill_success)
answer, reasoning = self.llm_decide(prompt)
answer, reasoning = await self.llm_decide(prompt)
if Action.FORM_FILLED.value in answer:
pretty_print(f"Filled form. Handling page update.", color="status")
@ -337,14 +341,18 @@ class BrowserAgent(Agent):
link = self.select_link(links)
if Action.REQUEST_EXIT.value in answer:
self.status_message = "Exiting web browser..."
pretty_print(f"Agent requested exit.", color="status")
complete = True
break
if (link == None and not len(extracted_form)) or Action.GO_BACK.value in answer or link in self.search_history:
if (link == None and len(extracted_form) < 3) or Action.GO_BACK.value in answer or link in self.search_history:
pretty_print(f"Going back to results. Still {len(unvisited)}", color="status")
self.status_message = "Going back to search results..."
unvisited = self.select_unvisited(search_result)
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
self.search_history.append(link)
self.current_page = link
continue
animate_thinking(f"Navigating to {link}", color="status")
@ -355,13 +363,16 @@ class BrowserAgent(Agent):
page_text = self.browser.get_text()
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
self.status_message = "Navigating..."
self.browser.screenshot()
pretty_print("Exited navigation, starting to summarize finding...", color="status")
prompt = self.conclude_prompt(user_prompt)
mem_last_idx = self.memory.push('user', prompt)
answer, reasoning = self.llm_request()
self.status_message = "Summarizing findings..."
answer, reasoning = await self.llm_request()
pretty_print(answer, color="output")
self.memory.clear_section(mem_begin_idx+1, mem_last_idx-1)
self.status_message = "Ready"
return answer, reasoning
if __name__ == "__main__":

View File

@ -1,3 +1,4 @@
import asyncio
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
@ -17,11 +18,12 @@ class CasualAgent(Agent):
self.role = "talk"
self.type = "casual_agent"
def process(self, prompt, speech_module) -> str:
async def process(self, prompt, speech_module) -> str:
self.memory.push('user', prompt)
animate_thinking("Thinking...", color="status")
answer, reasoning = self.llm_request()
answer, reasoning = await self.llm_request()
self.last_answer = answer
self.status_message = "Ready"
return answer, reasoning
if __name__ == "__main__":

View File

@ -1,4 +1,5 @@
import platform, os
import asyncio
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent, executorResult
@ -26,7 +27,6 @@ class CoderAgent(Agent):
self.work_dir = self.tools["file_finder"].get_work_dir()
self.role = "code"
self.type = "code_agent"
def add_sys_info_prompt(self, prompt):
"""Add system information to the prompt."""
@ -36,7 +36,7 @@ class CoderAgent(Agent):
f"\nYou must save file in work directory: {self.work_dir}"
return f"{prompt}\n\n{info}"
def process(self, prompt, speech_module) -> str:
async def process(self, prompt, speech_module) -> str:
answer = ""
attempt = 0
max_attempts = 4
@ -46,25 +46,30 @@ class CoderAgent(Agent):
while attempt < max_attempts:
animate_thinking("Thinking...", color="status")
self.wait_message(speech_module)
answer, reasoning = self.llm_request()
await self.wait_message(speech_module)
answer, reasoning = await self.llm_request()
if clarify_trigger in answer:
self.last_answer = answer
await asyncio.sleep(0)
return answer, reasoning
if not "```" in answer:
self.last_answer = answer
await asyncio.sleep(0)
break
animate_thinking("Executing code...", color="status")
self.status_message = "Executing code..."
exec_success, _ = self.execute_modules(answer)
answer = self.remove_blocks(answer)
self.last_answer = answer
if self.get_last_tool_type() == "bash":
continue
if exec_success:
await asyncio.sleep(0)
if exec_success and self.get_last_tool_type() != "bash":
break
pretty_print("Execution failure", color="failure")
pretty_print("Correcting code...", color="status")
self.status_message = "Correcting code..."
self.show_answer()
attempt += 1
self.status_message = "Ready"
if attempt == max_attempts:
return "I'm sorry, I couldn't find a solution to your problem. How would you like me to proceed ?", reasoning
return answer, reasoning

View File

@ -1,3 +1,4 @@
import asyncio
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
@ -18,17 +19,18 @@ class FileAgent(Agent):
self.role = "files"
self.type = "file_agent"
def process(self, prompt, speech_module) -> str:
async def process(self, prompt, speech_module) -> str:
exec_success = False
prompt += f"\nYou must work in directory: {self.work_dir}"
self.memory.push('user', prompt)
while exec_success is False:
self.wait_message(speech_module)
await self.wait_message(speech_module)
animate_thinking("Thinking...", color="status")
answer, reasoning = self.llm_request()
answer, reasoning = await self.llm_request()
exec_success, _ = self.execute_modules(answer)
answer = self.remove_blocks(answer)
self.last_answer = answer
self.status_message = "Ready"
return answer, reasoning
if __name__ == "__main__":

View File

@ -76,8 +76,7 @@ class PlannerAgent(Agent):
"""
return prompt
def show_plan(self, answer: dict) -> None:
agents_tasks = self.parse_agent_tasks(answer)
def show_plan(self, agents_tasks: dict, answer: str) -> None:
if agents_tasks == (None, None):
pretty_print(answer, color="warning")
pretty_print("Failed to make a plan. This can happen with (too) small LLM. Clarify your request and insist on it making a plan within ```json.", color="failure")
@ -87,25 +86,26 @@ class PlannerAgent(Agent):
pretty_print(f"{task['agent']} -> {task['task']}", color="info")
pretty_print("▔▗ E N D ▖▔", color="status")
def make_plan(self, prompt: str) -> str:
async def make_plan(self, prompt: str) -> str:
ok = False
answer = None
while not ok:
animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt)
answer, _ = self.llm_request()
self.show_plan(answer)
ok_str = input("Is the plan ok? (y/n): ")
if ok_str == 'y':
ok = True
else:
prompt = input("Please reformulate: ")
answer, _ = await self.llm_request()
agents_tasks = self.parse_agent_tasks(answer)
if agents_tasks == (None, None):
prompt = f"Failed to parse the tasks. Please make a plan within ```json.\n"
pretty_print("Failed to make plan. Retrying...", color="warning")
continue
self.show_plan(agents_tasks, answer)
ok = True
return answer
def start_agent_process(self, task: str, required_infos: dict | None) -> str:
async def start_agent_process(self, task: dict, required_infos: dict | None) -> str:
agent_prompt = self.make_prompt(task['task'], required_infos)
pretty_print(f"Agent {task['agent']} started working...", color="status")
agent_answer, _ = self.agents[task['agent'].lower()].process(agent_prompt, None)
agent_answer, _ = await self.agents[task['agent'].lower()].process(agent_prompt, None)
self.agents[task['agent'].lower()].show_answer()
pretty_print(f"Agent {task['agent']} completed task.", color="status")
return agent_answer
@ -113,16 +113,17 @@ class PlannerAgent(Agent):
def get_work_result_agent(self, task_needs, agents_work_result):
return {k: agents_work_result[k] for k in task_needs if k in agents_work_result}
def process(self, prompt: str, speech_module: Speech) -> Tuple[str, str]:
async def process(self, prompt: str, speech_module: Speech) -> Tuple[str, str]:
agents_tasks = (None, None)
agents_work_result = dict()
answer = self.make_plan(prompt)
answer = await self.make_plan(prompt)
agents_tasks = self.parse_agent_tasks(answer)
if agents_tasks == (None, None):
return "Failed to parse the tasks.", ""
for task_name, task in agents_tasks:
self.status_message = "Starting agent process..."
pretty_print(f"I will {task_name}.", color="info")
pretty_print(f"Assigned agent {task['agent']} to {task_name}", color="info")
if speech_module: speech_module.speak(f"I will {task_name}. I assigned the {task['agent']} agent to the task.")
@ -130,7 +131,7 @@ class PlannerAgent(Agent):
if agents_work_result is not None:
required_infos = self.get_work_result_agent(task['need'], agents_work_result)
try:
self.last_answer = self.start_agent_process(task, required_infos)
self.last_answer = await self.start_agent_process(task, required_infos)
except Exception as e:
raise e
agents_work_result[task['id']] = self.last_answer

View File

@ -39,14 +39,24 @@ def get_chrome_path() -> str:
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
else: # Linux
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"]
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium", "/opt/chrome/chrome", "/usr/local/bin/chrome"]
for path in paths:
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
return path
print("Looking for Google Chrome in these locations failed:")
print('\n'.join(paths))
chrome_path_env = os.environ.get("CHROME_EXECUTABLE_PATH")
if chrome_path_env and os.path.exists(chrome_path_env) and os.access(chrome_path_env, os.X_OK):
return chrome_path_env
path = input("Google Chrome not found. Please enter the path to the Chrome executable: ")
if os.path.exists(path) and os.access(path, os.X_OK):
os.environ["CHROME_EXECUTABLE_PATH"] = path
print(f"Chrome path saved to environment variable CHROME_EXECUTABLE_PATH")
return path
return None
def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome:
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
"""Create a Chrome WebDriver with specified options."""
chrome_options = Options()
chrome_path = get_chrome_path()
@ -74,10 +84,10 @@ def create_driver(headless=False, stealth_mode=True) -> webdriver.Chrome:
chrome_options.add_argument(f'--window-size={width},{height}')
if not stealth_mode:
# crx file can't be installed in stealth mode
crx_path = "./crx/nopecha.crx"
if not os.path.exists(crx_path):
raise FileNotFoundError(f"Extension file not found at: {crx_path}")
chrome_options.add_extension(crx_path)
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
else:
chrome_options.add_extension(crx_path)
chromedriver_path = shutil.which("chromedriver")
if not chromedriver_path:
@ -123,14 +133,25 @@ class Browser:
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
self.logger = Logger("browser.log")
self.screenshot_folder = os.path.join(os.getcwd(), ".screenshots")
self.tabs = []
try:
self.driver = driver
self.wait = WebDriverWait(self.driver, 10)
except Exception as e:
raise Exception(f"Failed to initialize browser: {str(e)}")
self.driver.get("https://www.google.com")
self.setup_tabs()
if anticaptcha_manual_install:
self.load_anticatpcha_manually()
def setup_tabs(self):
self.tabs = self.driver.window_handles
self.driver.get("https://www.google.com")
self.screenshot()
def switch_control_tab(self):
self.logger.log("Switching to control tab.")
self.driver.switch_to.window(self.tabs[0])
def load_anticatpcha_manually(self):
pretty_print("You might want to install the AntiCaptcha extension for captchas.", color="warning")
@ -142,11 +163,10 @@ class Browser:
try:
initial_handles = self.driver.window_handles
self.driver.get(url)
wait = WebDriverWait(self.driver, timeout=30)
wait = WebDriverWait(self.driver, timeout=10)
wait.until(
lambda driver: (
driver.execute_script("return document.readyState") == "complete" and
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "verifying", "captcha"])
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "captcha"])
),
message="stuck on 'checking browser' or verification screen"
)
@ -198,6 +218,8 @@ class Browser:
lines.append(cleaned)
result = "[Start of page]\n\n" + "\n\n".join(lines) + "\n\n[End of page]"
result = re.sub(r'!\[(.*?)\]\(.*?\)', r'[IMAGE: \1]', result)
self.logger.info(f"Extracted text: {result[:100]}...")
self.logger.info(f"Extracted text length: {len(result)}")
return result[:8192]
except Exception as e:
self.logger.error(f"Error getting text: {str(e)}")
@ -223,9 +245,11 @@ class Browser:
def is_link_valid(self, url:str) -> bool:
"""Check if a URL is a valid link (page, not related to icon or metadata)."""
if len(url) > 64:
self.logger.warning(f"URL too long: {url}")
return False
parsed_url = urlparse(url)
if not parsed_url.scheme or not parsed_url.netloc:
self.logger.warning(f"Invalid URL: {url}")
return False
if re.search(r'/\d+$', parsed_url.path):
return False
@ -270,6 +294,7 @@ class Browser:
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", element)
time.sleep(0.1)
element.click()
self.logger.info(f"Clicked element at {xpath}")
return True
except ElementClickInterceptedException as e:
self.logger.error(f"Error click_element: {str(e)}")
@ -355,6 +380,7 @@ class Browser:
Wait for a submission outcome (e.g., URL change or new element).
"""
try:
self.logger.info("Waiting for submission outcome...")
wait = WebDriverWait(self.driver, timeout)
wait.until(
lambda driver: driver.current_url != self.driver.current_url or
@ -382,8 +408,10 @@ class Browser:
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
)
if self.click_element(xpath):
self.logger.info(f"Clicked button '{button_text}' at XPath: {xpath}")
return True
else:
self.logger.warning(f"Button '{button_text}' at XPath: {xpath} not clickable")
return False
except TimeoutException:
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
@ -419,9 +447,9 @@ class Browser:
self.logger.info(f"Ticked checkbox {index}")
except ElementClickInterceptedException:
self.driver.execute_script("arguments[0].click();", checkbox)
self.logger.info(f"Ticked checkbox {index} using JavaScript")
self.logger.warning(f"Click checkbox {index} intercepted")
else:
self.logger.debug(f"Checkbox {index} already ticked")
self.logger.info(f"Checkbox {index} already ticked")
except TimeoutException:
self.logger.warning(f"Timeout waiting for checkbox {index} to be clickable")
continue
@ -528,19 +556,28 @@ class Browser:
def scroll_bottom(self) -> bool:
"""Scroll to the bottom of the page."""
try:
self.logger.info("Scrolling to the bottom of the page...")
self.driver.execute_script(
"window.scrollTo(0, document.body.scrollHeight);"
)
time.sleep(1)
time.sleep(0.5)
return True
except Exception as e:
self.logger.error(f"Error scrolling: {str(e)}")
return False
def get_screenshot(self) -> str:
return self.screenshot_folder + "/updated_screen.png"
def screenshot(self, filename:str) -> bool:
def screenshot(self, filename:str = 'updated_screen.png') -> bool:
"""Take a screenshot of the current page."""
self.logger.info("Taking screenshot...")
time.sleep(0.1)
try:
self.driver.save_screenshot(filename)
path = os.path.join(self.screenshot_folder, filename)
if not os.path.exists(self.screenshot_folder):
os.makedirs(self.screenshot_folder)
self.driver.save_screenshot(path)
self.logger.info(f"Screenshot saved as {filename}")
return True
except Exception as e:
@ -551,19 +588,18 @@ class Browser:
"""
Apply security measures to block any website malicious/annoying execution, privacy violation etc..
"""
self.logger.info("Applying web safety measures...")
script = self.load_js("inject_safety_script.js")
input_elements = self.driver.execute_script(script)
if __name__ == "__main__":
driver = create_driver()
driver = create_driver(headless=False, stealth_mode=True)
browser = Browser(driver, anticaptcha_manual_install=True)
#browser.go_to("https://github.com/Fosowl/agenticSeek")
#txt = browser.get_text()
#print(txt)
#browser.go_to("https://practicetestautomation.com/practice-test-login/")
input("press enter to continue")
print("AntiCaptcha / Form Test")
#browser.go_to("https://practicetestautomation.com/practice-test-login/")
#txt = browser.get_text()
#browser.go_to("https://www.google.com/recaptcha/api2/demo")
browser.go_to("https://home.openweathermap.org/users/sign_up")
inputs_visible = browser.get_form_inputs()

View File

@ -21,25 +21,36 @@ class Interaction:
self.current_agent = None
self.last_query = None
self.last_answer = None
self.speech = None
self.agents = agents
self.tts_enabled = tts_enabled
self.stt_enabled = stt_enabled
self.recover_last_session = recover_last_session
self.router = AgentRouter(self.agents, supported_language=langs)
if tts_enabled:
animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=tts_enabled)
self.ai_name = self.find_ai_name()
self.speech = None
self.transcriber = None
self.recorder = None
self.is_generating = False
if tts_enabled:
self.initialize_tts()
if stt_enabled:
animate_thinking("Initializing speech recognition...", color="status")
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
self.recorder = AudioRecorder()
self.initialize_stt()
if recover_last_session:
self.load_last_session()
self.emit_status()
def initialize_tts(self):
"""Initialize TTS."""
if not self.speech:
animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=self.tts_enabled)
def initialize_stt(self):
"""Initialize STT."""
if not self.transcriber or not self.recorder:
animate_thinking("Initializing speech recognition...", color="status")
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
self.recorder = AudioRecorder()
def emit_status(self):
"""Print the current status of agenticSeek."""
@ -113,7 +124,7 @@ class Interaction:
self.last_query = query
return query
def think(self) -> bool:
async def think(self) -> bool:
"""Request AI agents to process the user input."""
push_last_agent_memory = False
if self.last_query is None or len(self.last_query) == 0:
@ -125,7 +136,9 @@ class Interaction:
push_last_agent_memory = True
tmp = self.last_answer
self.current_agent = agent
self.last_answer, _ = agent.process(self.last_query, self.speech)
self.is_generating = True
self.last_answer, _ = await agent.process(self.last_query, self.speech)
self.is_generating = False
if push_last_agent_memory:
self.current_agent.memory.push('user', self.last_query)
self.current_agent.memory.push('assistant', self.last_answer)
@ -133,6 +146,18 @@ class Interaction:
self.last_answer = None
return True
def get_updated_process_answer(self) -> str:
"""Get the answer from the last agent."""
if self.current_agent is None:
return None
return self.current_agent.get_last_answer()
def get_updated_block_answer(self) -> str:
"""Get the answer from the last agent."""
if self.current_agent is None:
return None
return self.current_agent.get_last_block_answer()
def show_answer(self) -> None:
"""Show the answer to the user."""
if self.last_query is None:

View File

@ -7,7 +7,9 @@ import requests
import subprocess
import ipaddress
import httpx
import socket
import platform
from urllib.parse import urlparse
from dotenv import load_dotenv, set_key
from openai import OpenAI
from huggingface_hub import InferenceClient
@ -34,7 +36,7 @@ class Provider:
}
self.logger = Logger("provider.log")
self.api_key = None
self.unsafe_providers = ["openai", "deepseek", "dsk_deepseek"]
self.unsafe_providers = ["openai", "deepseek", "dsk_deepseek", "together"]
if self.provider_name not in self.available_providers:
raise ValueError(f"Unknown provider: {provider_name}")
if self.provider_name in self.unsafe_providers:
@ -51,10 +53,8 @@ class Provider:
api_key_var = f"{provider.upper()}_API_KEY"
api_key = os.getenv(api_key_var)
if not api_key:
api_key = input(f"Please enter your {provider} API key: ")
set_key(".env", api_key_var, api_key)
self.logger.info("Set API key in env.")
load_dotenv()
pretty_print(f"API key {api_key_var} not found in .env file. Please add it", color="warning")
exit(1)
return api_key
def check_address_format(self, address):
@ -87,31 +87,34 @@ class Provider:
raise ConnectionError(f"{str(e)}\nConnection to {self.server_ip} failed.")
except AttributeError as e:
raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
except ModuleNotFoundError as e:
raise ModuleNotFoundError(f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
except Exception as e:
if "refused" in str(e):
return f"Server {self.server_ip} seem offline. Unable to answer."
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
return thought
def is_ip_online(self, ip_address):
def is_ip_online(self, address: str, timeout: int = 10) -> bool:
"""
Check if an IP address is online by sending a ping request.
Check if an address is online by sending a ping request.
"""
if ip_address == "127.0.0.1":
if not address:
return False
if address.lower() in ["127.0.0.1", "localhost", "0.0.0.0"]:
return True
hostname = urlparse(f'http://{address}' if not address.startswith(('http://', 'https://')) else address).hostname or address
try:
ip_address = socket.gethostbyname(hostname)
except socket.gaierror:
self.logger.error(f"Cannot resolve: {hostname}")
return False
param = '-n' if platform.system().lower() == 'windows' else '-c'
command = ['ping', param, '1', ip_address]
try:
output = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=15)
if output.returncode == 0:
return True
else:
self.logger.error(f"Ping command returned code: {output.returncode}")
return False
except subprocess.TimeoutExpired:
return False
except Exception as e:
pretty_print(f"Error with ping request {str(e)}", color="failure")
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
return result.returncode == 0
except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
return False
def server_fn(self, history, verbose = False):

View File

@ -130,9 +130,7 @@ class Memory():
self.logger.info(f"Clearing memory section {start} to {end}.")
start = max(0, start) + 1
end = min(end, len(self.memory)-1) + 2
self.logger.info(f"Memory before: {self.memory}")
self.memory = self.memory[:start] + self.memory[end:]
self.logger.info(f"Memory after: {self.memory}")
def get(self) -> list:
return self.memory

View File

@ -362,6 +362,8 @@ class AgentRouter:
Returns:
str: The selected label
"""
if len(text) <= 8:
return "talk"
result_bart = self.pipelines['bart'](text, labels)
result_llm_router = self.llm_router(text)
bart, confidence_bart = result_bart['labels'][0], result_bart['scores'][0]
@ -402,8 +404,6 @@ class AgentRouter:
if confidence < 0.4:
self.logger.info(f"Low confidence in complexity estimation: {confidence}")
return "LOW"
if complexity == "HIGH" and len(text) < 64:
return None # ask for more info
if complexity == "HIGH":
return "HIGH"
elif complexity == "LOW":
@ -440,11 +440,6 @@ class AgentRouter:
text = self.lang_analysis.translate(text, lang)
labels = [agent.role for agent in self.agents]
complexity = self.estimate_complexity(text)
if complexity == None and self.asked_clarify == False:
self.asked_clarify = True
pretty_print(f"Humm, the task seem complex but you gave very little information. can you clarify?", color="info")
return None
self.asked_clarify = False
if complexity == "HIGH":
pretty_print(f"Complex task detected, routing to planner agent.", color="info")
return self.find_planner_agent()

74
sources/schemas.py Normal file
View File

@ -0,0 +1,74 @@
from typing import Tuple, Callable
from pydantic import BaseModel
from sources.utility import pretty_print
class QueryRequest(BaseModel):
query: str
tts_enabled: bool = True
def __str__(self):
return f"Query: {self.query}, Language: {self.lang}, TTS: {self.tts_enabled}, STT: {self.stt_enabled}"
def jsonify(self):
return {
"query": self.query,
"tts_enabled": self.tts_enabled,
}
class QueryResponse(BaseModel):
done: str
answer: str
agent_name: str
success: str
blocks: dict
status: str
timestamp: str
def __str__(self):
return f"Done: {self.done}, Answer: {self.answer}, Agent Name: {self.agent_name}, Success: {self.success}, Blocks: {self.blocks}"
def jsonify(self):
return {
"done": self.done,
"answer": self.answer,
"agent_name": self.agent_name,
"success": self.success,
"blocks": self.blocks
}
class executorResult:
"""
A class to store the result of a tool execution.
"""
def __init__(self, block: str, feedback: str, success: bool, tool_type: str):
"""
Initialize an agent with execution results.
Args:
block: The content or code block processed by the agent.
feedback: Feedback or response information from the execution.
success: Boolean indicating whether the agent's execution was successful.
tool_type: The type of tool used by the agent for execution.
"""
self.block = block
self.feedback = feedback
self.success = success
self.tool_type = tool_type
def __str__(self):
return f"Tool: {self.tool_type}\nBlock: {self.block}\nFeedback: {self.feedback}\nSuccess: {self.success}"
def jsonify(self):
return {
"block": self.block,
"feedback": self.feedback,
"success": self.success,
"tool_type": self.tool_type
}
def show(self):
pretty_print(''*64, color="status")
pretty_print(self.block, color="code" if self.success else "failure")
pretty_print(''*64, color="status")
pretty_print(self.feedback, color="success" if self.success else "failure")

View File

@ -30,18 +30,27 @@ class PyInterpreter(Tools):
'__builtins__': __builtins__,
'os': os,
'sys': sys,
'__name__': '__main__'
}
code = '\n\n'.join(codes)
self.logger.info(f"Executing code:\n{code}")
try:
try:
buffer = exec(code, global_vars)
self.logger.info(f"Code executed successfully.\noutput:{buffer}")
print(buffer)
if buffer is not None:
output = buffer + '\n'
except SystemExit:
self.logger.info("SystemExit caught, code execution stopped.")
output = stdout_buffer.getvalue()
return f"[SystemExit caught] Output before exit:\n{output}"
except Exception as e:
self.logger.error(f"Code execution failed: {str(e)}")
return "code execution failed:" + str(e)
output = stdout_buffer.getvalue()
finally:
self.logger.info("Code execution finished.")
sys.stdout = sys.__stdout__
return output
@ -74,7 +83,9 @@ class PyInterpreter(Tools):
]
combined_pattern = "|".join(error_patterns)
if re.search(combined_pattern, feedback, re.IGNORECASE):
self.logger.error(f"Execution failure detected: {feedback}")
return True
self.logger.info("No execution success detected.")
return False
if __name__ == "__main__":
@ -93,6 +104,9 @@ here is a save test
def print_hello():
hello = "Hello World"
print(hello)
if __name__ == "__main__":
print_hello()
```
"""
py = PyInterpreter()

View File

@ -16,6 +16,7 @@ class FlightSearch(Tools):
"""
super().__init__()
self.tag = "flight_search"
self.api_key = None
self.api_key = api_key or os.getenv("AVIATIONSTACK_API_KEY")
def execute(self, blocks: str, safety: bool = True) -> str:

View File

@ -21,6 +21,7 @@ import sys
import os
import configparser
from abc import abstractmethod
from sources.logger import Logger
sys.path.append('..')
@ -30,9 +31,9 @@ class Tools():
"""
def __init__(self):
self.tag = "undefined"
self.api_key = None
self.client = None
self.messages = []
self.logger = Logger("tools.log")
self.config = configparser.ConfigParser()
self.work_dir = self.create_work_dir()
self.excutable_blocks_found = False
@ -118,10 +119,12 @@ class Tools():
"""
if save_path is None:
return
self.logger.info(f"Saving blocks to {save_path}")
save_path_dir = os.path.dirname(save_path)
save_path_file = os.path.basename(save_path)
directory = os.path.join(self.work_dir, save_path_dir)
if directory and not os.path.exists(directory):
self.logger.info(f"Creating directory {directory}")
os.makedirs(directory)
for block in blocks:
with open(os.path.join(directory, save_path_file), 'w') as f:
@ -199,6 +202,7 @@ class Tools():
self.excutable_blocks_found = True
code_blocks.append(content)
start_index = end_pos + len(end_tag)
self.logger.info(f"Found {len(code_blocks)} blocks to execute")
return code_blocks, save_path
if __name__ == "__main__":

View File

@ -3,5 +3,11 @@
REM Up the provider in windows
start ollama serve
REM Up Docker
cd searxng && docker compose up
docker-compose up
if %ERRORLEVEL% neq 0 (
echo Error: Failed to start containers. Check Docker logs with 'docker compose logs'.
echo Possible fixes: Ensure Docker Desktop is running or check if port 8080 is free.
exit /b 1
)
timeout /t 10 /nobreak >nul

View File

@ -1,4 +1,68 @@
#!/bin/bash
# start searxng service for internet search
cd searxng && ./setup_searxng.sh
command_exists() {
command -v "$1" &> /dev/null
}
#
# Check if Docker is installed é running
#
if ! command_exists docker; then
echo "Error: Docker is not installed. Please install Docker first."
echo "On Ubuntu: sudo apt install docker.io"
echo "On macOS/Windows: Install Docker Desktop from https://www.docker.com/get-started/"
exit 1
fi
# Check if Docker daemon is running
echo "Checking if Docker daemon is running..."
if ! docker info &> /dev/null; then
echo "Error: Docker daemon is not running or inaccessible."
if [ "$(uname)" = "Linux" ]; then
echo "Trying to start Docker service (may require sudo)..."
if sudo systemctl start docker &> /dev/null; then
echo "Docker started successfully."
else
echo "Failed to start Docker. Possible issues:"
echo "1. Run this script with sudo: sudo bash setup_searxng.sh"
echo "2. Check Docker installation: sudo systemctl status docker"
echo "3. Add your user to the docker group: sudo usermod -aG docker $USER (then log out and back in)"
exit 1
fi
else
echo "Please start Docker manually:"
echo "- On macOS/Windows: Open Docker Desktop."
echo "- On Linux: Run 'sudo systemctl start docker' or check your distro's docs."
exit 1
fi
else
echo "Docker daemon is running."
fi
# Check if Docker Compose is installed
if ! command_exists docker-compose; then
echo "Error: Docker Compose is not installed. Please install it first."
echo "On Ubuntu: sudo apt install docker-compose"
echo "Or via pip: pip install docker-compose"
exit 1
fi
# Check if docker-compose.yml exists
if [ ! -f "docker-compose.yml" ]; then
echo "Error: docker-compose.yml not found in the current directory."
exit 1
fi
# start docker compose for searxng, redis, frontend services
echo "Warning: stopping all docker containers (t-4 seconds)..."
sleep 4
docker stop $(docker ps -a -q)
echo "All containers stopped"
if ! docker-compose up; then
echo "Error: Failed to start containers. Check Docker logs with 'docker compose logs'."
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
exit 1
fi
sleep 10