mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
Merge pull request #66 from Fosowl/dev
Fix : AudioRecorder issue, Improve readme, more flexible requirements + python_requires to 3.9
This commit is contained in:
commit
489dac5488
31
README.md
31
README.md
@ -38,8 +38,7 @@
|
||||
|
||||
- **Memory**: Remembers what’s useful, your preferences and past sessions conversation.
|
||||
|
||||
- **Web Browsing**: Autonomous web navigation is underway.
|
||||
|
||||
- **Web Browsing**: Autonomous web navigation.
|
||||
|
||||
|
||||
### Searching the web with agenticSeek :
|
||||
@ -52,7 +51,7 @@
|
||||
|
||||
## **Installation**
|
||||
|
||||
Make sure you have chrome driver and docker installed.
|
||||
Make sure you have chrome driver, docker and python3.10 (or newer) installed.
|
||||
|
||||
For issues related to chrome driver, see the **Chromedriver** section.
|
||||
|
||||
@ -125,7 +124,7 @@ provider_server_address = 127.0.0.1:11434
|
||||
start all services :
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
sudo ./start_services.sh
|
||||
```
|
||||
|
||||
Run the assistant:
|
||||
@ -150,7 +149,7 @@ Warning: currently the system that choose the best AI agent routing system will
|
||||
Make sure the services are up and running with `./start_services.sh` and run the agenticSeek with `python3 main.py`
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
sudo ./start_services.sh
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
@ -247,7 +246,7 @@ provider_server_address = x.x.x.x:5000
|
||||
Run the assistant:
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
sudo ./start_services.sh
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
@ -268,7 +267,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
|
||||
Run the assistant:
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
sudo ./start_services.sh
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
@ -278,22 +277,25 @@ python3 main.py
|
||||
|
||||
## Speech to Text
|
||||
|
||||
The speech to text is disabled by default, you can enable it by setting listen to true in the config.ini:
|
||||
The speech-to-text functionality is disabled by default. To enable it, set the listen option to True in the config.ini file:
|
||||
|
||||
```
|
||||
listen = True
|
||||
```
|
||||
|
||||
The speech to text will await for a AI name as a trigger keyword before it start listening, you can change the AI name by changing the agent_name in the config.ini:
|
||||
When enabled, the speech-to-text feature listens for a trigger keyword, which is the agent's name, before it begins processing your input. You can customize the agent's name by updating the `agent_name` value in the *config.ini* file:
|
||||
|
||||
```
|
||||
agent_name = Friday
|
||||
```
|
||||
|
||||
It will work better if you use a common english name like John or Emma.
|
||||
For optimal recognition, we recommend using a common English name like "John" or "Emma" as the agent name
|
||||
|
||||
After hearing it's name agenticSeek will listen until it hear one of the following keyword for confirmation:
|
||||
Once you see the transcript start to appear, say the agent's name aloud to wake it up (e.g., "Friday").
|
||||
|
||||
Speak your query clearly.
|
||||
|
||||
End your request with a confirmation phrase to signal the system to proceed. Examples of confirmation phrases include:
|
||||
```
|
||||
"do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"
|
||||
```
|
||||
@ -321,7 +323,7 @@ provider_server_address = 127.0.0.1:5000
|
||||
```
|
||||
`is_local`: should be True for any locally running LLM, otherwise False.
|
||||
|
||||
`provider_name`: Select the provider to use by its name, see the provider list above.
|
||||
`provider_name`: Select the provider to use by it's name, see the provider list above.
|
||||
|
||||
`provider_model`: Set the model to use by the agent.
|
||||
|
||||
@ -351,6 +353,7 @@ And download the chromedriver version matching your OS.
|
||||

|
||||
|
||||
## FAQ
|
||||
|
||||
**Q: What hardware do I need?**
|
||||
|
||||
7B Model: GPU with 8GB VRAM.
|
||||
@ -365,10 +368,6 @@ Deepseek R1 excels at reasoning and tool use for its size. We think it’s a sol
|
||||
|
||||
Ensure Ollama is running (`ollama serve`), your `config.ini` matches your provider, and dependencies are installed. If none work feel free to raise an issue.
|
||||
|
||||
**Q: How to join the discord ?**
|
||||
|
||||
Ask in the Community section for an invite.
|
||||
|
||||
**Q: Can it really run 100% locally?**
|
||||
|
||||
Yes with Ollama or Server providers, all speech to text, LLM and text to speech model run locally. Non-local options (OpenAI or others API) are optional.
|
||||
|
@ -1,33 +1,35 @@
|
||||
requests==2.31.0
|
||||
openai==1.61.1
|
||||
colorama==0.4.6
|
||||
python-dotenv==1.0.0
|
||||
playsound==1.3.0
|
||||
soundfile==0.13.1
|
||||
transformers==4.48.3
|
||||
torch==2.5.1
|
||||
ollama==0.4.7
|
||||
scipy==1.15.1
|
||||
kokoro==0.7.12
|
||||
flask==3.1.0
|
||||
soundfile==0.13.1
|
||||
protobuf==3.20.3
|
||||
termcolor==2.5.0
|
||||
ipython==8.34.0
|
||||
gliclass==0.1.8
|
||||
pyaudio==0.2.14
|
||||
librosa==0.10.2.post1
|
||||
selenium==4.29.0
|
||||
markdownify==1.1.0
|
||||
text2emotion==0.0.5
|
||||
langid==1.1.6
|
||||
chromedriver-autoinstaller==0.6.4
|
||||
requests>=2.31.0
|
||||
colorama>=0.4.6
|
||||
python-dotenv>=1.0.0
|
||||
playsound>=1.3.0
|
||||
soundfile>=0.13.1
|
||||
transformers>=4.46.3
|
||||
torch>=2.4.1
|
||||
python-dotenv>=1.0.0
|
||||
ollama>=0.4.7
|
||||
scipy>=1.15.1
|
||||
kokoro>=0.7.12
|
||||
flask>=3.1.0
|
||||
soundfile>=0.13.1
|
||||
protobuf>=3.20.3
|
||||
termcolor>=2.5.0
|
||||
ipython>=8.34.0
|
||||
gliclass>=0.1.8
|
||||
pyaudio>=0.2.14
|
||||
librosa>=0.10.2.post1
|
||||
selenium>=4.29.0
|
||||
markdownify>=1.1.0
|
||||
text2emotion>=0.0.5
|
||||
langid>=1.1.6
|
||||
chromedriver-autoinstaller>=0.6.4
|
||||
httpx>=0.27,<0.29
|
||||
anyio>=3.5.0,<5
|
||||
distro>=1.7.0,<2
|
||||
jiter>=0.4.0,<1
|
||||
sniffio
|
||||
tqdm>4
|
||||
# for api provider
|
||||
openai
|
||||
# if use chinese
|
||||
ordered_set
|
||||
pypinyin
|
||||
|
@ -5,6 +5,8 @@ echo "Starting installation for Linux..."
|
||||
# Update package list
|
||||
sudo apt-get update
|
||||
|
||||
pip install --upgrade pip
|
||||
|
||||
# Install Python dependencies from requirements.txt
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
version: '3'
|
||||
services:
|
||||
redis:
|
||||
container_name: redis
|
||||
|
47
setup.py
47
setup.py
@ -8,34 +8,35 @@ setup(
|
||||
version="0.1.0",
|
||||
author="Fosowl",
|
||||
author_email="mlg.fcu@gmail.com",
|
||||
description="A Python project for agentic search and processing",
|
||||
description="The open, local alternative to ManusAI",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://github.com/Fosowl/agenticSeek",
|
||||
packages=find_packages(),
|
||||
include_package_data=True,
|
||||
install_requires=[
|
||||
"requests==2.31.0",
|
||||
"openai==1.61.1",
|
||||
"colorama==0.4.6",
|
||||
"python-dotenv==1.0.0",
|
||||
"playsound==1.3.0",
|
||||
"soundfile==0.13.1",
|
||||
"transformers==4.48.3",
|
||||
"torch==2.5.1",
|
||||
"ollama==0.4.7",
|
||||
"scipy==1.15.1",
|
||||
"kokoro==0.7.12",
|
||||
"flask==3.1.0",
|
||||
"protobuf==3.20.3",
|
||||
"termcolor==2.5.0",
|
||||
"gliclass==0.1.8",
|
||||
"ipython==8.34.0",
|
||||
"librosa==0.10.2.post1",
|
||||
"selenium==4.29.0",
|
||||
"markdownify==1.1.0",
|
||||
"text2emotion==0.0.5",
|
||||
"langid==1.1.6",
|
||||
"requests>=2.31.0",
|
||||
"openai",
|
||||
"colorama>=0.4.6",
|
||||
"python-dotenv>=1.0.0",
|
||||
"playsound>=1.3.0",
|
||||
"soundfile>=0.13.1",
|
||||
"transformers>=4.46.3",
|
||||
"torch>=2.4.1",
|
||||
"ollama>=0.4.7",
|
||||
"scipy>=1.15.1",
|
||||
"kokoro>=0.7.12",
|
||||
"flask>=3.1.0",
|
||||
"protobuf>=3.20.3",
|
||||
"termcolor>=2.5.0",
|
||||
"gliclass>=0.1.8",
|
||||
"ipython>=8.34.0",
|
||||
"librosa>=0.10.2.post1",
|
||||
"selenium>=4.29.0",
|
||||
"markdownify>=1.1.0",
|
||||
"text2emotion>=0.0.5",
|
||||
"python-dotenv>=1.0.0",
|
||||
"langid>=1.1.6",
|
||||
"httpx>=0.27,<0.29",
|
||||
"anyio>=3.5.0,<5",
|
||||
"distro>=1.7.0,<2",
|
||||
@ -61,5 +62,5 @@ setup(
|
||||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
python_requires=">=3.9",
|
||||
)
|
||||
|
@ -33,7 +33,7 @@ class Provider:
|
||||
if self.provider_name in self.unsafe_providers:
|
||||
pretty_print("Warning: you are using an API provider. You data will be sent to the cloud.", color="warning")
|
||||
self.api_key = self.get_api_key(self.provider_name)
|
||||
elif self.server != "":
|
||||
elif self.server != "ollama":
|
||||
pretty_print(f"Provider: {provider_name} initialized at {self.server}", color="success")
|
||||
self.check_address_format(self.server)
|
||||
if not self.is_ip_online(self.server.split(':')[0]):
|
||||
@ -54,6 +54,7 @@ class Provider:
|
||||
Validate if the address is valid IP.
|
||||
"""
|
||||
try:
|
||||
address = address.replace('http://', '')
|
||||
ip, port = address.rsplit(":", 1)
|
||||
if all(c.lower() in ".:abcdef0123456789" for c in ip):
|
||||
ipaddress.ip_address(ip)
|
||||
@ -143,6 +144,7 @@ class Provider:
|
||||
if e.status_code == 404:
|
||||
animate_thinking(f"Downloading {self.model}...")
|
||||
ollama.pull(self.model)
|
||||
self.ollama_fn(history, verbose)
|
||||
if "refused" in str(e).lower():
|
||||
raise Exception("Ollama connection failed. is the server running ?") from e
|
||||
raise e
|
||||
|
@ -6,6 +6,7 @@ import torch
|
||||
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
||||
import time
|
||||
import librosa
|
||||
import pyaudio
|
||||
|
||||
audio_queue = queue.Queue()
|
||||
done = False
|
||||
@ -14,7 +15,7 @@ class AudioRecorder:
|
||||
"""
|
||||
AudioRecorder is a class that records audio from the microphone and adds it to the audio queue.
|
||||
"""
|
||||
def __init__(self, format: int, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False):
|
||||
def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False):
|
||||
import pyaudio
|
||||
self.format = format
|
||||
self.channels = channels
|
||||
|
Loading…
x
Reference in New Issue
Block a user