diff --git a/README.md b/README.md index 4af823f..ab8826f 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,7 @@ - **Memory**: Remembers what’s useful, your preferences and past sessions conversation. -- **Web Browsing**: Autonomous web navigation is underway. - +- **Web Browsing**: Autonomous web navigation. ### Searching the web with agenticSeek : @@ -52,7 +51,7 @@ ## **Installation** -Make sure you have chrome driver and docker installed. +Make sure you have chrome driver, docker and python3.10 (or newer) installed. For issues related to chrome driver, see the **Chromedriver** section. @@ -125,7 +124,7 @@ provider_server_address = 127.0.0.1:11434 start all services : ```sh -./start_services.sh +sudo ./start_services.sh ``` Run the assistant: @@ -150,7 +149,7 @@ Warning: currently the system that choose the best AI agent routing system will Make sure the services are up and running with `./start_services.sh` and run the agenticSeek with `python3 main.py` ```sh -./start_services.sh +sudo ./start_services.sh python3 main.py ``` @@ -247,7 +246,7 @@ provider_server_address = x.x.x.x:5000 Run the assistant: ```sh -./start_services.sh +sudo ./start_services.sh python3 main.py ``` @@ -268,7 +267,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used Run the assistant: ```sh -./start_services.sh +sudo ./start_services.sh python3 main.py ``` @@ -278,22 +277,25 @@ python3 main.py ## Speech to Text -The speech to text is disabled by default, you can enable it by setting listen to true in the config.ini: +The speech-to-text functionality is disabled by default. To enable it, set the listen option to True in the config.ini file: ``` listen = True ``` -The speech to text will await for a AI name as a trigger keyword before it start listening, you can change the AI name by changing the agent_name in the config.ini: +When enabled, the speech-to-text feature listens for a trigger keyword, which is the agent's name, before it begins processing your input. You can customize the agent's name by updating the `agent_name` value in the *config.ini* file: ``` agent_name = Friday ``` -It will work better if you use a common english name like John or Emma. +For optimal recognition, we recommend using a common English name like "John" or "Emma" as the agent name -After hearing it's name agenticSeek will listen until it hear one of the following keyword for confirmation: +Once you see the transcript start to appear, say the agent's name aloud to wake it up (e.g., "Friday"). +Speak your query clearly. + +End your request with a confirmation phrase to signal the system to proceed. Examples of confirmation phrases include: ``` "do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?" ``` @@ -321,7 +323,7 @@ provider_server_address = 127.0.0.1:5000 ``` `is_local`: should be True for any locally running LLM, otherwise False. -`provider_name`: Select the provider to use by its name, see the provider list above. +`provider_name`: Select the provider to use by it's name, see the provider list above. `provider_model`: Set the model to use by the agent. @@ -351,6 +353,7 @@ And download the chromedriver version matching your OS. ![alt text](./media/chromedriver_readme.png) ## FAQ + **Q: What hardware do I need?** 7B Model: GPU with 8GB VRAM. @@ -365,10 +368,6 @@ Deepseek R1 excels at reasoning and tool use for its size. We think it’s a sol Ensure Ollama is running (`ollama serve`), your `config.ini` matches your provider, and dependencies are installed. If none work feel free to raise an issue. -**Q: How to join the discord ?** - -Ask in the Community section for an invite. - **Q: Can it really run 100% locally?** Yes with Ollama or Server providers, all speech to text, LLM and text to speech model run locally. Non-local options (OpenAI or others API) are optional. diff --git a/requirements.txt b/requirements.txt index d2eae9e..db9b52c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,35 @@ -requests==2.31.0 -openai==1.61.1 -colorama==0.4.6 -python-dotenv==1.0.0 -playsound==1.3.0 -soundfile==0.13.1 -transformers==4.48.3 -torch==2.5.1 -ollama==0.4.7 -scipy==1.15.1 -kokoro==0.7.12 -flask==3.1.0 -soundfile==0.13.1 -protobuf==3.20.3 -termcolor==2.5.0 -ipython==8.34.0 -gliclass==0.1.8 -pyaudio==0.2.14 -librosa==0.10.2.post1 -selenium==4.29.0 -markdownify==1.1.0 -text2emotion==0.0.5 -langid==1.1.6 -chromedriver-autoinstaller==0.6.4 +requests>=2.31.0 +colorama>=0.4.6 +python-dotenv>=1.0.0 +playsound>=1.3.0 +soundfile>=0.13.1 +transformers>=4.46.3 +torch>=2.4.1 +python-dotenv>=1.0.0 +ollama>=0.4.7 +scipy>=1.15.1 +kokoro>=0.7.12 +flask>=3.1.0 +soundfile>=0.13.1 +protobuf>=3.20.3 +termcolor>=2.5.0 +ipython>=8.34.0 +gliclass>=0.1.8 +pyaudio>=0.2.14 +librosa>=0.10.2.post1 +selenium>=4.29.0 +markdownify>=1.1.0 +text2emotion>=0.0.5 +langid>=1.1.6 +chromedriver-autoinstaller>=0.6.4 httpx>=0.27,<0.29 anyio>=3.5.0,<5 distro>=1.7.0,<2 jiter>=0.4.0,<1 sniffio tqdm>4 +# for api provider +openai # if use chinese ordered_set pypinyin diff --git a/scripts/linux_install.sh b/scripts/linux_install.sh index 8222e20..c43a41e 100644 --- a/scripts/linux_install.sh +++ b/scripts/linux_install.sh @@ -5,6 +5,8 @@ echo "Starting installation for Linux..." # Update package list sudo apt-get update +pip install --upgrade pip + # Install Python dependencies from requirements.txt pip3 install -r requirements.txt diff --git a/searxng/docker-compose.yml b/searxng/docker-compose.yml index d3b4920..156b73f 100644 --- a/searxng/docker-compose.yml +++ b/searxng/docker-compose.yml @@ -1,3 +1,4 @@ +version: '3' services: redis: container_name: redis diff --git a/setup.py b/setup.py index a309b45..002f842 100644 --- a/setup.py +++ b/setup.py @@ -8,34 +8,35 @@ setup( version="0.1.0", author="Fosowl", author_email="mlg.fcu@gmail.com", - description="A Python project for agentic search and processing", + description="The open, local alternative to ManusAI", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/Fosowl/agenticSeek", packages=find_packages(), include_package_data=True, install_requires=[ - "requests==2.31.0", - "openai==1.61.1", - "colorama==0.4.6", - "python-dotenv==1.0.0", - "playsound==1.3.0", - "soundfile==0.13.1", - "transformers==4.48.3", - "torch==2.5.1", - "ollama==0.4.7", - "scipy==1.15.1", - "kokoro==0.7.12", - "flask==3.1.0", - "protobuf==3.20.3", - "termcolor==2.5.0", - "gliclass==0.1.8", - "ipython==8.34.0", - "librosa==0.10.2.post1", - "selenium==4.29.0", - "markdownify==1.1.0", - "text2emotion==0.0.5", - "langid==1.1.6", + "requests>=2.31.0", + "openai", + "colorama>=0.4.6", + "python-dotenv>=1.0.0", + "playsound>=1.3.0", + "soundfile>=0.13.1", + "transformers>=4.46.3", + "torch>=2.4.1", + "ollama>=0.4.7", + "scipy>=1.15.1", + "kokoro>=0.7.12", + "flask>=3.1.0", + "protobuf>=3.20.3", + "termcolor>=2.5.0", + "gliclass>=0.1.8", + "ipython>=8.34.0", + "librosa>=0.10.2.post1", + "selenium>=4.29.0", + "markdownify>=1.1.0", + "text2emotion>=0.0.5", + "python-dotenv>=1.0.0", + "langid>=1.1.6", "httpx>=0.27,<0.29", "anyio>=3.5.0,<5", "distro>=1.7.0,<2", @@ -61,5 +62,5 @@ setup( "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", ], - python_requires=">=3.6", + python_requires=">=3.9", ) diff --git a/sources/llm_provider.py b/sources/llm_provider.py index b3f30e9..2e74cd2 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -33,7 +33,7 @@ class Provider: if self.provider_name in self.unsafe_providers: pretty_print("Warning: you are using an API provider. You data will be sent to the cloud.", color="warning") self.api_key = self.get_api_key(self.provider_name) - elif self.server != "": + elif self.server != "ollama": pretty_print(f"Provider: {provider_name} initialized at {self.server}", color="success") self.check_address_format(self.server) if not self.is_ip_online(self.server.split(':')[0]): @@ -54,6 +54,7 @@ class Provider: Validate if the address is valid IP. """ try: + address = address.replace('http://', '') ip, port = address.rsplit(":", 1) if all(c.lower() in ".:abcdef0123456789" for c in ip): ipaddress.ip_address(ip) @@ -143,6 +144,7 @@ class Provider: if e.status_code == 404: animate_thinking(f"Downloading {self.model}...") ollama.pull(self.model) + self.ollama_fn(history, verbose) if "refused" in str(e).lower(): raise Exception("Ollama connection failed. is the server running ?") from e raise e diff --git a/sources/speech_to_text.py b/sources/speech_to_text.py index 776dce3..2d76c11 100644 --- a/sources/speech_to_text.py +++ b/sources/speech_to_text.py @@ -6,6 +6,7 @@ import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import time import librosa +import pyaudio audio_queue = queue.Queue() done = False @@ -14,7 +15,7 @@ class AudioRecorder: """ AudioRecorder is a class that records audio from the microphone and adds it to the audio queue. """ - def __init__(self, format: int, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): + def __init__(self, format: int = pyaudio.paInt16, channels: int = 1, rate: int = 4096, chunk: int = 8192, record_seconds: int = 5, verbose: bool = False): import pyaudio self.format = format self.channels = channels