Merge branch 'Fosowl:main' into main

This commit is contained in:
ganesh nikhil 2025-03-18 00:41:19 +05:30 committed by GitHub
commit 477a145712
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 5786 additions and 32 deletions

View File

@ -1,3 +1,2 @@
OPENAI_API_KEY='dont share this, not needed for local providers'
SERPAPI_KEY='dont share this, needed for internet search'
AVIATIONSTACK_API_KEY='not needed if you dont search for flight'
SEARXNG_BASE_URL="http://127.0.0.1:8080"
OPENAI_API_KEY='dont share this, not needed for local providers'

View File

@ -74,11 +74,13 @@ Push your changes to your fork and submit a pull request to the main branch of t
Here are some high-priority tasks and areas where we need contributions:
- Web Browsing: Implement autonomous web browsing capabilities for the assistant.
- Multi-Agent System: Enhance the multi-agent functionality on the dev branch.
- Memory & Recovery: Improve conversation compression.
- Web Browsing: Improve the autonomous web browsing capabilities for the assistant.
- Graphical interface, a web graphical interface. (please ask first)
- Multi-Agent System: Enhance the planner agent for divide and conqueer for task (please ask first).
- New Tools: Add support for additional programming languages or APIs.
- Testing: Write comprehensive tests for existing and new features.
- Multi-language support for Text to speech & speech to text (english, chinese, spanish first)
- Testing: Write comprehensive tests for existing features.
- Better readme image: make a better readme image (robot whale that use tools. Ghibli or anime style, inspiration could be https://sakana.ai/assets/ai-scientist/cover.jpeg)
If you're unsure where to start, feel free to reach out by opening an issue or joining our community discussions.

View File

@ -44,11 +44,12 @@
## **Installation**
### 1⃣ **Clone the repository**
### 1⃣ **Clone the repository and setup**
```sh
git clone https://github.com/Fosowl/agenticSeek.git
cd agenticSeek
mv .env.example .env
```
### 2 **Create a virtual env**
@ -108,6 +109,12 @@ provider_name = ollama
provider_model = deepseek-r1:7b
```
start all services :
```sh
./start_services.sh
```
Run the assistant:
```sh
@ -156,6 +163,7 @@ provider_server_address = x.x.x.x:5000
Run the assistant:
```sh
./start_services.sh
python3 main.py
```
@ -176,6 +184,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
Run the assistant:
```sh
./start_services.sh
python3 main.py
```

View File

@ -0,0 +1,44 @@
services:
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
volumes:
- redis-data:/data
cap_drop:
- ALL
cap_add:
- SETGID
- SETUID
- DAC_OVERRIDE
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- UWSGI_WORKERS=4
- UWSGI_THREADS=4
cap_add:
- CHOWN
- SETGID
- SETUID
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
volumes:
redis-data:

2669
searxng/searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

52
searxng/searxng/uwsgi.ini Normal file
View File

@ -0,0 +1,52 @@
[uwsgi]
# Who will run the code
uid = searxng
gid = searxng
# Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile)
workers = 4
# Number of threads per worker
# default value: 4 (see Dockerfile)
threads = 4
# The right granted on the created socket
chmod-socket = 666
# Plugin to use and interpreter config
single-interpreter = true
master = true
plugin = python3
lazy-apps = true
enable-threads = 4
# Module to import
module = searx.webapp
# Virtualenv and python path
pythonpath = /usr/local/searxng/
chdir = /usr/local/searxng/searx/
# automatically set processes name to something meaningful
auto-procname = true
# Disable request logging for privacy
disable-logging = true
log-5xx = true
# Set the max size of a request (request-body excluded)
buffer-size = 8192
# No keep alive
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
static-gzip-all = True
offload-threads = 4

2669
searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

91
searxng/setup_searxng.sh Executable file
View File

@ -0,0 +1,91 @@
#!/bin/bash
# Script to automate SearXNG setup and deployment with Docker Compose
command_exists() {
command -v "$1" &> /dev/null
}
# Check if Docker is installed
if ! command_exists docker; then
echo "Error: Docker is not installed. Please install Docker first."
echo "On Ubuntu: sudo apt install docker.io"
echo "On macOS/Windows: Install Docker Desktop from https://www.docker.com/get-started/"
exit 1
fi
# Check if Docker daemon is running
echo "Checking if Docker daemon is running..."
if ! docker info &> /dev/null; then
echo "Error: Docker daemon is not running or inaccessible."
if [ "$(uname)" = "Linux" ]; then
echo "Trying to start Docker service (may require sudo)..."
if sudo systemctl start docker &> /dev/null; then
echo "Docker started successfully."
else
echo "Failed to start Docker. Possible issues:"
echo "1. Run this script with sudo: sudo bash setup_searxng.sh"
echo "2. Check Docker installation: sudo systemctl status docker"
echo "3. Add your user to the docker group: sudo usermod -aG docker $USER (then log out and back in)"
exit 1
fi
else
echo "Please start Docker manually:"
echo "- On macOS/Windows: Open Docker Desktop."
echo "- On Linux: Run 'sudo systemctl start docker' or check your distro's docs."
exit 1
fi
else
echo "Docker daemon is running."
fi
# Check if Docker Compose is installed
if ! command_exists docker-compose; then
echo "Error: Docker Compose is not installed. Please install it first."
echo "On Ubuntu: sudo apt install docker-compose"
echo "Or via pip: pip install docker-compose"
exit 1
fi
# Create a directory for SearXNG config if it doesnt exist
mkdir -p searxng
cd . || exit
# Check if docker-compose.yml exists
if [ ! -f "docker-compose.yml" ]; then
echo "Error: docker-compose.yml not found in the current directory."
echo "Please create it before running this script."
exit 1
fi
# Start containers to generate initial config files
echo "Starting containers for initial setup..."
if ! docker-compose up -d; then
echo "Error: Failed to start containers. Check Docker logs with 'docker compose logs'."
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
exit 1
fi
sleep 10
# Generate a secret key and update settings
SECRET_KEY=$(openssl rand -hex 32)
if [ -f "searxng/settings.yml" ]; then
if [ "$(uname)" = "Darwin" ]; then
sed -i '' "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
else
sed -i "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
fi
else
echo "Error: settings.yml not found. Initial setup may have failed."
docker-compose logs searxng
exit 1
fi
# Display status and access instructions
echo "SearXNG setup complete!"
docker ps -a --filter "name=searxng" --filter "name=redis"
echo "Access SearXNG at: http://localhost:8080"

View File

@ -93,4 +93,4 @@ if __name__ == '__main__':
config.load()
config.validate_model(config.model_name)
state.model = config.model_name
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)

View File

@ -3,8 +3,9 @@ import time
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch
from sources.tools.searxSearch import searxSearch
from sources.browser import Browser
class BrowserAgent(Agent):
def __init__(self, model, name, prompt_path, provider):
"""
@ -12,7 +13,7 @@ class BrowserAgent(Agent):
"""
super().__init__(model, name, prompt_path, provider)
self.tools = {
"web_search": webSearch(),
"web_search": searxSearch(),
}
self.role = "deep research and web search"
self.browser = Browser()
@ -65,23 +66,25 @@ class BrowserAgent(Agent):
You can navigate to these links:
{remaining_links}
You must choose a link (write it down) to navigate to, or go back.
For exemple you can say: i want to go to www.wikipedia.org/cats
Follow up with a summary of the page content (of the current page, not of the link), for example:
Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net"
The summary should include any useful finding that are useful in answering user query.
If a website does not have usefull information say Error, for exemple:
Error: This forum does not discus anything that can answer the user query
Be short, concise, direct.
If no link seem appropriate, please say "GO_BACK".
Remember, you seek the information the user want.
The user query was : {user_prompt}
You must choose a link (write it down) to navigate to, or go back.
For exemple you can say: i want to go to www.wikipedia.org/cats
Always end with a sentence that summarize when useful information is found for exemple:
Summary: According to https://karpathy.github.io/ LeCun net is the earliest real-world application of a neural net"
Do not say "according to this page", always write down the whole link.
If a website does not have usefull information say Error, for exemple:
Error: This forum does not discus anything that can answer the user query
Do not explain your choice, be short, concise.
"""
def llm_decide(self, prompt):
animate_thinking("Thinking...", color="status")
self.memory.push('user', prompt)
answer, reasoning = self.llm_request(prompt)
answer, reasoning = self.llm_request()
pretty_print("-"*100)
pretty_print(answer, color="output")
pretty_print("-"*100)
@ -119,16 +122,20 @@ class BrowserAgent(Agent):
def save_notes(self, text):
lines = text.split('\n')
for line in lines:
if "summary:" in line.lower():
if "summary" in line.lower():
self.notes.append(line)
def conclude_prompt(self, user_query):
search_note = '\n -'.join(self.notes)
annotated_notes = [f"{i+1}: {note.lower().replace('summary:', '')}" for i, note in enumerate(self.notes)]
search_note = '\n'.join(annotated_notes)
print("AI research notes:\n", search_note)
return f"""
Following a web search about:
Following a human request:
{user_query}
Write a conclusion based on these notes:
A web AI made the following finding across different pages:
{search_note}
Summarize the finding, and provide a conclusion that answer the request.
"""
def process(self, user_prompt, speech_module) -> str:
@ -136,8 +143,7 @@ class BrowserAgent(Agent):
animate_thinking(f"Searching...", color="status")
search_result_raw = self.tools["web_search"].execute([user_prompt], False)
search_result = self.jsonify_search_results(search_result_raw)
search_result = search_result[:10] # until futher improvement
search_result = self.jsonify_search_results(search_result_raw)[:5] # until futher improvement
prompt = self.make_newsearch_prompt(user_prompt, search_result)
unvisited = [None]
while not complete:
@ -147,14 +153,14 @@ class BrowserAgent(Agent):
complete = True
break
links = self.extract_links(answer)
if len(unvisited) == 0:
break
if len(links) == 0 or "GO_BACK" in answer:
unvisited = self.select_unvisited(search_result)
prompt = self.make_newsearch_prompt(user_prompt, unvisited)
pretty_print(f"Going back to results. Still {len(unvisited)}", color="warning")
links = []
continue
if len(unvisited) == 0:
break
animate_thinking(f"Navigating to {links[0]}", color="status")
speech_module.speak(f"Navigating to {links[0]}")
self.browser.go_to(links[0])
@ -163,11 +169,12 @@ class BrowserAgent(Agent):
self.navigable_links = self.browser.get_navigable()
prompt = self.make_navigation_prompt(user_prompt, page_text)
speech_module.speak(answer)
self.browser.close()
prompt = self.conclude_prompt(user_prompt)
self.memory.push('user', prompt)
answer, reasoning = self.llm_request(prompt)
pretty_print(answer, color="output")
speech_module.speak(answer)
return answer, reasoning
if __name__ == "__main__":

View File

@ -1,7 +1,7 @@
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch
from sources.tools.searxSearch import searxSearch
from sources.tools.flightSearch import FlightSearch
from sources.tools.fileFinder import FileFinder
from sources.tools.BashInterpreter import BashInterpreter
@ -13,7 +13,7 @@ class CasualAgent(Agent):
"""
super().__init__(model, name, prompt_path, provider)
self.tools = {
"web_search": webSearch(),
"web_search": searxSearch(),
"flight_search": FlightSearch(),
"file_finder": FileFinder(),
"bash": BashInterpreter()

View File

@ -27,7 +27,7 @@ class Interaction:
self.transcriber = AudioTranscriber(self.ai_name, verbose=False)
self.recorder = AudioRecorder()
if tts_enabled:
self.speech.speak("Hello Sir, we are online and ready. What can I do for you ?")
self.speech.speak("Hello, we are online and ready. What can I do for you ?")
if recover_last_session:
self.recover_last_session()

View File

@ -0,0 +1,116 @@
import requests
from bs4 import BeautifulSoup
import os
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
class searxSearch(Tools):
def __init__(self, base_url: str = None):
"""
A tool for searching a SearxNG instance and extracting URLs and titles.
"""
super().__init__()
self.tag = "web_search"
self.base_url = base_url or os.getenv("SEARXNG_BASE_URL") # Requires a SearxNG base URL
self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
self.paywall_keywords = [
"Member-only", "access denied", "restricted content", "404", "this page is not working"
]
if not self.base_url:
raise ValueError("SearxNG base URL must be provided either as an argument or via the SEARXNG_BASE_URL environment variable.")
def link_valid(self, link):
"""check if a link is valid."""
# TODO find a better way
if not link.startswith("http"):
return "Status: Invalid URL"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
try:
response = requests.get(link, headers=headers, timeout=5)
status = response.status_code
if status == 200:
content = response.text.lower()
if any(keyword in content for keyword in self.paywall_keywords):
return "Status: Possible Paywall"
return "Status: OK"
elif status == 404:
return "Status: 404 Not Found"
elif status == 403:
return "Status: 403 Forbidden"
else:
return f"Status: {status} {response.reason}"
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def check_all_links(self, links):
"""Check all links, one by one."""
# TODO Make it asyncromous or smth
statuses = []
print("Web scrawl to verify links accessibilty...")
for i, link in enumerate(links):
status = self.link_valid(link)
statuses.append(status)
return statuses
def execute(self, blocks: list, safety: bool = False) -> str:
"""Executes a search query against a SearxNG instance using POST and extracts URLs and titles."""
if not blocks:
return "Error: No search query provided."
query = blocks[0].strip()
if not query:
return "Error: Empty search query provided."
search_url = f"{self.base_url}/search"
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Pragma': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': self.user_agent
}
data = f"q={query}&categories=general&language=auto&time_range=&safesearch=0&theme=simple"
try:
response = requests.post(search_url, headers=headers, data=data, verify=False)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
results = []
for article in soup.find_all('article', class_='result'):
url_header = article.find('a', class_='url_header')
if url_header:
url = url_header['href']
title = article.find('h3').text.strip() if article.find('h3') else "No Title"
description = article.find('p', class_='content').text.strip() if article.find('p', class_='content') else "No Description"
results.append(f"Title:{title}\nSnippet:{description}\nLink:{url}")
if len(results) == 0:
raise Exception("Searx search failed. did you run start_services.sh? Did docker die?")
return "\n\n".join(results) # Return results as a single string, separated by newlines
except requests.exceptions.RequestException as e:
return f"Error during search: {str(e)}"
def execution_failure_check(self, output: str) -> bool:
"""
Checks if the execution failed based on the output.
"""
return "Error" in output
def interpreter_feedback(self, output: str) -> str:
"""
Feedback of web search to agent.
"""
if self.execution_failure_check(output):
return f"Web search failed: {output}"
return f"Web search result:\n{output}"
if __name__ == "__main__":
search_tool = searxSearch(base_url="http://127.0.0.1:8080")
result = search_tool.execute(["are dog better than cat?"])
print(result)

View File

@ -14,6 +14,11 @@ else:
from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print
"""
WARNING
webSearch is fully deprecated and is being replaced by searxSearch for web search.
"""
class webSearch(Tools):
def __init__(self, api_key: str = None):
"""

4
start_services.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
# start searxng service for internet search
cd searxng && ./setup_searxng.sh

View File

@ -0,0 +1,87 @@
import unittest
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path
from sources.tools.searxSearch import searxSearch
from dotenv import load_dotenv
import requests # Import the requests module
load_dotenv()
class TestSearxSearch(unittest.TestCase):
def setUp(self):
os.environ['SEARXNG_BASE_URL'] = "http://127.0.0.1:8080" # Set the environment variable
self.base_url = os.getenv("SEARXNG_BASE_URL")
self.search_tool = searxSearch(base_url=self.base_url)
self.valid_query = "test query"
self.invalid_query = ""
def test_initialization_with_env_variable(self):
# Ensure the tool initializes correctly with the base URL from the environment variable
os.environ['SEARXNG_BASE_URL'] = "http://test.example.com"
search_tool = searxSearch()
self.assertEqual(search_tool.base_url, "http://test.example.com")
del os.environ['SEARXNG_BASE_URL']
def test_initialization_no_base_url(self):
# Ensure the tool raises an error if no base URL is provided
# Remove the environment variable to ensure the ValueError is raised
if 'SEARXNG_BASE_URL' in os.environ:
del os.environ['SEARXNG_BASE_URL']
with self.assertRaises(ValueError):
searxSearch(base_url=None)
# Restore the environment variable after the test
os.environ['SEARXNG_BASE_URL'] = "http://searx.lan"
def test_execute_valid_query(self):
# Execute the search and verify the result
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_valid_query: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
self.assertNotEqual(result, "", "Result should not be empty. Check SearxNG instance.")
def test_execute_empty_query(self):
# Test with an empty query
result = self.search_tool.execute([""])
print(f"Output from test_execute_empty_query: {result}")
self.assertEqual(result, "Error: Empty search query provided.")
def test_execute_no_query(self):
# Test with no query provided
result = self.search_tool.execute([])
print(f"Output from test_execute_no_query: {result}")
self.assertEqual(result, "Error: No search query provided.")
def test_execute_request_exception(self):
# Test a request exception by temporarily modifying the base_url to an invalid one
original_base_url = self.search_tool.base_url
self.search_tool.base_url = "http://invalid_url"
try:
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_request_exception: {result}")
self.assertTrue("Error during search" in result)
finally:
self.search_tool.base_url = original_base_url # Restore the original base_url
def test_execute_no_results(self):
# Execute the search and verify that an empty string is handled correctly
result = self.search_tool.execute(["nonexistent query that should return no results"])
print(f"Output from test_execute_no_results: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
# Allow empty results, but print a warning
if result == "":
print("Warning: SearxNG returned no results for a query that should have returned no results.")
def test_execution_failure_check_error(self):
# Test when the output contains an error
output = "Error: Something went wrong"
self.assertTrue(self.search_tool.execution_failure_check(output))
def test_execution_failure_check_no_error(self):
# Test when the output does not contain an error
output = "Search completed successfully"
self.assertFalse(self.search_tool.execution_failure_check(output))
if __name__ == '__main__':
unittest.main()