Feat : integration of searxng for private, api free web search

This commit is contained in:
martin legrand 2025-03-17 13:31:43 +01:00
parent e358fd4df7
commit dee4db53fe
15 changed files with 5763 additions and 8 deletions

View File

@ -1,3 +1,2 @@
OPENAI_API_KEY='dont share this, not needed for local providers'
SERPAPI_KEY='dont share this, needed for internet search'
AVIATIONSTACK_API_KEY='not needed if you dont search for flight'
SEARXNG_BASE_URL="http://127.0.0.1:8080"
OPENAI_API_KEY='dont share this, not needed for local providers'

View File

@ -108,6 +108,12 @@ provider_name = ollama
provider_model = deepseek-r1:7b
```
start all services :
```sh
./start_services.sh
```
Run the assistant:
```sh
@ -156,6 +162,7 @@ provider_server_address = x.x.x.x:5000
Run the assistant:
```sh
./start_services.sh
python3 main.py
```
@ -176,6 +183,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
Run the assistant:
```sh
./start_services.sh
python3 main.py
```

1
searxng/.searxng.env Normal file
View File

@ -0,0 +1 @@
SEARXNG_BASE_URL="http://127.0.0.1:8080"

View File

@ -0,0 +1,44 @@
services:
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
volumes:
- redis-data:/data
cap_drop:
- ALL
cap_add:
- SETGID
- SETUID
- DAC_OVERRIDE
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- UWSGI_WORKERS=4
- UWSGI_THREADS=4
cap_add:
- CHOWN
- SETGID
- SETUID
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
volumes:
redis-data:

2669
searxng/searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

52
searxng/searxng/uwsgi.ini Normal file
View File

@ -0,0 +1,52 @@
[uwsgi]
# Who will run the code
uid = searxng
gid = searxng
# Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile)
workers = 4
# Number of threads per worker
# default value: 4 (see Dockerfile)
threads = 4
# The right granted on the created socket
chmod-socket = 666
# Plugin to use and interpreter config
single-interpreter = true
master = true
plugin = python3
lazy-apps = true
enable-threads = 4
# Module to import
module = searx.webapp
# Virtualenv and python path
pythonpath = /usr/local/searxng/
chdir = /usr/local/searxng/searx/
# automatically set processes name to something meaningful
auto-procname = true
# Disable request logging for privacy
disable-logging = true
log-5xx = true
# Set the max size of a request (request-body excluded)
buffer-size = 8192
# No keep alive
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
static-gzip-all = True
offload-threads = 4

2669
searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

102
searxng/setup_searxng.sh Executable file
View File

@ -0,0 +1,102 @@
#!/bin/bash
# Script to automate SearXNG setup and deployment with Docker Compose
command_exists() {
command -v "$1" &> /dev/null
}
# Check if Docker is installed
if ! command_exists docker; then
echo "Error: Docker is not installed. Please install Docker first."
echo "On Ubuntu: sudo apt install docker.io"
echo "On macOS/Windows: Install Docker Desktop from https://www.docker.com/get-started/"
exit 1
fi
# Check if Docker daemon is running
echo "Checking if Docker daemon is running..."
if ! docker info &> /dev/null; then
echo "Error: Docker daemon is not running or inaccessible."
if [ "$(uname)" = "Linux" ]; then
echo "Trying to start Docker service (may require sudo)..."
if sudo systemctl start docker &> /dev/null; then
echo "Docker started successfully."
else
echo "Failed to start Docker. Possible issues:"
echo "1. Run this script with sudo: sudo bash setup_searxng.sh"
echo "2. Check Docker installation: sudo systemctl status docker"
echo "3. Add your user to the docker group: sudo usermod -aG docker $USER (then log out and back in)"
exit 1
fi
else
echo "Please start Docker manually:"
echo "- On macOS/Windows: Open Docker Desktop."
echo "- On Linux: Run 'sudo systemctl start docker' or check your distro's docs."
exit 1
fi
else
echo "Docker daemon is running."
fi
# Check if Docker Compose is installed
if ! command_exists docker-compose; then
echo "Error: Docker Compose is not installed. Please install it first."
echo "On Ubuntu: sudo apt install docker-compose"
echo "Or via pip: pip install docker-compose"
exit 1
fi
# Create a directory for SearXNG config if it doesnt exist
mkdir -p searxng
cd . || exit
# Check if docker-compose.yml exists
if [ ! -f "docker-compose.yml" ]; then
echo "Error: docker-compose.yml not found in the current directory."
echo "Please create it before running this script."
exit 1
fi
# Start containers to generate initial config files
echo "Starting containers for initial setup..."
if ! docker-compose up -d; then
echo "Error: Failed to start containers. Check Docker logs with 'docker compose logs'."
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
exit 1
fi
sleep 10
# Generate a secret key and update settings
SECRET_KEY=$(openssl rand -hex 32)
if [ -f "searxng/settings.yml" ]; then
if [ "$(uname)" = "Darwin" ]; then
sed -i '' "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
else
sed -i "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
fi
else
echo "Error: settings.yml not found. Initial setup may have failed."
docker-compose logs searxng
exit 1
fi
# Stop containers
echo "Stopping containers to apply security settings..."
docker-compose down
# Start containers again with secure settings
echo "Deploying SearXNG with secure settings..."
if ! docker-compose up -d; then
echo "Error: Failed to deploy SearXNG. Check logs with 'docker compose logs'."
exit 1
fi
# Display status and access instructions
echo "SearXNG setup complete!"
docker ps -a --filter "name=searxng" --filter "name=redis"
echo "Access SearXNG at: http://localhost:8080"

View File

@ -93,4 +93,4 @@ if __name__ == '__main__':
config.load()
config.validate_model(config.model_name)
state.model = config.model_name
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)

View File

@ -3,8 +3,9 @@ import time
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch
from sources.tools.searxSearch import searxSearch
from sources.browser import Browser
class BrowserAgent(Agent):
def __init__(self, model, name, prompt_path, provider):
"""
@ -12,7 +13,7 @@ class BrowserAgent(Agent):
"""
super().__init__(model, name, prompt_path, provider)
self.tools = {
"web_search": webSearch(),
"web_search": searxSearch(),
}
self.role = "deep research and web search"
self.browser = Browser()

View File

@ -1,7 +1,7 @@
from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch
from sources.tools.searxSearch import searxSearch
from sources.tools.flightSearch import FlightSearch
from sources.tools.fileFinder import FileFinder
from sources.tools.BashInterpreter import BashInterpreter
@ -13,7 +13,7 @@ class CasualAgent(Agent):
"""
super().__init__(model, name, prompt_path, provider)
self.tools = {
"web_search": webSearch(),
"web_search": searxSearch(),
"flight_search": FlightSearch(),
"file_finder": FileFinder(),
"bash": BashInterpreter()

View File

@ -0,0 +1,114 @@
import requests
from bs4 import BeautifulSoup
import os
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
class searxSearch(Tools):
def __init__(self, base_url: str = None):
"""
A tool for searching a SearxNG instance and extracting URLs and titles.
"""
super().__init__()
self.tag = "web_search"
self.base_url = base_url or os.getenv("SEARXNG_BASE_URL") # Requires a SearxNG base URL
self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
self.paywall_keywords = [
"Member-only", "access denied", "restricted content", "404", "this page is not working"
]
if not self.base_url:
raise ValueError("SearxNG base URL must be provided either as an argument or via the SEARXNG_BASE_URL environment variable.")
def link_valid(self, link):
"""check if a link is valid."""
# TODO find a better way
if not link.startswith("http"):
return "Status: Invalid URL"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
try:
response = requests.get(link, headers=headers, timeout=5)
status = response.status_code
if status == 200:
content = response.text.lower()
if any(keyword in content for keyword in self.paywall_keywords):
return "Status: Possible Paywall"
return "Status: OK"
elif status == 404:
return "Status: 404 Not Found"
elif status == 403:
return "Status: 403 Forbidden"
else:
return f"Status: {status} {response.reason}"
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def check_all_links(self, links):
"""Check all links, one by one."""
# TODO Make it asyncromous or smth
statuses = []
print("Web scrawl to verify links accessibilty...")
for i, link in enumerate(links):
status = self.link_valid(link)
statuses.append(status)
return statuses
def execute(self, blocks: list, safety: bool = False) -> str:
"""Executes a search query against a SearxNG instance using POST and extracts URLs and titles."""
if not blocks:
return "Error: No search query provided."
query = blocks[0].strip()
if not query:
return "Error: Empty search query provided."
search_url = f"{self.base_url}/search"
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Pragma': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': self.user_agent
}
data = f"q={query}&categories=general&language=auto&time_range=&safesearch=0&theme=simple"
try:
response = requests.post(search_url, headers=headers, data=data, verify=False)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
results = []
for article in soup.find_all('article', class_='result'):
url_header = article.find('a', class_='url_header')
if url_header:
url = url_header['href']
title = article.find('h3').text.strip() if article.find('h3') else "No Title"
description = article.find('p', class_='content').text.strip() if article.find('p', class_='content') else "No Description"
results.append(f"Title:{title}\nSnippet:{description}\nLink:{url}")
return "\n\n".join(results) # Return results as a single string, separated by newlines
except requests.exceptions.RequestException as e:
return f"Error during search: {str(e)}"
def execution_failure_check(self, output: str) -> bool:
"""
Checks if the execution failed based on the output.
"""
return "Error" in output
def interpreter_feedback(self, output: str) -> str:
"""
Feedback of web search to agent.
"""
if self.execution_failure_check(output):
return f"Web search failed: {output}"
return f"Web search result:\n{output}"
if __name__ == "__main__":
search_tool = searxSearch(base_url="http://127.0.0.1:8080")
result = search_tool.execute(["are dog better than cat?"])
print(result)

View File

@ -14,6 +14,11 @@ else:
from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print
"""
WARNING
webSearch is fully deprecated and is being replaced by searxSearch for web search.
"""
class webSearch(Tools):
def __init__(self, api_key: str = None):
"""

4
start_services.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
# start searxng service for internet search
cd searxng && ./setup_searxng.sh

View File

@ -0,0 +1,87 @@
import unittest
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path
from sources.tools.searxSearch import searxSearch
from dotenv import load_dotenv
import requests # Import the requests module
load_dotenv()
class TestSearxSearch(unittest.TestCase):
def setUp(self):
os.environ['SEARXNG_BASE_URL'] = "http://127.0.0.1:8080" # Set the environment variable
self.base_url = os.getenv("SEARXNG_BASE_URL")
self.search_tool = searxSearch(base_url=self.base_url)
self.valid_query = "test query"
self.invalid_query = ""
def test_initialization_with_env_variable(self):
# Ensure the tool initializes correctly with the base URL from the environment variable
os.environ['SEARXNG_BASE_URL'] = "http://test.example.com"
search_tool = searxSearch()
self.assertEqual(search_tool.base_url, "http://test.example.com")
del os.environ['SEARXNG_BASE_URL']
def test_initialization_no_base_url(self):
# Ensure the tool raises an error if no base URL is provided
# Remove the environment variable to ensure the ValueError is raised
if 'SEARXNG_BASE_URL' in os.environ:
del os.environ['SEARXNG_BASE_URL']
with self.assertRaises(ValueError):
searxSearch(base_url=None)
# Restore the environment variable after the test
os.environ['SEARXNG_BASE_URL'] = "http://searx.lan"
def test_execute_valid_query(self):
# Execute the search and verify the result
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_valid_query: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
self.assertNotEqual(result, "", "Result should not be empty. Check SearxNG instance.")
def test_execute_empty_query(self):
# Test with an empty query
result = self.search_tool.execute([""])
print(f"Output from test_execute_empty_query: {result}")
self.assertEqual(result, "Error: Empty search query provided.")
def test_execute_no_query(self):
# Test with no query provided
result = self.search_tool.execute([])
print(f"Output from test_execute_no_query: {result}")
self.assertEqual(result, "Error: No search query provided.")
def test_execute_request_exception(self):
# Test a request exception by temporarily modifying the base_url to an invalid one
original_base_url = self.search_tool.base_url
self.search_tool.base_url = "http://invalid_url"
try:
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_request_exception: {result}")
self.assertTrue("Error during search" in result)
finally:
self.search_tool.base_url = original_base_url # Restore the original base_url
def test_execute_no_results(self):
# Execute the search and verify that an empty string is handled correctly
result = self.search_tool.execute(["nonexistent query that should return no results"])
print(f"Output from test_execute_no_results: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
# Allow empty results, but print a warning
if result == "":
print("Warning: SearxNG returned no results for a query that should have returned no results.")
def test_execution_failure_check_error(self):
# Test when the output contains an error
output = "Error: Something went wrong"
self.assertTrue(self.search_tool.execution_failure_check(output))
def test_execution_failure_check_no_error(self):
# Test when the output does not contain an error
output = "Search completed successfully"
self.assertFalse(self.search_tool.execution_failure_check(output))
if __name__ == '__main__':
unittest.main()