Feat : integration of searxng for private, api free web search

This commit is contained in:
martin legrand 2025-03-17 13:31:43 +01:00
parent e358fd4df7
commit dee4db53fe
15 changed files with 5763 additions and 8 deletions

View File

@ -1,3 +1,2 @@
OPENAI_API_KEY='dont share this, not needed for local providers' SEARXNG_BASE_URL="http://127.0.0.1:8080"
SERPAPI_KEY='dont share this, needed for internet search' OPENAI_API_KEY='dont share this, not needed for local providers'
AVIATIONSTACK_API_KEY='not needed if you dont search for flight'

View File

@ -108,6 +108,12 @@ provider_name = ollama
provider_model = deepseek-r1:7b provider_model = deepseek-r1:7b
``` ```
start all services :
```sh
./start_services.sh
```
Run the assistant: Run the assistant:
```sh ```sh
@ -156,6 +162,7 @@ provider_server_address = x.x.x.x:5000
Run the assistant: Run the assistant:
```sh ```sh
./start_services.sh
python3 main.py python3 main.py
``` ```
@ -176,6 +183,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
Run the assistant: Run the assistant:
```sh ```sh
./start_services.sh
python3 main.py python3 main.py
``` ```

1
searxng/.searxng.env Normal file
View File

@ -0,0 +1 @@
SEARXNG_BASE_URL="http://127.0.0.1:8080"

View File

@ -0,0 +1,44 @@
services:
redis:
container_name: redis
image: docker.io/valkey/valkey:8-alpine
command: valkey-server --save 30 1 --loglevel warning
restart: unless-stopped
volumes:
- redis-data:/data
cap_drop:
- ALL
cap_add:
- SETGID
- SETUID
- DAC_OVERRIDE
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
searxng:
container_name: searxng
image: docker.io/searxng/searxng:latest
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./searxng:/etc/searxng:rw
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- UWSGI_WORKERS=4
- UWSGI_THREADS=4
cap_add:
- CHOWN
- SETGID
- SETUID
logging:
driver: "json-file"
options:
max-size: "1m"
max-file: "1"
volumes:
redis-data:

2669
searxng/searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

52
searxng/searxng/uwsgi.ini Normal file
View File

@ -0,0 +1,52 @@
[uwsgi]
# Who will run the code
uid = searxng
gid = searxng
# Number of workers (usually CPU count)
# default value: %k (= number of CPU core, see Dockerfile)
workers = 4
# Number of threads per worker
# default value: 4 (see Dockerfile)
threads = 4
# The right granted on the created socket
chmod-socket = 666
# Plugin to use and interpreter config
single-interpreter = true
master = true
plugin = python3
lazy-apps = true
enable-threads = 4
# Module to import
module = searx.webapp
# Virtualenv and python path
pythonpath = /usr/local/searxng/
chdir = /usr/local/searxng/searx/
# automatically set processes name to something meaningful
auto-procname = true
# Disable request logging for privacy
disable-logging = true
log-5xx = true
# Set the max size of a request (request-body excluded)
buffer-size = 8192
# No keep alive
# See https://github.com/searx/searx-docker/issues/24
add-header = Connection: close
# Follow SIGTERM convention
# See https://github.com/searxng/searxng/issues/3427
die-on-term
# uwsgi serves the static files
static-map = /static=/usr/local/searxng/searx/static
static-gzip-all = True
offload-threads = 4

2669
searxng/settings.yml Normal file

File diff suppressed because it is too large Load Diff

102
searxng/setup_searxng.sh Executable file
View File

@ -0,0 +1,102 @@
#!/bin/bash
# Script to automate SearXNG setup and deployment with Docker Compose
command_exists() {
command -v "$1" &> /dev/null
}
# Check if Docker is installed
if ! command_exists docker; then
echo "Error: Docker is not installed. Please install Docker first."
echo "On Ubuntu: sudo apt install docker.io"
echo "On macOS/Windows: Install Docker Desktop from https://www.docker.com/get-started/"
exit 1
fi
# Check if Docker daemon is running
echo "Checking if Docker daemon is running..."
if ! docker info &> /dev/null; then
echo "Error: Docker daemon is not running or inaccessible."
if [ "$(uname)" = "Linux" ]; then
echo "Trying to start Docker service (may require sudo)..."
if sudo systemctl start docker &> /dev/null; then
echo "Docker started successfully."
else
echo "Failed to start Docker. Possible issues:"
echo "1. Run this script with sudo: sudo bash setup_searxng.sh"
echo "2. Check Docker installation: sudo systemctl status docker"
echo "3. Add your user to the docker group: sudo usermod -aG docker $USER (then log out and back in)"
exit 1
fi
else
echo "Please start Docker manually:"
echo "- On macOS/Windows: Open Docker Desktop."
echo "- On Linux: Run 'sudo systemctl start docker' or check your distro's docs."
exit 1
fi
else
echo "Docker daemon is running."
fi
# Check if Docker Compose is installed
if ! command_exists docker-compose; then
echo "Error: Docker Compose is not installed. Please install it first."
echo "On Ubuntu: sudo apt install docker-compose"
echo "Or via pip: pip install docker-compose"
exit 1
fi
# Create a directory for SearXNG config if it doesnt exist
mkdir -p searxng
cd . || exit
# Check if docker-compose.yml exists
if [ ! -f "docker-compose.yml" ]; then
echo "Error: docker-compose.yml not found in the current directory."
echo "Please create it before running this script."
exit 1
fi
# Start containers to generate initial config files
echo "Starting containers for initial setup..."
if ! docker-compose up -d; then
echo "Error: Failed to start containers. Check Docker logs with 'docker compose logs'."
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
exit 1
fi
sleep 10
# Generate a secret key and update settings
SECRET_KEY=$(openssl rand -hex 32)
if [ -f "searxng/settings.yml" ]; then
if [ "$(uname)" = "Darwin" ]; then
sed -i '' "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
else
sed -i "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
}
fi
else
echo "Error: settings.yml not found. Initial setup may have failed."
docker-compose logs searxng
exit 1
fi
# Stop containers
echo "Stopping containers to apply security settings..."
docker-compose down
# Start containers again with secure settings
echo "Deploying SearXNG with secure settings..."
if ! docker-compose up -d; then
echo "Error: Failed to deploy SearXNG. Check logs with 'docker compose logs'."
exit 1
fi
# Display status and access instructions
echo "SearXNG setup complete!"
docker ps -a --filter "name=searxng" --filter "name=redis"
echo "Access SearXNG at: http://localhost:8080"

View File

@ -93,4 +93,4 @@ if __name__ == '__main__':
config.load() config.load()
config.validate_model(config.model_name) config.validate_model(config.model_name)
state.model = config.model_name state.model = config.model_name
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True) app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)

View File

@ -3,8 +3,9 @@ import time
from sources.utility import pretty_print, animate_thinking from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch from sources.tools.searxSearch import searxSearch
from sources.browser import Browser from sources.browser import Browser
class BrowserAgent(Agent): class BrowserAgent(Agent):
def __init__(self, model, name, prompt_path, provider): def __init__(self, model, name, prompt_path, provider):
""" """
@ -12,7 +13,7 @@ class BrowserAgent(Agent):
""" """
super().__init__(model, name, prompt_path, provider) super().__init__(model, name, prompt_path, provider)
self.tools = { self.tools = {
"web_search": webSearch(), "web_search": searxSearch(),
} }
self.role = "deep research and web search" self.role = "deep research and web search"
self.browser = Browser() self.browser = Browser()

View File

@ -1,7 +1,7 @@
from sources.utility import pretty_print, animate_thinking from sources.utility import pretty_print, animate_thinking
from sources.agents.agent import Agent from sources.agents.agent import Agent
from sources.tools.webSearch import webSearch from sources.tools.searxSearch import searxSearch
from sources.tools.flightSearch import FlightSearch from sources.tools.flightSearch import FlightSearch
from sources.tools.fileFinder import FileFinder from sources.tools.fileFinder import FileFinder
from sources.tools.BashInterpreter import BashInterpreter from sources.tools.BashInterpreter import BashInterpreter
@ -13,7 +13,7 @@ class CasualAgent(Agent):
""" """
super().__init__(model, name, prompt_path, provider) super().__init__(model, name, prompt_path, provider)
self.tools = { self.tools = {
"web_search": webSearch(), "web_search": searxSearch(),
"flight_search": FlightSearch(), "flight_search": FlightSearch(),
"file_finder": FileFinder(), "file_finder": FileFinder(),
"bash": BashInterpreter() "bash": BashInterpreter()

View File

@ -0,0 +1,114 @@
import requests
from bs4 import BeautifulSoup
import os
if __name__ == "__main__":
from tools import Tools
else:
from sources.tools.tools import Tools
class searxSearch(Tools):
def __init__(self, base_url: str = None):
"""
A tool for searching a SearxNG instance and extracting URLs and titles.
"""
super().__init__()
self.tag = "web_search"
self.base_url = base_url or os.getenv("SEARXNG_BASE_URL") # Requires a SearxNG base URL
self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
self.paywall_keywords = [
"Member-only", "access denied", "restricted content", "404", "this page is not working"
]
if not self.base_url:
raise ValueError("SearxNG base URL must be provided either as an argument or via the SEARXNG_BASE_URL environment variable.")
def link_valid(self, link):
"""check if a link is valid."""
# TODO find a better way
if not link.startswith("http"):
return "Status: Invalid URL"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
try:
response = requests.get(link, headers=headers, timeout=5)
status = response.status_code
if status == 200:
content = response.text.lower()
if any(keyword in content for keyword in self.paywall_keywords):
return "Status: Possible Paywall"
return "Status: OK"
elif status == 404:
return "Status: 404 Not Found"
elif status == 403:
return "Status: 403 Forbidden"
else:
return f"Status: {status} {response.reason}"
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def check_all_links(self, links):
"""Check all links, one by one."""
# TODO Make it asyncromous or smth
statuses = []
print("Web scrawl to verify links accessibilty...")
for i, link in enumerate(links):
status = self.link_valid(link)
statuses.append(status)
return statuses
def execute(self, blocks: list, safety: bool = False) -> str:
"""Executes a search query against a SearxNG instance using POST and extracts URLs and titles."""
if not blocks:
return "Error: No search query provided."
query = blocks[0].strip()
if not query:
return "Error: Empty search query provided."
search_url = f"{self.base_url}/search"
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Content-Type': 'application/x-www-form-urlencoded',
'Pragma': 'no-cache',
'Upgrade-Insecure-Requests': '1',
'User-Agent': self.user_agent
}
data = f"q={query}&categories=general&language=auto&time_range=&safesearch=0&theme=simple"
try:
response = requests.post(search_url, headers=headers, data=data, verify=False)
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
results = []
for article in soup.find_all('article', class_='result'):
url_header = article.find('a', class_='url_header')
if url_header:
url = url_header['href']
title = article.find('h3').text.strip() if article.find('h3') else "No Title"
description = article.find('p', class_='content').text.strip() if article.find('p', class_='content') else "No Description"
results.append(f"Title:{title}\nSnippet:{description}\nLink:{url}")
return "\n\n".join(results) # Return results as a single string, separated by newlines
except requests.exceptions.RequestException as e:
return f"Error during search: {str(e)}"
def execution_failure_check(self, output: str) -> bool:
"""
Checks if the execution failed based on the output.
"""
return "Error" in output
def interpreter_feedback(self, output: str) -> str:
"""
Feedback of web search to agent.
"""
if self.execution_failure_check(output):
return f"Web search failed: {output}"
return f"Web search result:\n{output}"
if __name__ == "__main__":
search_tool = searxSearch(base_url="http://127.0.0.1:8080")
result = search_tool.execute(["are dog better than cat?"])
print(result)

View File

@ -14,6 +14,11 @@ else:
from sources.tools.tools import Tools from sources.tools.tools import Tools
from sources.utility import animate_thinking, pretty_print from sources.utility import animate_thinking, pretty_print
"""
WARNING
webSearch is fully deprecated and is being replaced by searxSearch for web search.
"""
class webSearch(Tools): class webSearch(Tools):
def __init__(self, api_key: str = None): def __init__(self, api_key: str = None):
""" """

4
start_services.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/bash
# start searxng service for internet search
cd searxng && ./setup_searxng.sh

View File

@ -0,0 +1,87 @@
import unittest
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path
from sources.tools.searxSearch import searxSearch
from dotenv import load_dotenv
import requests # Import the requests module
load_dotenv()
class TestSearxSearch(unittest.TestCase):
def setUp(self):
os.environ['SEARXNG_BASE_URL'] = "http://127.0.0.1:8080" # Set the environment variable
self.base_url = os.getenv("SEARXNG_BASE_URL")
self.search_tool = searxSearch(base_url=self.base_url)
self.valid_query = "test query"
self.invalid_query = ""
def test_initialization_with_env_variable(self):
# Ensure the tool initializes correctly with the base URL from the environment variable
os.environ['SEARXNG_BASE_URL'] = "http://test.example.com"
search_tool = searxSearch()
self.assertEqual(search_tool.base_url, "http://test.example.com")
del os.environ['SEARXNG_BASE_URL']
def test_initialization_no_base_url(self):
# Ensure the tool raises an error if no base URL is provided
# Remove the environment variable to ensure the ValueError is raised
if 'SEARXNG_BASE_URL' in os.environ:
del os.environ['SEARXNG_BASE_URL']
with self.assertRaises(ValueError):
searxSearch(base_url=None)
# Restore the environment variable after the test
os.environ['SEARXNG_BASE_URL'] = "http://searx.lan"
def test_execute_valid_query(self):
# Execute the search and verify the result
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_valid_query: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
self.assertNotEqual(result, "", "Result should not be empty. Check SearxNG instance.")
def test_execute_empty_query(self):
# Test with an empty query
result = self.search_tool.execute([""])
print(f"Output from test_execute_empty_query: {result}")
self.assertEqual(result, "Error: Empty search query provided.")
def test_execute_no_query(self):
# Test with no query provided
result = self.search_tool.execute([])
print(f"Output from test_execute_no_query: {result}")
self.assertEqual(result, "Error: No search query provided.")
def test_execute_request_exception(self):
# Test a request exception by temporarily modifying the base_url to an invalid one
original_base_url = self.search_tool.base_url
self.search_tool.base_url = "http://invalid_url"
try:
result = self.search_tool.execute([self.valid_query])
print(f"Output from test_execute_request_exception: {result}")
self.assertTrue("Error during search" in result)
finally:
self.search_tool.base_url = original_base_url # Restore the original base_url
def test_execute_no_results(self):
# Execute the search and verify that an empty string is handled correctly
result = self.search_tool.execute(["nonexistent query that should return no results"])
print(f"Output from test_execute_no_results: {result}")
self.assertTrue(isinstance(result, str), "Result should be a string.")
# Allow empty results, but print a warning
if result == "":
print("Warning: SearxNG returned no results for a query that should have returned no results.")
def test_execution_failure_check_error(self):
# Test when the output contains an error
output = "Error: Something went wrong"
self.assertTrue(self.search_tool.execution_failure_check(output))
def test_execution_failure_check_no_error(self):
# Test when the output does not contain an error
output = "Search completed successfully"
self.assertFalse(self.search_tool.execution_failure_check(output))
if __name__ == '__main__':
unittest.main()