mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-03 01:30:11 +00:00
Feat : integration of searxng for private, api free web search
This commit is contained in:
parent
e358fd4df7
commit
dee4db53fe
@ -1,3 +1,2 @@
|
||||
OPENAI_API_KEY='dont share this, not needed for local providers'
|
||||
SERPAPI_KEY='dont share this, needed for internet search'
|
||||
AVIATIONSTACK_API_KEY='not needed if you dont search for flight'
|
||||
SEARXNG_BASE_URL="http://127.0.0.1:8080"
|
||||
OPENAI_API_KEY='dont share this, not needed for local providers'
|
@ -108,6 +108,12 @@ provider_name = ollama
|
||||
provider_model = deepseek-r1:7b
|
||||
```
|
||||
|
||||
start all services :
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
```
|
||||
|
||||
Run the assistant:
|
||||
|
||||
```sh
|
||||
@ -156,6 +162,7 @@ provider_server_address = x.x.x.x:5000
|
||||
Run the assistant:
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
@ -176,6 +183,7 @@ provider_server_address = 127.0.0.1:5000 # can be set to anything, not used
|
||||
Run the assistant:
|
||||
|
||||
```sh
|
||||
./start_services.sh
|
||||
python3 main.py
|
||||
```
|
||||
|
||||
|
1
searxng/.searxng.env
Normal file
1
searxng/.searxng.env
Normal file
@ -0,0 +1 @@
|
||||
SEARXNG_BASE_URL="http://127.0.0.1:8080"
|
44
searxng/docker-compose.yml
Normal file
44
searxng/docker-compose.yml
Normal file
@ -0,0 +1,44 @@
|
||||
services:
|
||||
redis:
|
||||
container_name: redis
|
||||
image: docker.io/valkey/valkey:8-alpine
|
||||
command: valkey-server --save 30 1 --loglevel warning
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
cap_drop:
|
||||
- ALL
|
||||
cap_add:
|
||||
- SETGID
|
||||
- SETUID
|
||||
- DAC_OVERRIDE
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "1m"
|
||||
max-file: "1"
|
||||
|
||||
searxng:
|
||||
container_name: searxng
|
||||
image: docker.io/searxng/searxng:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./searxng:/etc/searxng:rw
|
||||
environment:
|
||||
- SEARXNG_BASE_URL=http://localhost:8080/
|
||||
- UWSGI_WORKERS=4
|
||||
- UWSGI_THREADS=4
|
||||
cap_add:
|
||||
- CHOWN
|
||||
- SETGID
|
||||
- SETUID
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "1m"
|
||||
max-file: "1"
|
||||
|
||||
volumes:
|
||||
redis-data:
|
2669
searxng/searxng/settings.yml
Normal file
2669
searxng/searxng/settings.yml
Normal file
File diff suppressed because it is too large
Load Diff
52
searxng/searxng/uwsgi.ini
Normal file
52
searxng/searxng/uwsgi.ini
Normal file
@ -0,0 +1,52 @@
|
||||
[uwsgi]
|
||||
# Who will run the code
|
||||
uid = searxng
|
||||
gid = searxng
|
||||
|
||||
# Number of workers (usually CPU count)
|
||||
# default value: %k (= number of CPU core, see Dockerfile)
|
||||
workers = 4
|
||||
|
||||
# Number of threads per worker
|
||||
# default value: 4 (see Dockerfile)
|
||||
threads = 4
|
||||
|
||||
# The right granted on the created socket
|
||||
chmod-socket = 666
|
||||
|
||||
# Plugin to use and interpreter config
|
||||
single-interpreter = true
|
||||
master = true
|
||||
plugin = python3
|
||||
lazy-apps = true
|
||||
enable-threads = 4
|
||||
|
||||
# Module to import
|
||||
module = searx.webapp
|
||||
|
||||
# Virtualenv and python path
|
||||
pythonpath = /usr/local/searxng/
|
||||
chdir = /usr/local/searxng/searx/
|
||||
|
||||
# automatically set processes name to something meaningful
|
||||
auto-procname = true
|
||||
|
||||
# Disable request logging for privacy
|
||||
disable-logging = true
|
||||
log-5xx = true
|
||||
|
||||
# Set the max size of a request (request-body excluded)
|
||||
buffer-size = 8192
|
||||
|
||||
# No keep alive
|
||||
# See https://github.com/searx/searx-docker/issues/24
|
||||
add-header = Connection: close
|
||||
|
||||
# Follow SIGTERM convention
|
||||
# See https://github.com/searxng/searxng/issues/3427
|
||||
die-on-term
|
||||
|
||||
# uwsgi serves the static files
|
||||
static-map = /static=/usr/local/searxng/searx/static
|
||||
static-gzip-all = True
|
||||
offload-threads = 4
|
2669
searxng/settings.yml
Normal file
2669
searxng/settings.yml
Normal file
File diff suppressed because it is too large
Load Diff
102
searxng/setup_searxng.sh
Executable file
102
searxng/setup_searxng.sh
Executable file
@ -0,0 +1,102 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to automate SearXNG setup and deployment with Docker Compose
|
||||
|
||||
command_exists() {
|
||||
command -v "$1" &> /dev/null
|
||||
}
|
||||
|
||||
# Check if Docker is installed
|
||||
if ! command_exists docker; then
|
||||
echo "Error: Docker is not installed. Please install Docker first."
|
||||
echo "On Ubuntu: sudo apt install docker.io"
|
||||
echo "On macOS/Windows: Install Docker Desktop from https://www.docker.com/get-started/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if Docker daemon is running
|
||||
echo "Checking if Docker daemon is running..."
|
||||
if ! docker info &> /dev/null; then
|
||||
echo "Error: Docker daemon is not running or inaccessible."
|
||||
if [ "$(uname)" = "Linux" ]; then
|
||||
echo "Trying to start Docker service (may require sudo)..."
|
||||
if sudo systemctl start docker &> /dev/null; then
|
||||
echo "Docker started successfully."
|
||||
else
|
||||
echo "Failed to start Docker. Possible issues:"
|
||||
echo "1. Run this script with sudo: sudo bash setup_searxng.sh"
|
||||
echo "2. Check Docker installation: sudo systemctl status docker"
|
||||
echo "3. Add your user to the docker group: sudo usermod -aG docker $USER (then log out and back in)"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Please start Docker manually:"
|
||||
echo "- On macOS/Windows: Open Docker Desktop."
|
||||
echo "- On Linux: Run 'sudo systemctl start docker' or check your distro's docs."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Docker daemon is running."
|
||||
fi
|
||||
|
||||
# Check if Docker Compose is installed
|
||||
if ! command_exists docker-compose; then
|
||||
echo "Error: Docker Compose is not installed. Please install it first."
|
||||
echo "On Ubuntu: sudo apt install docker-compose"
|
||||
echo "Or via pip: pip install docker-compose"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a directory for SearXNG config if it doesn’t exist
|
||||
mkdir -p searxng
|
||||
cd . || exit
|
||||
|
||||
# Check if docker-compose.yml exists
|
||||
if [ ! -f "docker-compose.yml" ]; then
|
||||
echo "Error: docker-compose.yml not found in the current directory."
|
||||
echo "Please create it before running this script."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start containers to generate initial config files
|
||||
echo "Starting containers for initial setup..."
|
||||
if ! docker-compose up -d; then
|
||||
echo "Error: Failed to start containers. Check Docker logs with 'docker compose logs'."
|
||||
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
|
||||
# Generate a secret key and update settings
|
||||
SECRET_KEY=$(openssl rand -hex 32)
|
||||
if [ -f "searxng/settings.yml" ]; then
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
sed -i '' "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
|
||||
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
|
||||
}
|
||||
else
|
||||
sed -i "s/ultrasecretkey/$SECRET_KEY/g" searxng/settings.yml || {
|
||||
echo "Warning: Failed to update settings.yml with secret key. Please check the file manually."
|
||||
}
|
||||
fi
|
||||
else
|
||||
echo "Error: settings.yml not found. Initial setup may have failed."
|
||||
docker-compose logs searxng
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Stop containers
|
||||
echo "Stopping containers to apply security settings..."
|
||||
docker-compose down
|
||||
|
||||
# Start containers again with secure settings
|
||||
echo "Deploying SearXNG with secure settings..."
|
||||
if ! docker-compose up -d; then
|
||||
echo "Error: Failed to deploy SearXNG. Check logs with 'docker compose logs'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Display status and access instructions
|
||||
echo "SearXNG setup complete!"
|
||||
docker ps -a --filter "name=searxng" --filter "name=redis"
|
||||
echo "Access SearXNG at: http://localhost:8080"
|
@ -93,4 +93,4 @@ if __name__ == '__main__':
|
||||
config.load()
|
||||
config.validate_model(config.model_name)
|
||||
state.model = config.model_name
|
||||
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
|
||||
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
|
||||
|
@ -3,8 +3,9 @@ import time
|
||||
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
from sources.agents.agent import Agent
|
||||
from sources.tools.webSearch import webSearch
|
||||
from sources.tools.searxSearch import searxSearch
|
||||
from sources.browser import Browser
|
||||
|
||||
class BrowserAgent(Agent):
|
||||
def __init__(self, model, name, prompt_path, provider):
|
||||
"""
|
||||
@ -12,7 +13,7 @@ class BrowserAgent(Agent):
|
||||
"""
|
||||
super().__init__(model, name, prompt_path, provider)
|
||||
self.tools = {
|
||||
"web_search": webSearch(),
|
||||
"web_search": searxSearch(),
|
||||
}
|
||||
self.role = "deep research and web search"
|
||||
self.browser = Browser()
|
||||
|
@ -1,7 +1,7 @@
|
||||
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
from sources.agents.agent import Agent
|
||||
from sources.tools.webSearch import webSearch
|
||||
from sources.tools.searxSearch import searxSearch
|
||||
from sources.tools.flightSearch import FlightSearch
|
||||
from sources.tools.fileFinder import FileFinder
|
||||
from sources.tools.BashInterpreter import BashInterpreter
|
||||
@ -13,7 +13,7 @@ class CasualAgent(Agent):
|
||||
"""
|
||||
super().__init__(model, name, prompt_path, provider)
|
||||
self.tools = {
|
||||
"web_search": webSearch(),
|
||||
"web_search": searxSearch(),
|
||||
"flight_search": FlightSearch(),
|
||||
"file_finder": FileFinder(),
|
||||
"bash": BashInterpreter()
|
||||
|
114
sources/tools/searxSearch.py
Normal file
114
sources/tools/searxSearch.py
Normal file
@ -0,0 +1,114 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class searxSearch(Tools):
|
||||
def __init__(self, base_url: str = None):
|
||||
"""
|
||||
A tool for searching a SearxNG instance and extracting URLs and titles.
|
||||
"""
|
||||
super().__init__()
|
||||
self.tag = "web_search"
|
||||
self.base_url = base_url or os.getenv("SEARXNG_BASE_URL") # Requires a SearxNG base URL
|
||||
self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"
|
||||
self.paywall_keywords = [
|
||||
"Member-only", "access denied", "restricted content", "404", "this page is not working"
|
||||
]
|
||||
if not self.base_url:
|
||||
raise ValueError("SearxNG base URL must be provided either as an argument or via the SEARXNG_BASE_URL environment variable.")
|
||||
|
||||
def link_valid(self, link):
|
||||
"""check if a link is valid."""
|
||||
# TODO find a better way
|
||||
if not link.startswith("http"):
|
||||
return "Status: Invalid URL"
|
||||
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||
try:
|
||||
response = requests.get(link, headers=headers, timeout=5)
|
||||
status = response.status_code
|
||||
if status == 200:
|
||||
content = response.text.lower()
|
||||
if any(keyword in content for keyword in self.paywall_keywords):
|
||||
return "Status: Possible Paywall"
|
||||
return "Status: OK"
|
||||
elif status == 404:
|
||||
return "Status: 404 Not Found"
|
||||
elif status == 403:
|
||||
return "Status: 403 Forbidden"
|
||||
else:
|
||||
return f"Status: {status} {response.reason}"
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def check_all_links(self, links):
|
||||
"""Check all links, one by one."""
|
||||
# TODO Make it asyncromous or smth
|
||||
statuses = []
|
||||
print("Web scrawl to verify links accessibilty...")
|
||||
for i, link in enumerate(links):
|
||||
status = self.link_valid(link)
|
||||
statuses.append(status)
|
||||
return statuses
|
||||
|
||||
def execute(self, blocks: list, safety: bool = False) -> str:
|
||||
"""Executes a search query against a SearxNG instance using POST and extracts URLs and titles."""
|
||||
if not blocks:
|
||||
return "Error: No search query provided."
|
||||
|
||||
query = blocks[0].strip()
|
||||
if not query:
|
||||
return "Error: Empty search query provided."
|
||||
|
||||
search_url = f"{self.base_url}/search"
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Pragma': 'no-cache',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'User-Agent': self.user_agent
|
||||
}
|
||||
data = f"q={query}&categories=general&language=auto&time_range=&safesearch=0&theme=simple"
|
||||
try:
|
||||
response = requests.post(search_url, headers=headers, data=data, verify=False)
|
||||
response.raise_for_status()
|
||||
html_content = response.text
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
results = []
|
||||
for article in soup.find_all('article', class_='result'):
|
||||
url_header = article.find('a', class_='url_header')
|
||||
if url_header:
|
||||
url = url_header['href']
|
||||
title = article.find('h3').text.strip() if article.find('h3') else "No Title"
|
||||
description = article.find('p', class_='content').text.strip() if article.find('p', class_='content') else "No Description"
|
||||
results.append(f"Title:{title}\nSnippet:{description}\nLink:{url}")
|
||||
return "\n\n".join(results) # Return results as a single string, separated by newlines
|
||||
except requests.exceptions.RequestException as e:
|
||||
return f"Error during search: {str(e)}"
|
||||
|
||||
def execution_failure_check(self, output: str) -> bool:
|
||||
"""
|
||||
Checks if the execution failed based on the output.
|
||||
"""
|
||||
return "Error" in output
|
||||
|
||||
def interpreter_feedback(self, output: str) -> str:
|
||||
"""
|
||||
Feedback of web search to agent.
|
||||
"""
|
||||
if self.execution_failure_check(output):
|
||||
return f"Web search failed: {output}"
|
||||
return f"Web search result:\n{output}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
search_tool = searxSearch(base_url="http://127.0.0.1:8080")
|
||||
result = search_tool.execute(["are dog better than cat?"])
|
||||
print(result)
|
@ -14,6 +14,11 @@ else:
|
||||
from sources.tools.tools import Tools
|
||||
from sources.utility import animate_thinking, pretty_print
|
||||
|
||||
"""
|
||||
WARNING
|
||||
webSearch is fully deprecated and is being replaced by searxSearch for web search.
|
||||
"""
|
||||
|
||||
class webSearch(Tools):
|
||||
def __init__(self, api_key: str = None):
|
||||
"""
|
||||
|
4
start_services.sh
Executable file
4
start_services.sh
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
# start searxng service for internet search
|
||||
cd searxng && ./setup_searxng.sh
|
87
tests/test_searx_search.py
Normal file
87
tests/test_searx_search.py
Normal file
@ -0,0 +1,87 @@
|
||||
import unittest
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Add project root to Python path
|
||||
from sources.tools.searxSearch import searxSearch
|
||||
from dotenv import load_dotenv
|
||||
import requests # Import the requests module
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class TestSearxSearch(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
os.environ['SEARXNG_BASE_URL'] = "http://127.0.0.1:8080" # Set the environment variable
|
||||
self.base_url = os.getenv("SEARXNG_BASE_URL")
|
||||
self.search_tool = searxSearch(base_url=self.base_url)
|
||||
self.valid_query = "test query"
|
||||
self.invalid_query = ""
|
||||
|
||||
def test_initialization_with_env_variable(self):
|
||||
# Ensure the tool initializes correctly with the base URL from the environment variable
|
||||
os.environ['SEARXNG_BASE_URL'] = "http://test.example.com"
|
||||
search_tool = searxSearch()
|
||||
self.assertEqual(search_tool.base_url, "http://test.example.com")
|
||||
del os.environ['SEARXNG_BASE_URL']
|
||||
|
||||
def test_initialization_no_base_url(self):
|
||||
# Ensure the tool raises an error if no base URL is provided
|
||||
# Remove the environment variable to ensure the ValueError is raised
|
||||
if 'SEARXNG_BASE_URL' in os.environ:
|
||||
del os.environ['SEARXNG_BASE_URL']
|
||||
with self.assertRaises(ValueError):
|
||||
searxSearch(base_url=None)
|
||||
# Restore the environment variable after the test
|
||||
os.environ['SEARXNG_BASE_URL'] = "http://searx.lan"
|
||||
|
||||
def test_execute_valid_query(self):
|
||||
# Execute the search and verify the result
|
||||
result = self.search_tool.execute([self.valid_query])
|
||||
print(f"Output from test_execute_valid_query: {result}")
|
||||
self.assertTrue(isinstance(result, str), "Result should be a string.")
|
||||
self.assertNotEqual(result, "", "Result should not be empty. Check SearxNG instance.")
|
||||
|
||||
def test_execute_empty_query(self):
|
||||
# Test with an empty query
|
||||
result = self.search_tool.execute([""])
|
||||
print(f"Output from test_execute_empty_query: {result}")
|
||||
self.assertEqual(result, "Error: Empty search query provided.")
|
||||
|
||||
def test_execute_no_query(self):
|
||||
# Test with no query provided
|
||||
result = self.search_tool.execute([])
|
||||
print(f"Output from test_execute_no_query: {result}")
|
||||
self.assertEqual(result, "Error: No search query provided.")
|
||||
|
||||
def test_execute_request_exception(self):
|
||||
# Test a request exception by temporarily modifying the base_url to an invalid one
|
||||
original_base_url = self.search_tool.base_url
|
||||
self.search_tool.base_url = "http://invalid_url"
|
||||
try:
|
||||
result = self.search_tool.execute([self.valid_query])
|
||||
print(f"Output from test_execute_request_exception: {result}")
|
||||
self.assertTrue("Error during search" in result)
|
||||
finally:
|
||||
self.search_tool.base_url = original_base_url # Restore the original base_url
|
||||
|
||||
def test_execute_no_results(self):
|
||||
# Execute the search and verify that an empty string is handled correctly
|
||||
result = self.search_tool.execute(["nonexistent query that should return no results"])
|
||||
print(f"Output from test_execute_no_results: {result}")
|
||||
self.assertTrue(isinstance(result, str), "Result should be a string.")
|
||||
# Allow empty results, but print a warning
|
||||
if result == "":
|
||||
print("Warning: SearxNG returned no results for a query that should have returned no results.")
|
||||
|
||||
def test_execution_failure_check_error(self):
|
||||
# Test when the output contains an error
|
||||
output = "Error: Something went wrong"
|
||||
self.assertTrue(self.search_tool.execution_failure_check(output))
|
||||
|
||||
def test_execution_failure_check_no_error(self):
|
||||
# Test when the output does not contain an error
|
||||
output = "Search completed successfully"
|
||||
self.assertFalse(self.search_tool.execution_failure_check(output))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user