mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-07 03:25:32 +00:00
Compare commits
23 Commits
b9c61d3573
...
dd46d67573
Author | SHA1 | Date | |
---|---|---|---|
![]() |
dd46d67573 | ||
![]() |
f79c3f10da | ||
![]() |
bdd7e34b8c | ||
![]() |
84c618678a | ||
![]() |
eda277aa54 | ||
![]() |
d6ec56b5df | ||
![]() |
365a552726 | ||
![]() |
b70fcf5d71 | ||
![]() |
8607514c05 | ||
![]() |
fa2852d3e7 | ||
![]() |
1c4ebefae4 | ||
![]() |
96a6dd368a | ||
![]() |
ed4f04b19c | ||
![]() |
f325865869 | ||
![]() |
a15dd998f3 | ||
![]() |
f17dc0550b | ||
![]() |
ed76c8415b | ||
![]() |
9f2c105074 | ||
![]() |
ccef61b2b9 | ||
![]() |
3cf1cab68f | ||
![]() |
0579fd3bb6 | ||
![]() |
c6688355a7 | ||
![]() |
68ed1834a9 |
@ -1,4 +1,5 @@
|
|||||||
SEARXNG_BASE_URL="http://127.0.0.1:8080"
|
SEARXNG_BASE_URL="http://127.0.0.1:8080"
|
||||||
OPENAI_API_KEY='xxxxx'
|
OPENAI_API_KEY='xxxxx'
|
||||||
DEEPSEEK_API_KEY='xxxxx'
|
DEEPSEEK_API_KEY='xxxxx'
|
||||||
BACKEND_PORT=8000
|
BACKEND_PORT=8000
|
||||||
|
WORK_DIR="/tmp/"
|
4
.github/FUNDING.yml
vendored
Normal file
4
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# These are supported funding model platforms
|
||||||
|
|
||||||
|
github: [Fosowl ]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||||
|
|
35
Dockerfile.python-env
Normal file
35
Dockerfile.python-env
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y \
|
||||||
|
python3-dev \
|
||||||
|
python3-pip \
|
||||||
|
python3-wheel \
|
||||||
|
build-essential \
|
||||||
|
alsa-utils \
|
||||||
|
portaudio19-dev \
|
||||||
|
libgtk-3-dev \
|
||||||
|
libnotify-dev \
|
||||||
|
libgconf-2-4 \
|
||||||
|
libnss3 \
|
||||||
|
libxss1 && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Upgrade pip and install Python packages
|
||||||
|
RUN pip3 install --upgrade pip setuptools wheel && \
|
||||||
|
pip3 install selenium
|
||||||
|
|
||||||
|
# Copy requirements.txt first to leverage Docker cache
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip3 install -r requirements.txt --no-cache-dir
|
||||||
|
|
||||||
|
# Copy the rest of the application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set environment variables for Chrome paths with defaults
|
||||||
|
ENV CHROME_EXECUTABLE_PATH=/app/chrome_bundle/chrome136/chrome-linux64/chrome
|
||||||
|
ENV CHROMEDRIVER_PATH=/app/chrome_bundle/chrome136/chromedriver
|
10
README.md
10
README.md
@ -159,9 +159,9 @@ Set the desired provider in the `config.ini`. See below for a list of API provid
|
|||||||
```sh
|
```sh
|
||||||
[MAIN]
|
[MAIN]
|
||||||
is_local = False
|
is_local = False
|
||||||
provider_name = openai
|
provider_name = google
|
||||||
provider_model = gpt-4o
|
provider_model = gemini-2.0-flash
|
||||||
provider_server_address = 127.0.0.1:5000
|
provider_server_address = 127.0.0.1:5000 # doesn't matter
|
||||||
```
|
```
|
||||||
Warning: Make sure there is not trailing space in the config.
|
Warning: Make sure there is not trailing space in the config.
|
||||||
|
|
||||||
@ -179,6 +179,10 @@ Example: export `TOGETHER_API_KEY="xxxxx"`
|
|||||||
| togetherAI | No | Use together AI API (non-private) |
|
| togetherAI | No | Use together AI API (non-private) |
|
||||||
| google | No | Use google gemini API (non-private) |
|
| google | No | Use google gemini API (non-private) |
|
||||||
|
|
||||||
|
*We advice against using gpt-4o or other closedAI models*, performance are poor for web browsing and task planning.
|
||||||
|
|
||||||
|
Please also note that coding/bash might fail with gemini, it seem to ignore our prompt for format to respect, which are optimized for deepseek r1.
|
||||||
|
|
||||||
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
|
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
|
||||||
|
|
||||||
*See the **Known issues** section if you are having issues*
|
*See the **Known issues** section if you are having issues*
|
||||||
|
@ -8,9 +8,9 @@ recover_last_session = False
|
|||||||
save_session = False
|
save_session = False
|
||||||
speak = False
|
speak = False
|
||||||
listen = False
|
listen = False
|
||||||
work_dir = /Users/mlg/Documents/ai_folder
|
work_dir = ${WORK_DIR}
|
||||||
jarvis_personality = False
|
jarvis_personality = False
|
||||||
languages = en
|
languages = en
|
||||||
[BROWSER]
|
[BROWSER]
|
||||||
headless_browser = False
|
headless_browser = True
|
||||||
stealth_mode = True
|
stealth_mode = True
|
||||||
|
@ -65,10 +65,51 @@ services:
|
|||||||
- REACT_APP_BACKEND_URL=http://0.0.0.0:${BACKEND_PORT:-8000}
|
- REACT_APP_BACKEND_URL=http://0.0.0.0:${BACKEND_PORT:-8000}
|
||||||
networks:
|
networks:
|
||||||
- agentic-seek-net
|
- agentic-seek-net
|
||||||
|
|
||||||
|
python-env:
|
||||||
|
container_name: python-env
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile.python-env
|
||||||
|
ports:
|
||||||
|
- ${BACKEND_PORT:-8000}:${BACKEND_PORT:-8000}
|
||||||
|
volumes:
|
||||||
|
- ./:/app
|
||||||
|
- ${WORK_DIR}:${WORK_DIR}
|
||||||
|
command: python3 api.py
|
||||||
|
environment:
|
||||||
|
- SEARXNG_URL=http://localhost:8080
|
||||||
|
- WORK_DIR=${WORK_DIR}
|
||||||
|
network_mode: "host"
|
||||||
|
|
||||||
# NOTE: backend service is not working yet due to issue with chromedriver on docker.
|
# NOTE: backend service is not working yet due to issue with chromedriver on docker.
|
||||||
# Therefore backend is run on host machine.
|
# Therefore backend is run on host machine.
|
||||||
# Open to pull requests to fix this.
|
# Open to pull requests to fix this.
|
||||||
|
#backend:
|
||||||
|
# container_name: backend
|
||||||
|
# build:
|
||||||
|
# context: ./
|
||||||
|
# dockerfile: Dockerfile.backend
|
||||||
|
# stdin_open: true
|
||||||
|
# tty: true
|
||||||
|
# shm_size: 8g
|
||||||
|
# ports:
|
||||||
|
# - "8000:8000"
|
||||||
|
# volumes:
|
||||||
|
# - ./:/app
|
||||||
|
# environment:
|
||||||
|
# - NODE_ENV=development
|
||||||
|
# - REDIS_URL=redis://redis:6379/0
|
||||||
|
# - SEARXNG_URL=http://searxng:8080
|
||||||
|
# - OLLAMA_URL=http://localhost:11434
|
||||||
|
# - LM_STUDIO_URL=http://localhost:1234
|
||||||
|
# extra_hosts:
|
||||||
|
# - "host.docker.internal:host-gateway"
|
||||||
|
# depends_on:
|
||||||
|
# - redis
|
||||||
|
# - searxng
|
||||||
|
# networks:
|
||||||
|
# - agentic-seek-net
|
||||||
|
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
kokoro==0.9.4
|
||||||
|
certifi==2025.4.26
|
||||||
fastapi>=0.115.12
|
fastapi>=0.115.12
|
||||||
flask>=3.1.0
|
flask>=3.1.0
|
||||||
celery>=5.5.1
|
celery>=5.5.1
|
||||||
@ -18,10 +20,10 @@ torch>=2.4.1
|
|||||||
python-dotenv>=1.0.0
|
python-dotenv>=1.0.0
|
||||||
ollama>=0.4.7
|
ollama>=0.4.7
|
||||||
scipy>=1.9.3
|
scipy>=1.9.3
|
||||||
kokoro>=0.7.12
|
|
||||||
soundfile>=0.13.1
|
soundfile>=0.13.1
|
||||||
protobuf>=3.20.3
|
protobuf>=3.20.3
|
||||||
termcolor>=2.4.0
|
termcolor>=2.4.0
|
||||||
|
pypdf>=5.4.0
|
||||||
ipython>=8.13.0
|
ipython>=8.13.0
|
||||||
pyaudio>=0.2.14
|
pyaudio>=0.2.14
|
||||||
librosa>=0.10.2.post1
|
librosa>=0.10.2.post1
|
||||||
@ -39,6 +41,7 @@ fake_useragent>=2.1.0
|
|||||||
selenium_stealth>=1.0.6
|
selenium_stealth>=1.0.6
|
||||||
undetected-chromedriver>=3.5.5
|
undetected-chromedriver>=3.5.5
|
||||||
sentencepiece>=0.2.0
|
sentencepiece>=0.2.0
|
||||||
|
tqdm>4
|
||||||
openai
|
openai
|
||||||
sniffio
|
sniffio
|
||||||
tqdm>4
|
tqdm>4
|
||||||
@ -46,5 +49,3 @@ python-dotenv>=1.0.0
|
|||||||
# if use chinese
|
# if use chinese
|
||||||
ordered_set
|
ordered_set
|
||||||
pypinyin
|
pypinyin
|
||||||
cn2an
|
|
||||||
jieba
|
|
||||||
|
@ -123,6 +123,8 @@ class Agent():
|
|||||||
"""
|
"""
|
||||||
start_tag = "<think>"
|
start_tag = "<think>"
|
||||||
end_tag = "</think>"
|
end_tag = "</think>"
|
||||||
|
if text is None:
|
||||||
|
return None
|
||||||
start_idx = text.find(start_tag)
|
start_idx = text.find(start_tag)
|
||||||
end_idx = text.rfind(end_tag)+8
|
end_idx = text.rfind(end_tag)+8
|
||||||
return text[start_idx:end_idx]
|
return text[start_idx:end_idx]
|
||||||
|
@ -151,7 +151,7 @@ class PlannerAgent(Agent):
|
|||||||
return []
|
return []
|
||||||
agents_tasks = self.parse_agent_tasks(answer)
|
agents_tasks = self.parse_agent_tasks(answer)
|
||||||
if agents_tasks == []:
|
if agents_tasks == []:
|
||||||
prompt = f"Failed to parse the tasks. Please make a plan within ```json. Do not ask for clarification.\n"
|
prompt = f"Failed to parse the tasks. Please write down your task followed by a json plan within ```json. Do not ask for clarification.\n"
|
||||||
pretty_print("Failed to make plan. Retrying...", color="warning")
|
pretty_print("Failed to make plan. Retrying...", color="warning")
|
||||||
continue
|
continue
|
||||||
self.show_plan(agents_tasks, answer)
|
self.show_plan(agents_tasks, answer)
|
||||||
|
@ -13,6 +13,8 @@ from fake_useragent import UserAgent
|
|||||||
from selenium_stealth import stealth
|
from selenium_stealth import stealth
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
import chromedriver_autoinstaller
|
import chromedriver_autoinstaller
|
||||||
|
import certifi
|
||||||
|
import ssl
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
import os
|
import os
|
||||||
@ -27,53 +29,85 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from sources.utility import pretty_print, animate_thinking
|
from sources.utility import pretty_print, animate_thinking
|
||||||
from sources.logger import Logger
|
from sources.logger import Logger
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_chrome_path() -> str:
|
def get_chrome_path() -> str:
|
||||||
"""Get the path to the Chrome executable."""
|
"""Get the path to the Chrome executable."""
|
||||||
if sys.platform.startswith("win"):
|
if sys.platform.startswith("win"):
|
||||||
paths = [
|
paths = [
|
||||||
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
||||||
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
||||||
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
|
os.path.join(
|
||||||
|
os.environ.get("LOCALAPPDATA", ""),
|
||||||
|
"Google\\Chrome\\Application\\chrome.exe",
|
||||||
|
), # User install
|
||||||
]
|
]
|
||||||
elif sys.platform.startswith("darwin"): # macOS
|
elif sys.platform.startswith("darwin"): # macOS
|
||||||
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
paths = [
|
||||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||||
|
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
|
||||||
|
]
|
||||||
else: # Linux
|
else: # Linux
|
||||||
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium", "/opt/chrome/chrome", "/usr/local/bin/chrome"]
|
paths = [
|
||||||
|
"/usr/bin/google-chrome",
|
||||||
|
"/usr/bin/chromium-browser",
|
||||||
|
"/usr/bin/chromium",
|
||||||
|
"/opt/chrome/chrome",
|
||||||
|
"/usr/local/bin/chrome",
|
||||||
|
]
|
||||||
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
||||||
return path
|
return path
|
||||||
print("Looking for Google Chrome in these locations failed:")
|
print("Looking for Google Chrome in these locations failed:")
|
||||||
print('\n'.join(paths))
|
print("\n".join(paths))
|
||||||
chrome_path_env = os.environ.get("CHROME_EXECUTABLE_PATH")
|
chrome_path_env = os.environ.get("CHROME_EXECUTABLE_PATH")
|
||||||
if chrome_path_env and os.path.exists(chrome_path_env) and os.access(chrome_path_env, os.X_OK):
|
if (
|
||||||
|
chrome_path_env
|
||||||
|
and os.path.exists(chrome_path_env)
|
||||||
|
and os.access(chrome_path_env, os.X_OK)
|
||||||
|
):
|
||||||
return chrome_path_env
|
return chrome_path_env
|
||||||
path = input("Google Chrome not found. Please enter the path to the Chrome executable: ")
|
path = input(
|
||||||
|
"Google Chrome not found. Please enter the path to the Chrome executable: "
|
||||||
|
)
|
||||||
if os.path.exists(path) and os.access(path, os.X_OK):
|
if os.path.exists(path) and os.access(path, os.X_OK):
|
||||||
os.environ["CHROME_EXECUTABLE_PATH"] = path
|
os.environ["CHROME_EXECUTABLE_PATH"] = path
|
||||||
print(f"Chrome path saved to environment variable CHROME_EXECUTABLE_PATH")
|
print(f"Chrome path saved to environment variable CHROME_EXECUTABLE_PATH")
|
||||||
return path
|
return path
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_random_user_agent() -> str:
|
def get_random_user_agent() -> str:
|
||||||
"""Get a random user agent string with associated vendor."""
|
"""Get a random user agent string with associated vendor."""
|
||||||
user_agents = [
|
user_agents = [
|
||||||
{"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36", "vendor": "Google Inc."},
|
{
|
||||||
{"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", "vendor": "Apple Inc."},
|
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36",
|
||||||
{"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "vendor": ""},
|
"vendor": "Google Inc.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
||||||
|
"vendor": "Apple Inc.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
|
||||||
|
"vendor": "",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
return random.choice(user_agents)
|
return random.choice(user_agents)
|
||||||
|
|
||||||
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
|
|
||||||
|
def create_driver(
|
||||||
|
headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx"
|
||||||
|
) -> webdriver.Chrome:
|
||||||
"""Create a Chrome WebDriver with specified options."""
|
"""Create a Chrome WebDriver with specified options."""
|
||||||
chrome_options = Options()
|
chrome_options = Options()
|
||||||
chrome_path = get_chrome_path()
|
chrome_path = get_chrome_path()
|
||||||
|
|
||||||
if not chrome_path:
|
if not chrome_path:
|
||||||
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
||||||
chrome_options.binary_location = chrome_path
|
chrome_options.binary_location = chrome_path
|
||||||
|
|
||||||
if headless:
|
if headless:
|
||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("--disable-gpu")
|
chrome_options.add_argument("--disable-gpu")
|
||||||
@ -90,31 +124,40 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
|||||||
chrome_options.add_argument(f'user-agent={user_agent["ua"]}')
|
chrome_options.add_argument(f'user-agent={user_agent["ua"]}')
|
||||||
resolutions = [(1920, 1080), (1366, 768), (1440, 900)]
|
resolutions = [(1920, 1080), (1366, 768), (1440, 900)]
|
||||||
width, height = random.choice(resolutions)
|
width, height = random.choice(resolutions)
|
||||||
chrome_options.add_argument(f'--window-size={width},{height}')
|
chrome_options.add_argument(f"--window-size={width},{height}")
|
||||||
if not stealth_mode:
|
if not stealth_mode:
|
||||||
# crx file can't be installed in stealth mode
|
# crx file can't be installed in stealth mode
|
||||||
if not os.path.exists(crx_path):
|
if not os.path.exists(crx_path):
|
||||||
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
|
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
|
||||||
else:
|
else:
|
||||||
chrome_options.add_extension(crx_path)
|
chrome_options.add_extension(crx_path)
|
||||||
|
|
||||||
chromedriver_path = shutil.which("chromedriver")
|
chromedriver_path = os.environ.get("CHROMEDRIVER_PATH")
|
||||||
if not chromedriver_path:
|
if not os.path.exists(chromedriver_path):
|
||||||
chromedriver_path = chromedriver_autoinstaller.install()
|
chromedriver_path = chromedriver_autoinstaller.install()
|
||||||
|
|
||||||
if not chromedriver_path:
|
if not chromedriver_path:
|
||||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
raise FileNotFoundError(
|
||||||
|
"ChromeDriver not found. Please install it or add it to your PATH."
|
||||||
|
)
|
||||||
|
|
||||||
service = Service(chromedriver_path)
|
service = Service(chromedriver_path)
|
||||||
if stealth_mode:
|
if stealth_mode:
|
||||||
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
driver = uc.Chrome(service=service, options=chrome_options)
|
driver = uc.Chrome(service=service, options=chrome_options)
|
||||||
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
driver.execute_script(
|
||||||
chrome_version = driver.capabilities['browserVersion']
|
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
|
||||||
stealth(driver,
|
)
|
||||||
|
chrome_version = driver.capabilities["browserVersion"]
|
||||||
|
stealth(
|
||||||
|
driver,
|
||||||
languages=["en-US", "en"],
|
languages=["en-US", "en"],
|
||||||
vendor=user_agent["vendor"],
|
vendor=user_agent["vendor"],
|
||||||
platform="Win64" if "Windows" in user_agent["ua"] else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64",
|
platform=(
|
||||||
|
"Win64"
|
||||||
|
if "Windows" in user_agent["ua"]
|
||||||
|
else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64"
|
||||||
|
),
|
||||||
webgl_vendor="Intel Inc.",
|
webgl_vendor="Intel Inc.",
|
||||||
renderer="Intel Iris OpenGL Engine",
|
renderer="Intel Iris OpenGL Engine",
|
||||||
fix_hairline=True,
|
fix_hairline=True,
|
||||||
@ -127,13 +170,16 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
|||||||
}
|
}
|
||||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
chrome_options.add_experimental_option("useAutomationExtension", False)
|
||||||
return webdriver.Chrome(service=service, options=chrome_options)
|
return webdriver.Chrome(service=service, options=chrome_options)
|
||||||
|
|
||||||
|
|
||||||
class Browser:
|
class Browser:
|
||||||
def __init__(self, driver, anticaptcha_manual_install=False):
|
def __init__(self, driver, anticaptcha_manual_install=False):
|
||||||
"""Initialize the browser with optional AntiCaptcha installation."""
|
"""Initialize the browser with optional AntiCaptcha installation."""
|
||||||
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
self.js_scripts_folder = (
|
||||||
|
"./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||||
|
)
|
||||||
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
||||||
self.logger = Logger("browser.log")
|
self.logger = Logger("browser.log")
|
||||||
self.screenshot_folder = os.path.join(os.getcwd(), ".screenshots")
|
self.screenshot_folder = os.path.join(os.getcwd(), ".screenshots")
|
||||||
@ -146,23 +192,26 @@ class Browser:
|
|||||||
self.setup_tabs()
|
self.setup_tabs()
|
||||||
if anticaptcha_manual_install:
|
if anticaptcha_manual_install:
|
||||||
self.load_anticatpcha_manually()
|
self.load_anticatpcha_manually()
|
||||||
|
|
||||||
def setup_tabs(self):
|
def setup_tabs(self):
|
||||||
self.tabs = self.driver.window_handles
|
self.tabs = self.driver.window_handles
|
||||||
self.driver.get("https://www.google.com")
|
self.driver.get("https://www.google.com")
|
||||||
self.screenshot()
|
self.screenshot()
|
||||||
|
|
||||||
def switch_control_tab(self):
|
def switch_control_tab(self):
|
||||||
self.logger.log("Switching to control tab.")
|
self.logger.log("Switching to control tab.")
|
||||||
self.driver.switch_to.window(self.tabs[0])
|
self.driver.switch_to.window(self.tabs[0])
|
||||||
|
|
||||||
def load_anticatpcha_manually(self):
|
def load_anticatpcha_manually(self):
|
||||||
pretty_print("You might want to install the AntiCaptcha extension for captchas.", color="warning")
|
pretty_print(
|
||||||
|
"You might want to install the AntiCaptcha extension for captchas.",
|
||||||
|
color="warning",
|
||||||
|
)
|
||||||
self.driver.get(self.anticaptcha)
|
self.driver.get(self.anticaptcha)
|
||||||
|
|
||||||
def go_to(self, url:str) -> bool:
|
def go_to(self, url: str) -> bool:
|
||||||
"""Navigate to a specified URL."""
|
"""Navigate to a specified URL."""
|
||||||
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
||||||
try:
|
try:
|
||||||
initial_handles = self.driver.window_handles
|
initial_handles = self.driver.window_handles
|
||||||
self.driver.get(url)
|
self.driver.get(url)
|
||||||
@ -170,12 +219,17 @@ class Browser:
|
|||||||
wait = WebDriverWait(self.driver, timeout=10)
|
wait = WebDriverWait(self.driver, timeout=10)
|
||||||
wait.until(
|
wait.until(
|
||||||
lambda driver: (
|
lambda driver: (
|
||||||
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "captcha"])
|
not any(
|
||||||
|
keyword in driver.page_source.lower()
|
||||||
|
for keyword in ["checking your browser", "captcha"]
|
||||||
|
)
|
||||||
),
|
),
|
||||||
message="stuck on 'checking browser' or verification screen"
|
message="stuck on 'checking browser' or verification screen",
|
||||||
)
|
)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
self.logger.warning("Timeout while waiting for page to bypass 'checking your browser'")
|
self.logger.warning(
|
||||||
|
"Timeout while waiting for page to bypass 'checking your browser'"
|
||||||
|
)
|
||||||
self.apply_web_safety()
|
self.apply_web_safety()
|
||||||
self.logger.log(f"Navigated to: {url}")
|
self.logger.log(f"Navigated to: {url}")
|
||||||
return True
|
return True
|
||||||
@ -189,30 +243,33 @@ class Browser:
|
|||||||
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def is_sentence(self, text:str) -> bool:
|
def is_sentence(self, text: str) -> bool:
|
||||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
|
|
||||||
if any(c.isdigit() for c in text):
|
if any(c.isdigit() for c in text):
|
||||||
return True
|
return True
|
||||||
words = re.findall(r'\w+', text, re.UNICODE)
|
words = re.findall(r"\w+", text, re.UNICODE)
|
||||||
word_count = len(words)
|
word_count = len(words)
|
||||||
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
has_punctuation = any(
|
||||||
|
text.endswith(p)
|
||||||
|
for p in [".", ",", ",", "!", "?", "。", "!", "?", "।", "۔"]
|
||||||
|
)
|
||||||
is_long_enough = word_count > 4
|
is_long_enough = word_count > 4
|
||||||
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
return word_count >= 5 and (has_punctuation or is_long_enough)
|
||||||
|
|
||||||
def get_text(self) -> str | None:
|
def get_text(self) -> str | None:
|
||||||
"""Get page text as formatted Markdown"""
|
"""Get page text as formatted Markdown"""
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
|
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||||
for element in soup(['script', 'style', 'noscript', 'meta', 'link']):
|
for element in soup(["script", "style", "noscript", "meta", "link"]):
|
||||||
element.decompose()
|
element.decompose()
|
||||||
markdown_converter = markdownify.MarkdownConverter(
|
markdown_converter = markdownify.MarkdownConverter(
|
||||||
heading_style="ATX",
|
heading_style="ATX",
|
||||||
strip=['a'],
|
strip=["a"],
|
||||||
autolinks=False,
|
autolinks=False,
|
||||||
bullets='•',
|
bullets="•",
|
||||||
strong_em_symbol='*',
|
strong_em_symbol="*",
|
||||||
default_title=False,
|
default_title=False,
|
||||||
)
|
)
|
||||||
markdown_text = markdown_converter.convert(str(soup.body))
|
markdown_text = markdown_converter.convert(str(soup.body))
|
||||||
@ -220,35 +277,43 @@ class Browser:
|
|||||||
for line in markdown_text.splitlines():
|
for line in markdown_text.splitlines():
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if stripped and self.is_sentence(stripped):
|
if stripped and self.is_sentence(stripped):
|
||||||
cleaned = ' '.join(stripped.split())
|
cleaned = " ".join(stripped.split())
|
||||||
lines.append(cleaned)
|
lines.append(cleaned)
|
||||||
result = "[Start of page]\n\n" + "\n\n".join(lines) + "\n\n[End of page]"
|
result = "[Start of page]\n\n" + "\n\n".join(lines) + "\n\n[End of page]"
|
||||||
result = re.sub(r'!\[(.*?)\]\(.*?\)', r'[IMAGE: \1]', result)
|
result = re.sub(r"!\[(.*?)\]\(.*?\)", r"[IMAGE: \1]", result)
|
||||||
self.logger.info(f"Extracted text: {result[:100]}...")
|
self.logger.info(f"Extracted text: {result[:100]}...")
|
||||||
self.logger.info(f"Extracted text length: {len(result)}")
|
self.logger.info(f"Extracted text length: {len(result)}")
|
||||||
return result[:8192]
|
return result[:8192]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error getting text: {str(e)}")
|
self.logger.error(f"Error getting text: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def clean_url(self, url:str) -> str:
|
def clean_url(self, url: str) -> str:
|
||||||
"""Clean URL to keep only the part needed for navigation to the page"""
|
"""Clean URL to keep only the part needed for navigation to the page"""
|
||||||
clean = url.split('#')[0]
|
clean = url.split("#")[0]
|
||||||
parts = clean.split('?', 1)
|
parts = clean.split("?", 1)
|
||||||
base_url = parts[0]
|
base_url = parts[0]
|
||||||
if len(parts) > 1:
|
if len(parts) > 1:
|
||||||
query = parts[1]
|
query = parts[1]
|
||||||
essential_params = []
|
essential_params = []
|
||||||
for param in query.split('&'):
|
for param in query.split("&"):
|
||||||
if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='):
|
if (
|
||||||
|
param.startswith("_skw=")
|
||||||
|
or param.startswith("q=")
|
||||||
|
or param.startswith("s=")
|
||||||
|
):
|
||||||
essential_params.append(param)
|
essential_params.append(param)
|
||||||
elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='):
|
elif (
|
||||||
|
param.startswith("_")
|
||||||
|
or param.startswith("hash=")
|
||||||
|
or param.startswith("itmmeta=")
|
||||||
|
):
|
||||||
break
|
break
|
||||||
if essential_params:
|
if essential_params:
|
||||||
return f"{base_url}?{'&'.join(essential_params)}"
|
return f"{base_url}?{'&'.join(essential_params)}"
|
||||||
return base_url
|
return base_url
|
||||||
|
|
||||||
def is_link_valid(self, url:str) -> bool:
|
def is_link_valid(self, url: str) -> bool:
|
||||||
"""Check if a URL is a valid link (page, not related to icon or metadata)."""
|
"""Check if a URL is a valid link (page, not related to icon or metadata)."""
|
||||||
if len(url) > 72:
|
if len(url) > 72:
|
||||||
self.logger.warning(f"URL too long: {url}")
|
self.logger.warning(f"URL too long: {url}")
|
||||||
@ -257,10 +322,10 @@ class Browser:
|
|||||||
if not parsed_url.scheme or not parsed_url.netloc:
|
if not parsed_url.scheme or not parsed_url.netloc:
|
||||||
self.logger.warning(f"Invalid URL: {url}")
|
self.logger.warning(f"Invalid URL: {url}")
|
||||||
return False
|
return False
|
||||||
if re.search(r'/\d+$', parsed_url.path):
|
if re.search(r"/\d+$", parsed_url.path):
|
||||||
return False
|
return False
|
||||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
|
image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]
|
||||||
metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom']
|
metadata_extensions = [".ico", ".xml", ".json", ".rss", ".atom"]
|
||||||
for ext in image_extensions + metadata_extensions:
|
for ext in image_extensions + metadata_extensions:
|
||||||
if url.lower().endswith(ext):
|
if url.lower().endswith(ext):
|
||||||
return False
|
return False
|
||||||
@ -271,18 +336,24 @@ class Browser:
|
|||||||
try:
|
try:
|
||||||
links = []
|
links = []
|
||||||
elements = self.driver.find_elements(By.TAG_NAME, "a")
|
elements = self.driver.find_elements(By.TAG_NAME, "a")
|
||||||
|
|
||||||
for element in elements:
|
for element in elements:
|
||||||
href = element.get_attribute("href")
|
href = element.get_attribute("href")
|
||||||
if href and href.startswith(("http", "https")):
|
if href and href.startswith(("http", "https")):
|
||||||
links.append({
|
links.append(
|
||||||
"url": href,
|
{
|
||||||
"text": element.text.strip(),
|
"url": href,
|
||||||
"is_displayed": element.is_displayed()
|
"text": element.text.strip(),
|
||||||
})
|
"is_displayed": element.is_displayed(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
self.logger.info(f"Found {len(links)} navigable links")
|
self.logger.info(f"Found {len(links)} navigable links")
|
||||||
return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and self.is_link_valid(link['url']))]
|
return [
|
||||||
|
self.clean_url(link["url"])
|
||||||
|
for link in links
|
||||||
|
if (link["is_displayed"] == True and self.is_link_valid(link["url"]))
|
||||||
|
]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error getting navigable links: {str(e)}")
|
self.logger.error(f"Error getting navigable links: {str(e)}")
|
||||||
return []
|
return []
|
||||||
@ -297,7 +368,10 @@ class Browser:
|
|||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
self.logger.error(f"Scrolling to element for click_element.")
|
self.logger.error(f"Scrolling to element for click_element.")
|
||||||
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", element)
|
self.driver.execute_script(
|
||||||
|
"arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});",
|
||||||
|
element,
|
||||||
|
)
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
element.click()
|
element.click()
|
||||||
self.logger.info(f"Clicked element at {xpath}")
|
self.logger.info(f"Clicked element at {xpath}")
|
||||||
@ -311,13 +385,13 @@ class Browser:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Unexpected error clicking element at {xpath}: {str(e)}")
|
self.logger.error(f"Unexpected error clicking element at {xpath}: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def load_js(self, file_name: str) -> str:
|
def load_js(self, file_name: str) -> str:
|
||||||
"""Load javascript from script folder to inject to page."""
|
"""Load javascript from script folder to inject to page."""
|
||||||
path = os.path.join(self.js_scripts_folder, file_name)
|
path = os.path.join(self.js_scripts_folder, file_name)
|
||||||
self.logger.info(f"Loading js at {path}")
|
self.logger.info(f"Loading js at {path}")
|
||||||
try:
|
try:
|
||||||
with open(path, 'r') as f:
|
with open(path, "r") as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
raise Exception(f"Could not find: {path}") from e
|
raise Exception(f"Could not find: {path}") from e
|
||||||
@ -349,17 +423,22 @@ class Browser:
|
|||||||
form_strings = []
|
form_strings = []
|
||||||
for element in input_elements:
|
for element in input_elements:
|
||||||
input_type = element.get("type") or "text"
|
input_type = element.get("type") or "text"
|
||||||
if input_type in ["hidden", "submit", "button", "image"] or not element["displayed"]:
|
if (
|
||||||
|
input_type in ["hidden", "submit", "button", "image"]
|
||||||
|
or not element["displayed"]
|
||||||
|
):
|
||||||
continue
|
continue
|
||||||
input_name = element.get("text") or element.get("id") or input_type
|
input_name = element.get("text") or element.get("id") or input_type
|
||||||
if input_type == "checkbox" or input_type == "radio":
|
if input_type == "checkbox" or input_type == "radio":
|
||||||
try:
|
try:
|
||||||
checked_status = "checked" if element.is_selected() else "unchecked"
|
checked_status = (
|
||||||
|
"checked" if element.is_selected() else "unchecked"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
continue
|
continue
|
||||||
form_strings.append(f"[{input_name}]({checked_status})")
|
form_strings.append(f"[{input_name}]({checked_status})")
|
||||||
else:
|
else:
|
||||||
form_strings.append(f"[{input_name}]("")")
|
form_strings.append(f"[{input_name}](" ")")
|
||||||
return form_strings
|
return form_strings
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -369,13 +448,18 @@ class Browser:
|
|||||||
"""
|
"""
|
||||||
Find buttons and return their type and xpath.
|
Find buttons and return their type and xpath.
|
||||||
"""
|
"""
|
||||||
buttons = self.driver.find_elements(By.TAG_NAME, "button") + \
|
buttons = self.driver.find_elements(
|
||||||
self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
By.TAG_NAME, "button"
|
||||||
|
) + self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
||||||
result = []
|
result = []
|
||||||
for i, button in enumerate(buttons):
|
for i, button in enumerate(buttons):
|
||||||
if not button.is_displayed() or not button.is_enabled():
|
if not button.is_displayed() or not button.is_enabled():
|
||||||
continue
|
continue
|
||||||
text = (button.text or button.get_attribute("value") or "").lower().replace(' ', '')
|
text = (
|
||||||
|
(button.text or button.get_attribute("value") or "")
|
||||||
|
.lower()
|
||||||
|
.replace(" ", "")
|
||||||
|
)
|
||||||
xpath = f"(//button | //input[@type='submit'])[{i + 1}]"
|
xpath = f"(//button | //input[@type='submit'])[{i + 1}]"
|
||||||
result.append((text, xpath))
|
result.append((text, xpath))
|
||||||
result.sort(key=lambda x: len(x[0]))
|
result.sort(key=lambda x: len(x[0]))
|
||||||
@ -389,8 +473,8 @@ class Browser:
|
|||||||
self.logger.info("Waiting for submission outcome...")
|
self.logger.info("Waiting for submission outcome...")
|
||||||
wait = WebDriverWait(self.driver, timeout)
|
wait = WebDriverWait(self.driver, timeout)
|
||||||
wait.until(
|
wait.until(
|
||||||
lambda driver: driver.current_url != self.driver.current_url or
|
lambda driver: driver.current_url != self.driver.current_url
|
||||||
driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
or driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
||||||
)
|
)
|
||||||
self.logger.info("Detected submission outcome")
|
self.logger.info("Detected submission outcome")
|
||||||
return True
|
return True
|
||||||
@ -398,7 +482,7 @@ class Browser:
|
|||||||
self.logger.warning("No submission outcome detected")
|
self.logger.warning("No submission outcome detected")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 5) -> bool:
|
def find_and_click_btn(self, btn_type: str = "login", timeout: int = 5) -> bool:
|
||||||
"""Find and click a submit button matching the specified type."""
|
"""Find and click a submit button matching the specified type."""
|
||||||
buttons = self.get_buttons_xpath()
|
buttons = self.get_buttons_xpath()
|
||||||
if not buttons:
|
if not buttons:
|
||||||
@ -406,24 +490,35 @@ class Browser:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
for button_text, xpath in buttons:
|
for button_text, xpath in buttons:
|
||||||
if btn_type.lower() in button_text.lower() or btn_type.lower() in xpath.lower():
|
if (
|
||||||
|
btn_type.lower() in button_text.lower()
|
||||||
|
or btn_type.lower() in xpath.lower()
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
wait = WebDriverWait(self.driver, timeout)
|
wait = WebDriverWait(self.driver, timeout)
|
||||||
element = wait.until(
|
element = wait.until(
|
||||||
EC.element_to_be_clickable((By.XPATH, xpath)),
|
EC.element_to_be_clickable((By.XPATH, xpath)),
|
||||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
|
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds",
|
||||||
)
|
)
|
||||||
if self.click_element(xpath):
|
if self.click_element(xpath):
|
||||||
self.logger.info(f"Clicked button '{button_text}' at XPath: {xpath}")
|
self.logger.info(
|
||||||
|
f"Clicked button '{button_text}' at XPath: {xpath}"
|
||||||
|
)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f"Button '{button_text}' at XPath: {xpath} not clickable")
|
self.logger.warning(
|
||||||
|
f"Button '{button_text}' at XPath: {xpath} not clickable"
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
|
self.logger.warning(
|
||||||
|
f"Timeout waiting for '{button_text}' button at XPath: {xpath}"
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}")
|
self.logger.error(
|
||||||
|
f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}"
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
self.logger.warning(f"No button matching '{btn_type}' found")
|
self.logger.warning(f"No button matching '{btn_type}' found")
|
||||||
return False
|
return False
|
||||||
@ -434,7 +529,9 @@ class Browser:
|
|||||||
Returns True if successful, False if any issues occur.
|
Returns True if successful, False if any issues occur.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
checkboxes = self.driver.find_elements(By.XPATH, "//input[@type='checkbox']")
|
checkboxes = self.driver.find_elements(
|
||||||
|
By.XPATH, "//input[@type='checkbox']"
|
||||||
|
)
|
||||||
if not checkboxes:
|
if not checkboxes:
|
||||||
self.logger.info("No checkboxes found on the page")
|
self.logger.info("No checkboxes found on the page")
|
||||||
return True
|
return True
|
||||||
@ -445,19 +542,24 @@ class Browser:
|
|||||||
EC.element_to_be_clickable(checkbox)
|
EC.element_to_be_clickable(checkbox)
|
||||||
)
|
)
|
||||||
self.driver.execute_script(
|
self.driver.execute_script(
|
||||||
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});", checkbox
|
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});",
|
||||||
|
checkbox,
|
||||||
)
|
)
|
||||||
if not checkbox.is_selected():
|
if not checkbox.is_selected():
|
||||||
try:
|
try:
|
||||||
checkbox.click()
|
checkbox.click()
|
||||||
self.logger.info(f"Ticked checkbox {index}")
|
self.logger.info(f"Ticked checkbox {index}")
|
||||||
except ElementClickInterceptedException:
|
except ElementClickInterceptedException:
|
||||||
self.driver.execute_script("arguments[0].click();", checkbox)
|
self.driver.execute_script(
|
||||||
|
"arguments[0].click();", checkbox
|
||||||
|
)
|
||||||
self.logger.warning(f"Click checkbox {index} intercepted")
|
self.logger.warning(f"Click checkbox {index} intercepted")
|
||||||
else:
|
else:
|
||||||
self.logger.info(f"Checkbox {index} already ticked")
|
self.logger.info(f"Checkbox {index} already ticked")
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
self.logger.warning(f"Timeout waiting for checkbox {index} to be clickable")
|
self.logger.warning(
|
||||||
|
f"Timeout waiting for checkbox {index} to be clickable"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error ticking checkbox {index}: {str(e)}")
|
self.logger.error(f"Error ticking checkbox {index}: {str(e)}")
|
||||||
@ -468,16 +570,28 @@ class Browser:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
||||||
possible_submissions = ["login", "submit", "register", "continue", "apply",
|
possible_submissions = [
|
||||||
"ok", "confirm", "proceed", "accept",
|
"login",
|
||||||
"done", "finish", "start", "calculate"]
|
"submit",
|
||||||
|
"register",
|
||||||
|
"continue",
|
||||||
|
"apply",
|
||||||
|
"ok",
|
||||||
|
"confirm",
|
||||||
|
"proceed",
|
||||||
|
"accept",
|
||||||
|
"done",
|
||||||
|
"finish",
|
||||||
|
"start",
|
||||||
|
"calculate",
|
||||||
|
]
|
||||||
for submission in possible_submissions:
|
for submission in possible_submissions:
|
||||||
if self.find_and_click_btn(submission, timeout):
|
if self.find_and_click_btn(submission, timeout):
|
||||||
self.logger.info(f"Clicked on submission button: {submission}")
|
self.logger.info(f"Clicked on submission button: {submission}")
|
||||||
return True
|
return True
|
||||||
self.logger.warning("No submission button found")
|
self.logger.warning("No submission button found")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
||||||
for field in inputs:
|
for field in inputs:
|
||||||
if name in field["text"]:
|
if name in field["text"]:
|
||||||
@ -492,7 +606,7 @@ class Browser:
|
|||||||
inputs = self.find_all_inputs()
|
inputs = self.find_all_inputs()
|
||||||
try:
|
try:
|
||||||
for input_str in input_list:
|
for input_str in input_list:
|
||||||
match = re.match(r'\[(.*?)\]\((.*?)\)', input_str)
|
match = re.match(r"\[(.*?)\]\((.*?)\)", input_str)
|
||||||
if not match:
|
if not match:
|
||||||
self.logger.warning(f"Invalid format for input: {input_str}")
|
self.logger.warning(f"Invalid format for input: {input_str}")
|
||||||
continue
|
continue
|
||||||
@ -509,11 +623,17 @@ class Browser:
|
|||||||
EC.element_to_be_clickable((By.XPATH, xpath))
|
EC.element_to_be_clickable((By.XPATH, xpath))
|
||||||
)
|
)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
self.logger.error(f"Timeout waiting for element '{name}' to be clickable")
|
self.logger.error(
|
||||||
|
f"Timeout waiting for element '{name}' to be clickable"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
|
self.driver.execute_script(
|
||||||
|
"arguments[0].scrollIntoView(true);", element
|
||||||
|
)
|
||||||
if not element.is_displayed() or not element.is_enabled():
|
if not element.is_displayed() or not element.is_enabled():
|
||||||
self.logger.warning(f"Element '{name}' is not interactable (not displayed or disabled)")
|
self.logger.warning(
|
||||||
|
f"Element '{name}' is not interactable (not displayed or disabled)"
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
input_type = (element.get_attribute("type") or "text").lower()
|
input_type = (element.get_attribute("type") or "text").lower()
|
||||||
if input_type in ["checkbox", "radio"]:
|
if input_type in ["checkbox", "radio"]:
|
||||||
@ -531,7 +651,7 @@ class Browser:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error filling form inputs: {str(e)}")
|
self.logger.error(f"Error filling form inputs: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def fill_form(self, input_list: List[str]) -> bool:
|
def fill_form(self, input_list: List[str]) -> bool:
|
||||||
"""Fill form inputs based on a list of [name](value) and submit."""
|
"""Fill form inputs based on a list of [name](value) and submit."""
|
||||||
if not isinstance(input_list, list):
|
if not isinstance(input_list, list):
|
||||||
@ -571,11 +691,11 @@ class Browser:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error scrolling: {str(e)}")
|
self.logger.error(f"Error scrolling: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_screenshot(self) -> str:
|
def get_screenshot(self) -> str:
|
||||||
return self.screenshot_folder + "/updated_screen.png"
|
return self.screenshot_folder + "/updated_screen.png"
|
||||||
|
|
||||||
def screenshot(self, filename:str = 'updated_screen.png') -> bool:
|
def screenshot(self, filename: str = "updated_screen.png") -> bool:
|
||||||
"""Take a screenshot of the current page."""
|
"""Take a screenshot of the current page."""
|
||||||
self.logger.info("Taking screenshot...")
|
self.logger.info("Taking screenshot...")
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
@ -598,18 +718,19 @@ class Browser:
|
|||||||
script = self.load_js("inject_safety_script.js")
|
script = self.load_js("inject_safety_script.js")
|
||||||
input_elements = self.driver.execute_script(script)
|
input_elements = self.driver.execute_script(script)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
driver = create_driver(headless=False, stealth_mode=True)
|
driver = create_driver(headless=False, stealth_mode=True)
|
||||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||||
|
|
||||||
input("press enter to continue")
|
input("press enter to continue")
|
||||||
print("AntiCaptcha / Form Test")
|
print("AntiCaptcha / Form Test")
|
||||||
#browser.go_to("https://www.browserscan.net/bot-detection")
|
# browser.go_to("https://www.browserscan.net/bot-detection")
|
||||||
#txt = browser.get_text()
|
# txt = browser.get_text()
|
||||||
#browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
# browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||||
browser.go_to("https://home.openweathermap.org/users/sign_up")
|
browser.go_to("https://home.openweathermap.org/users/sign_up")
|
||||||
inputs_visible = browser.get_form_inputs()
|
inputs_visible = browser.get_form_inputs()
|
||||||
print("inputs:", inputs_visible)
|
print("inputs:", inputs_visible)
|
||||||
#inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
# inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
||||||
#browser.fill_form(inputs_fill)
|
# browser.fill_form(inputs_fill)
|
||||||
input("press enter to exit")
|
input("press enter to exit")
|
||||||
|
@ -72,6 +72,8 @@ class Provider:
|
|||||||
except ModuleNotFoundError as e:
|
except ModuleNotFoundError as e:
|
||||||
raise ModuleNotFoundError(f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
|
raise ModuleNotFoundError(f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if "try again later" in str(e).lower():
|
||||||
|
return f"{self.provider_name} server is overloaded. Please try again later."
|
||||||
if "refused" in str(e):
|
if "refused" in str(e):
|
||||||
return f"Server {self.server_ip} seem offline. Unable to answer."
|
return f"Server {self.server_ip} seem offline. Unable to answer."
|
||||||
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
|
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
|
||||||
@ -214,7 +216,7 @@ class Provider:
|
|||||||
"""
|
"""
|
||||||
base_url = self.server_ip
|
base_url = self.server_ip
|
||||||
if self.is_local:
|
if self.is_local:
|
||||||
raise Exception("Google Gemini is not available for local use.")
|
raise Exception("Google Gemini is not available for local use. Change config.ini")
|
||||||
|
|
||||||
client = OpenAI(api_key=self.api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
client = OpenAI(api_key=self.api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
||||||
try:
|
try:
|
||||||
@ -237,6 +239,8 @@ class Provider:
|
|||||||
"""
|
"""
|
||||||
from together import Together
|
from together import Together
|
||||||
client = Together(api_key=self.api_key)
|
client = Together(api_key=self.api_key)
|
||||||
|
if self.is_local:
|
||||||
|
raise Exception("Together AI is not available for local use. Change config.ini")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
@ -257,6 +261,8 @@ class Provider:
|
|||||||
Use deepseek api to generate text.
|
Use deepseek api to generate text.
|
||||||
"""
|
"""
|
||||||
client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
|
client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
|
||||||
|
if self.is_local:
|
||||||
|
raise Exception("Deepseek (API) is not available for local use. Change config.ini")
|
||||||
try:
|
try:
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
model="deepseek-chat",
|
model="deepseek-chat",
|
||||||
|
@ -9,7 +9,10 @@ from kokoro import KPipeline
|
|||||||
from IPython.display import display, Audio
|
from IPython.display import display, Audio
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
|
|
||||||
from sources.utility import pretty_print, animate_thinking
|
if __name__ == "__main__":
|
||||||
|
from utility import pretty_print, animate_thinking
|
||||||
|
else:
|
||||||
|
from sources.utility import pretty_print, animate_thinking
|
||||||
|
|
||||||
class Speech():
|
class Speech():
|
||||||
"""
|
"""
|
||||||
@ -140,6 +143,7 @@ class Speech():
|
|||||||
return sentence
|
return sentence
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# TODO add info message for cn2an, jieba chinese related import
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
speech = Speech()
|
speech = Speech()
|
||||||
tosay_en = """
|
tosay_en = """
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
|
|
||||||
import sys
|
import os, sys
|
||||||
import re
|
import re
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
from safety import is_unsafe
|
|
||||||
else:
|
from sources.tools.tools import Tools
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.safety import is_unsafe
|
||||||
from sources.tools.safety import is_unsafe
|
|
||||||
|
|
||||||
class BashInterpreter(Tools):
|
class BashInterpreter(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os, sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import re
|
import re
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class CInterpreter(Tools):
|
class CInterpreter(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os, sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import re
|
import re
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class GoInterpreter(Tools):
|
class GoInterpreter(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os, sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import re
|
import re
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class JavaInterpreter(Tools):
|
class JavaInterpreter(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -4,10 +4,10 @@ import os
|
|||||||
import re
|
import re
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class PyInterpreter(Tools):
|
class PyInterpreter(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -1,13 +1,12 @@
|
|||||||
import os
|
import os, sys
|
||||||
import stat
|
import stat
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
|
||||||
|
|
||||||
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class FileFinder(Tools):
|
class FileFinder(Tools):
|
||||||
"""
|
"""
|
||||||
@ -30,14 +29,46 @@ class FileFinder(Tools):
|
|||||||
return file.read()
|
return file.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Error reading file: {e}"
|
return f"Error reading file: {e}"
|
||||||
|
|
||||||
|
def read_arbitrary_file(self, file_path: str, file_type: str) -> str:
|
||||||
|
"""
|
||||||
|
Reads the content of a file with arbitrary encoding.
|
||||||
|
Args:
|
||||||
|
file_path (str): The path to the file to read
|
||||||
|
Returns:
|
||||||
|
str: The content of the file in markdown format
|
||||||
|
"""
|
||||||
|
mime_type, _ = mimetypes.guess_type(file_path)
|
||||||
|
if mime_type:
|
||||||
|
if mime_type.startswith(('image/', 'video/', 'audio/')):
|
||||||
|
return "can't read file type: image, video, or audio files are not supported."
|
||||||
|
content_raw = self.read_file(file_path)
|
||||||
|
if "text" in file_type:
|
||||||
|
content = content_raw
|
||||||
|
elif "pdf" in file_type:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
reader = PdfReader(file_path)
|
||||||
|
content = '\n'.join([pt.extract_text() for pt in reader.pages])
|
||||||
|
elif "binary" in file_type:
|
||||||
|
content = content_raw.decode('utf-8', errors='replace')
|
||||||
|
else:
|
||||||
|
content = content_raw
|
||||||
|
return content
|
||||||
|
|
||||||
def get_file_info(self, file_path: str) -> str:
|
def get_file_info(self, file_path: str) -> str:
|
||||||
|
"""
|
||||||
|
Gets information about a file, including its name, path, type, content, and permissions.
|
||||||
|
Args:
|
||||||
|
file_path (str): The path to the file
|
||||||
|
Returns:
|
||||||
|
str: A dictionary containing the file information
|
||||||
|
"""
|
||||||
if os.path.exists(file_path):
|
if os.path.exists(file_path):
|
||||||
stats = os.stat(file_path)
|
stats = os.stat(file_path)
|
||||||
permissions = oct(stat.S_IMODE(stats.st_mode))
|
permissions = oct(stat.S_IMODE(stats.st_mode))
|
||||||
file_type, _ = mimetypes.guess_type(file_path)
|
file_type, _ = mimetypes.guess_type(file_path)
|
||||||
file_type = file_type if file_type else "Unknown"
|
file_type = file_type if file_type else "Unknown"
|
||||||
content = self.read_file(file_path)
|
content = self.read_arbitrary_file(file_path, file_type)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"filename": os.path.basename(file_path),
|
"filename": os.path.basename(file_path),
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
import os
|
import os, sys
|
||||||
import requests
|
import requests
|
||||||
import dotenv
|
import dotenv
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class FlightSearch(Tools):
|
class FlightSearch(Tools):
|
||||||
def __init__(self, api_key: str = None):
|
def __init__(self, api_key: str = None):
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
import os
|
import os, sys
|
||||||
import requests
|
import requests
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
if __name__ == "__main__":
|
from sources.tools.tools import Tools
|
||||||
from tools import Tools
|
|
||||||
else:
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from sources.tools.tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
|
||||||
class MCP_finder(Tools):
|
class MCP_finder(Tools):
|
||||||
"""
|
"""
|
||||||
|
@ -2,10 +2,10 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import os
|
import os
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
from tools import Tools
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
from sources.tools.tools import Tools
|
||||||
|
|
||||||
class searxSearch(Tools):
|
class searxSearch(Tools):
|
||||||
def __init__(self, base_url: str = None):
|
def __init__(self, base_url: str = None):
|
||||||
|
@ -14,13 +14,17 @@ For example:
|
|||||||
print("Hello world")
|
print("Hello world")
|
||||||
```
|
```
|
||||||
This is then executed by the tool with its own class implementation of execute().
|
This is then executed by the tool with its own class implementation of execute().
|
||||||
A tool is not just for code tool but also API, internet, etc..
|
A tool is not just for code tool but also API, internet search, MCP, etc..
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import configparser
|
import configparser
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
if __name__ == "__main__": # if running as a script for individual testing
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from sources.logger import Logger
|
from sources.logger import Logger
|
||||||
|
|
||||||
class Tools():
|
class Tools():
|
||||||
|
@ -5,14 +5,8 @@ import dotenv
|
|||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
from sources.tools.tools import Tools
|
||||||
import sys
|
from sources.utility import animate_thinking, pretty_print
|
||||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
from utility import animate_thinking, pretty_print
|
|
||||||
from tools import Tools
|
|
||||||
else:
|
|
||||||
from sources.tools.tools import Tools
|
|
||||||
from sources.utility import animate_thinking, pretty_print
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
WARNING
|
WARNING
|
||||||
|
@ -1,8 +1,76 @@
|
|||||||
@echo off
|
@echo off
|
||||||
|
|
||||||
REM Up the provider in windows
|
REM Check if Docker is running
|
||||||
start ollama serve
|
docker info >nul 2>&1
|
||||||
|
if %ERRORLEVEL% neq 0 (
|
||||||
|
echo Error: Docker daemon is not running or inaccessible.
|
||||||
|
echo Please ensure Docker Desktop is running.
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
REM Check if docker-compose.yml exists
|
||||||
|
if not exist docker-compose.yml (
|
||||||
|
echo Error: docker-compose.yml not found in the current directory.
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
REM Check and download Chrome bundle if not present
|
||||||
|
echo Checking Chrome bundle...
|
||||||
|
if not exist chrome_bundle\chrome136 (
|
||||||
|
echo Chrome bundle not found. Downloading...
|
||||||
|
if not exist chrome_bundle mkdir chrome_bundle
|
||||||
|
curl -L https://github.com/tcsenpai/agenticSeek/releases/download/utility/chrome136.zip -o %TEMP%\chrome136.zip
|
||||||
|
if %ERRORLEVEL% neq 0 (
|
||||||
|
echo Error: Failed to download Chrome bundle
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
powershell -Command "Expand-Archive -Path '%TEMP%\chrome136.zip' -DestinationPath 'chrome_bundle' -Force"
|
||||||
|
if %ERRORLEVEL% neq 0 (
|
||||||
|
echo Error: Failed to extract Chrome bundle
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
del %TEMP%\chrome136.zip
|
||||||
|
echo Chrome bundle downloaded and extracted successfully
|
||||||
|
) else (
|
||||||
|
echo Chrome bundle already exists
|
||||||
|
)
|
||||||
|
|
||||||
|
REM Stop only containers in our project's network
|
||||||
|
echo Stopping project containers...
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
REM Start Ollama in the background
|
||||||
|
echo Starting Ollama...
|
||||||
|
start /B ollama serve
|
||||||
|
|
||||||
|
REM First start python-env
|
||||||
|
echo Starting python-env service...
|
||||||
|
docker-compose up -d python-env
|
||||||
|
if %ERRORLEVEL% neq 0 (
|
||||||
|
echo Error: Failed to start python-env container.
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
REM Wait for python-env to be healthy
|
||||||
|
echo Waiting for python-env to be ready...
|
||||||
|
set /a count=0
|
||||||
|
:wait_loop
|
||||||
|
docker inspect -f "{{.State.Running}}" python-env | findstr "true" >nul
|
||||||
|
if %ERRORLEVEL% neq 0 (
|
||||||
|
set /a count+=1
|
||||||
|
if %count% gtr 30 (
|
||||||
|
echo Error: python-env failed to start properly after 30 seconds
|
||||||
|
docker-compose logs python-env
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
timeout /t 1 /nobreak >nul
|
||||||
|
goto wait_loop
|
||||||
|
)
|
||||||
|
|
||||||
|
echo python-env is ready!
|
||||||
|
|
||||||
|
REM Now start the rest of the services
|
||||||
|
echo Starting remaining services...
|
||||||
docker-compose up
|
docker-compose up
|
||||||
if %ERRORLEVEL% neq 0 (
|
if %ERRORLEVEL% neq 0 (
|
||||||
echo Error: Failed to start containers. Check Docker logs with 'docker compose logs'.
|
echo Error: Failed to start containers. Check Docker logs with 'docker compose logs'.
|
||||||
|
@ -63,17 +63,60 @@ if [ ! -f "docker-compose.yml" ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# start docker compose for searxng, redis, frontend services
|
# Download and extract Chrome bundle if not present
|
||||||
echo "Warning: stopping all docker containers (t-4 seconds)..."
|
echo "Checking Chrome bundle..."
|
||||||
sleep 4
|
if [ ! -d "chrome_bundle/chrome136" ]; then
|
||||||
docker stop $(docker ps -a -q)
|
echo "Chrome bundle not found. Downloading..."
|
||||||
echo "All containers stopped"
|
mkdir -p chrome_bundle
|
||||||
|
curl -L https://github.com/tcsenpai/agenticSeek/releases/download/utility/chrome136.zip -o /tmp/chrome136.zip
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error: Failed to download Chrome bundle"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
unzip -q /tmp/chrome136.zip -d chrome_bundle/
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error: Failed to extract Chrome bundle"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
rm /tmp/chrome136.zip
|
||||||
|
echo "Chrome bundle downloaded and extracted successfully"
|
||||||
|
else
|
||||||
|
echo "Chrome bundle already exists"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Stop only containers in our project's network
|
||||||
|
echo "Stopping project containers..."
|
||||||
|
$COMPOSE_CMD down
|
||||||
|
|
||||||
|
# First start python-env and wait for it to be healthy
|
||||||
|
echo "Starting python-env service..."
|
||||||
|
if ! $COMPOSE_CMD up -d python-env; then
|
||||||
|
echo "Error: Failed to start python-env container."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Wait for python-env to be healthy (check if it's running and not restarting)
|
||||||
|
echo "Waiting for python-env to be ready..."
|
||||||
|
for i in {1..30}; do
|
||||||
|
if [ "$(docker inspect -f '{{.State.Running}}' python-env)" = "true" ] && \
|
||||||
|
[ "$(docker inspect -f '{{.State.Restarting}}' python-env)" = "false" ]; then
|
||||||
|
echo "python-env is ready!"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ $i -eq 30 ]; then
|
||||||
|
echo "Error: python-env failed to start properly after 30 seconds"
|
||||||
|
$COMPOSE_CMD logs python-env
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# Now start the rest of the services
|
||||||
|
echo "Starting remaining services..."
|
||||||
if ! $COMPOSE_CMD up; then
|
if ! $COMPOSE_CMD up; then
|
||||||
echo "Error: Failed to start containers. Check Docker logs with '$COMPOSE_CMD logs'."
|
echo "Error: Failed to start containers. Check Docker logs with '$COMPOSE_CMD logs'."
|
||||||
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
|
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
sleep 10
|
sleep 10
|
@ -17,13 +17,13 @@ class TestBrowserAgentParsing(unittest.TestCase):
|
|||||||
# Test various link formats
|
# Test various link formats
|
||||||
test_text = """
|
test_text = """
|
||||||
Check this out: https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of, and www.google.com!
|
Check this out: https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of, and www.google.com!
|
||||||
Also try https://test.org/about?page=1, hey this one as well bro https://weatherstack.com/documentation.
|
Also try https://test.org/about?page=1, hey this one as well bro https://weatherstack.com/documentation/.
|
||||||
"""
|
"""
|
||||||
expected = [
|
expected = [
|
||||||
"https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of",
|
"https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of",
|
||||||
"www.google.com",
|
"www.google.com",
|
||||||
"https://test.org/about?page=1",
|
"https://test.org/about?page=1",
|
||||||
"https://weatherstack.com/documentation"
|
"https://weatherstack.com/documentation",
|
||||||
]
|
]
|
||||||
result = self.agent.extract_links(test_text)
|
result = self.agent.extract_links(test_text)
|
||||||
self.assertEqual(result, expected)
|
self.assertEqual(result, expected)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user