mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-05 18:45:27 +00:00
Compare commits
23 Commits
b9c61d3573
...
dd46d67573
Author | SHA1 | Date | |
---|---|---|---|
![]() |
dd46d67573 | ||
![]() |
f79c3f10da | ||
![]() |
bdd7e34b8c | ||
![]() |
84c618678a | ||
![]() |
eda277aa54 | ||
![]() |
d6ec56b5df | ||
![]() |
365a552726 | ||
![]() |
b70fcf5d71 | ||
![]() |
8607514c05 | ||
![]() |
fa2852d3e7 | ||
![]() |
1c4ebefae4 | ||
![]() |
96a6dd368a | ||
![]() |
ed4f04b19c | ||
![]() |
f325865869 | ||
![]() |
a15dd998f3 | ||
![]() |
f17dc0550b | ||
![]() |
ed76c8415b | ||
![]() |
9f2c105074 | ||
![]() |
ccef61b2b9 | ||
![]() |
3cf1cab68f | ||
![]() |
0579fd3bb6 | ||
![]() |
c6688355a7 | ||
![]() |
68ed1834a9 |
@ -1,4 +1,5 @@
|
||||
SEARXNG_BASE_URL="http://127.0.0.1:8080"
|
||||
OPENAI_API_KEY='xxxxx'
|
||||
DEEPSEEK_API_KEY='xxxxx'
|
||||
BACKEND_PORT=8000
|
||||
BACKEND_PORT=8000
|
||||
WORK_DIR="/tmp/"
|
4
.github/FUNDING.yml
vendored
Normal file
4
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: [Fosowl ]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
|
||||
|
35
Dockerfile.python-env
Normal file
35
Dockerfile.python-env
Normal file
@ -0,0 +1,35 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-wheel \
|
||||
build-essential \
|
||||
alsa-utils \
|
||||
portaudio19-dev \
|
||||
libgtk-3-dev \
|
||||
libnotify-dev \
|
||||
libgconf-2-4 \
|
||||
libnss3 \
|
||||
libxss1 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Upgrade pip and install Python packages
|
||||
RUN pip3 install --upgrade pip setuptools wheel && \
|
||||
pip3 install selenium
|
||||
|
||||
# Copy requirements.txt first to leverage Docker cache
|
||||
COPY requirements.txt .
|
||||
RUN pip3 install -r requirements.txt --no-cache-dir
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY . .
|
||||
|
||||
# Set environment variables for Chrome paths with defaults
|
||||
ENV CHROME_EXECUTABLE_PATH=/app/chrome_bundle/chrome136/chrome-linux64/chrome
|
||||
ENV CHROMEDRIVER_PATH=/app/chrome_bundle/chrome136/chromedriver
|
10
README.md
10
README.md
@ -159,9 +159,9 @@ Set the desired provider in the `config.ini`. See below for a list of API provid
|
||||
```sh
|
||||
[MAIN]
|
||||
is_local = False
|
||||
provider_name = openai
|
||||
provider_model = gpt-4o
|
||||
provider_server_address = 127.0.0.1:5000
|
||||
provider_name = google
|
||||
provider_model = gemini-2.0-flash
|
||||
provider_server_address = 127.0.0.1:5000 # doesn't matter
|
||||
```
|
||||
Warning: Make sure there is not trailing space in the config.
|
||||
|
||||
@ -179,6 +179,10 @@ Example: export `TOGETHER_API_KEY="xxxxx"`
|
||||
| togetherAI | No | Use together AI API (non-private) |
|
||||
| google | No | Use google gemini API (non-private) |
|
||||
|
||||
*We advice against using gpt-4o or other closedAI models*, performance are poor for web browsing and task planning.
|
||||
|
||||
Please also note that coding/bash might fail with gemini, it seem to ignore our prompt for format to respect, which are optimized for deepseek r1.
|
||||
|
||||
Next step: [Start services and run AgenticSeek](#Start-services-and-Run)
|
||||
|
||||
*See the **Known issues** section if you are having issues*
|
||||
|
@ -8,9 +8,9 @@ recover_last_session = False
|
||||
save_session = False
|
||||
speak = False
|
||||
listen = False
|
||||
work_dir = /Users/mlg/Documents/ai_folder
|
||||
work_dir = ${WORK_DIR}
|
||||
jarvis_personality = False
|
||||
languages = en
|
||||
[BROWSER]
|
||||
headless_browser = False
|
||||
stealth_mode = True
|
||||
headless_browser = True
|
||||
stealth_mode = True
|
||||
|
@ -65,10 +65,51 @@ services:
|
||||
- REACT_APP_BACKEND_URL=http://0.0.0.0:${BACKEND_PORT:-8000}
|
||||
networks:
|
||||
- agentic-seek-net
|
||||
|
||||
|
||||
python-env:
|
||||
container_name: python-env
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.python-env
|
||||
ports:
|
||||
- ${BACKEND_PORT:-8000}:${BACKEND_PORT:-8000}
|
||||
volumes:
|
||||
- ./:/app
|
||||
- ${WORK_DIR}:${WORK_DIR}
|
||||
command: python3 api.py
|
||||
environment:
|
||||
- SEARXNG_URL=http://localhost:8080
|
||||
- WORK_DIR=${WORK_DIR}
|
||||
network_mode: "host"
|
||||
|
||||
# NOTE: backend service is not working yet due to issue with chromedriver on docker.
|
||||
# Therefore backend is run on host machine.
|
||||
# Open to pull requests to fix this.
|
||||
#backend:
|
||||
# container_name: backend
|
||||
# build:
|
||||
# context: ./
|
||||
# dockerfile: Dockerfile.backend
|
||||
# stdin_open: true
|
||||
# tty: true
|
||||
# shm_size: 8g
|
||||
# ports:
|
||||
# - "8000:8000"
|
||||
# volumes:
|
||||
# - ./:/app
|
||||
# environment:
|
||||
# - NODE_ENV=development
|
||||
# - REDIS_URL=redis://redis:6379/0
|
||||
# - SEARXNG_URL=http://searxng:8080
|
||||
# - OLLAMA_URL=http://localhost:11434
|
||||
# - LM_STUDIO_URL=http://localhost:1234
|
||||
# extra_hosts:
|
||||
# - "host.docker.internal:host-gateway"
|
||||
# depends_on:
|
||||
# - redis
|
||||
# - searxng
|
||||
# networks:
|
||||
# - agentic-seek-net
|
||||
|
||||
|
||||
volumes:
|
||||
|
@ -1,3 +1,5 @@
|
||||
kokoro==0.9.4
|
||||
certifi==2025.4.26
|
||||
fastapi>=0.115.12
|
||||
flask>=3.1.0
|
||||
celery>=5.5.1
|
||||
@ -18,10 +20,10 @@ torch>=2.4.1
|
||||
python-dotenv>=1.0.0
|
||||
ollama>=0.4.7
|
||||
scipy>=1.9.3
|
||||
kokoro>=0.7.12
|
||||
soundfile>=0.13.1
|
||||
protobuf>=3.20.3
|
||||
termcolor>=2.4.0
|
||||
pypdf>=5.4.0
|
||||
ipython>=8.13.0
|
||||
pyaudio>=0.2.14
|
||||
librosa>=0.10.2.post1
|
||||
@ -39,6 +41,7 @@ fake_useragent>=2.1.0
|
||||
selenium_stealth>=1.0.6
|
||||
undetected-chromedriver>=3.5.5
|
||||
sentencepiece>=0.2.0
|
||||
tqdm>4
|
||||
openai
|
||||
sniffio
|
||||
tqdm>4
|
||||
@ -46,5 +49,3 @@ python-dotenv>=1.0.0
|
||||
# if use chinese
|
||||
ordered_set
|
||||
pypinyin
|
||||
cn2an
|
||||
jieba
|
||||
|
@ -123,6 +123,8 @@ class Agent():
|
||||
"""
|
||||
start_tag = "<think>"
|
||||
end_tag = "</think>"
|
||||
if text is None:
|
||||
return None
|
||||
start_idx = text.find(start_tag)
|
||||
end_idx = text.rfind(end_tag)+8
|
||||
return text[start_idx:end_idx]
|
||||
|
@ -151,7 +151,7 @@ class PlannerAgent(Agent):
|
||||
return []
|
||||
agents_tasks = self.parse_agent_tasks(answer)
|
||||
if agents_tasks == []:
|
||||
prompt = f"Failed to parse the tasks. Please make a plan within ```json. Do not ask for clarification.\n"
|
||||
prompt = f"Failed to parse the tasks. Please write down your task followed by a json plan within ```json. Do not ask for clarification.\n"
|
||||
pretty_print("Failed to make plan. Retrying...", color="warning")
|
||||
continue
|
||||
self.show_plan(agents_tasks, answer)
|
||||
|
@ -13,6 +13,8 @@ from fake_useragent import UserAgent
|
||||
from selenium_stealth import stealth
|
||||
import undetected_chromedriver as uc
|
||||
import chromedriver_autoinstaller
|
||||
import certifi
|
||||
import ssl
|
||||
import time
|
||||
import random
|
||||
import os
|
||||
@ -27,53 +29,85 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
from sources.logger import Logger
|
||||
|
||||
|
||||
|
||||
def get_chrome_path() -> str:
|
||||
"""Get the path to the Chrome executable."""
|
||||
if sys.platform.startswith("win"):
|
||||
paths = [
|
||||
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
||||
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
||||
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
|
||||
os.path.join(
|
||||
os.environ.get("LOCALAPPDATA", ""),
|
||||
"Google\\Chrome\\Application\\chrome.exe",
|
||||
), # User install
|
||||
]
|
||||
elif sys.platform.startswith("darwin"): # macOS
|
||||
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
|
||||
paths = [
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
|
||||
]
|
||||
else: # Linux
|
||||
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium", "/opt/chrome/chrome", "/usr/local/bin/chrome"]
|
||||
paths = [
|
||||
"/usr/bin/google-chrome",
|
||||
"/usr/bin/chromium-browser",
|
||||
"/usr/bin/chromium",
|
||||
"/opt/chrome/chrome",
|
||||
"/usr/local/bin/chrome",
|
||||
]
|
||||
|
||||
for path in paths:
|
||||
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
||||
return path
|
||||
print("Looking for Google Chrome in these locations failed:")
|
||||
print('\n'.join(paths))
|
||||
print("\n".join(paths))
|
||||
chrome_path_env = os.environ.get("CHROME_EXECUTABLE_PATH")
|
||||
if chrome_path_env and os.path.exists(chrome_path_env) and os.access(chrome_path_env, os.X_OK):
|
||||
if (
|
||||
chrome_path_env
|
||||
and os.path.exists(chrome_path_env)
|
||||
and os.access(chrome_path_env, os.X_OK)
|
||||
):
|
||||
return chrome_path_env
|
||||
path = input("Google Chrome not found. Please enter the path to the Chrome executable: ")
|
||||
path = input(
|
||||
"Google Chrome not found. Please enter the path to the Chrome executable: "
|
||||
)
|
||||
if os.path.exists(path) and os.access(path, os.X_OK):
|
||||
os.environ["CHROME_EXECUTABLE_PATH"] = path
|
||||
print(f"Chrome path saved to environment variable CHROME_EXECUTABLE_PATH")
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def get_random_user_agent() -> str:
|
||||
"""Get a random user agent string with associated vendor."""
|
||||
user_agents = [
|
||||
{"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36", "vendor": "Google Inc."},
|
||||
{"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", "vendor": "Apple Inc."},
|
||||
{"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0", "vendor": ""},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36",
|
||||
"vendor": "Google Inc.",
|
||||
},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_6_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
||||
"vendor": "Apple Inc.",
|
||||
},
|
||||
{
|
||||
"ua": "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
|
||||
"vendor": "",
|
||||
},
|
||||
]
|
||||
return random.choice(user_agents)
|
||||
|
||||
def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx") -> webdriver.Chrome:
|
||||
|
||||
def create_driver(
|
||||
headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx"
|
||||
) -> webdriver.Chrome:
|
||||
"""Create a Chrome WebDriver with specified options."""
|
||||
chrome_options = Options()
|
||||
chrome_path = get_chrome_path()
|
||||
|
||||
|
||||
if not chrome_path:
|
||||
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
||||
chrome_options.binary_location = chrome_path
|
||||
|
||||
|
||||
if headless:
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
@ -90,31 +124,40 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
||||
chrome_options.add_argument(f'user-agent={user_agent["ua"]}')
|
||||
resolutions = [(1920, 1080), (1366, 768), (1440, 900)]
|
||||
width, height = random.choice(resolutions)
|
||||
chrome_options.add_argument(f'--window-size={width},{height}')
|
||||
chrome_options.add_argument(f"--window-size={width},{height}")
|
||||
if not stealth_mode:
|
||||
# crx file can't be installed in stealth mode
|
||||
if not os.path.exists(crx_path):
|
||||
pretty_print(f"Anti-captcha CRX not found at {crx_path}.", color="failure")
|
||||
else:
|
||||
chrome_options.add_extension(crx_path)
|
||||
|
||||
chromedriver_path = shutil.which("chromedriver")
|
||||
if not chromedriver_path:
|
||||
|
||||
chromedriver_path = os.environ.get("CHROMEDRIVER_PATH")
|
||||
if not os.path.exists(chromedriver_path):
|
||||
chromedriver_path = chromedriver_autoinstaller.install()
|
||||
|
||||
|
||||
if not chromedriver_path:
|
||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
||||
|
||||
raise FileNotFoundError(
|
||||
"ChromeDriver not found. Please install it or add it to your PATH."
|
||||
)
|
||||
|
||||
service = Service(chromedriver_path)
|
||||
if stealth_mode:
|
||||
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||
driver = uc.Chrome(service=service, options=chrome_options)
|
||||
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
chrome_version = driver.capabilities['browserVersion']
|
||||
stealth(driver,
|
||||
driver.execute_script(
|
||||
"Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
|
||||
)
|
||||
chrome_version = driver.capabilities["browserVersion"]
|
||||
stealth(
|
||||
driver,
|
||||
languages=["en-US", "en"],
|
||||
vendor=user_agent["vendor"],
|
||||
platform="Win64" if "Windows" in user_agent["ua"] else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64",
|
||||
platform=(
|
||||
"Win64"
|
||||
if "Windows" in user_agent["ua"]
|
||||
else "MacIntel" if "Macintosh" in user_agent["ua"] else "Linux x86_64"
|
||||
),
|
||||
webgl_vendor="Intel Inc.",
|
||||
renderer="Intel Iris OpenGL Engine",
|
||||
fix_hairline=True,
|
||||
@ -127,13 +170,16 @@ def create_driver(headless=False, stealth_mode=True, crx_path="./crx/nopecha.crx
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||
chrome_options.add_experimental_option("useAutomationExtension", False)
|
||||
return webdriver.Chrome(service=service, options=chrome_options)
|
||||
|
||||
|
||||
class Browser:
|
||||
def __init__(self, driver, anticaptcha_manual_install=False):
|
||||
"""Initialize the browser with optional AntiCaptcha installation."""
|
||||
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||
self.js_scripts_folder = (
|
||||
"./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||
)
|
||||
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
||||
self.logger = Logger("browser.log")
|
||||
self.screenshot_folder = os.path.join(os.getcwd(), ".screenshots")
|
||||
@ -146,23 +192,26 @@ class Browser:
|
||||
self.setup_tabs()
|
||||
if anticaptcha_manual_install:
|
||||
self.load_anticatpcha_manually()
|
||||
|
||||
|
||||
def setup_tabs(self):
|
||||
self.tabs = self.driver.window_handles
|
||||
self.driver.get("https://www.google.com")
|
||||
self.screenshot()
|
||||
|
||||
|
||||
def switch_control_tab(self):
|
||||
self.logger.log("Switching to control tab.")
|
||||
self.driver.switch_to.window(self.tabs[0])
|
||||
|
||||
|
||||
def load_anticatpcha_manually(self):
|
||||
pretty_print("You might want to install the AntiCaptcha extension for captchas.", color="warning")
|
||||
pretty_print(
|
||||
"You might want to install the AntiCaptcha extension for captchas.",
|
||||
color="warning",
|
||||
)
|
||||
self.driver.get(self.anticaptcha)
|
||||
|
||||
def go_to(self, url:str) -> bool:
|
||||
def go_to(self, url: str) -> bool:
|
||||
"""Navigate to a specified URL."""
|
||||
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
||||
time.sleep(random.uniform(0.4, 2.5)) # more human behavior
|
||||
try:
|
||||
initial_handles = self.driver.window_handles
|
||||
self.driver.get(url)
|
||||
@ -170,12 +219,17 @@ class Browser:
|
||||
wait = WebDriverWait(self.driver, timeout=10)
|
||||
wait.until(
|
||||
lambda driver: (
|
||||
not any(keyword in driver.page_source.lower() for keyword in ["checking your browser", "captcha"])
|
||||
not any(
|
||||
keyword in driver.page_source.lower()
|
||||
for keyword in ["checking your browser", "captcha"]
|
||||
)
|
||||
),
|
||||
message="stuck on 'checking browser' or verification screen"
|
||||
message="stuck on 'checking browser' or verification screen",
|
||||
)
|
||||
except TimeoutException:
|
||||
self.logger.warning("Timeout while waiting for page to bypass 'checking your browser'")
|
||||
self.logger.warning(
|
||||
"Timeout while waiting for page to bypass 'checking your browser'"
|
||||
)
|
||||
self.apply_web_safety()
|
||||
self.logger.log(f"Navigated to: {url}")
|
||||
return True
|
||||
@ -189,30 +243,33 @@ class Browser:
|
||||
self.logger.error(f"Fatal error with go_to method on {url}:\n{str(e)}")
|
||||
raise e
|
||||
|
||||
def is_sentence(self, text:str) -> bool:
|
||||
def is_sentence(self, text: str) -> bool:
|
||||
"""Check if the text qualifies as a meaningful sentence or contains important error codes."""
|
||||
text = text.strip()
|
||||
|
||||
if any(c.isdigit() for c in text):
|
||||
return True
|
||||
words = re.findall(r'\w+', text, re.UNICODE)
|
||||
words = re.findall(r"\w+", text, re.UNICODE)
|
||||
word_count = len(words)
|
||||
has_punctuation = any(text.endswith(p) for p in ['.', ',', ',', '!', '?', '。', '!', '?', '।', '۔'])
|
||||
has_punctuation = any(
|
||||
text.endswith(p)
|
||||
for p in [".", ",", ",", "!", "?", "。", "!", "?", "।", "۔"]
|
||||
)
|
||||
is_long_enough = word_count > 4
|
||||
return (word_count >= 5 and (has_punctuation or is_long_enough))
|
||||
return word_count >= 5 and (has_punctuation or is_long_enough)
|
||||
|
||||
def get_text(self) -> str | None:
|
||||
"""Get page text as formatted Markdown"""
|
||||
try:
|
||||
soup = BeautifulSoup(self.driver.page_source, 'html.parser')
|
||||
for element in soup(['script', 'style', 'noscript', 'meta', 'link']):
|
||||
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||
for element in soup(["script", "style", "noscript", "meta", "link"]):
|
||||
element.decompose()
|
||||
markdown_converter = markdownify.MarkdownConverter(
|
||||
heading_style="ATX",
|
||||
strip=['a'],
|
||||
strip=["a"],
|
||||
autolinks=False,
|
||||
bullets='•',
|
||||
strong_em_symbol='*',
|
||||
bullets="•",
|
||||
strong_em_symbol="*",
|
||||
default_title=False,
|
||||
)
|
||||
markdown_text = markdown_converter.convert(str(soup.body))
|
||||
@ -220,35 +277,43 @@ class Browser:
|
||||
for line in markdown_text.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and self.is_sentence(stripped):
|
||||
cleaned = ' '.join(stripped.split())
|
||||
cleaned = " ".join(stripped.split())
|
||||
lines.append(cleaned)
|
||||
result = "[Start of page]\n\n" + "\n\n".join(lines) + "\n\n[End of page]"
|
||||
result = re.sub(r'!\[(.*?)\]\(.*?\)', r'[IMAGE: \1]', result)
|
||||
result = re.sub(r"!\[(.*?)\]\(.*?\)", r"[IMAGE: \1]", result)
|
||||
self.logger.info(f"Extracted text: {result[:100]}...")
|
||||
self.logger.info(f"Extracted text length: {len(result)}")
|
||||
return result[:8192]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting text: {str(e)}")
|
||||
return None
|
||||
|
||||
def clean_url(self, url:str) -> str:
|
||||
|
||||
def clean_url(self, url: str) -> str:
|
||||
"""Clean URL to keep only the part needed for navigation to the page"""
|
||||
clean = url.split('#')[0]
|
||||
parts = clean.split('?', 1)
|
||||
clean = url.split("#")[0]
|
||||
parts = clean.split("?", 1)
|
||||
base_url = parts[0]
|
||||
if len(parts) > 1:
|
||||
query = parts[1]
|
||||
essential_params = []
|
||||
for param in query.split('&'):
|
||||
if param.startswith('_skw=') or param.startswith('q=') or param.startswith('s='):
|
||||
for param in query.split("&"):
|
||||
if (
|
||||
param.startswith("_skw=")
|
||||
or param.startswith("q=")
|
||||
or param.startswith("s=")
|
||||
):
|
||||
essential_params.append(param)
|
||||
elif param.startswith('_') or param.startswith('hash=') or param.startswith('itmmeta='):
|
||||
elif (
|
||||
param.startswith("_")
|
||||
or param.startswith("hash=")
|
||||
or param.startswith("itmmeta=")
|
||||
):
|
||||
break
|
||||
if essential_params:
|
||||
return f"{base_url}?{'&'.join(essential_params)}"
|
||||
return base_url
|
||||
|
||||
def is_link_valid(self, url:str) -> bool:
|
||||
|
||||
def is_link_valid(self, url: str) -> bool:
|
||||
"""Check if a URL is a valid link (page, not related to icon or metadata)."""
|
||||
if len(url) > 72:
|
||||
self.logger.warning(f"URL too long: {url}")
|
||||
@ -257,10 +322,10 @@ class Browser:
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
self.logger.warning(f"Invalid URL: {url}")
|
||||
return False
|
||||
if re.search(r'/\d+$', parsed_url.path):
|
||||
if re.search(r"/\d+$", parsed_url.path):
|
||||
return False
|
||||
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp']
|
||||
metadata_extensions = ['.ico', '.xml', '.json', '.rss', '.atom']
|
||||
image_extensions = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".webp"]
|
||||
metadata_extensions = [".ico", ".xml", ".json", ".rss", ".atom"]
|
||||
for ext in image_extensions + metadata_extensions:
|
||||
if url.lower().endswith(ext):
|
||||
return False
|
||||
@ -271,18 +336,24 @@ class Browser:
|
||||
try:
|
||||
links = []
|
||||
elements = self.driver.find_elements(By.TAG_NAME, "a")
|
||||
|
||||
|
||||
for element in elements:
|
||||
href = element.get_attribute("href")
|
||||
if href and href.startswith(("http", "https")):
|
||||
links.append({
|
||||
"url": href,
|
||||
"text": element.text.strip(),
|
||||
"is_displayed": element.is_displayed()
|
||||
})
|
||||
|
||||
links.append(
|
||||
{
|
||||
"url": href,
|
||||
"text": element.text.strip(),
|
||||
"is_displayed": element.is_displayed(),
|
||||
}
|
||||
)
|
||||
|
||||
self.logger.info(f"Found {len(links)} navigable links")
|
||||
return [self.clean_url(link['url']) for link in links if (link['is_displayed'] == True and self.is_link_valid(link['url']))]
|
||||
return [
|
||||
self.clean_url(link["url"])
|
||||
for link in links
|
||||
if (link["is_displayed"] == True and self.is_link_valid(link["url"]))
|
||||
]
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting navigable links: {str(e)}")
|
||||
return []
|
||||
@ -297,7 +368,10 @@ class Browser:
|
||||
return False
|
||||
try:
|
||||
self.logger.error(f"Scrolling to element for click_element.")
|
||||
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", element)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});",
|
||||
element,
|
||||
)
|
||||
time.sleep(0.1)
|
||||
element.click()
|
||||
self.logger.info(f"Clicked element at {xpath}")
|
||||
@ -311,13 +385,13 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Unexpected error clicking element at {xpath}: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def load_js(self, file_name: str) -> str:
|
||||
"""Load javascript from script folder to inject to page."""
|
||||
path = os.path.join(self.js_scripts_folder, file_name)
|
||||
self.logger.info(f"Loading js at {path}")
|
||||
try:
|
||||
with open(path, 'r') as f:
|
||||
with open(path, "r") as f:
|
||||
return f.read()
|
||||
except FileNotFoundError as e:
|
||||
raise Exception(f"Could not find: {path}") from e
|
||||
@ -349,17 +423,22 @@ class Browser:
|
||||
form_strings = []
|
||||
for element in input_elements:
|
||||
input_type = element.get("type") or "text"
|
||||
if input_type in ["hidden", "submit", "button", "image"] or not element["displayed"]:
|
||||
if (
|
||||
input_type in ["hidden", "submit", "button", "image"]
|
||||
or not element["displayed"]
|
||||
):
|
||||
continue
|
||||
input_name = element.get("text") or element.get("id") or input_type
|
||||
if input_type == "checkbox" or input_type == "radio":
|
||||
try:
|
||||
checked_status = "checked" if element.is_selected() else "unchecked"
|
||||
checked_status = (
|
||||
"checked" if element.is_selected() else "unchecked"
|
||||
)
|
||||
except Exception as e:
|
||||
continue
|
||||
form_strings.append(f"[{input_name}]({checked_status})")
|
||||
else:
|
||||
form_strings.append(f"[{input_name}]("")")
|
||||
form_strings.append(f"[{input_name}](" ")")
|
||||
return form_strings
|
||||
|
||||
except Exception as e:
|
||||
@ -369,13 +448,18 @@ class Browser:
|
||||
"""
|
||||
Find buttons and return their type and xpath.
|
||||
"""
|
||||
buttons = self.driver.find_elements(By.TAG_NAME, "button") + \
|
||||
self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
||||
buttons = self.driver.find_elements(
|
||||
By.TAG_NAME, "button"
|
||||
) + self.driver.find_elements(By.XPATH, "//input[@type='submit']")
|
||||
result = []
|
||||
for i, button in enumerate(buttons):
|
||||
if not button.is_displayed() or not button.is_enabled():
|
||||
continue
|
||||
text = (button.text or button.get_attribute("value") or "").lower().replace(' ', '')
|
||||
text = (
|
||||
(button.text or button.get_attribute("value") or "")
|
||||
.lower()
|
||||
.replace(" ", "")
|
||||
)
|
||||
xpath = f"(//button | //input[@type='submit'])[{i + 1}]"
|
||||
result.append((text, xpath))
|
||||
result.sort(key=lambda x: len(x[0]))
|
||||
@ -389,8 +473,8 @@ class Browser:
|
||||
self.logger.info("Waiting for submission outcome...")
|
||||
wait = WebDriverWait(self.driver, timeout)
|
||||
wait.until(
|
||||
lambda driver: driver.current_url != self.driver.current_url or
|
||||
driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
||||
lambda driver: driver.current_url != self.driver.current_url
|
||||
or driver.find_elements(By.XPATH, "//*[contains(text(), 'success')]")
|
||||
)
|
||||
self.logger.info("Detected submission outcome")
|
||||
return True
|
||||
@ -398,7 +482,7 @@ class Browser:
|
||||
self.logger.warning("No submission outcome detected")
|
||||
return False
|
||||
|
||||
def find_and_click_btn(self, btn_type: str = 'login', timeout: int = 5) -> bool:
|
||||
def find_and_click_btn(self, btn_type: str = "login", timeout: int = 5) -> bool:
|
||||
"""Find and click a submit button matching the specified type."""
|
||||
buttons = self.get_buttons_xpath()
|
||||
if not buttons:
|
||||
@ -406,24 +490,35 @@ class Browser:
|
||||
return False
|
||||
|
||||
for button_text, xpath in buttons:
|
||||
if btn_type.lower() in button_text.lower() or btn_type.lower() in xpath.lower():
|
||||
if (
|
||||
btn_type.lower() in button_text.lower()
|
||||
or btn_type.lower() in xpath.lower()
|
||||
):
|
||||
try:
|
||||
wait = WebDriverWait(self.driver, timeout)
|
||||
element = wait.until(
|
||||
EC.element_to_be_clickable((By.XPATH, xpath)),
|
||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds"
|
||||
message=f"Button with XPath '{xpath}' not clickable within {timeout} seconds",
|
||||
)
|
||||
if self.click_element(xpath):
|
||||
self.logger.info(f"Clicked button '{button_text}' at XPath: {xpath}")
|
||||
self.logger.info(
|
||||
f"Clicked button '{button_text}' at XPath: {xpath}"
|
||||
)
|
||||
return True
|
||||
else:
|
||||
self.logger.warning(f"Button '{button_text}' at XPath: {xpath} not clickable")
|
||||
self.logger.warning(
|
||||
f"Button '{button_text}' at XPath: {xpath} not clickable"
|
||||
)
|
||||
return False
|
||||
except TimeoutException:
|
||||
self.logger.warning(f"Timeout waiting for '{button_text}' button at XPath: {xpath}")
|
||||
self.logger.warning(
|
||||
f"Timeout waiting for '{button_text}' button at XPath: {xpath}"
|
||||
)
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}")
|
||||
self.logger.error(
|
||||
f"Error clicking button '{button_text}' at XPath: {xpath} - {str(e)}"
|
||||
)
|
||||
return False
|
||||
self.logger.warning(f"No button matching '{btn_type}' found")
|
||||
return False
|
||||
@ -434,7 +529,9 @@ class Browser:
|
||||
Returns True if successful, False if any issues occur.
|
||||
"""
|
||||
try:
|
||||
checkboxes = self.driver.find_elements(By.XPATH, "//input[@type='checkbox']")
|
||||
checkboxes = self.driver.find_elements(
|
||||
By.XPATH, "//input[@type='checkbox']"
|
||||
)
|
||||
if not checkboxes:
|
||||
self.logger.info("No checkboxes found on the page")
|
||||
return True
|
||||
@ -445,19 +542,24 @@ class Browser:
|
||||
EC.element_to_be_clickable(checkbox)
|
||||
)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});", checkbox
|
||||
"arguments[0].scrollIntoView({block: 'center', inline: 'center'});",
|
||||
checkbox,
|
||||
)
|
||||
if not checkbox.is_selected():
|
||||
try:
|
||||
checkbox.click()
|
||||
self.logger.info(f"Ticked checkbox {index}")
|
||||
except ElementClickInterceptedException:
|
||||
self.driver.execute_script("arguments[0].click();", checkbox)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].click();", checkbox
|
||||
)
|
||||
self.logger.warning(f"Click checkbox {index} intercepted")
|
||||
else:
|
||||
self.logger.info(f"Checkbox {index} already ticked")
|
||||
except TimeoutException:
|
||||
self.logger.warning(f"Timeout waiting for checkbox {index} to be clickable")
|
||||
self.logger.warning(
|
||||
f"Timeout waiting for checkbox {index} to be clickable"
|
||||
)
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error ticking checkbox {index}: {str(e)}")
|
||||
@ -468,16 +570,28 @@ class Browser:
|
||||
return False
|
||||
|
||||
def find_and_click_submission(self, timeout: int = 10) -> bool:
|
||||
possible_submissions = ["login", "submit", "register", "continue", "apply",
|
||||
"ok", "confirm", "proceed", "accept",
|
||||
"done", "finish", "start", "calculate"]
|
||||
possible_submissions = [
|
||||
"login",
|
||||
"submit",
|
||||
"register",
|
||||
"continue",
|
||||
"apply",
|
||||
"ok",
|
||||
"confirm",
|
||||
"proceed",
|
||||
"accept",
|
||||
"done",
|
||||
"finish",
|
||||
"start",
|
||||
"calculate",
|
||||
]
|
||||
for submission in possible_submissions:
|
||||
if self.find_and_click_btn(submission, timeout):
|
||||
self.logger.info(f"Clicked on submission button: {submission}")
|
||||
return True
|
||||
self.logger.warning("No submission button found")
|
||||
return False
|
||||
|
||||
|
||||
def find_input_xpath_by_name(self, inputs, name: str) -> str | None:
|
||||
for field in inputs:
|
||||
if name in field["text"]:
|
||||
@ -492,7 +606,7 @@ class Browser:
|
||||
inputs = self.find_all_inputs()
|
||||
try:
|
||||
for input_str in input_list:
|
||||
match = re.match(r'\[(.*?)\]\((.*?)\)', input_str)
|
||||
match = re.match(r"\[(.*?)\]\((.*?)\)", input_str)
|
||||
if not match:
|
||||
self.logger.warning(f"Invalid format for input: {input_str}")
|
||||
continue
|
||||
@ -509,11 +623,17 @@ class Browser:
|
||||
EC.element_to_be_clickable((By.XPATH, xpath))
|
||||
)
|
||||
except TimeoutException:
|
||||
self.logger.error(f"Timeout waiting for element '{name}' to be clickable")
|
||||
self.logger.error(
|
||||
f"Timeout waiting for element '{name}' to be clickable"
|
||||
)
|
||||
continue
|
||||
self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
|
||||
self.driver.execute_script(
|
||||
"arguments[0].scrollIntoView(true);", element
|
||||
)
|
||||
if not element.is_displayed() or not element.is_enabled():
|
||||
self.logger.warning(f"Element '{name}' is not interactable (not displayed or disabled)")
|
||||
self.logger.warning(
|
||||
f"Element '{name}' is not interactable (not displayed or disabled)"
|
||||
)
|
||||
continue
|
||||
input_type = (element.get_attribute("type") or "text").lower()
|
||||
if input_type in ["checkbox", "radio"]:
|
||||
@ -531,7 +651,7 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error filling form inputs: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def fill_form(self, input_list: List[str]) -> bool:
|
||||
"""Fill form inputs based on a list of [name](value) and submit."""
|
||||
if not isinstance(input_list, list):
|
||||
@ -571,11 +691,11 @@ class Browser:
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error scrolling: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def get_screenshot(self) -> str:
|
||||
return self.screenshot_folder + "/updated_screen.png"
|
||||
|
||||
def screenshot(self, filename:str = 'updated_screen.png') -> bool:
|
||||
def screenshot(self, filename: str = "updated_screen.png") -> bool:
|
||||
"""Take a screenshot of the current page."""
|
||||
self.logger.info("Taking screenshot...")
|
||||
time.sleep(0.1)
|
||||
@ -598,18 +718,19 @@ class Browser:
|
||||
script = self.load_js("inject_safety_script.js")
|
||||
input_elements = self.driver.execute_script(script)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
driver = create_driver(headless=False, stealth_mode=True)
|
||||
browser = Browser(driver, anticaptcha_manual_install=True)
|
||||
|
||||
|
||||
input("press enter to continue")
|
||||
print("AntiCaptcha / Form Test")
|
||||
#browser.go_to("https://www.browserscan.net/bot-detection")
|
||||
#txt = browser.get_text()
|
||||
#browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||
# browser.go_to("https://www.browserscan.net/bot-detection")
|
||||
# txt = browser.get_text()
|
||||
# browser.go_to("https://www.google.com/recaptcha/api2/demo")
|
||||
browser.go_to("https://home.openweathermap.org/users/sign_up")
|
||||
inputs_visible = browser.get_form_inputs()
|
||||
print("inputs:", inputs_visible)
|
||||
#inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
||||
#browser.fill_form(inputs_fill)
|
||||
# inputs_fill = ['[q](checked)', '[q](checked)', '[user[username]](mlg)', '[user[email]](mlg.fcu@gmail.com)', '[user[password]](placeholder_P@ssw0rd123)', '[user[password_confirmation]](placeholder_P@ssw0rd123)']
|
||||
# browser.fill_form(inputs_fill)
|
||||
input("press enter to exit")
|
||||
|
@ -72,6 +72,8 @@ class Provider:
|
||||
except ModuleNotFoundError as e:
|
||||
raise ModuleNotFoundError(f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
|
||||
except Exception as e:
|
||||
if "try again later" in str(e).lower():
|
||||
return f"{self.provider_name} server is overloaded. Please try again later."
|
||||
if "refused" in str(e):
|
||||
return f"Server {self.server_ip} seem offline. Unable to answer."
|
||||
raise Exception(f"Provider {self.provider_name} failed: {str(e)}") from e
|
||||
@ -214,7 +216,7 @@ class Provider:
|
||||
"""
|
||||
base_url = self.server_ip
|
||||
if self.is_local:
|
||||
raise Exception("Google Gemini is not available for local use.")
|
||||
raise Exception("Google Gemini is not available for local use. Change config.ini")
|
||||
|
||||
client = OpenAI(api_key=self.api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
||||
try:
|
||||
@ -237,6 +239,8 @@ class Provider:
|
||||
"""
|
||||
from together import Together
|
||||
client = Together(api_key=self.api_key)
|
||||
if self.is_local:
|
||||
raise Exception("Together AI is not available for local use. Change config.ini")
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
@ -257,6 +261,8 @@ class Provider:
|
||||
Use deepseek api to generate text.
|
||||
"""
|
||||
client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
|
||||
if self.is_local:
|
||||
raise Exception("Deepseek (API) is not available for local use. Change config.ini")
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model="deepseek-chat",
|
||||
|
@ -9,7 +9,10 @@ from kokoro import KPipeline
|
||||
from IPython.display import display, Audio
|
||||
import soundfile as sf
|
||||
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
if __name__ == "__main__":
|
||||
from utility import pretty_print, animate_thinking
|
||||
else:
|
||||
from sources.utility import pretty_print, animate_thinking
|
||||
|
||||
class Speech():
|
||||
"""
|
||||
@ -140,6 +143,7 @@ class Speech():
|
||||
return sentence
|
||||
|
||||
if __name__ == "__main__":
|
||||
# TODO add info message for cn2an, jieba chinese related import
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
speech = Speech()
|
||||
tosay_en = """
|
||||
|
@ -1,15 +1,14 @@
|
||||
|
||||
import sys
|
||||
import os, sys
|
||||
import re
|
||||
from io import StringIO
|
||||
import subprocess
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
from safety import is_unsafe
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
from sources.tools.safety import is_unsafe
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
from sources.tools.safety import is_unsafe
|
||||
|
||||
class BashInterpreter(Tools):
|
||||
"""
|
||||
|
@ -1,12 +1,12 @@
|
||||
import subprocess
|
||||
import os
|
||||
import os, sys
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class CInterpreter(Tools):
|
||||
"""
|
||||
|
@ -1,12 +1,12 @@
|
||||
import subprocess
|
||||
import os
|
||||
import os, sys
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class GoInterpreter(Tools):
|
||||
"""
|
||||
|
@ -1,12 +1,12 @@
|
||||
import subprocess
|
||||
import os
|
||||
import os, sys
|
||||
import tempfile
|
||||
import re
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class JavaInterpreter(Tools):
|
||||
"""
|
||||
|
@ -4,10 +4,10 @@ import os
|
||||
import re
|
||||
from io import StringIO
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class PyInterpreter(Tools):
|
||||
"""
|
||||
|
@ -1,13 +1,12 @@
|
||||
import os
|
||||
import os, sys
|
||||
import stat
|
||||
import mimetypes
|
||||
import configparser
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class FileFinder(Tools):
|
||||
"""
|
||||
@ -30,14 +29,46 @@ class FileFinder(Tools):
|
||||
return file.read()
|
||||
except Exception as e:
|
||||
return f"Error reading file: {e}"
|
||||
|
||||
def read_arbitrary_file(self, file_path: str, file_type: str) -> str:
|
||||
"""
|
||||
Reads the content of a file with arbitrary encoding.
|
||||
Args:
|
||||
file_path (str): The path to the file to read
|
||||
Returns:
|
||||
str: The content of the file in markdown format
|
||||
"""
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
if mime_type:
|
||||
if mime_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return "can't read file type: image, video, or audio files are not supported."
|
||||
content_raw = self.read_file(file_path)
|
||||
if "text" in file_type:
|
||||
content = content_raw
|
||||
elif "pdf" in file_type:
|
||||
from pypdf import PdfReader
|
||||
reader = PdfReader(file_path)
|
||||
content = '\n'.join([pt.extract_text() for pt in reader.pages])
|
||||
elif "binary" in file_type:
|
||||
content = content_raw.decode('utf-8', errors='replace')
|
||||
else:
|
||||
content = content_raw
|
||||
return content
|
||||
|
||||
def get_file_info(self, file_path: str) -> str:
|
||||
"""
|
||||
Gets information about a file, including its name, path, type, content, and permissions.
|
||||
Args:
|
||||
file_path (str): The path to the file
|
||||
Returns:
|
||||
str: A dictionary containing the file information
|
||||
"""
|
||||
if os.path.exists(file_path):
|
||||
stats = os.stat(file_path)
|
||||
permissions = oct(stat.S_IMODE(stats.st_mode))
|
||||
file_type, _ = mimetypes.guess_type(file_path)
|
||||
file_type = file_type if file_type else "Unknown"
|
||||
content = self.read_file(file_path)
|
||||
content = self.read_arbitrary_file(file_path, file_type)
|
||||
|
||||
result = {
|
||||
"filename": os.path.basename(file_path),
|
||||
|
@ -1,13 +1,13 @@
|
||||
import os
|
||||
import os, sys
|
||||
import requests
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class FlightSearch(Tools):
|
||||
def __init__(self, api_key: str = None):
|
||||
|
@ -1,12 +1,13 @@
|
||||
import os
|
||||
import os, sys
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
|
||||
class MCP_finder(Tools):
|
||||
"""
|
||||
|
@ -2,10 +2,10 @@ import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
|
||||
if __name__ == "__main__":
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class searxSearch(Tools):
|
||||
def __init__(self, base_url: str = None):
|
||||
|
@ -14,13 +14,17 @@ For example:
|
||||
print("Hello world")
|
||||
```
|
||||
This is then executed by the tool with its own class implementation of execute().
|
||||
A tool is not just for code tool but also API, internet, etc..
|
||||
A tool is not just for code tool but also API, internet search, MCP, etc..
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import configparser
|
||||
from abc import abstractmethod
|
||||
|
||||
if __name__ == "__main__": # if running as a script for individual testing
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from sources.logger import Logger
|
||||
|
||||
class Tools():
|
||||
|
@ -5,14 +5,8 @@ import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from utility import animate_thinking, pretty_print
|
||||
from tools import Tools
|
||||
else:
|
||||
from sources.tools.tools import Tools
|
||||
from sources.utility import animate_thinking, pretty_print
|
||||
from sources.tools.tools import Tools
|
||||
from sources.utility import animate_thinking, pretty_print
|
||||
|
||||
"""
|
||||
WARNING
|
||||
|
@ -1,8 +1,76 @@
|
||||
@echo off
|
||||
|
||||
REM Up the provider in windows
|
||||
start ollama serve
|
||||
REM Check if Docker is running
|
||||
docker info >nul 2>&1
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo Error: Docker daemon is not running or inaccessible.
|
||||
echo Please ensure Docker Desktop is running.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Check if docker-compose.yml exists
|
||||
if not exist docker-compose.yml (
|
||||
echo Error: docker-compose.yml not found in the current directory.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Check and download Chrome bundle if not present
|
||||
echo Checking Chrome bundle...
|
||||
if not exist chrome_bundle\chrome136 (
|
||||
echo Chrome bundle not found. Downloading...
|
||||
if not exist chrome_bundle mkdir chrome_bundle
|
||||
curl -L https://github.com/tcsenpai/agenticSeek/releases/download/utility/chrome136.zip -o %TEMP%\chrome136.zip
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo Error: Failed to download Chrome bundle
|
||||
exit /b 1
|
||||
)
|
||||
powershell -Command "Expand-Archive -Path '%TEMP%\chrome136.zip' -DestinationPath 'chrome_bundle' -Force"
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo Error: Failed to extract Chrome bundle
|
||||
exit /b 1
|
||||
)
|
||||
del %TEMP%\chrome136.zip
|
||||
echo Chrome bundle downloaded and extracted successfully
|
||||
) else (
|
||||
echo Chrome bundle already exists
|
||||
)
|
||||
|
||||
REM Stop only containers in our project's network
|
||||
echo Stopping project containers...
|
||||
docker-compose down
|
||||
|
||||
REM Start Ollama in the background
|
||||
echo Starting Ollama...
|
||||
start /B ollama serve
|
||||
|
||||
REM First start python-env
|
||||
echo Starting python-env service...
|
||||
docker-compose up -d python-env
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo Error: Failed to start python-env container.
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
REM Wait for python-env to be healthy
|
||||
echo Waiting for python-env to be ready...
|
||||
set /a count=0
|
||||
:wait_loop
|
||||
docker inspect -f "{{.State.Running}}" python-env | findstr "true" >nul
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
set /a count+=1
|
||||
if %count% gtr 30 (
|
||||
echo Error: python-env failed to start properly after 30 seconds
|
||||
docker-compose logs python-env
|
||||
exit /b 1
|
||||
)
|
||||
timeout /t 1 /nobreak >nul
|
||||
goto wait_loop
|
||||
)
|
||||
|
||||
echo python-env is ready!
|
||||
|
||||
REM Now start the rest of the services
|
||||
echo Starting remaining services...
|
||||
docker-compose up
|
||||
if %ERRORLEVEL% neq 0 (
|
||||
echo Error: Failed to start containers. Check Docker logs with 'docker compose logs'.
|
||||
|
@ -63,17 +63,60 @@ if [ ! -f "docker-compose.yml" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# start docker compose for searxng, redis, frontend services
|
||||
echo "Warning: stopping all docker containers (t-4 seconds)..."
|
||||
sleep 4
|
||||
docker stop $(docker ps -a -q)
|
||||
echo "All containers stopped"
|
||||
# Download and extract Chrome bundle if not present
|
||||
echo "Checking Chrome bundle..."
|
||||
if [ ! -d "chrome_bundle/chrome136" ]; then
|
||||
echo "Chrome bundle not found. Downloading..."
|
||||
mkdir -p chrome_bundle
|
||||
curl -L https://github.com/tcsenpai/agenticSeek/releases/download/utility/chrome136.zip -o /tmp/chrome136.zip
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: Failed to download Chrome bundle"
|
||||
exit 1
|
||||
fi
|
||||
unzip -q /tmp/chrome136.zip -d chrome_bundle/
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: Failed to extract Chrome bundle"
|
||||
exit 1
|
||||
fi
|
||||
rm /tmp/chrome136.zip
|
||||
echo "Chrome bundle downloaded and extracted successfully"
|
||||
else
|
||||
echo "Chrome bundle already exists"
|
||||
fi
|
||||
|
||||
# Stop only containers in our project's network
|
||||
echo "Stopping project containers..."
|
||||
$COMPOSE_CMD down
|
||||
|
||||
# First start python-env and wait for it to be healthy
|
||||
echo "Starting python-env service..."
|
||||
if ! $COMPOSE_CMD up -d python-env; then
|
||||
echo "Error: Failed to start python-env container."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Wait for python-env to be healthy (check if it's running and not restarting)
|
||||
echo "Waiting for python-env to be ready..."
|
||||
for i in {1..30}; do
|
||||
if [ "$(docker inspect -f '{{.State.Running}}' python-env)" = "true" ] && \
|
||||
[ "$(docker inspect -f '{{.State.Restarting}}' python-env)" = "false" ]; then
|
||||
echo "python-env is ready!"
|
||||
break
|
||||
fi
|
||||
if [ $i -eq 30 ]; then
|
||||
echo "Error: python-env failed to start properly after 30 seconds"
|
||||
$COMPOSE_CMD logs python-env
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Now start the rest of the services
|
||||
echo "Starting remaining services..."
|
||||
if ! $COMPOSE_CMD up; then
|
||||
echo "Error: Failed to start containers. Check Docker logs with '$COMPOSE_CMD logs'."
|
||||
echo "Possible fixes: Run with sudo or ensure port 8080 is free."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
sleep 10
|
@ -17,13 +17,13 @@ class TestBrowserAgentParsing(unittest.TestCase):
|
||||
# Test various link formats
|
||||
test_text = """
|
||||
Check this out: https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of, and www.google.com!
|
||||
Also try https://test.org/about?page=1, hey this one as well bro https://weatherstack.com/documentation.
|
||||
Also try https://test.org/about?page=1, hey this one as well bro https://weatherstack.com/documentation/.
|
||||
"""
|
||||
expected = [
|
||||
"https://thriveonai.com/15-ai-startups-in-japan-to-take-note-of",
|
||||
"www.google.com",
|
||||
"https://test.org/about?page=1",
|
||||
"https://weatherstack.com/documentation"
|
||||
"https://weatherstack.com/documentation",
|
||||
]
|
||||
result = self.agent.extract_links(test_text)
|
||||
self.assertEqual(result, expected)
|
||||
|
Loading…
x
Reference in New Issue
Block a user