mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
fix : browser duplication, isolate driver creation
This commit is contained in:
parent
9080697dc0
commit
8c425f62b6
14
main.py
14
main.py
@ -8,6 +8,7 @@ import configparser
|
||||
from sources.llm_provider import Provider
|
||||
from sources.interaction import Interaction
|
||||
from sources.agents import Agent, CoderAgent, CasualAgent, FileAgent, PlannerAgent, BrowserAgent
|
||||
from sources.browser import Browser, create_driver
|
||||
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
@ -28,6 +29,8 @@ def main():
|
||||
model=config["MAIN"]["provider_model"],
|
||||
server_address=config["MAIN"]["provider_server_address"])
|
||||
|
||||
browser = Browser(create_driver(), headless=False)
|
||||
|
||||
agents = [
|
||||
CasualAgent(name=config["MAIN"]["agent_name"],
|
||||
prompt_path="prompts/casual_agent.txt",
|
||||
@ -40,16 +43,17 @@ def main():
|
||||
provider=provider, verbose=False),
|
||||
BrowserAgent(name="Browser",
|
||||
prompt_path="prompts/browser_agent.txt",
|
||||
provider=provider, verbose=False),
|
||||
provider=provider, verbose=False, browser=browser),
|
||||
# Planner agent is experimental, might work poorly, especially with model < 32b
|
||||
PlannerAgent(name="Planner",
|
||||
prompt_path="prompts/planner_agent.txt",
|
||||
provider=provider, verbose=False)
|
||||
provider=provider, verbose=False, browser=browser)
|
||||
]
|
||||
|
||||
interaction = Interaction(agents, tts_enabled=config.getboolean('MAIN', 'speak'),
|
||||
stt_enabled=config.getboolean('MAIN', 'listen'),
|
||||
recover_last_session=config.getboolean('MAIN', 'recover_last_session'))
|
||||
interaction = Interaction(agents,
|
||||
tts_enabled=config.getboolean('MAIN', 'speak'),
|
||||
stt_enabled=config.getboolean('MAIN', 'listen'),
|
||||
recover_last_session=config.getboolean('MAIN', 'recover_last_session'))
|
||||
try:
|
||||
while interaction.is_active:
|
||||
interaction.get_user()
|
||||
|
@ -34,7 +34,8 @@ class Agent():
|
||||
prompt_path:str,
|
||||
provider,
|
||||
recover_last_session=True,
|
||||
verbose=False) -> None:
|
||||
verbose=False,
|
||||
browser=None) -> None:
|
||||
"""
|
||||
Args:
|
||||
name (str): Name of the agent.
|
||||
@ -42,9 +43,11 @@ class Agent():
|
||||
provider: The provider for the LLM.
|
||||
recover_last_session (bool, optional): Whether to recover the last conversation.
|
||||
verbose (bool, optional): Enable verbose logging if True. Defaults to False.
|
||||
browser: The browser class for web navigation (only for browser agent).
|
||||
"""
|
||||
|
||||
self.agent_name = name
|
||||
self.browser = browser
|
||||
self.role = None
|
||||
self.type = None
|
||||
self.current_directory = os.getcwd()
|
||||
|
@ -9,11 +9,11 @@ from datetime import date
|
||||
from typing import List, Tuple
|
||||
|
||||
class BrowserAgent(Agent):
|
||||
def __init__(self, name, prompt_path, provider, verbose=False):
|
||||
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
|
||||
"""
|
||||
The Browser agent is an agent that navigate the web autonomously in search of answer
|
||||
"""
|
||||
super().__init__(name, prompt_path, provider, verbose)
|
||||
super().__init__(name, prompt_path, provider, verbose, browser)
|
||||
self.tools = {
|
||||
"web_search": searxSearch(),
|
||||
}
|
||||
@ -24,7 +24,7 @@ class BrowserAgent(Agent):
|
||||
"es": "web"
|
||||
}
|
||||
self.type = "browser_agent"
|
||||
self.browser = Browser()
|
||||
self.browser = browser
|
||||
self.current_page = ""
|
||||
self.search_history = []
|
||||
self.navigable_links = []
|
||||
|
@ -11,7 +11,7 @@ class CasualAgent(Agent):
|
||||
"""
|
||||
The casual agent is a special for casual talk to the user without specific tasks.
|
||||
"""
|
||||
super().__init__(name, prompt_path, provider, verbose)
|
||||
super().__init__(name, prompt_path, provider, verbose, None)
|
||||
self.tools = {
|
||||
} # No tools for the casual agent
|
||||
self.role = {
|
||||
|
@ -12,7 +12,7 @@ class CoderAgent(Agent):
|
||||
The code agent is an agent that can write and execute code.
|
||||
"""
|
||||
def __init__(self, name, prompt_path, provider, verbose=False):
|
||||
super().__init__(name, prompt_path, provider, verbose)
|
||||
super().__init__(name, prompt_path, provider, verbose, None)
|
||||
self.tools = {
|
||||
"bash": BashInterpreter(),
|
||||
"python": PyInterpreter(),
|
||||
|
@ -9,7 +9,7 @@ class FileAgent(Agent):
|
||||
"""
|
||||
The file agent is a special agent for file operations.
|
||||
"""
|
||||
super().__init__(name, prompt_path, provider, verbose)
|
||||
super().__init__(name, prompt_path, provider, verbose, None)
|
||||
self.tools = {
|
||||
"file_finder": FileFinder(),
|
||||
"bash": BashInterpreter()
|
||||
|
@ -7,19 +7,20 @@ from sources.agents.browser_agent import BrowserAgent
|
||||
from sources.tools.tools import Tools
|
||||
|
||||
class PlannerAgent(Agent):
|
||||
def __init__(self, name, prompt_path, provider, verbose=False):
|
||||
def __init__(self, name, prompt_path, provider, verbose=False, browser=None):
|
||||
"""
|
||||
The planner agent is a special agent that divides and conquers the task.
|
||||
"""
|
||||
super().__init__(name, prompt_path, provider, verbose)
|
||||
super().__init__(name, prompt_path, provider, verbose, None)
|
||||
self.tools = {
|
||||
"json": Tools()
|
||||
}
|
||||
self.tools['json'].tag = "json"
|
||||
self.browser = browser
|
||||
self.agents = {
|
||||
"coder": CoderAgent(name, "prompts/coder_agent.txt", provider, verbose=False),
|
||||
"file": FileAgent(name, "prompts/file_agent.txt", provider, verbose=False),
|
||||
"web": BrowserAgent(name, "prompts/browser_agent.txt", provider, verbose=False)
|
||||
"web": BrowserAgent(name, "prompts/browser_agent.txt", provider, verbose=False, browser=browser)
|
||||
}
|
||||
self.role = {
|
||||
"en": "Research, setup and code",
|
||||
|
@ -21,80 +21,75 @@ from urllib.parse import urlparse
|
||||
|
||||
from sources.utility import pretty_print
|
||||
|
||||
def get_chrome_path() -> str:
|
||||
if sys.platform.startswith("win"):
|
||||
paths = [
|
||||
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
||||
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
||||
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
|
||||
]
|
||||
elif sys.platform.startswith("darwin"): # macOS
|
||||
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
|
||||
else: # Linux
|
||||
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"]
|
||||
|
||||
for path in paths:
|
||||
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
||||
return path
|
||||
return None
|
||||
|
||||
def create_driver(headless=False):
|
||||
chrome_options = Options()
|
||||
chrome_path = get_chrome_path()
|
||||
|
||||
if not chrome_path:
|
||||
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
||||
chrome_options.binary_location = chrome_path
|
||||
|
||||
if headless:
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
chrome_options.add_argument("--autoplay-policy=user-gesture-required")
|
||||
chrome_options.add_argument("--mute-audio")
|
||||
chrome_options.add_argument("--disable-webgl")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
security_prefs = {
|
||||
"profile.default_content_setting_values.media_stream": 2,
|
||||
"profile.default_content_setting_values.notifications": 2,
|
||||
"profile.default_content_setting_values.popups": 2,
|
||||
"profile.default_content_setting_values.geolocation": 2,
|
||||
"safebrowsing.enabled": True,
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||
|
||||
chromedriver_path = shutil.which("chromedriver")
|
||||
if not chromedriver_path:
|
||||
chromedriver_path = chromedriver_autoinstaller.install()
|
||||
|
||||
if not chromedriver_path:
|
||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
||||
|
||||
service = Service(chromedriver_path)
|
||||
return webdriver.Chrome(service=service, options=chrome_options)
|
||||
|
||||
class Browser:
|
||||
def __init__(self, headless=False, anticaptcha_install=False):
|
||||
def __init__(self, driver, headless=False, anticaptcha_install=True):
|
||||
"""Initialize the browser with optional headless mode."""
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Referer': 'https://www.google.com/',
|
||||
}
|
||||
self.js_scripts_folder = "./sources/web_scripts/" if not __name__ == "__main__" else "./web_scripts/"
|
||||
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
|
||||
try:
|
||||
chrome_options = Options()
|
||||
chrome_path = self.get_chrome_path()
|
||||
|
||||
if not chrome_path:
|
||||
raise FileNotFoundError("Google Chrome not found. Please install it.")
|
||||
chrome_options.binary_location = chrome_path
|
||||
|
||||
if headless:
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--disable-gpu")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||
chrome_options.add_argument("--autoplay-policy=user-gesture-required")
|
||||
chrome_options.add_argument("--mute-audio")
|
||||
chrome_options.add_argument("--disable-webgl")
|
||||
chrome_options.add_argument("--disable-notifications")
|
||||
security_prefs = {
|
||||
"profile.default_content_setting_values.media_stream": 2, # Block webcam/mic
|
||||
"profile.default_content_setting_values.notifications": 2, # Block notifications
|
||||
"profile.default_content_setting_values.popups": 2, # Block pop-ups
|
||||
"profile.default_content_setting_values.geolocation": 2, # Block geolocation
|
||||
"safebrowsing.enabled": True, # Enable safe browsing
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", security_prefs)
|
||||
|
||||
chromedriver_path = shutil.which("chromedriver") # system installed driver.
|
||||
|
||||
#If not found, try auto-installing the correct version
|
||||
if not chromedriver_path:
|
||||
chromedriver_path = chromedriver_autoinstaller.install()
|
||||
|
||||
if not chromedriver_path:
|
||||
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
|
||||
|
||||
service = Service(chromedriver_path)
|
||||
self.driver = webdriver.Chrome(service=service, options=chrome_options)
|
||||
self.driver = driver
|
||||
self.wait = WebDriverWait(self.driver, 10)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.info("Browser initialized successfully")
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to initialize browser: {str(e)}")
|
||||
self.load_anticatpcha()
|
||||
if anticaptcha_install:
|
||||
self.load_anticatpcha()
|
||||
|
||||
@staticmethod
|
||||
def get_chrome_path() -> str:
|
||||
if sys.platform.startswith("win"):
|
||||
paths = [
|
||||
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
||||
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
||||
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
|
||||
]
|
||||
elif sys.platform.startswith("darwin"): # macOS
|
||||
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta"]
|
||||
else: # Linux
|
||||
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"]
|
||||
|
||||
for path in paths:
|
||||
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
|
||||
return path
|
||||
return None
|
||||
|
||||
def load_anticatpcha(self):
|
||||
print("You might want to install the AntiCaptcha extension for captchas.")
|
||||
self.driver.get(self.anticaptcha)
|
||||
|
@ -12,7 +12,6 @@ class Interaction:
|
||||
tts_enabled: bool = True,
|
||||
stt_enabled: bool = True,
|
||||
recover_last_session: bool = False):
|
||||
self.tts_enabled = tts_enabled
|
||||
self.agents = agents
|
||||
self.current_agent = None
|
||||
self.router = AgentRouter(self.agents)
|
||||
|
@ -142,6 +142,7 @@ class AgentRouter:
|
||||
("i would like to setup a new AI project, index as mark2", "files"),
|
||||
("Hey, can you find the old_project.zip file somewhere on my drive?", "files"),
|
||||
("Tell me a funny story", "talk"),
|
||||
("can you make a snake game in python", "code"),
|
||||
("Can you locate the backup folder I created last month on my system?", "files"),
|
||||
("Share a random fun fact about space.", "talk"),
|
||||
("Write a script to rename all files in a directory to lowercase.", "files"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user