Merge pull request #48 from ganeshnikhil/main

update browser.py multi-os support
This commit is contained in:
Martin 2025-03-17 20:52:00 +01:00 committed by GitHub
commit d51f17fdad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,6 +6,8 @@ from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.common.exceptions import TimeoutException, WebDriverException
import time import time
import os
import shutil
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import markdownify import markdownify
import logging import logging
@ -23,17 +25,47 @@ class Browser:
self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related" self.anticaptcha = "https://chrome.google.com/webstore/detail/nopecha-captcha-solver/dknlfmjaanfblgfdfebhijalfmhmjjjo/related"
try: try:
chrome_options = Options() chrome_options = Options()
chrome_path = self.get_chrome_path()
if not chrome_path:
raise FileNotFoundError("Google Chrome not found. Please install it.")
chrome_options.binary_location = chrome_path
if headless: if headless:
chrome_options.add_argument("--headless") chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(options=chrome_options) # Automatically find ChromeDriver path
chromedriver_path = shutil.which("chromedriver")
if not chromedriver_path:
raise FileNotFoundError("ChromeDriver not found. Please install it or add it to your PATH.")
service = Service(chromedriver_path)
self.driver = webdriver.Chrome(service=service, options=chrome_options)
self.wait = WebDriverWait(self.driver, 10) self.wait = WebDriverWait(self.driver, 10)
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.logger.info("Browser initialized successfully") self.logger.info("Browser initialized successfully")
except Exception as e: except Exception as e:
raise Exception(f"Failed to initialize browser: {str(e)}") raise Exception(f"Failed to initialize browser: {str(e)}")
@staticmethod
def get_chrome_path():
if sys.platform.startswith("win"):
paths = [
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
os.path.join(os.environ.get("LOCALAPPDATA", ""), "Google\\Chrome\\Application\\chrome.exe") # User install
]
elif sys.platform.startswith("darwin"): # macOS
paths = ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
else: # Linux
paths = ["/usr/bin/google-chrome", "/usr/bin/chromium-browser", "/usr/bin/chromium"]
for path in paths:
if os.path.exists(path) and os.access(path, os.X_OK): # Check if executable
return path
return None
def go_to(self, url): def go_to(self, url):
"""Navigate to a specified URL.""" """Navigate to a specified URL."""
@ -185,4 +217,4 @@ if __name__ == "__main__":
links = browser.get_navigable() links = browser.get_navigable()
print("\nNavigable Links:", links) print("\nNavigable Links:", links)
finally: finally:
browser.close() browser.close()