2024-12-13 15:42:25 +01:00

77 lines
1.8 KiB
Python

# 29.06.24
import tempfile
import logging
# External library
from bs4 import BeautifulSoup
from seleniumbase import Driver
# Internal utilities
from StreamingCommunity.Util._jsonConfig import config_manager
# Config
USE_HEADLESS = config_manager.get_bool("BROWSER", "headless")
class WebAutomation:
"""
A class for automating web interactions using SeleniumBase and BeautifulSoup.
"""
def __init__(self):
"""
Initializes the WebAutomation instance with SeleniumBase Driver.
Parameters:
headless (bool, optional): Whether to run the browser in headless mode. Default is True.
"""
logging.getLogger('seleniumbase').setLevel(logging.ERROR)
self.driver = Driver(
uc=True,
uc_cdp_events=True,
headless=USE_HEADLESS,
user_data_dir = tempfile.mkdtemp(),
chromium_arg="--disable-search-engine-choice-screen"
)
def quit(self):
"""
Quits the WebDriver instance.
"""
self.driver.quit()
def get_page(self, url):
"""
Navigates the browser to the specified URL.
Parameters:
url (str): The URL to navigate to.
"""
self.driver.get(url)
def retrieve_soup(self):
"""
Retrieves the BeautifulSoup object for the current page's HTML content.
Returns:
BeautifulSoup object: Parsed HTML content of the current page.
"""
html_content = self.driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
return soup
def get_content(self):
"""
Returns the HTML content of the current page.
Returns:
str: The HTML content of the current page.
"""
return self.driver.page_source