mirror of
https://github.com/Arrowar/StreamingCommunity.git
synced 2025-06-06 19:45:24 +00:00
75 lines
1.7 KiB
Python
75 lines
1.7 KiB
Python
# 29.06.24
|
|
|
|
import tempfile
|
|
import logging
|
|
|
|
|
|
# External library
|
|
from bs4 import BeautifulSoup
|
|
from seleniumbase import Driver
|
|
|
|
|
|
# Internal utilities
|
|
from Src.Util._jsonConfig import config_manager
|
|
|
|
|
|
# Config
|
|
USE_HEADLESS = config_manager.get_bool("BROWSER", "headless")
|
|
|
|
|
|
class WebAutomation:
|
|
"""
|
|
A class for automating web interactions using SeleniumBase and BeautifulSoup.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""
|
|
Initializes the WebAutomation instance with SeleniumBase Driver.
|
|
|
|
Args:
|
|
headless (bool, optional): Whether to run the browser in headless mode. Default is True.
|
|
"""
|
|
logging.getLogger('seleniumbase').setLevel(logging.ERROR)
|
|
self.driver = Driver(
|
|
uc=True,
|
|
uc_cdp_events=True,
|
|
headless=USE_HEADLESS,
|
|
user_data_dir = tempfile.mkdtemp()
|
|
)
|
|
|
|
def quit(self):
|
|
"""
|
|
Quits the WebDriver instance.
|
|
"""
|
|
self.driver.quit()
|
|
|
|
def get_page(self, url):
|
|
"""
|
|
Navigates the browser to the specified URL.
|
|
|
|
Args:
|
|
url (str): The URL to navigate to.
|
|
"""
|
|
self.driver.get(url)
|
|
|
|
def retrieve_soup(self):
|
|
"""
|
|
Retrieves the BeautifulSoup object for the current page's HTML content.
|
|
|
|
Returns:
|
|
BeautifulSoup object: Parsed HTML content of the current page.
|
|
"""
|
|
html_content = self.driver.page_source
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
return soup
|
|
|
|
def get_content(self):
|
|
"""
|
|
Returns the HTML content of the current page.
|
|
|
|
Returns:
|
|
str: The HTML content of the current page.
|
|
"""
|
|
return self.driver.page_source
|
|
|