diff --git a/README.md b/README.md index 6bb3962..3bd8f76 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ You can change some behaviors by tweaking the configuration file. - **Default Value**: `false` * **proxy**: The proxy to use for requests. (Note: This parameter works only with HTTP and HTTPS protocols.) - - **Example Value**: `[{'protocol': 'http', 'ip': '123.45.67.89', 'port': '8080', 'username': 'your_username', 'password': 'your_password'}, {'protocol': 'https', 'ip': '123.45.67.89', 'port': '8080', 'username': 'your_username', 'password': 'your_password'}]` + - **Example Value**: `["http://user:pass@38.154.227.167:5868"]` diff --git a/Src/Lib/Hls/proxyes.py b/Src/Lib/Hls/proxyes.py new file mode 100644 index 0000000..8a0fda3 --- /dev/null +++ b/Src/Lib/Hls/proxyes.py @@ -0,0 +1,88 @@ +# 09.06.24 + +import time +import logging +from concurrent.futures import ThreadPoolExecutor + + +# External libraries +import requests + + +# Internal utilities +from Src.Util._jsonConfig import config_manager + + +class ProxyManager: + def __init__(self, proxy_list=None, url=None): + """ + Initialize ProxyManager with a list of proxies and timeout. + + Args: + - proxy_list: List of proxy strings + - timeout: Timeout for proxy requests + """ + self.proxy_list = proxy_list or [] + self.verified_proxies = [] + self.failed_proxies = {} + self.timeout = config_manager.get_float('REQUESTS', 'timeout') + self.url = url + + def _check_proxy(self, proxy): + """ + Check if a single proxy is working by making a request to Google. + + Args: + - proxy: Proxy string to be checked + + Returns: + - Proxy string if working, None otherwise + """ + protocol = proxy.split(":")[0].lower() + + try: + response = requests.get(self.url, proxies={protocol: proxy}, timeout=self.timeout) + + if response.status_code == 200: + logging.info(f"Proxy {proxy} is working.") + return proxy + + except requests.RequestException as e: + logging.error(f"Proxy {proxy} failed: {e}") + self.failed_proxies[proxy] = time.time() + return None + + def verify_proxies(self): + """ + Verify all proxies in the list and store the working ones. + """ + logging.info("Starting proxy verification...") + with ThreadPoolExecutor(max_workers=10) as executor: + self.verified_proxies = list(executor.map(self._check_proxy, self.proxy_list)) + self.verified_proxies = [proxy for proxy in self.verified_proxies if proxy] + logging.info(f"Verification complete. {len(self.verified_proxies)} proxies are working.") + + def get_verified_proxies(self): + """ + Get validate proxies. + """ + validate_proxy = [] + + for proxy in self.verified_proxies: + protocol = proxy.split(":")[0].lower() + validate_proxy.append({protocol: proxy}) + + return validate_proxy + + +def main_test_proxy(url_test): + + # Get list of proxy from config.json + proxy_list = config_manager.get_list('REQUESTS', 'proxy') + + # Verify proxy + manager = ProxyManager(proxy_list, url_test) + manager.verify_proxies() + + # Return valid proxy + return manager.get_verified_proxies() diff --git a/Src/Lib/Hls/segments.py b/Src/Lib/Hls/segments.py index f1dee38..421ecea 100644 --- a/Src/Lib/Hls/segments.py +++ b/Src/Lib/Hls/segments.py @@ -32,6 +32,7 @@ from ..M3U8 import ( M3U8_Parser, M3U8_UrlFix ) +from .proxyes import main_test_proxy # Warning @@ -41,10 +42,10 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Config TQDM_MAX_WORKER = config_manager.get_int('M3U8_DOWNLOAD', 'tdqm_workers') +TQDM_DELAY_WORKER = config_manager.get_float('M3U8_DOWNLOAD', 'tqdm_delay') TQDM_USE_LARGE_BAR = config_manager.get_int('M3U8_DOWNLOAD', 'tqdm_use_large_bar') REQUEST_TIMEOUT = config_manager.get_float('REQUESTS', 'timeout') -PROXY_LIST = config_manager.get_list('REQUESTS', 'proxy') -START_THREAD_DELAY = 0.05 +THERE_IS_PROXY_LIST = len(config_manager.get_list('REQUESTS', 'proxy')) > 0 # Variable @@ -152,6 +153,15 @@ class M3U8_Segments: self.class_ts_estimator.total_segments = len(self.segments) logging.info(f"Segmnets to donwload: [{len(self.segments)}]") + # Proxy + if THERE_IS_PROXY_LIST: + console.log("[red]Validate proxy.") + self.valid_proxy = main_test_proxy(self.segments[0]) + console.log(f"[cyan]N. Valid ip: [red]{len(self.valid_proxy)}") + + if len(self.valid_proxy) == 0: + sys.exit(0) + def get_info(self) -> None: """ Makes a request to the index M3U8 file to get information about segments. @@ -170,38 +180,6 @@ class M3U8_Segments: # Parse the text from the M3U8 index file self.parse_data(response.text) - def get_proxy(self, index): - """ - Returns the proxy configuration for the given index. - - Args: - - index (int): The index to select the proxy from the PROXY_LIST. - - Returns: - - dict: A dictionary containing the proxy scheme and proxy URL. - """ - try: - - # Select the proxy from the list using the index - new_proxy = PROXY_LIST[index % len(PROXY_LIST)] - proxy_scheme = new_proxy["protocol"] - - # Construct the proxy URL based on the presence of user and pass keys - if "user" in new_proxy and "pass" in new_proxy: - proxy_url = f"{proxy_scheme}://{new_proxy['user']}:{new_proxy['pass']}@{new_proxy['ip']}:{new_proxy['port']}" - elif "user" in new_proxy: - proxy_url = f"{proxy_scheme}://{new_proxy['user']}@{new_proxy['ip']}:{new_proxy['port']}" - else: - proxy_url = f"{proxy_scheme}://{new_proxy['ip']}:{new_proxy['port']}" - - logging.info(f"Proxy URL generated: {proxy_url}") - return {proxy_scheme: proxy_url} - - except KeyError as e: - logging.error(f"KeyError: Missing required key {e} in proxy configuration.") - except Exception as e: - logging.error(f"An unexpected error occurred while generating proxy URL: {e}") - def make_requests_stream(self, ts_url: str, index: int, progress_bar: tqdm) -> None: """ Downloads a TS segment and adds it to the segment queue. @@ -211,8 +189,6 @@ class M3U8_Segments: - index (int): The index of the segment. - progress_bar (tqdm): Progress counter for tracking download progress. """ - global START_THREAD_DELAY - try: # Generate headers @@ -220,8 +196,9 @@ class M3U8_Segments: headers_segments['user-agent'] = get_headers() # Make request to get content - if len(PROXY_LIST) > 0: - proxy = self.get_proxy(index) + if THERE_IS_PROXY_LIST: + proxy = self.valid_proxy[index % len(self.valid_proxy)] + logging.info(f"Use proxy: {proxy}") response = session.get(ts_url, headers=headers_segments, timeout=REQUEST_TIMEOUT, proxies=proxy) else: response = session.get(ts_url, headers=headers_segments, timeout=REQUEST_TIMEOUT) @@ -286,8 +263,6 @@ class M3U8_Segments: Args: - add_desc (str): Additional description for the progress bar. """ - global START_THREAD_DELAY - if TQDM_USE_LARGE_BAR: bar_format=f"{Colors.YELLOW}Downloading {Colors.WHITE}({add_desc}{Colors.WHITE}): {Colors.RED}{{percentage:.2f}}% {Colors.MAGENTA}{{bar}} {Colors.WHITE}[ {Colors.YELLOW}{{n_fmt}}{Colors.WHITE} / {Colors.RED}{{total_fmt}} {Colors.WHITE}] {Colors.YELLOW}{{elapsed}} {Colors.WHITE}< {Colors.CYAN}{{remaining}}{{postfix}} {Colors.WHITE}]" else: @@ -308,7 +283,7 @@ class M3U8_Segments: # Start all workers with ThreadPoolExecutor(max_workers=TQDM_MAX_WORKER) as executor: for index, segment_url in enumerate(self.segments): - time.sleep(START_THREAD_DELAY) + time.sleep(TQDM_DELAY_WORKER) executor.submit(self.make_requests_stream, segment_url, index, progress_bar) # Wait for all tasks to complete diff --git a/Test/t_get_server_ip_sc.py b/Test/t_get_server_ip_sc.py deleted file mode 100644 index d89ebac..0000000 --- a/Test/t_get_server_ip_sc.py +++ /dev/null @@ -1,86 +0,0 @@ -# 13.05.24 - -import socket -import logging -import urllib3 -from urllib.parse import urlparse, urlunparse - - -import warnings -warnings.filterwarnings("ignore", category=urllib3.exceptions.InsecureRequestWarning) - - -# Variable -url_test = "https://sc-b1-18.scws-content.net/hls/100/b/d3/bd3a430d-0a13-4bec-8fcc-ea41af183555/audio/ita/0010-0100.ts?token=CiEPTIyvEoTkGk3szgDu9g&expires=1722801022" - - -def get_ip_from_url(url): - """ - Extracts the IP address from a given URL. - - Args: - url (str): The URL from which to extract the IP address. - - Returns: - str or None: The extracted IP address if successful, otherwise None. - """ - try: - parsed_url = urlparse(url) - if not parsed_url.hostname: - logging.error(f"Invalid URL: {url}") - return None - - ip_address = socket.gethostbyname(parsed_url.hostname) - return ip_address - - except Exception as e: - logging.error(f"Error: {e}") - return None - -def replace_random_number(url, random_number): - """ - Replaces a random number in the URL. - - Args: - url (str): The URL in which to replace the random number. - random_number (int): The random number to replace in the URL. - - Returns: - str: The modified URL with the random number replaced. - """ - parsed_url = urlparse(url) - parts = parsed_url.netloc.split('.') - prefix = None - - for i, part in enumerate(parts): - if '-' in part and part.startswith("sc-"): - prefix = part.split('-')[0] + '-' + part.split('-')[1] + '-' - new_part = prefix + f"{random_number:02d}" - parts[i] = new_part - break - - new_netloc = '.'.join(parts) - return urlunparse((parsed_url.scheme, new_netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) - -def main(): - """ - Main function to test the URL manipulation. - """ - valid_ip = [] - - for i in range(1, 36): - try: - ip = get_ip_from_url(replace_random_number(url_test, i)) - - if ip: - valid_ip.append(ip) - - except Exception as e: - logging.error(f"Error: {e}") - pass - - print(f"Valid IP addresses: {sorted(valid_ip, reverse=True)}") - - -if __name__ == '__main__': - main() diff --git a/config.json b/config.json index cedc3a0..3ac1b38 100644 --- a/config.json +++ b/config.json @@ -18,6 +18,7 @@ }, "M3U8_DOWNLOAD": { "tdqm_workers": 4, + "tqdm_delay": 0.01, "tqdm_use_large_bar": true, "download_video": true, "download_audio": true,