Add new auto find new domain.

2025-06-07 20:15:24 +00:00 · 2024-06-18 23:37:55 +02:00 · 2024-06-18 23:37:55 +02:00 · 6036bbeb20
commit 6036bbeb20
parent 678e39cc46
7 changed files with 1707 additions and 73 deletions
--- a/Src/Api/Template/Util/get_domain.py
+++ b/Src/Api/Template/Util/get_domain.py
@ -0,0 +1,192 @@
 # 18.06.24
 import os
 import sys
 import time
 import logging
 import threading
 from concurrent.futures import ThreadPoolExecutor, as_completed
 # External libraries
 import httpx
 import psutil
 from tqdm import tqdm
 # Internal utilities
 from Src.Util.color import Colors
 from Src.Util.headers import get_headers
 from Src.Util.console import console
 from Src.Util._jsonConfig import config_manager
 def check_url_for_content(url: str, content: str, timeout: int = 1) -> bool:
    """
    Check if a URL contains specific content.
    Args:
        - url (str): The URL to check.
        - content (str): The content to search for in the response.
        - timeout (int): Timeout for the request in seconds.
    Returns:
        bool: True if the content is found, False otherwise.
    """
    try:
        response = httpx.get(url, timeout=timeout, headers={'user-agent': get_headers()})
        logging.info(f"Testing site to extract domain: {url}, response: {response.status_code}")
        # Raise an error if the status is not successful
        response.raise_for_status()
        # Check if the target content is in the response text
        if content in response.text:
            return True
    except httpx.RequestError as e:
        logging.warning(f"Request error for {url}: {e}")
    except httpx.HTTPStatusError as e:
        logging.warning(f"HTTP status error for {url}: {e}")
    except Exception as e:
        logging.warning(f"Error for {url}: {e}")
    return False
 def get_top_level_domain(base_url: str, target_content: str, max_workers: int = os.cpu_count(), timeout: int = 2, retries: int = 1) -> str:
    """
    Get the top-level domain (TLD) from a list of URLs.
    Args:
        - base_url (str): The base URL to construct complete URLs.
        - target_content (str): The content to search for in the response.
        - max_workers (int): Maximum number of threads.
        - timeout (int): Timeout for the request in seconds.
        - retries (int): Number of retries for failed requests.
    Returns:
        str: The found TLD, if any.
    """
    results = []
    failed_urls = []
    path_file = os.path.join("Test", "data", "TLD", "tld_list_complete.txt")
    logging.info(f"Loading file: {path_file}")
    if not os.path.exists(path_file):
        raise FileNotFoundError("The file 'tld_list_complete.txt' does not exist.")
    # Read TLDs from file and create URLs to test
    with open(path_file, "r") as file:
        urls = [f"{base_url}.{x.strip().lower()}" for x in file]
    urls = list(set(urls))  # Remove duplicates
    start_time = time.time()
    bar_format=f"{Colors.YELLOW}Testing URLS{Colors.WHITE}: {Colors.RED}{{percentage:.2f}}% {Colors.MAGENTA}{{bar}} {Colors.WHITE}[ {Colors.YELLOW}{{n_fmt}}{Colors.WHITE} / {Colors.RED}{{total_fmt}} {Colors.WHITE}] {Colors.YELLOW}{{elapsed}} {Colors.WHITE}< {Colors.CYAN}{{remaining}}{Colors.GREEN}{{postfix}} {Colors.WHITE}]"
    progress_bar = tqdm(
        total=len(urls), 
        unit='url',
        ascii='░▒█',
        bar_format=bar_format
    )
    # Event to signal when to stop checking URLs
    stop_event = threading.Event()
    def url_checker(url: str):
        for attempt in range(retries):
            if stop_event.is_set():
                return None
            if check_url_for_content(url, target_content, timeout):
                stop_event.set()
                progress_bar.update(1)
                return url.split(".")[-1]
            logging.info(f"Retrying {url} ({attempt+1}/{retries})")
        failed_urls.append(url)
        progress_bar.update(1)
        return None
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(url_checker, url): url for url in urls}
        for future in as_completed(futures):
            tld = future.result()
            if tld:
                results.append(tld)
                if stop_event.is_set():
                    break
            # Update the progress bar with CPU usage info
            progress_bar.set_postfix(cpu_usage=f"{psutil.cpu_percent()}%")
    progress_bar.close()
    end_time = time.time()
    total_time = end_time - start_time
    avg_time_per_url = total_time / len(urls) if urls else 0
    logging.info(f"Tested {len(urls)} URLs: {len(results)} passed, {len(failed_urls)} failed.")
    logging.info(f"Total time: {total_time:.2f} seconds, Average time per URL: {avg_time_per_url:.2f} seconds.")
    if results:
        return results[-1]
    else:
        return None
 def search_domain(site_name: str, target_content: str, base_url: str):
    """
    Search for a valid domain for the given site name and base URL.
    Args:
        - site_name (str): The name of the site to search the domain for.
        - target_content (str): The content to search for in the response.
        - base_url (str): The base URL to construct complete URLs.
    Returns:
        tuple: The found domain and the complete URL.
    """
    # Extract config domain
    domain = config_manager.get("SITE", site_name)
    console.print(f"[cyan]Test site[white]: [red]{base_url}.{domain}")
    try:
        # Test the current domain
        response = httpx.get(f"{base_url}.{domain}", headers={'user-agent': get_headers()}, timeout=2)
        console.print(f"[cyan]Test response site[white]: [red]{response.status_code}")
        response.raise_for_status()
        # Return config domain
        console.print(f"[cyan]Use domain: [red]{domain}")
        return domain, f"{base_url}.{domain}"
    except:
        # If the current domain fails, find a new one
        print()
        console.print("[red]Extract new DOMAIN from TLD list.")
        new_domain = get_top_level_domain(base_url=base_url, target_content=target_content)
        if new_domain is not None:
            # Update domain in config.json
            config_manager.set_key('SITE', site_name, new_domain)
            config_manager.write_config()
            # Return new config domain
            console.print(f"[cyan]Use domain: [red]{new_domain}")
            return new_domain, f"{base_url}.{new_domain}"
        else:
            logging.error(f"Failed to find a new domain for: {base_url}")
            sys.exit(0)
--- a/Src/Api/Template/init.py
+++ b/Src/Api/Template/init.py
@ -0,0 +1 @@
 from .Util.get_domain import search_domain
--- a/Src/Api/altadefinizione/site.py
+++ b/Src/Api/altadefinizione/site.py
@ -11,9 +11,10 @@ from unidecode import unidecode
 # Internal utilities
 from Src.Util.table import TVShowManager
 from Src.Util.console import console
 from Src.Util.headers import get_headers
 from Src.Util.console import console
 from Src.Util.table import TVShowManager
 from ..Template import search_domain
 # Logic class
@ -38,8 +39,11 @@ def title_search(title_search: str) -> int:
        int: The number of titles found.
    """
    # Find new domain if prev dont work
    domain_to_use, _ = search_domain(SITE_NAME, '<meta name="generator" content="altadefinizione">', f"https://{SITE_NAME}")
    # Send request to search for titles
-    response = httpx.get(f"https://{SITE_NAME}.{DOMAIN_NOW}/page/1/?story={unidecode(title_search.replace(' ', '+'))}&do=search&subaction=search&titleonly=3", headers={'user-agent': get_headers()})
+    response = httpx.get(f"https://{SITE_NAME}.{domain_to_use}/page/1/?story={unidecode(title_search.replace(' ', '+'))}&do=search&subaction=search&titleonly=3", headers={'user-agent': get_headers()})
    response.raise_for_status()
    # Create soup and find table
--- a/Src/Api/animeunity/site.py
+++ b/Src/Api/animeunity/site.py
@ -11,9 +11,10 @@ from unidecode import unidecode
 # Internal utilities
 from Src.Util.table import TVShowManager
 from Src.Util.console import console
 from Src.Util._jsonConfig import config_manager
 from Src.Util.table import TVShowManager
 from ..Template import search_domain
 # Logic class
@ -65,28 +66,6 @@ def get_token(site_name: str, domain: str) -> dict:
    }
 def update_domain():
    """
    Update the domain for the anime streaming site.
    This function tests the accessibility of the current anime streaming site.
    If the current domain is inaccessible, it attempts to obtain and set a new domain.
    It uses the 'light' method to extract a new domain from Anime Unity.
    """
    # Test current site's accessibility
    try:
        console.log(f"[cyan]Test site: [red]https://{SITE_NAME}.{DOMAIN_NOW}")
        response = httpx.get(f"https://www.{SITE_NAME}.{DOMAIN_NOW}")
        response.raise_for_status()
    except Exception as e:
        console.log("[red]Upload domain")
        sys.exit(0)
 def get_real_title(record):
    """
    Get the real title from a record.
@ -122,12 +101,9 @@ def title_search(title: str) -> int:
        - int: A number containing the length of media search manager.
    """
    # Update domain
    update_domain()
    # Get token and session value from configuration
-    url_domain = config_manager.get('SITE', SITE_NAME)  
+    domain_to_use, _ = search_domain(SITE_NAME, '<meta name="author" content="AnimeUnity Staff">', f"https://www.{SITE_NAME}")
-    data = get_token(SITE_NAME, url_domain)
+    data = get_token(SITE_NAME, domain_to_use)
    # Prepare cookies to be used in the request
    cookies = {
@ -148,7 +124,7 @@ def title_search(title: str) -> int:
    }
    # Send a POST request to the API endpoint for live search
-    response = httpx.post(f'https://www.{SITE_NAME}.{url_domain}/livesearch', cookies=cookies, headers=headers, json=json_data)
+    response = httpx.post(f'https://www.{SITE_NAME}.{domain_to_use}/livesearch', cookies=cookies, headers=headers, json=json_data)
    response.raise_for_status()
    # Process each record returned in the response
--- a/Src/Api/streamingcommunity/site.py
+++ b/Src/Api/streamingcommunity/site.py
@ -15,9 +15,10 @@ from unidecode import unidecode
 # Internal utilities
 from Src.Util.headers import get_headers
 from Src.Util._jsonConfig import config_manager
 from Src.Util.console import console
 from Src.Util.table import TVShowManager
 from ..Template import search_domain
 # Logic class
@ -75,45 +76,15 @@ def get_version(text: str) -> tuple[str, list]:
        raise
-def get_version_and_domain(new_domain = None) -> Tuple[str, str]:
+def get_version_and_domain() -> Tuple[str, str]:
    """
    Retrieves the version and domain of the streaming website.
-    This function retrieves the version and domain of the streaming website.
+    # Find new domain if prev dont work
-    It first checks the accessibility of the current site.
+    domain_to_use, base_url = search_domain(SITE_NAME, '<meta name="author" content="StreamingCommunity">', f"https://{SITE_NAME}")
    If the site is accessible, it extracts the version from the response.
    If configured to do so, it also scrapes and prints the titles of the moments.
    If the site is inaccessible, it attempts to obtain a new domain using the 'insta' method.
-    Returns:
+    # Extract version from the response
-        Tuple[str, str]: A tuple containing the version and domain.
+    version, list_title_top_10 = get_version(httpx.get(base_url, headers={'user-agent': get_headers()}).text)
    """
-    # Get the current domain from the configuration
+    return version, domain_to_use
    if new_domain is None:
        config_domain = config_manager.get('SITE', SITE_NAME)
    else:
        config_domain = new_domain
    # Test the accessibility of the current site
    try:
        # Make requests to site to get text
        console.print(f"[cyan]Test site[white]: [red]https://{SITE_NAME}.{config_domain}")
        response = httpx.get(f"https://{SITE_NAME}.{config_domain}")
        response.raise_for_status()
        console.print(f"[cyan]Test respost site[white]: [red]{response.status_code} \n")
        # Extract version from the response
        version, list_title_top_10 = get_version(response.text)
        return version, config_domain
    except:
        console.log("[red]Upload domain.")
        sys.exit(0)
 def title_search(title_search: str, domain: str) -> int:
--- a/Test/data/TLD/creation.py
+++ b/Test/data/TLD/creation.py
@ -1,12 +1,11 @@
 # 29.04.24
 import httpx
 import json
 from bs4 import BeautifulSoup
 # URL of the webpage containing the table
-url = 'https://icannwiki.org/New_gTLD_Generic_Applications'
+url = 'https://icannwiki.org/All_New_gTLD_Applications'
 # List to store scraped data
@ -78,8 +77,9 @@ def main():
    print(len(data))
    # Write the scraped data to a JSON file
-    with open('data.json', 'w') as json_file:
+    with open('data.txt', 'w') as json_file:
-        json.dump(data, json_file)
+        for find_tld in data:
            json_file.write(find_tld['application_id'] + "\n")
 if __name__ == '__main__':
--- a/Test/data/TLD/tld_list_complete.txt
+++ b/Test/data/TLD/tld_list_complete.txt