Add light and strong search domain @inklook

2025-07-24 19:10:04 +00:00 · 2024-04-29 20:23:11 +02:00 · 2024-04-29 20:23:11 +02:00 · 02edd4d5cd
commit 02edd4d5cd
parent a60dcb2b79
17 changed files with 390 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -18,6 +18,7 @@ You can chat, help improve this repo, or just hang around for some fun in the **
 * [CONFIGURATION](#Configuration)
 * [DOCKER](#docker)
 * [TUTORIAL](#tutorial)
 * [TO DO](#to-do)
 ## Requirement
@ -232,3 +233,7 @@ docker run -it -p 8000:8000 -v /path/to/download:/app/Video streaming-community-
 ## Tutorial
 For a detailed walkthrough, refer to the [video tutorial](https://www.youtube.com/watch?v=Ok7hQCgxqLg&ab_channel=Nothing)
 ## To do
 - Add a gui
 - Add a website api
--- a/Src/Api/Class/Video.py
+++ b/Src/Api/Class/Video.py
@ -1,6 +1,5 @@
 # 01.03.24
 import requests
 import re
 import json
 import binascii
@ -9,12 +8,12 @@ from urllib.parse import urljoin, urlencode, quote
 # External libraries
 import requests
 from bs4 import BeautifulSoup
 # Internal utilities
 from Src.Util.headers import get_headers
 from Src.Lib.Request import requests
 from .SeriesType import TitleManager
 from .EpisodeType import EpisodeManager
 from .WindowType import WindowVideo, WindowParameter
--- a/Src/Api/Util/init.py
+++ b/Src/Api/Util/init.py
@ -0,0 +1,3 @@
 # 29.04.24
 from .extract_domain import grab_sc_top_level_domain as get_sc_domain
--- a/Src/Api/Util/extract_domain.py
+++ b/Src/Api/Util/extract_domain.py
@ -0,0 +1,128 @@
 # 29.04.24
 import threading
 import logging
 import os
 # Internal utilities
 from Src.Lib.Google import search as google_search
 from Src.Lib.Request import requests
 def check_url_for_content(url: str, content: str) -> bool:
    """
    Check if a URL contains specific content.
    Args:
        url (str): The URL to check.
        content (str): The content to search for in the response.
    Returns:
        bool: True if the content is found, False otherwise.
    """
    try:
        r = requests.get(url, timeout = 1)
        if r.status_code == 200 and content in r.text:
            return True
    except Exception as e:
        pass
    return False
 def grab_top_level_domain(base_url: str, target_content: str) -> str:
    """
    Get the top-level domain (TLD) from a list of URLs.
    Args:
        base_url (str): The base URL to construct complete URLs.
        target_content (str): The content to search for in the response.
    Returns:
        str: The found TLD, if any.
    """
    results = []
    threads = []
    def url_checker(url: str):
        if check_url_for_content(url, target_content):
            results.append(url.split(".")[-1])
    if not os.path.exists("tld_list.txt"):
        raise FileNotFoundError("The file 'tld_list.txt' does not exist.")
    urls = [f"{base_url}.{x.strip().lower()}" for x in open("tld_list.txt", "r")]
    for url in urls:
        thread = threading.Thread(target=url_checker, args=(url,))
        thread.start()
        threads.append(thread)
    for thread in threads:
        thread.join()
    if results:
        return results[-1]
 def grab_top_level_domain_light(query: str) -> str:
    """
    Get the top-level domain (TLD) using a light method via Google search.
    Args:
        query (str): The search query for Google search.
    Returns:
        str: The found TLD, if any.
    """
    for result in google_search(query, num=1, stop=1, pause=2):
        return result.split(".", 2)[-1].replace("/", "")
 def grab_sc_top_level_domain(method: str) -> str:
    """
    Get the top-level domain (TLD) for the streaming community.
    Args:
        method (str): The method to use to obtain the TLD ("light" or "strong").
    Returns:
        str: The found TLD, if any.
    """
    if method == "light":
        return grab_top_level_domain_light("streaming community")
    elif method == "strong":
        return grab_top_level_domain("https://streamingcommunity", '<meta name="author" content="StreamingCommunity">')
 def grab_au_top_level_domain(method: str) -> str:
    """
    Get the top-level domain (TLD) for Anime Unity.
    Args:
        method (str): The method to use to obtain the TLD ("light" or "strong").
    Returns:
        str: The found TLD, if any.
    """
    if method == "light":
        return grab_top_level_domain_light("animeunity")
    elif method == "strong":
        return grab_top_level_domain("https://www.animeunity", '<meta name="author" content="AnimeUnity Staff">')
 def compose_both_top_level_domains(method: str) -> dict:
    """
    Compose TLDs for both the streaming community and Anime Unity.
    Args:
        method (str): The method to use to obtain the TLD ("light" or "strong").
    Returns:
        dict: A dictionary containing the TLDs for the streaming community and Anime Unity.
    """
    sc_tld = grab_sc_top_level_domain(method)
    au_tld = grab_au_top_level_domain(method)
    if not sc_tld:
        sc_tld = grab_sc_top_level_domain("strong")
    if not au_tld:
        au_tld = grab_au_top_level_domain("strong")
    return {"streaming_community": sc_tld, "anime_unity": au_tld}
--- a/Src/Api/anime.py
+++ b/Src/Api/anime.py
@ -4,13 +4,10 @@ import os
 import logging
 # External libraries
 import requests
 # Internal utilities
 from Src.Util.console import console, msg
 from Src.Util.config import config_manager
 from Src.Lib.Request import requests
 from Src.Lib.FFmpeg.my_m3u8 import Downloader
 from Src.Util.message import start_message
 from .Class import VideoSource
--- a/Src/Api/site.py
+++ b/Src/Api/site.py
@ -7,15 +7,16 @@ from typing import Tuple
 # External libraries
 import requests
 from bs4 import BeautifulSoup
 # Internal utilities
 from Src.Util.table import TVShowManager
 from Src.Util.headers import get_headers
 from Src.Lib.Request import requests
 from Src.Util.console import console
 from Src.Util.config import config_manager
 from .Util import get_sc_domain
 from .Class import MediaManager, MediaItem
@ -109,10 +110,6 @@ def get_moment_titles(domain: str, version: str, prefix: str):
        return None
 def get_domain() -> str:
    pass
 def test_site(domain: str) -> str:
    """
    Tests the availability of a website.
@ -180,7 +177,7 @@ def get_version_and_domain() -> Tuple[str, str]:
        response_test_site = test_site(config_domain)
        if response_test_site is None:
-            config_domain = get_domain()
+            config_domain = get_sc_domain('light')
            response_test_site = test_site(config_domain)
        if response_test_site:
--- a/Src/Lib/FFmpeg/my_m3u8.py
+++ b/Src/Lib/FFmpeg/my_m3u8.py
@ -14,7 +14,6 @@ warnings.filterwarnings("ignore", category=UserWarning, module="cryptography")
 # External libraries
 import requests
 from tqdm.rich import tqdm
 from concurrent.futures import ThreadPoolExecutor, as_completed
@ -22,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 # Internal utilities
 from Src.Util.console import console
 from Src.Util.headers import get_headers
 from Src.Lib.Request import requests
 from Src.Util.config import config_manager
 from Src.Util.os import (
    remove_folder, 
--- a/Src/Lib/FFmpeg/util/parser.py
+++ b/Src/Lib/FFmpeg/util/parser.py
@ -5,10 +5,10 @@ import logging
 # Internal utilities
 from Src.Util.headers import get_headers
 from Src.Lib.Request import requests
 # External libraries
 import requests
 from m3u8 import M3U8
--- a/Src/Lib/Google/init.py
+++ b/Src/Lib/Google/init.py
@ -0,0 +1,3 @@
 # 29.04.24
 from .page import search
--- a/Src/Lib/Google/page.py
+++ b/Src/Lib/Google/page.py
@ -0,0 +1,140 @@
 # 29.04.24
 import time
 import ssl
 from urllib.request import Request, urlopen
 from urllib.parse import quote_plus, urlparse, parse_qs
 from bs4 import BeautifulSoup
 from typing import Generator, Optional
 # Internal utilities
 from Src.Lib.Request import requests
 def get_page(url: str) -> bytes:
    """
    Fetches the HTML content of a webpage given its URL.
    Args:
        url (str): The URL of the webpage.
    Returns:
        bytes: The HTML content of the webpage.
    """
    response = requests.get(url)
    return response.text
 def filter_result(link: str) -> Optional[str]:
    """
    Filters search result links to remove unwanted ones.
    Args:
        link (str): The URL of the search result.
    Returns:
        Optional[str]: The filtered URL if valid, None otherwise.
    """
    try:
        if link.startswith('/url?'):
            # Extract the actual URL from Google's redirect link
            o = urlparse(link, 'http')
            link = parse_qs(o.query)['q'][0]
        o = urlparse(link, 'http')
        # Filter out Google links
        if o.netloc and 'google' not in o.netloc:
            return link
    except Exception:
        pass
 def search(query: str, num: int = 10, stop: Optional[int] = None, pause: float = 2.0) -> Generator[str, None, None]:
    """
    Performs a Google search and yields the URLs of search results.
    Args:
        query (str): The search query.
        num (int): Number of results to fetch per request. Default is 10.
        stop (int, optional): Total number of results to retrieve. Default is None.
        pause (float): Pause duration between requests. Default is 2.0.
    Yields:
        str: The URL of a search result.
    Example:
        >>> for url in search("Python tutorials", num=5, stop=10):
        ...     print(url)
        ...
        https://www.python.org/about/gettingstarted/
    """
    # Set to store unique URLs
    hashes = set()
    # Counter for the number of fetched URLs
    count = 0
    # Encode the query for URL
    query = quote_plus(query)
    while not stop or count < stop:
        last_count = count
        # Construct the Google search URL
        url = f"https://www.google.com/search?client=opera&q={query}&sourceid=opera&oe=UTF-8"
        # Pause before making the request
        time.sleep(pause)
        # Fetch the HTML content of the search page
        html = get_page(url)
        soup = BeautifulSoup(html, 'html.parser')
        try:
            # Find all anchor tags containing search result links
            anchors = soup.find(id='search').findAll('a')
        except AttributeError:
            # Handle cases where search results are not found in the usual div
            gbar = soup.find(id='gbar')
            if gbar:
                gbar.clear()
            anchors = soup.findAll('a')
        # Iterate over each anchor tag
        for a in anchors:
            try:
                link = a['href']
            except KeyError:
                continue
            # Filter out unwanted links
            link = filter_result(link)
            if not link:
                continue
            # Check for duplicate URLs
            h = hash(link)
            if h in hashes:
                continue
            hashes.add(h)
            # Yield the valid URL
            yield link
            # Increment the counter
            count += 1
            # Check if the desired number of URLs is reached
            if stop and count >= stop:
                return
        # Break the loop if no new URLs are found
        if last_count == count:
            break
--- a/Src/Lib/Request/init.py
+++ b/Src/Lib/Request/init.py
@ -0,0 +1,3 @@
 # 21.04.24
 from .my_requests import requests
--- a/Src/Lib/Request/my_requests.py
+++ b/Src/Lib/Request/my_requests.py
@ -9,7 +9,8 @@ import re
 import urllib.parse
 import urllib.request
 import urllib.error
-from typing import Dict, Optional, Union
+
 from typing import Dict, Optional, Union, TypedDict, Any
 try:
    from typing import Unpack
@ -22,9 +23,13 @@ except ImportError:
                          "Please make sure you have the necessary libraries installed.")
 # External library
 from bs4 import BeautifulSoup
 # Constants
-HTTP_TIMEOUT = 4
+HTTP_TIMEOUT = 3
-HTTP_RETRIES = 2
+HTTP_RETRIES = 1
 HTTP_DELAY = 1
@ -95,12 +100,43 @@ class Response:
            raise RequestError(f"Request failed with status code {self.status_code}")
    def json(self):
-        """Return the response content as JSON if it is JSON."""
+        """
        Return the response content as JSON if it is JSON.
        Returns:
            dict or list or None: A Python dictionary or list parsed from JSON if the response content is JSON, otherwise None.
        """
        if self.is_json:
            return json.loads(self.text)
        else:
            return None
    def get_redirects(self):
        """
        Extracts unique site URLs from HTML <link> elements within the <head> section.
        Returns:
            list or None: A list of unique site URLs if found, otherwise None.
        """
        site_find = []
        if self.text:
            soup = BeautifulSoup(self.text, "html.parser")
            for links in soup.find("head").find_all('link'):
                if links is not None:
                    parsed_url = urllib.parse.urlparse(links.get('href'))
                    site = parsed_url.scheme + "://" + parsed_url.netloc
                    if site not in site_find:
                        site_find.append(site)
        if site_find:
            return site_find
        else:
            return None
 class ManageRequests:
    """Class for managing HTTP requests."""
@ -116,6 +152,7 @@ class ManageRequests:
        auth: Optional[tuple] = None,
        proxy: Optional[str] = None,
        cookies: Optional[Dict[str, str]] = None,
        json_data: Optional[Dict[str, Any]] = None,
        redirection_handling: bool = True,
    ):
        """
@ -136,7 +173,7 @@ class ManageRequests:
        """
        self.url = url
        self.method = method
-        self.headers = headers or {'User-Agent': 'Mozilla/5.0'}
+        self.headers = headers or {}
        self.timeout = timeout
        self.retries = retries
        self.params = params
@ -144,6 +181,7 @@ class ManageRequests:
        self.auth = auth
        self.proxy = proxy
        self.cookies = cookies
        self.json_data = json_data
        self.redirection_handling = redirection_handling
    def add_header(self, key: str, value: str) -> None:
@ -152,6 +190,7 @@ class ManageRequests:
    def send(self) -> Response:
        """Send the HTTP request."""
        start_time = time.time()
        self.attempt = 0
        redirect_url = None
@ -160,24 +199,53 @@ class ManageRequests:
            try:
                req = self._build_request()
                response = self._perform_request(req)
                return self._process_response(response, start_time, redirect_url)
            except (urllib.error.URLError, urllib.error.HTTPError) as e:
                self._handle_error(e)
-                attempt += 1
+                self.attempt += 1
    def _build_request(self) -> urllib.request.Request:
        """Build the urllib Request object."""
        headers = self.headers.copy()
        if self.params:
            url = self.url + '?' + urllib.parse.urlencode(self.params)
        else:
            url = self.url
        req = urllib.request.Request(url, headers=headers, method=self.method)
        if self.json_data:
            req.add_header('Content-Type', 'application/json')
            req.body = json.dumps(self.json_data).encode('utf-8')
        else:
            req = urllib.request.Request(url, headers=headers, method=self.method)
        if self.auth:
            req.add_header('Authorization', 'Basic ' + base64.b64encode(f"{self.auth[0]}:{self.auth[1]}".encode()).decode())
        if self.cookies:
            cookie_str = '; '.join([f"{name}={value}" for name, value in self.cookies.items()])
            req.add_header('Cookie', cookie_str)
        if self.headers:
            for key, value in self.headers.items():
                req.add_header(key, value)
        # Add default user agent
        if True:
            there_is_agent = False
            for key, value in self.headers.items():
                if str(key).lower() == 'user-agent':
                    there_is_agent = True
            if not there_is_agent:
                default_user_agent = 'Mozilla/5.0'
                req.add_header('user-agent', default_user_agent)
        return req
    def _perform_request(self, req: urllib.request.Request) -> urllib.response.addinfourl:
@ -186,26 +254,30 @@ class ManageRequests:
            proxy_handler = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy})
            opener = urllib.request.build_opener(proxy_handler)
            urllib.request.install_opener(opener)
        if not self.verify_ssl:
            ssl_context = ssl.create_default_context()
            ssl_context.check_hostname = False
            ssl_context.verify_mode = ssl.CERT_NONE
            response = urllib.request.urlopen(req, timeout=self.timeout, context=ssl_context)
        else:
            response = urllib.request.urlopen(req, timeout=self.timeout)
        return response
    def _process_response(self, response: urllib.response.addinfourl, start_time: float, redirect_url: Optional[str]) -> Response:
        """Process the HTTP response."""
        response_data = response.read()
        content_type = response.headers.get('Content-Type', '').lower()
-        is_response_api = "json" in content_type
+
        if self.redirection_handling and response.status in (301, 302, 303, 307, 308):
            location = response.headers.get('Location')
            logging.info(f"Redirecting to: {location}")
            redirect_url = location
            self.url = location
            return self.send()
        return self._build_response(response, response_data, start_time, redirect_url, content_type)
    def _build_response(self, response: urllib.response.addinfourl, response_data: bytes, start_time: float, redirect_url: Optional[str], content_type: str) -> Response:
@ -216,7 +288,7 @@ class ManageRequests:
        for cookie in response.headers.get_all('Set-Cookie', []):
            cookie_parts = cookie.split(';')
-            cookie_name, cookie_value = cookie_parts[0].split('=')
+            cookie_name, cookie_value = cookie_parts[0].split('=', 1)
            response_cookies[cookie_name.strip()] = cookie_value.strip()
        return Response(
@ -234,9 +306,11 @@ class ManageRequests:
    def _handle_error(self, e: Union[urllib.error.URLError, urllib.error.HTTPError]) -> None:
        """Handle request error."""
        logging.error(f"Request failed for URL '{self.url}': {str(e)}")
        if self.attempt < self.retries:
            logging.info(f"Retrying request for URL '{self.url}' (attempt {self.attempt}/{self.retries})")
            time.sleep(HTTP_DELAY)
        else:
            logging.error(f"Maximum retries reached for URL '{self.url}'")
            raise RequestError(str(e))
@ -251,9 +325,7 @@ class ValidateRequest:
            r'^(?:http|ftp)s?://'  # http:// or https://
            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
            r'localhost|'  # localhost...
-            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or IP
+            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', re.IGNORECASE)
            r'(?::\d+)?'  # optional port
            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
        return re.match(url_regex, url) is not None
    @staticmethod
@ -287,14 +359,14 @@ class SSLHandler:
        ssl_context.verify_mode = ssl.CERT_NONE
-class KwargsRequest():
+class KwargsRequest(TypedDict, total = False):
    """Class representing keyword arguments for a request."""
    url: str
    headers: Optional[Dict[str, str]] = None
    timeout: float = HTTP_TIMEOUT
    retries: int = HTTP_RETRIES
    params: Optional[Dict[str, str]] = None
    cookies: Optional[Dict[str, str]] = None
    json_data: Optional[Dict[str, Any]] = None
 class Request:
@ -302,7 +374,7 @@ class Request:
    def __init__(self) -> None:
        pass
-    def get(self, url: str, **kwargs: Unpack[KwargsRequest]):
+    def get(self, url: str, **kwargs: Unpack[KwargsRequest])-> 'Response':
        """
        Send a GET request.
@ -315,7 +387,7 @@ class Request:
        """
        return self._send_request(url, 'GET', **kwargs)
-    def post(self, url: str, **kwargs: Unpack[KwargsRequest]):
+    def post(self, url: str, **kwargs: Unpack[KwargsRequest]) -> 'Response':
        """
        Send a POST request.
@ -328,35 +400,8 @@ class Request:
        """
        return self._send_request(url, 'POST', **kwargs)
-    def put(self, url: str, **kwargs: Unpack[KwargsRequest]):
+    def _send_request(self, url: str, method: str, **kwargs: Unpack[KwargsRequest]) -> 'Response':
        """
        Send a PUT request.
        Args:
            url (str): The URL to which the request will be sent.
            **kwargs: Additional keyword arguments for the request.
        Returns:
            Response: The response object.
        """
        return self._send_request(url, 'PUT', **kwargs)
    def delete(self, url: str, **kwargs: Unpack[KwargsRequest]):
        """
        Send a DELETE request.
        Args:
            url (str): The URL to which the request will be sent.
            **kwargs: Additional keyword arguments for the request.
        Returns:
            Response: The response object.
        """
        return self._send_request(url, 'DELETE', **kwargs)
    def _send_request(self, url: str, method: str, **kwargs: Unpack[KwargsRequest]):
        """Send an HTTP request."""
        # Add validation checks for URL and headers
        if not ValidateRequest.validate_url(url):
            raise ValueError("Invalid URL format")
@ -365,6 +410,5 @@ class Request:
        return ManageRequests(url, method, **kwargs).send()
 # Out
-request = Request()
+requests: Request = Request()
--- a/Src/Lib/UserAgent/init.py
+++ b/Src/Lib/UserAgent/init.py
@ -0,0 +1,3 @@
 # 21.04.24
 from .user_agent import ua
--- a/Src/Lib/UserAgent/user_agent.py
+++ b/Src/Lib/UserAgent/user_agent.py
@ -7,10 +7,12 @@ import random
 import threading
 import json
 import tempfile
 from typing import Dict, List
 # Internal utilities
-from .my_requests import request
+from ..Request import requests
 def get_browser_user_agents_online(browser: str) -> List[str]:
@ -28,7 +30,7 @@ def get_browser_user_agents_online(browser: str) -> List[str]:
    try:
        # Make request and find all user agents
-        html = request.get(url).text
+        html = requests.get(url).text
        browser_user_agents = re.findall(r"<a href=\'/.*?>(.+?)</a>", html, re.UNICODE)
        return [ua for ua in browser_user_agents if "more" not in ua.lower()]
@ -103,4 +105,4 @@ class UserAgentManager:
 # Output
-ua = UserAgentManager()
+ua: UserAgentManager = UserAgentManager()
--- a/Src/Upload/update.py
+++ b/Src/Upload/update.py
@ -1,13 +1,13 @@
 # 01.03.2023
 import os
 import requests
 import time
 # Internal utilities
 from .version import __version__
 from Src.Util.console import console
 from Src.Lib.Request import requests
 # Variable
--- a/Src/Util/headers.py
+++ b/Src/Util/headers.py
@ -4,7 +4,7 @@ import logging
 # Internal utilities
-from Src.Lib.Request.user_agent import ua
+from Src.Lib.UserAgent import ua
 def get_headers() -> str:
--- a/requirements.txt
+++ b/requirements.txt
		`@ -0,0 +1,3 @@`
							`# 29.04.24`

							`from .extract_domain import grab_sc_top_level_domain as get_sc_domain`
		`@ -0,0 +1,3 @@`
							`# 21.04.24`

							`from .my_requests import requests`