mirror of
https://github.com/Arrowar/StreamingCommunity.git
synced 2025-06-07 20:15:24 +00:00
Add light and strong search domain @inklook
This commit is contained in:
parent
a60dcb2b79
commit
02edd4d5cd
@ -18,6 +18,7 @@ You can chat, help improve this repo, or just hang around for some fun in the **
|
|||||||
* [CONFIGURATION](#Configuration)
|
* [CONFIGURATION](#Configuration)
|
||||||
* [DOCKER](#docker)
|
* [DOCKER](#docker)
|
||||||
* [TUTORIAL](#tutorial)
|
* [TUTORIAL](#tutorial)
|
||||||
|
* [TO DO](#to-do)
|
||||||
|
|
||||||
## Requirement
|
## Requirement
|
||||||
|
|
||||||
@ -232,3 +233,7 @@ docker run -it -p 8000:8000 -v /path/to/download:/app/Video streaming-community-
|
|||||||
|
|
||||||
## Tutorial
|
## Tutorial
|
||||||
For a detailed walkthrough, refer to the [video tutorial](https://www.youtube.com/watch?v=Ok7hQCgxqLg&ab_channel=Nothing)
|
For a detailed walkthrough, refer to the [video tutorial](https://www.youtube.com/watch?v=Ok7hQCgxqLg&ab_channel=Nothing)
|
||||||
|
|
||||||
|
## To do
|
||||||
|
- Add a gui
|
||||||
|
- Add a website api
|
@ -1,6 +1,5 @@
|
|||||||
# 01.03.24
|
# 01.03.24
|
||||||
|
|
||||||
import requests
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import binascii
|
import binascii
|
||||||
@ -9,12 +8,12 @@ from urllib.parse import urljoin, urlencode, quote
|
|||||||
|
|
||||||
|
|
||||||
# External libraries
|
# External libraries
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Util.headers import get_headers
|
from Src.Util.headers import get_headers
|
||||||
|
from Src.Lib.Request import requests
|
||||||
from .SeriesType import TitleManager
|
from .SeriesType import TitleManager
|
||||||
from .EpisodeType import EpisodeManager
|
from .EpisodeType import EpisodeManager
|
||||||
from .WindowType import WindowVideo, WindowParameter
|
from .WindowType import WindowVideo, WindowParameter
|
||||||
|
3
Src/Api/Util/__init__.py
Normal file
3
Src/Api/Util/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# 29.04.24
|
||||||
|
|
||||||
|
from .extract_domain import grab_sc_top_level_domain as get_sc_domain
|
128
Src/Api/Util/extract_domain.py
Normal file
128
Src/Api/Util/extract_domain.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
# 29.04.24
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
# Internal utilities
|
||||||
|
from Src.Lib.Google import search as google_search
|
||||||
|
from Src.Lib.Request import requests
|
||||||
|
|
||||||
|
|
||||||
|
def check_url_for_content(url: str, content: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a URL contains specific content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The URL to check.
|
||||||
|
content (str): The content to search for in the response.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the content is found, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
r = requests.get(url, timeout = 1)
|
||||||
|
if r.status_code == 200 and content in r.text:
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
def grab_top_level_domain(base_url: str, target_content: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the top-level domain (TLD) from a list of URLs.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_url (str): The base URL to construct complete URLs.
|
||||||
|
target_content (str): The content to search for in the response.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The found TLD, if any.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
threads = []
|
||||||
|
|
||||||
|
def url_checker(url: str):
|
||||||
|
if check_url_for_content(url, target_content):
|
||||||
|
results.append(url.split(".")[-1])
|
||||||
|
|
||||||
|
if not os.path.exists("tld_list.txt"):
|
||||||
|
raise FileNotFoundError("The file 'tld_list.txt' does not exist.")
|
||||||
|
|
||||||
|
urls = [f"{base_url}.{x.strip().lower()}" for x in open("tld_list.txt", "r")]
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
thread = threading.Thread(target=url_checker, args=(url,))
|
||||||
|
thread.start()
|
||||||
|
threads.append(thread)
|
||||||
|
|
||||||
|
for thread in threads:
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
if results:
|
||||||
|
return results[-1]
|
||||||
|
|
||||||
|
def grab_top_level_domain_light(query: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the top-level domain (TLD) using a light method via Google search.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query (str): The search query for Google search.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The found TLD, if any.
|
||||||
|
"""
|
||||||
|
for result in google_search(query, num=1, stop=1, pause=2):
|
||||||
|
return result.split(".", 2)[-1].replace("/", "")
|
||||||
|
|
||||||
|
def grab_sc_top_level_domain(method: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the top-level domain (TLD) for the streaming community.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method (str): The method to use to obtain the TLD ("light" or "strong").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The found TLD, if any.
|
||||||
|
"""
|
||||||
|
if method == "light":
|
||||||
|
return grab_top_level_domain_light("streaming community")
|
||||||
|
elif method == "strong":
|
||||||
|
return grab_top_level_domain("https://streamingcommunity", '<meta name="author" content="StreamingCommunity">')
|
||||||
|
|
||||||
|
def grab_au_top_level_domain(method: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the top-level domain (TLD) for Anime Unity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method (str): The method to use to obtain the TLD ("light" or "strong").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The found TLD, if any.
|
||||||
|
"""
|
||||||
|
if method == "light":
|
||||||
|
return grab_top_level_domain_light("animeunity")
|
||||||
|
elif method == "strong":
|
||||||
|
return grab_top_level_domain("https://www.animeunity", '<meta name="author" content="AnimeUnity Staff">')
|
||||||
|
|
||||||
|
def compose_both_top_level_domains(method: str) -> dict:
|
||||||
|
"""
|
||||||
|
Compose TLDs for both the streaming community and Anime Unity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
method (str): The method to use to obtain the TLD ("light" or "strong").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the TLDs for the streaming community and Anime Unity.
|
||||||
|
"""
|
||||||
|
sc_tld = grab_sc_top_level_domain(method)
|
||||||
|
au_tld = grab_au_top_level_domain(method)
|
||||||
|
|
||||||
|
if not sc_tld:
|
||||||
|
sc_tld = grab_sc_top_level_domain("strong")
|
||||||
|
|
||||||
|
if not au_tld:
|
||||||
|
au_tld = grab_au_top_level_domain("strong")
|
||||||
|
|
||||||
|
return {"streaming_community": sc_tld, "anime_unity": au_tld}
|
@ -4,13 +4,10 @@ import os
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
# External libraries
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Util.console import console, msg
|
from Src.Util.console import console, msg
|
||||||
from Src.Util.config import config_manager
|
from Src.Util.config import config_manager
|
||||||
|
from Src.Lib.Request import requests
|
||||||
from Src.Lib.FFmpeg.my_m3u8 import Downloader
|
from Src.Lib.FFmpeg.my_m3u8 import Downloader
|
||||||
from Src.Util.message import start_message
|
from Src.Util.message import start_message
|
||||||
from .Class import VideoSource
|
from .Class import VideoSource
|
||||||
|
@ -7,15 +7,16 @@ from typing import Tuple
|
|||||||
|
|
||||||
|
|
||||||
# External libraries
|
# External libraries
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Util.table import TVShowManager
|
from Src.Util.table import TVShowManager
|
||||||
from Src.Util.headers import get_headers
|
from Src.Util.headers import get_headers
|
||||||
|
from Src.Lib.Request import requests
|
||||||
from Src.Util.console import console
|
from Src.Util.console import console
|
||||||
from Src.Util.config import config_manager
|
from Src.Util.config import config_manager
|
||||||
|
from .Util import get_sc_domain
|
||||||
from .Class import MediaManager, MediaItem
|
from .Class import MediaManager, MediaItem
|
||||||
|
|
||||||
|
|
||||||
@ -109,10 +110,6 @@ def get_moment_titles(domain: str, version: str, prefix: str):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_domain() -> str:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def test_site(domain: str) -> str:
|
def test_site(domain: str) -> str:
|
||||||
"""
|
"""
|
||||||
Tests the availability of a website.
|
Tests the availability of a website.
|
||||||
@ -180,7 +177,7 @@ def get_version_and_domain() -> Tuple[str, str]:
|
|||||||
response_test_site = test_site(config_domain)
|
response_test_site = test_site(config_domain)
|
||||||
|
|
||||||
if response_test_site is None:
|
if response_test_site is None:
|
||||||
config_domain = get_domain()
|
config_domain = get_sc_domain('light')
|
||||||
response_test_site = test_site(config_domain)
|
response_test_site = test_site(config_domain)
|
||||||
|
|
||||||
if response_test_site:
|
if response_test_site:
|
||||||
|
@ -14,7 +14,6 @@ warnings.filterwarnings("ignore", category=UserWarning, module="cryptography")
|
|||||||
|
|
||||||
|
|
||||||
# External libraries
|
# External libraries
|
||||||
import requests
|
|
||||||
from tqdm.rich import tqdm
|
from tqdm.rich import tqdm
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
@ -22,6 +21,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Util.console import console
|
from Src.Util.console import console
|
||||||
from Src.Util.headers import get_headers
|
from Src.Util.headers import get_headers
|
||||||
|
from Src.Lib.Request import requests
|
||||||
from Src.Util.config import config_manager
|
from Src.Util.config import config_manager
|
||||||
from Src.Util.os import (
|
from Src.Util.os import (
|
||||||
remove_folder,
|
remove_folder,
|
||||||
|
@ -5,10 +5,10 @@ import logging
|
|||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Util.headers import get_headers
|
from Src.Util.headers import get_headers
|
||||||
|
from Src.Lib.Request import requests
|
||||||
|
|
||||||
|
|
||||||
# External libraries
|
# External libraries
|
||||||
import requests
|
|
||||||
from m3u8 import M3U8
|
from m3u8 import M3U8
|
||||||
|
|
||||||
|
|
||||||
|
3
Src/Lib/Google/__init__.py
Normal file
3
Src/Lib/Google/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# 29.04.24
|
||||||
|
|
||||||
|
from .page import search
|
140
Src/Lib/Google/page.py
Normal file
140
Src/Lib/Google/page.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
# 29.04.24
|
||||||
|
|
||||||
|
import time
|
||||||
|
import ssl
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
from urllib.parse import quote_plus, urlparse, parse_qs
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from typing import Generator, Optional
|
||||||
|
|
||||||
|
|
||||||
|
# Internal utilities
|
||||||
|
from Src.Lib.Request import requests
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_page(url: str) -> bytes:
|
||||||
|
"""
|
||||||
|
Fetches the HTML content of a webpage given its URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The URL of the webpage.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bytes: The HTML content of the webpage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
response = requests.get(url)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
def filter_result(link: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Filters search result links to remove unwanted ones.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
link (str): The URL of the search result.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[str]: The filtered URL if valid, None otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if link.startswith('/url?'):
|
||||||
|
|
||||||
|
# Extract the actual URL from Google's redirect link
|
||||||
|
o = urlparse(link, 'http')
|
||||||
|
link = parse_qs(o.query)['q'][0]
|
||||||
|
|
||||||
|
o = urlparse(link, 'http')
|
||||||
|
|
||||||
|
# Filter out Google links
|
||||||
|
if o.netloc and 'google' not in o.netloc:
|
||||||
|
return link
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def search(query: str, num: int = 10, stop: Optional[int] = None, pause: float = 2.0) -> Generator[str, None, None]:
|
||||||
|
"""
|
||||||
|
Performs a Google search and yields the URLs of search results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query (str): The search query.
|
||||||
|
num (int): Number of results to fetch per request. Default is 10.
|
||||||
|
stop (int, optional): Total number of results to retrieve. Default is None.
|
||||||
|
pause (float): Pause duration between requests. Default is 2.0.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
str: The URL of a search result.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> for url in search("Python tutorials", num=5, stop=10):
|
||||||
|
... print(url)
|
||||||
|
...
|
||||||
|
https://www.python.org/about/gettingstarted/
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Set to store unique URLs
|
||||||
|
hashes = set()
|
||||||
|
|
||||||
|
# Counter for the number of fetched URLs
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
# Encode the query for URL
|
||||||
|
query = quote_plus(query)
|
||||||
|
|
||||||
|
while not stop or count < stop:
|
||||||
|
last_count = count
|
||||||
|
|
||||||
|
# Construct the Google search URL
|
||||||
|
url = f"https://www.google.com/search?client=opera&q={query}&sourceid=opera&oe=UTF-8"
|
||||||
|
|
||||||
|
# Pause before making the request
|
||||||
|
time.sleep(pause)
|
||||||
|
|
||||||
|
# Fetch the HTML content of the search page
|
||||||
|
html = get_page(url)
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Find all anchor tags containing search result links
|
||||||
|
anchors = soup.find(id='search').findAll('a')
|
||||||
|
except AttributeError:
|
||||||
|
# Handle cases where search results are not found in the usual div
|
||||||
|
gbar = soup.find(id='gbar')
|
||||||
|
if gbar:
|
||||||
|
gbar.clear()
|
||||||
|
anchors = soup.findAll('a')
|
||||||
|
|
||||||
|
# Iterate over each anchor tag
|
||||||
|
for a in anchors:
|
||||||
|
try:
|
||||||
|
link = a['href']
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Filter out unwanted links
|
||||||
|
link = filter_result(link)
|
||||||
|
if not link:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for duplicate URLs
|
||||||
|
h = hash(link)
|
||||||
|
if h in hashes:
|
||||||
|
continue
|
||||||
|
hashes.add(h)
|
||||||
|
|
||||||
|
# Yield the valid URL
|
||||||
|
yield link
|
||||||
|
|
||||||
|
# Increment the counter
|
||||||
|
count += 1
|
||||||
|
# Check if the desired number of URLs is reached
|
||||||
|
if stop and count >= stop:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Break the loop if no new URLs are found
|
||||||
|
if last_count == count:
|
||||||
|
break
|
3
Src/Lib/Request/__init__.py
Normal file
3
Src/Lib/Request/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# 21.04.24
|
||||||
|
|
||||||
|
from .my_requests import requests
|
@ -9,7 +9,8 @@ import re
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.error
|
import urllib.error
|
||||||
from typing import Dict, Optional, Union
|
|
||||||
|
from typing import Dict, Optional, Union, TypedDict, Any
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from typing import Unpack
|
from typing import Unpack
|
||||||
@ -22,9 +23,13 @@ except ImportError:
|
|||||||
"Please make sure you have the necessary libraries installed.")
|
"Please make sure you have the necessary libraries installed.")
|
||||||
|
|
||||||
|
|
||||||
|
# External library
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
HTTP_TIMEOUT = 4
|
HTTP_TIMEOUT = 3
|
||||||
HTTP_RETRIES = 2
|
HTTP_RETRIES = 1
|
||||||
HTTP_DELAY = 1
|
HTTP_DELAY = 1
|
||||||
|
|
||||||
|
|
||||||
@ -95,12 +100,43 @@ class Response:
|
|||||||
raise RequestError(f"Request failed with status code {self.status_code}")
|
raise RequestError(f"Request failed with status code {self.status_code}")
|
||||||
|
|
||||||
def json(self):
|
def json(self):
|
||||||
"""Return the response content as JSON if it is JSON."""
|
"""
|
||||||
|
Return the response content as JSON if it is JSON.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict or list or None: A Python dictionary or list parsed from JSON if the response content is JSON, otherwise None.
|
||||||
|
"""
|
||||||
if self.is_json:
|
if self.is_json:
|
||||||
return json.loads(self.text)
|
return json.loads(self.text)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_redirects(self):
|
||||||
|
"""
|
||||||
|
Extracts unique site URLs from HTML <link> elements within the <head> section.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list or None: A list of unique site URLs if found, otherwise None.
|
||||||
|
"""
|
||||||
|
|
||||||
|
site_find = []
|
||||||
|
|
||||||
|
if self.text:
|
||||||
|
soup = BeautifulSoup(self.text, "html.parser")
|
||||||
|
|
||||||
|
for links in soup.find("head").find_all('link'):
|
||||||
|
if links is not None:
|
||||||
|
parsed_url = urllib.parse.urlparse(links.get('href'))
|
||||||
|
site = parsed_url.scheme + "://" + parsed_url.netloc
|
||||||
|
|
||||||
|
if site not in site_find:
|
||||||
|
site_find.append(site)
|
||||||
|
|
||||||
|
if site_find:
|
||||||
|
return site_find
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ManageRequests:
|
class ManageRequests:
|
||||||
"""Class for managing HTTP requests."""
|
"""Class for managing HTTP requests."""
|
||||||
@ -116,6 +152,7 @@ class ManageRequests:
|
|||||||
auth: Optional[tuple] = None,
|
auth: Optional[tuple] = None,
|
||||||
proxy: Optional[str] = None,
|
proxy: Optional[str] = None,
|
||||||
cookies: Optional[Dict[str, str]] = None,
|
cookies: Optional[Dict[str, str]] = None,
|
||||||
|
json_data: Optional[Dict[str, Any]] = None,
|
||||||
redirection_handling: bool = True,
|
redirection_handling: bool = True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -136,7 +173,7 @@ class ManageRequests:
|
|||||||
"""
|
"""
|
||||||
self.url = url
|
self.url = url
|
||||||
self.method = method
|
self.method = method
|
||||||
self.headers = headers or {'User-Agent': 'Mozilla/5.0'}
|
self.headers = headers or {}
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.retries = retries
|
self.retries = retries
|
||||||
self.params = params
|
self.params = params
|
||||||
@ -144,6 +181,7 @@ class ManageRequests:
|
|||||||
self.auth = auth
|
self.auth = auth
|
||||||
self.proxy = proxy
|
self.proxy = proxy
|
||||||
self.cookies = cookies
|
self.cookies = cookies
|
||||||
|
self.json_data = json_data
|
||||||
self.redirection_handling = redirection_handling
|
self.redirection_handling = redirection_handling
|
||||||
|
|
||||||
def add_header(self, key: str, value: str) -> None:
|
def add_header(self, key: str, value: str) -> None:
|
||||||
@ -152,6 +190,7 @@ class ManageRequests:
|
|||||||
|
|
||||||
def send(self) -> Response:
|
def send(self) -> Response:
|
||||||
"""Send the HTTP request."""
|
"""Send the HTTP request."""
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
self.attempt = 0
|
self.attempt = 0
|
||||||
redirect_url = None
|
redirect_url = None
|
||||||
@ -160,24 +199,53 @@ class ManageRequests:
|
|||||||
try:
|
try:
|
||||||
req = self._build_request()
|
req = self._build_request()
|
||||||
response = self._perform_request(req)
|
response = self._perform_request(req)
|
||||||
|
|
||||||
return self._process_response(response, start_time, redirect_url)
|
return self._process_response(response, start_time, redirect_url)
|
||||||
|
|
||||||
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
except (urllib.error.URLError, urllib.error.HTTPError) as e:
|
||||||
self._handle_error(e)
|
self._handle_error(e)
|
||||||
attempt += 1
|
self.attempt += 1
|
||||||
|
|
||||||
def _build_request(self) -> urllib.request.Request:
|
def _build_request(self) -> urllib.request.Request:
|
||||||
"""Build the urllib Request object."""
|
"""Build the urllib Request object."""
|
||||||
headers = self.headers.copy()
|
headers = self.headers.copy()
|
||||||
|
|
||||||
if self.params:
|
if self.params:
|
||||||
url = self.url + '?' + urllib.parse.urlencode(self.params)
|
url = self.url + '?' + urllib.parse.urlencode(self.params)
|
||||||
else:
|
else:
|
||||||
url = self.url
|
url = self.url
|
||||||
|
|
||||||
req = urllib.request.Request(url, headers=headers, method=self.method)
|
req = urllib.request.Request(url, headers=headers, method=self.method)
|
||||||
|
|
||||||
|
if self.json_data:
|
||||||
|
req.add_header('Content-Type', 'application/json')
|
||||||
|
req.body = json.dumps(self.json_data).encode('utf-8')
|
||||||
|
else:
|
||||||
|
req = urllib.request.Request(url, headers=headers, method=self.method)
|
||||||
|
|
||||||
if self.auth:
|
if self.auth:
|
||||||
req.add_header('Authorization', 'Basic ' + base64.b64encode(f"{self.auth[0]}:{self.auth[1]}".encode()).decode())
|
req.add_header('Authorization', 'Basic ' + base64.b64encode(f"{self.auth[0]}:{self.auth[1]}".encode()).decode())
|
||||||
|
|
||||||
if self.cookies:
|
if self.cookies:
|
||||||
cookie_str = '; '.join([f"{name}={value}" for name, value in self.cookies.items()])
|
cookie_str = '; '.join([f"{name}={value}" for name, value in self.cookies.items()])
|
||||||
req.add_header('Cookie', cookie_str)
|
req.add_header('Cookie', cookie_str)
|
||||||
|
|
||||||
|
if self.headers:
|
||||||
|
for key, value in self.headers.items():
|
||||||
|
req.add_header(key, value)
|
||||||
|
|
||||||
|
# Add default user agent
|
||||||
|
if True:
|
||||||
|
there_is_agent = False
|
||||||
|
|
||||||
|
for key, value in self.headers.items():
|
||||||
|
if str(key).lower() == 'user-agent':
|
||||||
|
there_is_agent = True
|
||||||
|
|
||||||
|
if not there_is_agent:
|
||||||
|
default_user_agent = 'Mozilla/5.0'
|
||||||
|
req.add_header('user-agent', default_user_agent)
|
||||||
|
|
||||||
return req
|
return req
|
||||||
|
|
||||||
def _perform_request(self, req: urllib.request.Request) -> urllib.response.addinfourl:
|
def _perform_request(self, req: urllib.request.Request) -> urllib.response.addinfourl:
|
||||||
@ -186,26 +254,30 @@ class ManageRequests:
|
|||||||
proxy_handler = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy})
|
proxy_handler = urllib.request.ProxyHandler({'http': self.proxy, 'https': self.proxy})
|
||||||
opener = urllib.request.build_opener(proxy_handler)
|
opener = urllib.request.build_opener(proxy_handler)
|
||||||
urllib.request.install_opener(opener)
|
urllib.request.install_opener(opener)
|
||||||
|
|
||||||
if not self.verify_ssl:
|
if not self.verify_ssl:
|
||||||
ssl_context = ssl.create_default_context()
|
ssl_context = ssl.create_default_context()
|
||||||
ssl_context.check_hostname = False
|
ssl_context.check_hostname = False
|
||||||
ssl_context.verify_mode = ssl.CERT_NONE
|
ssl_context.verify_mode = ssl.CERT_NONE
|
||||||
response = urllib.request.urlopen(req, timeout=self.timeout, context=ssl_context)
|
response = urllib.request.urlopen(req, timeout=self.timeout, context=ssl_context)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
response = urllib.request.urlopen(req, timeout=self.timeout)
|
response = urllib.request.urlopen(req, timeout=self.timeout)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _process_response(self, response: urllib.response.addinfourl, start_time: float, redirect_url: Optional[str]) -> Response:
|
def _process_response(self, response: urllib.response.addinfourl, start_time: float, redirect_url: Optional[str]) -> Response:
|
||||||
"""Process the HTTP response."""
|
"""Process the HTTP response."""
|
||||||
response_data = response.read()
|
response_data = response.read()
|
||||||
content_type = response.headers.get('Content-Type', '').lower()
|
content_type = response.headers.get('Content-Type', '').lower()
|
||||||
is_response_api = "json" in content_type
|
|
||||||
if self.redirection_handling and response.status in (301, 302, 303, 307, 308):
|
if self.redirection_handling and response.status in (301, 302, 303, 307, 308):
|
||||||
location = response.headers.get('Location')
|
location = response.headers.get('Location')
|
||||||
logging.info(f"Redirecting to: {location}")
|
logging.info(f"Redirecting to: {location}")
|
||||||
redirect_url = location
|
redirect_url = location
|
||||||
self.url = location
|
self.url = location
|
||||||
return self.send()
|
return self.send()
|
||||||
|
|
||||||
return self._build_response(response, response_data, start_time, redirect_url, content_type)
|
return self._build_response(response, response_data, start_time, redirect_url, content_type)
|
||||||
|
|
||||||
def _build_response(self, response: urllib.response.addinfourl, response_data: bytes, start_time: float, redirect_url: Optional[str], content_type: str) -> Response:
|
def _build_response(self, response: urllib.response.addinfourl, response_data: bytes, start_time: float, redirect_url: Optional[str], content_type: str) -> Response:
|
||||||
@ -216,7 +288,7 @@ class ManageRequests:
|
|||||||
|
|
||||||
for cookie in response.headers.get_all('Set-Cookie', []):
|
for cookie in response.headers.get_all('Set-Cookie', []):
|
||||||
cookie_parts = cookie.split(';')
|
cookie_parts = cookie.split(';')
|
||||||
cookie_name, cookie_value = cookie_parts[0].split('=')
|
cookie_name, cookie_value = cookie_parts[0].split('=', 1)
|
||||||
response_cookies[cookie_name.strip()] = cookie_value.strip()
|
response_cookies[cookie_name.strip()] = cookie_value.strip()
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
@ -234,9 +306,11 @@ class ManageRequests:
|
|||||||
def _handle_error(self, e: Union[urllib.error.URLError, urllib.error.HTTPError]) -> None:
|
def _handle_error(self, e: Union[urllib.error.URLError, urllib.error.HTTPError]) -> None:
|
||||||
"""Handle request error."""
|
"""Handle request error."""
|
||||||
logging.error(f"Request failed for URL '{self.url}': {str(e)}")
|
logging.error(f"Request failed for URL '{self.url}': {str(e)}")
|
||||||
|
|
||||||
if self.attempt < self.retries:
|
if self.attempt < self.retries:
|
||||||
logging.info(f"Retrying request for URL '{self.url}' (attempt {self.attempt}/{self.retries})")
|
logging.info(f"Retrying request for URL '{self.url}' (attempt {self.attempt}/{self.retries})")
|
||||||
time.sleep(HTTP_DELAY)
|
time.sleep(HTTP_DELAY)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logging.error(f"Maximum retries reached for URL '{self.url}'")
|
logging.error(f"Maximum retries reached for URL '{self.url}'")
|
||||||
raise RequestError(str(e))
|
raise RequestError(str(e))
|
||||||
@ -251,9 +325,7 @@ class ValidateRequest:
|
|||||||
r'^(?:http|ftp)s?://' # http:// or https://
|
r'^(?:http|ftp)s?://' # http:// or https://
|
||||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
|
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
|
||||||
r'localhost|' # localhost...
|
r'localhost|' # localhost...
|
||||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or IP
|
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', re.IGNORECASE)
|
||||||
r'(?::\d+)?' # optional port
|
|
||||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
|
||||||
return re.match(url_regex, url) is not None
|
return re.match(url_regex, url) is not None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -287,14 +359,14 @@ class SSLHandler:
|
|||||||
ssl_context.verify_mode = ssl.CERT_NONE
|
ssl_context.verify_mode = ssl.CERT_NONE
|
||||||
|
|
||||||
|
|
||||||
class KwargsRequest():
|
class KwargsRequest(TypedDict, total = False):
|
||||||
"""Class representing keyword arguments for a request."""
|
|
||||||
url: str
|
url: str
|
||||||
headers: Optional[Dict[str, str]] = None
|
headers: Optional[Dict[str, str]] = None
|
||||||
timeout: float = HTTP_TIMEOUT
|
timeout: float = HTTP_TIMEOUT
|
||||||
retries: int = HTTP_RETRIES
|
retries: int = HTTP_RETRIES
|
||||||
params: Optional[Dict[str, str]] = None
|
params: Optional[Dict[str, str]] = None
|
||||||
cookies: Optional[Dict[str, str]] = None
|
cookies: Optional[Dict[str, str]] = None
|
||||||
|
json_data: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
class Request:
|
class Request:
|
||||||
@ -302,7 +374,7 @@ class Request:
|
|||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get(self, url: str, **kwargs: Unpack[KwargsRequest]):
|
def get(self, url: str, **kwargs: Unpack[KwargsRequest])-> 'Response':
|
||||||
"""
|
"""
|
||||||
Send a GET request.
|
Send a GET request.
|
||||||
|
|
||||||
@ -315,7 +387,7 @@ class Request:
|
|||||||
"""
|
"""
|
||||||
return self._send_request(url, 'GET', **kwargs)
|
return self._send_request(url, 'GET', **kwargs)
|
||||||
|
|
||||||
def post(self, url: str, **kwargs: Unpack[KwargsRequest]):
|
def post(self, url: str, **kwargs: Unpack[KwargsRequest]) -> 'Response':
|
||||||
"""
|
"""
|
||||||
Send a POST request.
|
Send a POST request.
|
||||||
|
|
||||||
@ -328,35 +400,8 @@ class Request:
|
|||||||
"""
|
"""
|
||||||
return self._send_request(url, 'POST', **kwargs)
|
return self._send_request(url, 'POST', **kwargs)
|
||||||
|
|
||||||
def put(self, url: str, **kwargs: Unpack[KwargsRequest]):
|
def _send_request(self, url: str, method: str, **kwargs: Unpack[KwargsRequest]) -> 'Response':
|
||||||
"""
|
|
||||||
Send a PUT request.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url (str): The URL to which the request will be sent.
|
|
||||||
**kwargs: Additional keyword arguments for the request.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Response: The response object.
|
|
||||||
"""
|
|
||||||
return self._send_request(url, 'PUT', **kwargs)
|
|
||||||
|
|
||||||
def delete(self, url: str, **kwargs: Unpack[KwargsRequest]):
|
|
||||||
"""
|
|
||||||
Send a DELETE request.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url (str): The URL to which the request will be sent.
|
|
||||||
**kwargs: Additional keyword arguments for the request.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Response: The response object.
|
|
||||||
"""
|
|
||||||
return self._send_request(url, 'DELETE', **kwargs)
|
|
||||||
|
|
||||||
def _send_request(self, url: str, method: str, **kwargs: Unpack[KwargsRequest]):
|
|
||||||
"""Send an HTTP request."""
|
"""Send an HTTP request."""
|
||||||
# Add validation checks for URL and headers
|
|
||||||
if not ValidateRequest.validate_url(url):
|
if not ValidateRequest.validate_url(url):
|
||||||
raise ValueError("Invalid URL format")
|
raise ValueError("Invalid URL format")
|
||||||
|
|
||||||
@ -365,6 +410,5 @@ class Request:
|
|||||||
|
|
||||||
return ManageRequests(url, method, **kwargs).send()
|
return ManageRequests(url, method, **kwargs).send()
|
||||||
|
|
||||||
|
|
||||||
# Out
|
# Out
|
||||||
request = Request()
|
requests: Request = Request()
|
3
Src/Lib/UserAgent/__init__.py
Normal file
3
Src/Lib/UserAgent/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# 21.04.24
|
||||||
|
|
||||||
|
from .user_agent import ua
|
@ -7,10 +7,12 @@ import random
|
|||||||
import threading
|
import threading
|
||||||
import json
|
import json
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from .my_requests import request
|
from ..Request import requests
|
||||||
|
|
||||||
|
|
||||||
def get_browser_user_agents_online(browser: str) -> List[str]:
|
def get_browser_user_agents_online(browser: str) -> List[str]:
|
||||||
@ -28,7 +30,7 @@ def get_browser_user_agents_online(browser: str) -> List[str]:
|
|||||||
try:
|
try:
|
||||||
|
|
||||||
# Make request and find all user agents
|
# Make request and find all user agents
|
||||||
html = request.get(url).text
|
html = requests.get(url).text
|
||||||
browser_user_agents = re.findall(r"<a href=\'/.*?>(.+?)</a>", html, re.UNICODE)
|
browser_user_agents = re.findall(r"<a href=\'/.*?>(.+?)</a>", html, re.UNICODE)
|
||||||
return [ua for ua in browser_user_agents if "more" not in ua.lower()]
|
return [ua for ua in browser_user_agents if "more" not in ua.lower()]
|
||||||
|
|
||||||
@ -103,4 +105,4 @@ class UserAgentManager:
|
|||||||
|
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
ua = UserAgentManager()
|
ua: UserAgentManager = UserAgentManager()
|
@ -1,13 +1,13 @@
|
|||||||
# 01.03.2023
|
# 01.03.2023
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import requests
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
from Src.Util.console import console
|
from Src.Util.console import console
|
||||||
|
from Src.Lib.Request import requests
|
||||||
|
|
||||||
|
|
||||||
# Variable
|
# Variable
|
||||||
|
@ -4,7 +4,7 @@ import logging
|
|||||||
|
|
||||||
|
|
||||||
# Internal utilities
|
# Internal utilities
|
||||||
from Src.Lib.Request.user_agent import ua
|
from Src.Lib.UserAgent import ua
|
||||||
|
|
||||||
|
|
||||||
def get_headers() -> str:
|
def get_headers() -> str:
|
||||||
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user