diff --git a/Src/Api/series.py b/Src/Api/series.py index 2b6b1fe..a37cb8e 100644 --- a/Src/Api/series.py +++ b/Src/Api/series.py @@ -3,6 +3,7 @@ import os import sys import logging +from unidecode import unidecode as transliterate from typing import List @@ -12,7 +13,6 @@ from Src.Util.config import config_manager from Src.Util.table import TVShowManager from Src.Util.message import start_message from Src.Util.os import remove_special_characters -from Src.Lib.Unidecode import transliterate from Src.Util.file_validation import can_create_file from Src.Lib.FFmpeg.my_m3u8 import Downloader from Src.Util.mapper import map_episode_title diff --git a/Src/Lib/Unidecode/__init__.py b/Src/Lib/Unidecode/__init__.py deleted file mode 100644 index 407692d..0000000 --- a/Src/Lib/Unidecode/__init__.py +++ /dev/null @@ -1,151 +0,0 @@ -# 04.04.24 - -import os -import logging -import importlib.util - - -# Variable -Cache = {} - - -class UnidecodeError(ValueError): - pass - - -def transliterate_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str: - """Transliterates non-ASCII characters in a string to their ASCII counterparts. - - Args: - string (str): The input string containing non-ASCII characters. - errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'. - replace_str (str): The replacement string used when errors='replace'. - - Returns: - str: The transliterated string with non-ASCII characters replaced. - """ - return _transliterate(string, errors, replace_str) - - -def _get_ascii_representation(char: str) -> str: - """Obtains the ASCII representation of a Unicode character. - - Args: - char (str): The Unicode character. - - Returns: - str: The ASCII representation of the character. - """ - codepoint = ord(char) - - # If the character is ASCII, return it as is - if codepoint < 0x80: - return str(char) - - # Ignore characters outside the BMP (Basic Multilingual Plane) - if codepoint > 0xeffff: - return None - - # Warn about surrogate characters - if 0xd800 <= codepoint <= 0xdfff: - logging.warning("Surrogate character %r will be ignored. " - "You might be using a narrow Python build.", char) - - # Calculate section and position - section = codepoint >> 8 - position = codepoint % 256 - - try: - # Look up the character in the cache - table = Cache[section] - - except KeyError: - try: - # Import the module corresponding to the section - module_name = f"x{section:03x}.py" - main = os.path.abspath(os.path.dirname(__file__)) - module_path = os.path.join(main, module_name) - spec = importlib.util.spec_from_file_location(module_name, module_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - except ImportError: - # If module import fails, set cache entry to None and return - Cache[section] = None - return None - - # Update cache with module data - Cache[section] = table = module.data - - # Return the ASCII representation if found, otherwise None - if table and len(table) > position: - return table[position] - else: - return None - - -def _transliterate(string: str, errors: str, replace_str: str) -> str: - """Main transliteration function. - - Args: - string (str): The input string. - errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'. - replace_str (str): The replacement string used when errors='replace'. - - Returns: - str: The transliterated string. - """ - retval = [] - - for char in string: - # Get the ASCII representation of the character - ascii_char = _get_ascii_representation(char) - - if ascii_char is None: - # Handle errors based on the specified policy - if errors == 'ignore': - ascii_char = '' - elif errors == 'strict': - logging.error(f'No replacement found for character {char!r}') - raise UnidecodeError(f'no replacement found for character {char!r}') - elif errors == 'replace': - ascii_char = replace_str - elif errors == 'preserve': - ascii_char = char - else: - logging.error(f'Invalid value for errors parameter {errors!r}') - raise UnidecodeError(f'invalid value for errors parameter {errors!r}') - - # Append the ASCII representation to the result - retval.append(ascii_char) - - return ''.join(retval) - - -def transliterate_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str: - """Transliterates non-ASCII characters in a string, expecting ASCII input. - - Args: - string (str): The input string containing non-ASCII characters. - errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'. - replace_str (str): The replacement string used when errors='replace'. - - Returns: - str: The transliterated string with non-ASCII characters replaced. - """ - try: - # Check if the string can be encoded as ASCII - string.encode('ASCII') - except UnicodeEncodeError: - # If encoding fails, fall back to transliteration - pass - else: - # If the string is already ASCII, return it as is - return string - - # Otherwise, transliterate non-ASCII characters - return _transliterate(string, errors, replace_str) - - -# Out -transliterate = transliterate_expect_ascii diff --git a/Src/Lib/Unidecode/x000.py b/Src/Lib/Unidecode/x000.py deleted file mode 100644 index 7ab4d76..0000000 --- a/Src/Lib/Unidecode/x000.py +++ /dev/null @@ -1,258 +0,0 @@ -data = ( -'\x00', # 0x00 -'\x01', # 0x01 -'\x02', # 0x02 -'\x03', # 0x03 -'\x04', # 0x04 -'\x05', # 0x05 -'\x06', # 0x06 -'\x07', # 0x07 -'\x08', # 0x08 -'\x09', # 0x09 -'\x0a', # 0x0a -'\x0b', # 0x0b -'\x0c', # 0x0c -'\x0d', # 0x0d -'\x0e', # 0x0e -'\x0f', # 0x0f -'\x10', # 0x10 -'\x11', # 0x11 -'\x12', # 0x12 -'\x13', # 0x13 -'\x14', # 0x14 -'\x15', # 0x15 -'\x16', # 0x16 -'\x17', # 0x17 -'\x18', # 0x18 -'\x19', # 0x19 -'\x1a', # 0x1a -'\x1b', # 0x1b -'\x1c', # 0x1c -'\x1d', # 0x1d -'\x1e', # 0x1e -'\x1f', # 0x1f -' ', # 0x20 -'!', # 0x21 -'"', # 0x22 -'#', # 0x23 -'$', # 0x24 -'%', # 0x25 -'&', # 0x26 -'\'', # 0x27 -'(', # 0x28 -')', # 0x29 -'*', # 0x2a -'+', # 0x2b -',', # 0x2c -'-', # 0x2d -'.', # 0x2e -'/', # 0x2f -'0', # 0x30 -'1', # 0x31 -'2', # 0x32 -'3', # 0x33 -'4', # 0x34 -'5', # 0x35 -'6', # 0x36 -'7', # 0x37 -'8', # 0x38 -'9', # 0x39 -':', # 0x3a -';', # 0x3b -'<', # 0x3c -'=', # 0x3d -'>', # 0x3e -'?', # 0x3f -'@', # 0x40 -'A', # 0x41 -'B', # 0x42 -'C', # 0x43 -'D', # 0x44 -'E', # 0x45 -'F', # 0x46 -'G', # 0x47 -'H', # 0x48 -'I', # 0x49 -'J', # 0x4a -'K', # 0x4b -'L', # 0x4c -'M', # 0x4d -'N', # 0x4e -'O', # 0x4f -'P', # 0x50 -'Q', # 0x51 -'R', # 0x52 -'S', # 0x53 -'T', # 0x54 -'U', # 0x55 -'V', # 0x56 -'W', # 0x57 -'X', # 0x58 -'Y', # 0x59 -'Z', # 0x5a -']', # 0x5b -'\\', # 0x5c -']', # 0x5d -'^', # 0x5e -'_', # 0x5f -'`', # 0x60 -'a', # 0x61 -'b', # 0x62 -'c', # 0x63 -'d', # 0x64 -'e', # 0x65 -'f', # 0x66 -'g', # 0x67 -'h', # 0x68 -'i', # 0x69 -'j', # 0x6a -'k', # 0x6b -'l', # 0x6c -'m', # 0x6d -'n', # 0x6e -'o', # 0x6f -'p', # 0x70 -'q', # 0x71 -'r', # 0x72 -'s', # 0x73 -'t', # 0x74 -'u', # 0x75 -'v', # 0x76 -'w', # 0x77 -'x', # 0x78 -'y', # 0x79 -'z', # 0x7a -'{', # 0x7b -'|', # 0x7c -'}', # 0x7d -'~', # 0x7e -'', # 0x7f -'', # 0x80 -'', # 0x81 -'', # 0x82 -'', # 0x83 -'', # 0x84 -'', # 0x85 -'', # 0x86 -'', # 0x87 -'', # 0x88 -'', # 0x89 -'', # 0x8a -'', # 0x8b -'', # 0x8c -'', # 0x8d -'', # 0x8e -'', # 0x8f -'', # 0x90 -'', # 0x91 -'', # 0x92 -'', # 0x93 -'', # 0x94 -'', # 0x95 -'', # 0x96 -'', # 0x97 -'', # 0x98 -'', # 0x99 -'', # 0x9a -'', # 0x9b -'', # 0x9c -'', # 0x9d -'', # 0x9e -'', # 0x9f -' ', # 0xa0 -'!', # 0xa1 -'C/', # 0xa2 -'PS', # 0xa3 -'$?', # 0xa4 -'Y=', # 0xa5 -'|', # 0xa6 -'SS', # 0xa7 -'"', # 0xa8 -'(c)', # 0xa9 -'a', # 0xaa -'<<', # 0xab -'!', # 0xac -'', # 0xad -'(r)', # 0xae -'-', # 0xaf -'deg', # 0xb0 -'+-', # 0xb1 -'2', # 0xb2 -'3', # 0xb3 -'\'', # 0xb4 -'u', # 0xb5 -'P', # 0xb6 -'*', # 0xb7 -',', # 0xb8 -'1', # 0xb9 -'o', # 0xba -'>>', # 0xbb -'1/4', # 0xbc -'1/2', # 0xbd -'3/4', # 0xbe -'?', # 0xbf -'A', # 0xc0 -'A', # 0xc1 -'A', # 0xc2 -'A', # 0xc3 -'A', # 0xc4 -'A', # 0xc5 -'AE', # 0xc6 -'C', # 0xc7 -'E', # 0xc8 -'E', # 0xc9 -'E', # 0xca -'E', # 0xcb -'I', # 0xcc -'I', # 0xcd -'I', # 0xce -'I', # 0xcf -'D', # 0xd0 -'N', # 0xd1 -'O', # 0xd2 -'O', # 0xd3 -'O', # 0xd4 -'O', # 0xd5 -'O', # 0xd6 -'x', # 0xd7 -'O', # 0xd8 -'U', # 0xd9 -'U', # 0xda -'U', # 0xdb -'U', # 0xdc -'U', # 0xdd -'Th', # 0xde -'ss', # 0xdf -'a', # 0xe0 -'a', # 0xe1 -'a', # 0xe2 -'a', # 0xe3 -'a', # 0xe4 -'a', # 0xe5 -'ae', # 0xe6 -'c', # 0xe7 -'e', # 0xe8 -'e', # 0xe9 -'e', # 0xea -'e', # 0xeb -'i', # 0xec -'i', # 0xed -'i', # 0xee -'i', # 0xef -'d', # 0xf0 -'n', # 0xf1 -'o', # 0xf2 -'o', # 0xf3 -'o', # 0xf4 -'o', # 0xf5 -'o', # 0xf6 -'/', # 0xf7 -'o', # 0xf8 -'u', # 0xf9 -'u', # 0xfa -'u', # 0xfb -'u', # 0xfc -'y', # 0xfd -'th', # 0xfe -'y', # 0xff -) \ No newline at end of file diff --git a/Src/Lib/Unidecode/x021.py b/Src/Lib/Unidecode/x021.py deleted file mode 100644 index 0c19908..0000000 --- a/Src/Lib/Unidecode/x021.py +++ /dev/null @@ -1,42 +0,0 @@ -data = ( -' ', # 0x00 -' ', # 0x01 -' ', # 0x02 -' ', # 0x03 -' ', # 0x04 -' ', # 0x05 -' ', # 0x06 -' ', # 0x07 -' ', # 0x08 -' ', # 0x09 -' ', # 0x0a -' ', # 0x0b -'', # 0x0c -'', # 0x0d -'', # 0x0e -'', # 0x0f -'-', # 0x10 -'-', # 0x11 -'-', # 0x12 -'-', # 0x13 -'--', # 0x14 -'--', # 0x15 -'||', # 0x16 -'_', # 0x17 -'\'', # 0x18 -'\'', # 0x19 -',', # 0x1a -'\'', # 0x1b -'"', # 0x1c -'"', # 0x1d -',,', # 0x1e -'"', # 0x1f -'+', # 0x20 -'++', # 0x21 -'*', # 0x22 -'*>', # 0x23 -'.', # 0x24 -'..', # 0x25 -'...', # 0x26 -'.', # 0x27 -) diff --git a/Src/Util/mapper.py b/Src/Util/mapper.py index 9066d7c..272911d 100644 --- a/Src/Util/mapper.py +++ b/Src/Util/mapper.py @@ -1,8 +1,9 @@ # 10.04.24 +from unidecode import unidecode as transliterate + # Internal utilities -from Src.Lib.Unidecode import transliterate from Src.Util.config import config_manager from Src.Api.Class.EpisodeType import Episode diff --git a/file_list.txt b/file_list.txt new file mode 100644 index 0000000..1fa7073 --- /dev/null +++ b/file_list.txt @@ -0,0 +1,13 @@ +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\0.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\1.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\2.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\3.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\4.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\5.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\6.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\7.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\8.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\9.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\10.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\11.ts' +file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\12.ts' diff --git a/requirements.txt b/requirements.txt index 3d213d5..ee74277 100644 Binary files a/requirements.txt and b/requirements.txt differ