fix ascii character

2025-07-23 02:20:04 +00:00 · 2024-05-16 18:25:00 +02:00 · 2024-05-16 18:25:00 +02:00 · 67bbb9a043
commit 67bbb9a043
parent f98fbe18e6
7 changed files with 16 additions and 453 deletions
--- a/Src/Api/series.py
+++ b/Src/Api/series.py
@ -3,6 +3,7 @@
 import os
 import sys
 import logging
+from unidecode import unidecode as transliterate
 from typing import List


@ -12,7 +13,6 @@ from Src.Util.config import config_manager
 from Src.Util.table import TVShowManager
 from Src.Util.message import start_message
 from Src.Util.os import remove_special_characters
-from Src.Lib.Unidecode import transliterate
 from Src.Util.file_validation import can_create_file
 from Src.Lib.FFmpeg.my_m3u8 import Downloader
 from Src.Util.mapper import map_episode_title
--- a/Src/Lib/Unidecode/init.py
+++ b/Src/Lib/Unidecode/init.py
@ -1,151 +0,0 @@
-# 04.04.24
-
-import os
-import logging
-import importlib.util
-
-
-# Variable
-Cache = {}
-
-
-class UnidecodeError(ValueError):
-    pass
-
-
-def transliterate_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
-    """Transliterates non-ASCII characters in a string to their ASCII counterparts.
-
-    Args:
-        string (str): The input string containing non-ASCII characters.
-        errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
-        replace_str (str): The replacement string used when errors='replace'.
-
-    Returns:
-        str: The transliterated string with non-ASCII characters replaced.
-    """
-    return _transliterate(string, errors, replace_str)
-
-
-def _get_ascii_representation(char: str) -> str:
-    """Obtains the ASCII representation of a Unicode character.
-
-    Args:
-        char (str): The Unicode character.
-
-    Returns:
-        str: The ASCII representation of the character.
-    """
-    codepoint = ord(char)
-
-    # If the character is ASCII, return it as is
-    if codepoint < 0x80:
-        return str(char)
-
-    # Ignore characters outside the BMP (Basic Multilingual Plane)
-    if codepoint > 0xeffff:
-        return None
-
-    # Warn about surrogate characters
-    if 0xd800 <= codepoint <= 0xdfff:
-        logging.warning("Surrogate character %r will be ignored. "
-                        "You might be using a narrow Python build.", char)
-
-    # Calculate section and position
-    section = codepoint >> 8
-    position = codepoint % 256 
-
-    try:
-        # Look up the character in the cache
-        table = Cache[section]
-        
-    except KeyError:
-        try:
-            # Import the module corresponding to the section
-            module_name = f"x{section:03x}.py"
-            main = os.path.abspath(os.path.dirname(__file__))
-            module_path = os.path.join(main, module_name)
-            spec = importlib.util.spec_from_file_location(module_name, module_path)
-            module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(module)
-
-        except ImportError:
-            # If module import fails, set cache entry to None and return
-            Cache[section] = None
-            return None
-
-        # Update cache with module data
-        Cache[section] = table = module.data
-
-    # Return the ASCII representation if found, otherwise None
-    if table and len(table) > position:
-        return table[position]
-    else:
-        return None
-
-
-def _transliterate(string: str, errors: str, replace_str: str) -> str:
-    """Main transliteration function.
-
-    Args:
-        string (str): The input string.
-        errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
-        replace_str (str): The replacement string used when errors='replace'.
-
-    Returns:
-        str: The transliterated string.
-    """
-    retval = []
-
-    for char in string:
-        # Get the ASCII representation of the character
-        ascii_char = _get_ascii_representation(char)
-
-        if ascii_char is None:
-            # Handle errors based on the specified policy
-            if errors == 'ignore':
-                ascii_char = ''
-            elif errors == 'strict':
-                logging.error(f'No replacement found for character {char!r}')
-                raise UnidecodeError(f'no replacement found for character {char!r}')
-            elif errors == 'replace':
-                ascii_char = replace_str
-            elif errors == 'preserve':
-                ascii_char = char
-            else:
-                logging.error(f'Invalid value for errors parameter {errors!r}')
-                raise UnidecodeError(f'invalid value for errors parameter {errors!r}')
-
-        # Append the ASCII representation to the result
-        retval.append(ascii_char)
-
-    return ''.join(retval)
-
-
-def transliterate_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
-    """Transliterates non-ASCII characters in a string, expecting ASCII input.
-
-    Args:
-        string (str): The input string containing non-ASCII characters.
-        errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
-        replace_str (str): The replacement string used when errors='replace'.
-
-    Returns:
-        str: The transliterated string with non-ASCII characters replaced.
-    """
-    try:
-        # Check if the string can be encoded as ASCII
-        string.encode('ASCII')
-    except UnicodeEncodeError:
-        # If encoding fails, fall back to transliteration
-        pass
-    else:
-        # If the string is already ASCII, return it as is
-        return string
-
-    # Otherwise, transliterate non-ASCII characters
-    return _transliterate(string, errors, replace_str)
-
-
-# Out
-transliterate = transliterate_expect_ascii
--- a/Src/Lib/Unidecode/x000.py
+++ b/Src/Lib/Unidecode/x000.py
@ -1,258 +0,0 @@
-data = (
-'\x00',    # 0x00
-'\x01',    # 0x01
-'\x02',    # 0x02
-'\x03',    # 0x03
-'\x04',    # 0x04
-'\x05',    # 0x05
-'\x06',    # 0x06
-'\x07',    # 0x07
-'\x08',    # 0x08
-'\x09',    # 0x09
-'\x0a',    # 0x0a
-'\x0b',    # 0x0b
-'\x0c',    # 0x0c
-'\x0d',    # 0x0d
-'\x0e',    # 0x0e
-'\x0f',    # 0x0f
-'\x10',    # 0x10
-'\x11',    # 0x11
-'\x12',    # 0x12
-'\x13',    # 0x13
-'\x14',    # 0x14
-'\x15',    # 0x15
-'\x16',    # 0x16
-'\x17',    # 0x17
-'\x18',    # 0x18
-'\x19',    # 0x19
-'\x1a',    # 0x1a
-'\x1b',    # 0x1b
-'\x1c',    # 0x1c
-'\x1d',    # 0x1d
-'\x1e',    # 0x1e
-'\x1f',    # 0x1f
-' ',    # 0x20
-'!',    # 0x21
-'"',    # 0x22
-'#',    # 0x23
-'$',    # 0x24
-'%',    # 0x25
-'&',    # 0x26
-'\'',    # 0x27
-'(',    # 0x28
-')',    # 0x29
-'*',    # 0x2a
-'+',    # 0x2b
-',',    # 0x2c
-'-',    # 0x2d
-'.',    # 0x2e
-'/',    # 0x2f
-'0',    # 0x30
-'1',    # 0x31
-'2',    # 0x32
-'3',    # 0x33
-'4',    # 0x34
-'5',    # 0x35
-'6',    # 0x36
-'7',    # 0x37
-'8',    # 0x38
-'9',    # 0x39
-':',    # 0x3a
-';',    # 0x3b
-'<',    # 0x3c
-'=',    # 0x3d
-'>',    # 0x3e
-'?',    # 0x3f
-'@',    # 0x40
-'A',    # 0x41
-'B',    # 0x42
-'C',    # 0x43
-'D',    # 0x44
-'E',    # 0x45
-'F',    # 0x46
-'G',    # 0x47
-'H',    # 0x48
-'I',    # 0x49
-'J',    # 0x4a
-'K',    # 0x4b
-'L',    # 0x4c
-'M',    # 0x4d
-'N',    # 0x4e
-'O',    # 0x4f
-'P',    # 0x50
-'Q',    # 0x51
-'R',    # 0x52
-'S',    # 0x53
-'T',    # 0x54
-'U',    # 0x55
-'V',    # 0x56
-'W',    # 0x57
-'X',    # 0x58
-'Y',    # 0x59
-'Z',    # 0x5a
-']',    # 0x5b
-'\\',    # 0x5c
-']',    # 0x5d
-'^',    # 0x5e
-'_',    # 0x5f
-'`',    # 0x60
-'a',    # 0x61
-'b',    # 0x62
-'c',    # 0x63
-'d',    # 0x64
-'e',    # 0x65
-'f',    # 0x66
-'g',    # 0x67
-'h',    # 0x68
-'i',    # 0x69
-'j',    # 0x6a
-'k',    # 0x6b
-'l',    # 0x6c
-'m',    # 0x6d
-'n',    # 0x6e
-'o',    # 0x6f
-'p',    # 0x70
-'q',    # 0x71
-'r',    # 0x72
-'s',    # 0x73
-'t',    # 0x74
-'u',    # 0x75
-'v',    # 0x76
-'w',    # 0x77
-'x',    # 0x78
-'y',    # 0x79
-'z',    # 0x7a
-'{',    # 0x7b
-'|',    # 0x7c
-'}',    # 0x7d
-'~',    # 0x7e
-'',    # 0x7f
-'',    # 0x80
-'',    # 0x81
-'',    # 0x82
-'',    # 0x83
-'',    # 0x84
-'',    # 0x85
-'',    # 0x86
-'',    # 0x87
-'',    # 0x88
-'',    # 0x89
-'',    # 0x8a
-'',    # 0x8b
-'',    # 0x8c
-'',    # 0x8d
-'',    # 0x8e
-'',    # 0x8f
-'',    # 0x90
-'',    # 0x91
-'',    # 0x92
-'',    # 0x93
-'',    # 0x94
-'',    # 0x95
-'',    # 0x96
-'',    # 0x97
-'',    # 0x98
-'',    # 0x99
-'',    # 0x9a
-'',    # 0x9b
-'',    # 0x9c
-'',    # 0x9d
-'',    # 0x9e
-'',    # 0x9f
-' ',    # 0xa0
-'!',    # 0xa1
-'C/',    # 0xa2
-'PS',    # 0xa3
-'$?',    # 0xa4
-'Y=',    # 0xa5
-'|',    # 0xa6
-'SS',    # 0xa7
-'"',    # 0xa8
-'(c)',    # 0xa9
-'a',    # 0xaa
-'<<',    # 0xab
-'!',    # 0xac
-'',    # 0xad
-'(r)',    # 0xae
-'-',    # 0xaf
-'deg',    # 0xb0
-'+-',    # 0xb1
-'2',    # 0xb2
-'3',    # 0xb3
-'\'',    # 0xb4
-'u',    # 0xb5
-'P',    # 0xb6
-'*',    # 0xb7
-',',    # 0xb8
-'1',    # 0xb9
-'o',    # 0xba
-'>>',    # 0xbb
-'1/4',    # 0xbc
-'1/2',    # 0xbd
-'3/4',    # 0xbe
-'?',    # 0xbf
-'A',    # 0xc0
-'A',    # 0xc1
-'A',    # 0xc2
-'A',    # 0xc3
-'A',    # 0xc4
-'A',    # 0xc5
-'AE',    # 0xc6
-'C',    # 0xc7
-'E',    # 0xc8
-'E',    # 0xc9
-'E',    # 0xca
-'E',    # 0xcb
-'I',    # 0xcc
-'I',    # 0xcd
-'I',    # 0xce
-'I',    # 0xcf
-'D',    # 0xd0
-'N',    # 0xd1
-'O',    # 0xd2
-'O',    # 0xd3
-'O',    # 0xd4
-'O',    # 0xd5
-'O',    # 0xd6
-'x',    # 0xd7
-'O',    # 0xd8
-'U',    # 0xd9
-'U',    # 0xda
-'U',    # 0xdb
-'U',    # 0xdc
-'U',    # 0xdd
-'Th',    # 0xde
-'ss',    # 0xdf
-'a',    # 0xe0
-'a',    # 0xe1
-'a',    # 0xe2
-'a',    # 0xe3
-'a',    # 0xe4
-'a',    # 0xe5
-'ae',    # 0xe6
-'c',    # 0xe7
-'e',    # 0xe8
-'e',    # 0xe9
-'e',    # 0xea
-'e',    # 0xeb
-'i',    # 0xec
-'i',    # 0xed
-'i',    # 0xee
-'i',    # 0xef
-'d',    # 0xf0
-'n',    # 0xf1
-'o',    # 0xf2
-'o',    # 0xf3
-'o',    # 0xf4
-'o',    # 0xf5
-'o',    # 0xf6
-'/',    # 0xf7
-'o',    # 0xf8
-'u',    # 0xf9
-'u',    # 0xfa
-'u',    # 0xfb
-'u',    # 0xfc
-'y',    # 0xfd
-'th',    # 0xfe
-'y',    # 0xff
-)
--- a/Src/Lib/Unidecode/x021.py
+++ b/Src/Lib/Unidecode/x021.py
@ -1,42 +0,0 @@
-data = (
-' ',    # 0x00
-' ',    # 0x01
-' ',    # 0x02
-' ',    # 0x03
-' ',    # 0x04
-' ',    # 0x05
-' ',    # 0x06
-' ',    # 0x07
-' ',    # 0x08
-' ',    # 0x09
-' ',    # 0x0a
-' ',    # 0x0b
-'',    # 0x0c
-'',    # 0x0d
-'',    # 0x0e
-'',    # 0x0f
-'-',    # 0x10
-'-',    # 0x11
-'-',    # 0x12
-'-',    # 0x13
-'--',    # 0x14
-'--',    # 0x15
-'||',    # 0x16
-'_',    # 0x17
-'\'',    # 0x18
-'\'',    # 0x19
-',',    # 0x1a
-'\'',    # 0x1b
-'"',    # 0x1c
-'"',    # 0x1d
-',,',    # 0x1e
-'"',    # 0x1f
-'+',    # 0x20
-'++',    # 0x21
-'*',    # 0x22
-'*>',    # 0x23
-'.',    # 0x24
-'..',    # 0x25
-'...',    # 0x26
-'.',    # 0x27
-)
--- a/Src/Util/mapper.py
+++ b/Src/Util/mapper.py
@ -1,8 +1,9 @@
 # 10.04.24

+from unidecode import unidecode as transliterate
+

 # Internal utilities
-from Src.Lib.Unidecode import transliterate
 from Src.Util.config import config_manager
 from Src.Api.Class.EpisodeType import Episode

--- a/file_list.txt
+++ b/file_list.txt
@ -0,0 +1,13 @@
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\0.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\1.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\2.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\3.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\4.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\5.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\6.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\7.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\8.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\9.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\10.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\11.ts'
+file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\12.ts'
--- a/requirements.txt
+++ b/requirements.txt