mirror of
https://github.com/Arrowar/StreamingCommunity.git
synced 2025-06-06 11:35:29 +00:00
fix ascii character
This commit is contained in:
parent
f98fbe18e6
commit
67bbb9a043
@ -3,6 +3,7 @@
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
from unidecode import unidecode as transliterate
|
||||
from typing import List
|
||||
|
||||
|
||||
@ -12,7 +13,6 @@ from Src.Util.config import config_manager
|
||||
from Src.Util.table import TVShowManager
|
||||
from Src.Util.message import start_message
|
||||
from Src.Util.os import remove_special_characters
|
||||
from Src.Lib.Unidecode import transliterate
|
||||
from Src.Util.file_validation import can_create_file
|
||||
from Src.Lib.FFmpeg.my_m3u8 import Downloader
|
||||
from Src.Util.mapper import map_episode_title
|
||||
|
@ -1,151 +0,0 @@
|
||||
# 04.04.24
|
||||
|
||||
import os
|
||||
import logging
|
||||
import importlib.util
|
||||
|
||||
|
||||
# Variable
|
||||
Cache = {}
|
||||
|
||||
|
||||
class UnidecodeError(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def transliterate_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
|
||||
"""Transliterates non-ASCII characters in a string to their ASCII counterparts.
|
||||
|
||||
Args:
|
||||
string (str): The input string containing non-ASCII characters.
|
||||
errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
|
||||
replace_str (str): The replacement string used when errors='replace'.
|
||||
|
||||
Returns:
|
||||
str: The transliterated string with non-ASCII characters replaced.
|
||||
"""
|
||||
return _transliterate(string, errors, replace_str)
|
||||
|
||||
|
||||
def _get_ascii_representation(char: str) -> str:
|
||||
"""Obtains the ASCII representation of a Unicode character.
|
||||
|
||||
Args:
|
||||
char (str): The Unicode character.
|
||||
|
||||
Returns:
|
||||
str: The ASCII representation of the character.
|
||||
"""
|
||||
codepoint = ord(char)
|
||||
|
||||
# If the character is ASCII, return it as is
|
||||
if codepoint < 0x80:
|
||||
return str(char)
|
||||
|
||||
# Ignore characters outside the BMP (Basic Multilingual Plane)
|
||||
if codepoint > 0xeffff:
|
||||
return None
|
||||
|
||||
# Warn about surrogate characters
|
||||
if 0xd800 <= codepoint <= 0xdfff:
|
||||
logging.warning("Surrogate character %r will be ignored. "
|
||||
"You might be using a narrow Python build.", char)
|
||||
|
||||
# Calculate section and position
|
||||
section = codepoint >> 8
|
||||
position = codepoint % 256
|
||||
|
||||
try:
|
||||
# Look up the character in the cache
|
||||
table = Cache[section]
|
||||
|
||||
except KeyError:
|
||||
try:
|
||||
# Import the module corresponding to the section
|
||||
module_name = f"x{section:03x}.py"
|
||||
main = os.path.abspath(os.path.dirname(__file__))
|
||||
module_path = os.path.join(main, module_name)
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
except ImportError:
|
||||
# If module import fails, set cache entry to None and return
|
||||
Cache[section] = None
|
||||
return None
|
||||
|
||||
# Update cache with module data
|
||||
Cache[section] = table = module.data
|
||||
|
||||
# Return the ASCII representation if found, otherwise None
|
||||
if table and len(table) > position:
|
||||
return table[position]
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _transliterate(string: str, errors: str, replace_str: str) -> str:
|
||||
"""Main transliteration function.
|
||||
|
||||
Args:
|
||||
string (str): The input string.
|
||||
errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
|
||||
replace_str (str): The replacement string used when errors='replace'.
|
||||
|
||||
Returns:
|
||||
str: The transliterated string.
|
||||
"""
|
||||
retval = []
|
||||
|
||||
for char in string:
|
||||
# Get the ASCII representation of the character
|
||||
ascii_char = _get_ascii_representation(char)
|
||||
|
||||
if ascii_char is None:
|
||||
# Handle errors based on the specified policy
|
||||
if errors == 'ignore':
|
||||
ascii_char = ''
|
||||
elif errors == 'strict':
|
||||
logging.error(f'No replacement found for character {char!r}')
|
||||
raise UnidecodeError(f'no replacement found for character {char!r}')
|
||||
elif errors == 'replace':
|
||||
ascii_char = replace_str
|
||||
elif errors == 'preserve':
|
||||
ascii_char = char
|
||||
else:
|
||||
logging.error(f'Invalid value for errors parameter {errors!r}')
|
||||
raise UnidecodeError(f'invalid value for errors parameter {errors!r}')
|
||||
|
||||
# Append the ASCII representation to the result
|
||||
retval.append(ascii_char)
|
||||
|
||||
return ''.join(retval)
|
||||
|
||||
|
||||
def transliterate_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
|
||||
"""Transliterates non-ASCII characters in a string, expecting ASCII input.
|
||||
|
||||
Args:
|
||||
string (str): The input string containing non-ASCII characters.
|
||||
errors (str): Specifies the treatment of errors. Can be 'ignore', 'strict', 'replace', or 'preserve'.
|
||||
replace_str (str): The replacement string used when errors='replace'.
|
||||
|
||||
Returns:
|
||||
str: The transliterated string with non-ASCII characters replaced.
|
||||
"""
|
||||
try:
|
||||
# Check if the string can be encoded as ASCII
|
||||
string.encode('ASCII')
|
||||
except UnicodeEncodeError:
|
||||
# If encoding fails, fall back to transliteration
|
||||
pass
|
||||
else:
|
||||
# If the string is already ASCII, return it as is
|
||||
return string
|
||||
|
||||
# Otherwise, transliterate non-ASCII characters
|
||||
return _transliterate(string, errors, replace_str)
|
||||
|
||||
|
||||
# Out
|
||||
transliterate = transliterate_expect_ascii
|
@ -1,258 +0,0 @@
|
||||
data = (
|
||||
'\x00', # 0x00
|
||||
'\x01', # 0x01
|
||||
'\x02', # 0x02
|
||||
'\x03', # 0x03
|
||||
'\x04', # 0x04
|
||||
'\x05', # 0x05
|
||||
'\x06', # 0x06
|
||||
'\x07', # 0x07
|
||||
'\x08', # 0x08
|
||||
'\x09', # 0x09
|
||||
'\x0a', # 0x0a
|
||||
'\x0b', # 0x0b
|
||||
'\x0c', # 0x0c
|
||||
'\x0d', # 0x0d
|
||||
'\x0e', # 0x0e
|
||||
'\x0f', # 0x0f
|
||||
'\x10', # 0x10
|
||||
'\x11', # 0x11
|
||||
'\x12', # 0x12
|
||||
'\x13', # 0x13
|
||||
'\x14', # 0x14
|
||||
'\x15', # 0x15
|
||||
'\x16', # 0x16
|
||||
'\x17', # 0x17
|
||||
'\x18', # 0x18
|
||||
'\x19', # 0x19
|
||||
'\x1a', # 0x1a
|
||||
'\x1b', # 0x1b
|
||||
'\x1c', # 0x1c
|
||||
'\x1d', # 0x1d
|
||||
'\x1e', # 0x1e
|
||||
'\x1f', # 0x1f
|
||||
' ', # 0x20
|
||||
'!', # 0x21
|
||||
'"', # 0x22
|
||||
'#', # 0x23
|
||||
'$', # 0x24
|
||||
'%', # 0x25
|
||||
'&', # 0x26
|
||||
'\'', # 0x27
|
||||
'(', # 0x28
|
||||
')', # 0x29
|
||||
'*', # 0x2a
|
||||
'+', # 0x2b
|
||||
',', # 0x2c
|
||||
'-', # 0x2d
|
||||
'.', # 0x2e
|
||||
'/', # 0x2f
|
||||
'0', # 0x30
|
||||
'1', # 0x31
|
||||
'2', # 0x32
|
||||
'3', # 0x33
|
||||
'4', # 0x34
|
||||
'5', # 0x35
|
||||
'6', # 0x36
|
||||
'7', # 0x37
|
||||
'8', # 0x38
|
||||
'9', # 0x39
|
||||
':', # 0x3a
|
||||
';', # 0x3b
|
||||
'<', # 0x3c
|
||||
'=', # 0x3d
|
||||
'>', # 0x3e
|
||||
'?', # 0x3f
|
||||
'@', # 0x40
|
||||
'A', # 0x41
|
||||
'B', # 0x42
|
||||
'C', # 0x43
|
||||
'D', # 0x44
|
||||
'E', # 0x45
|
||||
'F', # 0x46
|
||||
'G', # 0x47
|
||||
'H', # 0x48
|
||||
'I', # 0x49
|
||||
'J', # 0x4a
|
||||
'K', # 0x4b
|
||||
'L', # 0x4c
|
||||
'M', # 0x4d
|
||||
'N', # 0x4e
|
||||
'O', # 0x4f
|
||||
'P', # 0x50
|
||||
'Q', # 0x51
|
||||
'R', # 0x52
|
||||
'S', # 0x53
|
||||
'T', # 0x54
|
||||
'U', # 0x55
|
||||
'V', # 0x56
|
||||
'W', # 0x57
|
||||
'X', # 0x58
|
||||
'Y', # 0x59
|
||||
'Z', # 0x5a
|
||||
']', # 0x5b
|
||||
'\\', # 0x5c
|
||||
']', # 0x5d
|
||||
'^', # 0x5e
|
||||
'_', # 0x5f
|
||||
'`', # 0x60
|
||||
'a', # 0x61
|
||||
'b', # 0x62
|
||||
'c', # 0x63
|
||||
'd', # 0x64
|
||||
'e', # 0x65
|
||||
'f', # 0x66
|
||||
'g', # 0x67
|
||||
'h', # 0x68
|
||||
'i', # 0x69
|
||||
'j', # 0x6a
|
||||
'k', # 0x6b
|
||||
'l', # 0x6c
|
||||
'm', # 0x6d
|
||||
'n', # 0x6e
|
||||
'o', # 0x6f
|
||||
'p', # 0x70
|
||||
'q', # 0x71
|
||||
'r', # 0x72
|
||||
's', # 0x73
|
||||
't', # 0x74
|
||||
'u', # 0x75
|
||||
'v', # 0x76
|
||||
'w', # 0x77
|
||||
'x', # 0x78
|
||||
'y', # 0x79
|
||||
'z', # 0x7a
|
||||
'{', # 0x7b
|
||||
'|', # 0x7c
|
||||
'}', # 0x7d
|
||||
'~', # 0x7e
|
||||
'', # 0x7f
|
||||
'', # 0x80
|
||||
'', # 0x81
|
||||
'', # 0x82
|
||||
'', # 0x83
|
||||
'', # 0x84
|
||||
'', # 0x85
|
||||
'', # 0x86
|
||||
'', # 0x87
|
||||
'', # 0x88
|
||||
'', # 0x89
|
||||
'', # 0x8a
|
||||
'', # 0x8b
|
||||
'', # 0x8c
|
||||
'', # 0x8d
|
||||
'', # 0x8e
|
||||
'', # 0x8f
|
||||
'', # 0x90
|
||||
'', # 0x91
|
||||
'', # 0x92
|
||||
'', # 0x93
|
||||
'', # 0x94
|
||||
'', # 0x95
|
||||
'', # 0x96
|
||||
'', # 0x97
|
||||
'', # 0x98
|
||||
'', # 0x99
|
||||
'', # 0x9a
|
||||
'', # 0x9b
|
||||
'', # 0x9c
|
||||
'', # 0x9d
|
||||
'', # 0x9e
|
||||
'', # 0x9f
|
||||
' ', # 0xa0
|
||||
'!', # 0xa1
|
||||
'C/', # 0xa2
|
||||
'PS', # 0xa3
|
||||
'$?', # 0xa4
|
||||
'Y=', # 0xa5
|
||||
'|', # 0xa6
|
||||
'SS', # 0xa7
|
||||
'"', # 0xa8
|
||||
'(c)', # 0xa9
|
||||
'a', # 0xaa
|
||||
'<<', # 0xab
|
||||
'!', # 0xac
|
||||
'', # 0xad
|
||||
'(r)', # 0xae
|
||||
'-', # 0xaf
|
||||
'deg', # 0xb0
|
||||
'+-', # 0xb1
|
||||
'2', # 0xb2
|
||||
'3', # 0xb3
|
||||
'\'', # 0xb4
|
||||
'u', # 0xb5
|
||||
'P', # 0xb6
|
||||
'*', # 0xb7
|
||||
',', # 0xb8
|
||||
'1', # 0xb9
|
||||
'o', # 0xba
|
||||
'>>', # 0xbb
|
||||
'1/4', # 0xbc
|
||||
'1/2', # 0xbd
|
||||
'3/4', # 0xbe
|
||||
'?', # 0xbf
|
||||
'A', # 0xc0
|
||||
'A', # 0xc1
|
||||
'A', # 0xc2
|
||||
'A', # 0xc3
|
||||
'A', # 0xc4
|
||||
'A', # 0xc5
|
||||
'AE', # 0xc6
|
||||
'C', # 0xc7
|
||||
'E', # 0xc8
|
||||
'E', # 0xc9
|
||||
'E', # 0xca
|
||||
'E', # 0xcb
|
||||
'I', # 0xcc
|
||||
'I', # 0xcd
|
||||
'I', # 0xce
|
||||
'I', # 0xcf
|
||||
'D', # 0xd0
|
||||
'N', # 0xd1
|
||||
'O', # 0xd2
|
||||
'O', # 0xd3
|
||||
'O', # 0xd4
|
||||
'O', # 0xd5
|
||||
'O', # 0xd6
|
||||
'x', # 0xd7
|
||||
'O', # 0xd8
|
||||
'U', # 0xd9
|
||||
'U', # 0xda
|
||||
'U', # 0xdb
|
||||
'U', # 0xdc
|
||||
'U', # 0xdd
|
||||
'Th', # 0xde
|
||||
'ss', # 0xdf
|
||||
'a', # 0xe0
|
||||
'a', # 0xe1
|
||||
'a', # 0xe2
|
||||
'a', # 0xe3
|
||||
'a', # 0xe4
|
||||
'a', # 0xe5
|
||||
'ae', # 0xe6
|
||||
'c', # 0xe7
|
||||
'e', # 0xe8
|
||||
'e', # 0xe9
|
||||
'e', # 0xea
|
||||
'e', # 0xeb
|
||||
'i', # 0xec
|
||||
'i', # 0xed
|
||||
'i', # 0xee
|
||||
'i', # 0xef
|
||||
'd', # 0xf0
|
||||
'n', # 0xf1
|
||||
'o', # 0xf2
|
||||
'o', # 0xf3
|
||||
'o', # 0xf4
|
||||
'o', # 0xf5
|
||||
'o', # 0xf6
|
||||
'/', # 0xf7
|
||||
'o', # 0xf8
|
||||
'u', # 0xf9
|
||||
'u', # 0xfa
|
||||
'u', # 0xfb
|
||||
'u', # 0xfc
|
||||
'y', # 0xfd
|
||||
'th', # 0xfe
|
||||
'y', # 0xff
|
||||
)
|
@ -1,42 +0,0 @@
|
||||
data = (
|
||||
' ', # 0x00
|
||||
' ', # 0x01
|
||||
' ', # 0x02
|
||||
' ', # 0x03
|
||||
' ', # 0x04
|
||||
' ', # 0x05
|
||||
' ', # 0x06
|
||||
' ', # 0x07
|
||||
' ', # 0x08
|
||||
' ', # 0x09
|
||||
' ', # 0x0a
|
||||
' ', # 0x0b
|
||||
'', # 0x0c
|
||||
'', # 0x0d
|
||||
'', # 0x0e
|
||||
'', # 0x0f
|
||||
'-', # 0x10
|
||||
'-', # 0x11
|
||||
'-', # 0x12
|
||||
'-', # 0x13
|
||||
'--', # 0x14
|
||||
'--', # 0x15
|
||||
'||', # 0x16
|
||||
'_', # 0x17
|
||||
'\'', # 0x18
|
||||
'\'', # 0x19
|
||||
',', # 0x1a
|
||||
'\'', # 0x1b
|
||||
'"', # 0x1c
|
||||
'"', # 0x1d
|
||||
',,', # 0x1e
|
||||
'"', # 0x1f
|
||||
'+', # 0x20
|
||||
'++', # 0x21
|
||||
'*', # 0x22
|
||||
'*>', # 0x23
|
||||
'.', # 0x24
|
||||
'..', # 0x25
|
||||
'...', # 0x26
|
||||
'.', # 0x27
|
||||
)
|
@ -1,8 +1,9 @@
|
||||
# 10.04.24
|
||||
|
||||
from unidecode import unidecode as transliterate
|
||||
|
||||
|
||||
# Internal utilities
|
||||
from Src.Lib.Unidecode import transliterate
|
||||
from Src.Util.config import config_manager
|
||||
from Src.Api.Class.EpisodeType import Episode
|
||||
|
||||
|
13
file_list.txt
Normal file
13
file_list.txt
Normal file
@ -0,0 +1,13 @@
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\0.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\1.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\2.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\3.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\4.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\5.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\6.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\7.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\8.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\9.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\10.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\11.ts'
|
||||
file 'Video\Series\pechino-express\S2\pechino-express_S02E01_Ha Noi - Vinh\tmp\video\12.ts'
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user