mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
59 lines
1.6 KiB
Python
59 lines
1.6 KiB
Python
import re
|
|
from copy import copy
|
|
|
|
|
|
class EPUBBookLoaderHelper:
|
|
def __init__(self, translate_model, accumulated_num):
|
|
self.translate_model = translate_model
|
|
self.accumulated_num = accumulated_num
|
|
|
|
def deal_new(self, p, wait_p_list):
|
|
self.deal_old(wait_p_list)
|
|
new_p = copy(p)
|
|
new_p.string = self.translate_model.translate(p.text)
|
|
p.insert_after(new_p)
|
|
|
|
def deal_old(self, wait_p_list):
|
|
if not wait_p_list:
|
|
return
|
|
|
|
result_txt_list = self.translate_model.translate_list(wait_p_list)
|
|
|
|
for i in range(len(wait_p_list)):
|
|
if i < len(result_txt_list):
|
|
p = wait_p_list[i]
|
|
new_p = copy(p)
|
|
new_p.string = result_txt_list[i]
|
|
p.insert_after(new_p)
|
|
|
|
wait_p_list.clear()
|
|
|
|
|
|
def is_text_link(text):
|
|
url_pattern = re.compile(
|
|
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
|
)
|
|
return bool(url_pattern.match(text.strip()))
|
|
|
|
|
|
def is_text_tail_link(text, num=100):
|
|
text = text.strip()
|
|
url_pattern = re.compile(
|
|
r".*http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$"
|
|
)
|
|
return bool(url_pattern.match(text)) and len(text) < num
|
|
|
|
|
|
def is_text_source(text):
|
|
return text.strip().startswith("Source: ")
|
|
|
|
|
|
def is_text_list(text, num=80):
|
|
text = text.strip()
|
|
return re.match(r"^Listing\s*\d+", text) and len(text) < num
|
|
|
|
|
|
def is_text_figure(text, num=80):
|
|
text = text.strip()
|
|
return re.match(r"^Figure\s*\d+", text) and len(text) < num
|