support single_translate in epub (#237)

* support no_bilingual

* use single_translate instead no_bilingual
This commit is contained in:
hleft 2023-04-09 21:20:41 +08:00 committed by GitHub
parent 237ce5280a
commit a0c999a2e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 11 deletions

View File

@ -230,6 +230,11 @@ So you are close to reaching the limit. You have to choose your own value, there
python3 "make_book.py" --book_name "test_books/animal_farm.epub" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which' python3 "make_book.py" --book_name "test_books/animal_farm.epub" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'
""", """,
) )
parser.add_argument(
"--single_translate",
action="store_true",
help="output translated book, no bilingual",
)
options = parser.parse_args() options = parser.parse_args()
@ -308,6 +313,7 @@ So you are close to reaching the limit. You have to choose your own value, there
is_test=options.test, is_test=options.test,
test_num=options.test_num, test_num=options.test_num,
prompt_config=parse_prompt_arg(options.prompt_arg), prompt_config=parse_prompt_arg(options.prompt_arg),
single_translate=options.single_translate,
) )
# other options # other options
if options.allow_navigable_strings: if options.allow_navigable_strings:

View File

@ -29,6 +29,7 @@ class EPUBBookLoader(BaseBookLoader):
is_test=False, is_test=False,
test_num=5, test_num=5,
prompt_config=None, prompt_config=None,
single_translate=False,
): ):
self.epub_name = epub_name self.epub_name = epub_name
self.new_epub = epub.EpubBook() self.new_epub = epub.EpubBook()
@ -51,6 +52,7 @@ class EPUBBookLoader(BaseBookLoader):
self.retranslate = None self.retranslate = None
self.exclude_filelist = "" self.exclude_filelist = ""
self.only_filelist = "" self.only_filelist = ""
self.single_translate = single_translate
# monkey patch for # 173 # monkey patch for # 173
def _write_items_patch(obj): def _write_items_patch(obj):
@ -131,7 +133,9 @@ class EPUBBookLoader(BaseBookLoader):
new_p.string = self.translate_model.translate(new_p.text) new_p.string = self.translate_model.translate(new_p.text)
self.p_to_save.append(new_p.text) self.p_to_save.append(new_p.text)
self.helper.insert_trans(p, new_p.string, self.translation_style) self.helper.insert_trans(
p, new_p.string, self.translation_style, self.single_translate
)
index += 1 index += 1
if index % 20 == 0: if index % 20 == 0:
@ -154,18 +158,18 @@ class EPUBBookLoader(BaseBookLoader):
[not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)] [not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
): ):
if i == len(p_list) - 1: if i == len(p_list) - 1:
self.helper.deal_old(wait_p_list) self.helper.deal_old(wait_p_list, self.single_translate)
continue continue
length = num_tokens_from_text(temp_p.text) length = num_tokens_from_text(temp_p.text)
if length > send_num: if length > send_num:
self.helper.deal_new(p, wait_p_list) self.helper.deal_new(p, wait_p_list, self.single_translate)
continue continue
if i == len(p_list) - 1: if i == len(p_list) - 1:
if count + length < send_num: if count + length < send_num:
wait_p_list.append(p) wait_p_list.append(p)
self.helper.deal_old(wait_p_list) self.helper.deal_old(wait_p_list, self.single_translate)
else: else:
self.helper.deal_new(p, wait_p_list) self.helper.deal_new(p, wait_p_list, self.single_translate)
break break
if count + length < send_num: if count + length < send_num:
count += length count += length
@ -175,7 +179,7 @@ class EPUBBookLoader(BaseBookLoader):
# self.helper.deal_old(wait_p_list) # self.helper.deal_old(wait_p_list)
# count = 0 # count = 0
else: else:
self.helper.deal_old(wait_p_list) self.helper.deal_old(wait_p_list, self.single_translate)
wait_p_list.append(p) wait_p_list.append(p)
count = length count = length
@ -459,7 +463,10 @@ class EPUBBookLoader(BaseBookLoader):
else: else:
new_p.string = self.p_to_save[index] new_p.string = self.p_to_save[index]
self.helper.insert_trans( self.helper.insert_trans(
p, new_p.string, self.translation_style p,
new_p.string,
self.translation_style,
self.single_translate,
) )
index += 1 index += 1
else: else:

View File

@ -8,7 +8,7 @@ class EPUBBookLoaderHelper:
self.accumulated_num = accumulated_num self.accumulated_num = accumulated_num
self.translation_style = translation_style self.translation_style = translation_style
def insert_trans(self, p, text, translation_style=""): def insert_trans(self, p, text, translation_style="", single_translate=False):
if ( if (
p.string is not None p.string is not None
and p.string.replace(" ", "").strip() == text.replace(" ", "").strip() and p.string.replace(" ", "").strip() == text.replace(" ", "").strip()
@ -19,16 +19,19 @@ class EPUBBookLoaderHelper:
if translation_style != "": if translation_style != "":
new_p["style"] = translation_style new_p["style"] = translation_style
p.insert_after(new_p) p.insert_after(new_p)
if single_translate:
p.extract()
def deal_new(self, p, wait_p_list): def deal_new(self, p, wait_p_list, single_translate=False):
self.deal_old(wait_p_list) self.deal_old(wait_p_list, single_translate)
self.insert_trans( self.insert_trans(
p, p,
shorter_result_link(self.translate_model.translate(p.text)), shorter_result_link(self.translate_model.translate(p.text)),
self.translation_style, self.translation_style,
single_translate,
) )
def deal_old(self, wait_p_list): def deal_old(self, wait_p_list, single_translate=False):
if not wait_p_list: if not wait_p_list:
return return
@ -41,6 +44,7 @@ class EPUBBookLoaderHelper:
p, p,
shorter_result_link(result_txt_list[i]), shorter_result_link(result_txt_list[i]),
self.translation_style, self.translation_style,
single_translate,
) )
wait_p_list.clear() wait_p_list.clear()

View File

@ -18,6 +18,7 @@ class TXTBookLoader(BaseBookLoader):
is_test=False, is_test=False,
test_num=5, test_num=5,
prompt_config=None, prompt_config=None,
single_translate=False,
) -> None: ) -> None:
self.txt_name = txt_name self.txt_name = txt_name
self.translate_model = model( self.translate_model = model(