support single_translate in epub (#237)

* support no_bilingual

* use single_translate instead no_bilingual
This commit is contained in:
hleft 2023-04-09 21:20:41 +08:00 committed by GitHub
parent 237ce5280a
commit a0c999a2e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 11 deletions

View File

@ -230,6 +230,11 @@ So you are close to reaching the limit. You have to choose your own value, there
python3 "make_book.py" --book_name "test_books/animal_farm.epub" --retranslate 'test_books/animal_farm_bilingual.epub' 'index_split_002.html' 'in spite of the present book shortage which'
""",
)
parser.add_argument(
"--single_translate",
action="store_true",
help="output translated book, no bilingual",
)
options = parser.parse_args()
@ -308,6 +313,7 @@ So you are close to reaching the limit. You have to choose your own value, there
is_test=options.test,
test_num=options.test_num,
prompt_config=parse_prompt_arg(options.prompt_arg),
single_translate=options.single_translate,
)
# other options
if options.allow_navigable_strings:

View File

@ -29,6 +29,7 @@ class EPUBBookLoader(BaseBookLoader):
is_test=False,
test_num=5,
prompt_config=None,
single_translate=False,
):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
@ -51,6 +52,7 @@ class EPUBBookLoader(BaseBookLoader):
self.retranslate = None
self.exclude_filelist = ""
self.only_filelist = ""
self.single_translate = single_translate
# monkey patch for # 173
def _write_items_patch(obj):
@ -131,7 +133,9 @@ class EPUBBookLoader(BaseBookLoader):
new_p.string = self.translate_model.translate(new_p.text)
self.p_to_save.append(new_p.text)
self.helper.insert_trans(p, new_p.string, self.translation_style)
self.helper.insert_trans(
p, new_p.string, self.translation_style, self.single_translate
)
index += 1
if index % 20 == 0:
@ -154,18 +158,18 @@ class EPUBBookLoader(BaseBookLoader):
[not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
):
if i == len(p_list) - 1:
self.helper.deal_old(wait_p_list)
self.helper.deal_old(wait_p_list, self.single_translate)
continue
length = num_tokens_from_text(temp_p.text)
if length > send_num:
self.helper.deal_new(p, wait_p_list)
self.helper.deal_new(p, wait_p_list, self.single_translate)
continue
if i == len(p_list) - 1:
if count + length < send_num:
wait_p_list.append(p)
self.helper.deal_old(wait_p_list)
self.helper.deal_old(wait_p_list, self.single_translate)
else:
self.helper.deal_new(p, wait_p_list)
self.helper.deal_new(p, wait_p_list, self.single_translate)
break
if count + length < send_num:
count += length
@ -175,7 +179,7 @@ class EPUBBookLoader(BaseBookLoader):
# self.helper.deal_old(wait_p_list)
# count = 0
else:
self.helper.deal_old(wait_p_list)
self.helper.deal_old(wait_p_list, self.single_translate)
wait_p_list.append(p)
count = length
@ -459,7 +463,10 @@ class EPUBBookLoader(BaseBookLoader):
else:
new_p.string = self.p_to_save[index]
self.helper.insert_trans(
p, new_p.string, self.translation_style
p,
new_p.string,
self.translation_style,
self.single_translate,
)
index += 1
else:

View File

@ -8,7 +8,7 @@ class EPUBBookLoaderHelper:
self.accumulated_num = accumulated_num
self.translation_style = translation_style
def insert_trans(self, p, text, translation_style=""):
def insert_trans(self, p, text, translation_style="", single_translate=False):
if (
p.string is not None
and p.string.replace(" ", "").strip() == text.replace(" ", "").strip()
@ -19,16 +19,19 @@ class EPUBBookLoaderHelper:
if translation_style != "":
new_p["style"] = translation_style
p.insert_after(new_p)
if single_translate:
p.extract()
def deal_new(self, p, wait_p_list):
self.deal_old(wait_p_list)
def deal_new(self, p, wait_p_list, single_translate=False):
self.deal_old(wait_p_list, single_translate)
self.insert_trans(
p,
shorter_result_link(self.translate_model.translate(p.text)),
self.translation_style,
single_translate,
)
def deal_old(self, wait_p_list):
def deal_old(self, wait_p_list, single_translate=False):
if not wait_p_list:
return
@ -41,6 +44,7 @@ class EPUBBookLoaderHelper:
p,
shorter_result_link(result_txt_list[i]),
self.translation_style,
single_translate,
)
wait_p_list.clear()

View File

@ -18,6 +18,7 @@ class TXTBookLoader(BaseBookLoader):
is_test=False,
test_num=5,
prompt_config=None,
single_translate=False,
) -> None:
self.txt_name = txt_name
self.translate_model = model(