support exclude or only translate filelist, exclude taglist (#216)

* support exclude_filelist

support only_filelist

* support exclude tags

* clean
This commit is contained in:
hleft 2023-04-02 17:09:52 +08:00 committed by GitHub
parent cc9e816c57
commit fd5782513a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 51 additions and 6 deletions

View File

@ -152,6 +152,20 @@ def main():
type=str,
help="specify base url other than the OpenAI's official API address",
)
parser.add_argument(
"--exclude_filelist",
dest="exclude_filelist",
type=str,
default="",
help="if you have more than one file to exclude, please use comma to split them, example: --exclude_filelist 'nav.xhtml,cover.xhtml'",
)
parser.add_argument(
"--only_filelist",
dest="only_filelist",
type=str,
default="",
help="if you only have a few files with translations, please use comma to split them, example: --only_filelist 'nav.xhtml,cover.xhtml'",
)
parser.add_argument(
"--translate-tags",
dest="translate_tags",
@ -159,6 +173,13 @@ def main():
default="p",
help="example --translate-tags p,blockquote",
)
parser.add_argument(
"--exclude_translate-tags",
dest="exclude_translate_tags",
type=str,
default="sup",
help="example --exclude_translate-tags table,sup",
)
parser.add_argument(
"--allow_navigable_strings",
dest="allow_navigable_strings",
@ -315,6 +336,12 @@ So you are close to reaching the limit. You have to choose your own value, there
e.allow_navigable_strings = True
if options.translate_tags:
e.translate_tags = options.translate_tags
if options.exclude_translate_tags:
e.exclude_translate_tags = options.exclude_translate_tags
if options.exclude_filelist:
e.exclude_filelist = options.exclude_filelist
if options.only_filelist:
e.only_filelist = options.only_filelist
if options.accumulated_num > 1:
e.accumulated_num = options.accumulated_num
if options.translation_style:

View File

@ -45,6 +45,7 @@ class EPUBBookLoader(BaseBookLoader):
self.is_test = is_test
self.test_num = test_num
self.translate_tags = "p"
self.exclude_translate_tags = "sup"
self.allow_navigable_strings = False
self.accumulated_num = 1
self.translation_style = ""
@ -52,6 +53,8 @@ class EPUBBookLoader(BaseBookLoader):
self.translate_model, self.accumulated_num, self.translation_style
)
self.retranslate = None
self.exclude_filelist = ""
self.only_filelist = ""
# monkey patch for # 173
def _write_items_patch(obj):
@ -118,14 +121,18 @@ class EPUBBookLoader(BaseBookLoader):
new_p = copy(p)
for p_exclude in self.exclude_translate_tags.split(","):
for pt in new_p.find_all(p_exclude):
pt.extract()
if self.resume and index < p_to_save_len:
new_p.string = self.p_to_save[index]
else:
if type(p) == NavigableString:
new_p = self.translate_model.translate(p.text)
new_p = self.translate_model.translate(new_p.text)
self.p_to_save.append(new_p)
else:
new_p.string = self.translate_model.translate(p.text)
new_p.string = self.translate_model.translate(new_p.text)
self.p_to_save.append(new_p.text)
self.helper.insert_trans(p, new_p.string, self.translation_style)
@ -142,8 +149,11 @@ class EPUBBookLoader(BaseBookLoader):
for i in range(len(p_list)):
p = p_list[i]
temp_p = copy(p)
for sup in temp_p.find_all("sup"):
sup.extract()
for p_exclude in self.exclude_translate_tags.split(","):
for pt in temp_p.find_all(p_exclude):
pt.extract()
if any(
[not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
):
@ -304,6 +314,16 @@ class EPUBBookLoader(BaseBookLoader):
fixstart=None,
fixend=None,
):
if self.only_filelist != "" and not item.file_name in self.only_filelist.split(
","
):
return index
elif self.only_filelist == "" and item.file_name in self.exclude_filelist.split(
","
):
new_book.add_item(item)
return index
if not os.path.exists("log"):
os.makedirs("log")
@ -391,8 +411,6 @@ class EPUBBookLoader(BaseBookLoader):
new_book.add_item(item)
for item in self.origin_book.get_items_of_type(ITEM_DOCUMENT):
# if item.file_name != "OEBPS/ch01.xhtml":
# continue
index = self.process_item(
item, index, p_to_save_len, pbar, new_book, trans_taglist
)