mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
support exclude or only translate filelist, exclude taglist (#216)
* support exclude_filelist support only_filelist * support exclude tags * clean
This commit is contained in:
parent
cc9e816c57
commit
fd5782513a
@ -152,6 +152,20 @@ def main():
|
|||||||
type=str,
|
type=str,
|
||||||
help="specify base url other than the OpenAI's official API address",
|
help="specify base url other than the OpenAI's official API address",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--exclude_filelist",
|
||||||
|
dest="exclude_filelist",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="if you have more than one file to exclude, please use comma to split them, example: --exclude_filelist 'nav.xhtml,cover.xhtml'",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--only_filelist",
|
||||||
|
dest="only_filelist",
|
||||||
|
type=str,
|
||||||
|
default="",
|
||||||
|
help="if you only have a few files with translations, please use comma to split them, example: --only_filelist 'nav.xhtml,cover.xhtml'",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--translate-tags",
|
"--translate-tags",
|
||||||
dest="translate_tags",
|
dest="translate_tags",
|
||||||
@ -159,6 +173,13 @@ def main():
|
|||||||
default="p",
|
default="p",
|
||||||
help="example --translate-tags p,blockquote",
|
help="example --translate-tags p,blockquote",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--exclude_translate-tags",
|
||||||
|
dest="exclude_translate_tags",
|
||||||
|
type=str,
|
||||||
|
default="sup",
|
||||||
|
help="example --exclude_translate-tags table,sup",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--allow_navigable_strings",
|
"--allow_navigable_strings",
|
||||||
dest="allow_navigable_strings",
|
dest="allow_navigable_strings",
|
||||||
@ -315,6 +336,12 @@ So you are close to reaching the limit. You have to choose your own value, there
|
|||||||
e.allow_navigable_strings = True
|
e.allow_navigable_strings = True
|
||||||
if options.translate_tags:
|
if options.translate_tags:
|
||||||
e.translate_tags = options.translate_tags
|
e.translate_tags = options.translate_tags
|
||||||
|
if options.exclude_translate_tags:
|
||||||
|
e.exclude_translate_tags = options.exclude_translate_tags
|
||||||
|
if options.exclude_filelist:
|
||||||
|
e.exclude_filelist = options.exclude_filelist
|
||||||
|
if options.only_filelist:
|
||||||
|
e.only_filelist = options.only_filelist
|
||||||
if options.accumulated_num > 1:
|
if options.accumulated_num > 1:
|
||||||
e.accumulated_num = options.accumulated_num
|
e.accumulated_num = options.accumulated_num
|
||||||
if options.translation_style:
|
if options.translation_style:
|
||||||
|
@ -45,6 +45,7 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
self.is_test = is_test
|
self.is_test = is_test
|
||||||
self.test_num = test_num
|
self.test_num = test_num
|
||||||
self.translate_tags = "p"
|
self.translate_tags = "p"
|
||||||
|
self.exclude_translate_tags = "sup"
|
||||||
self.allow_navigable_strings = False
|
self.allow_navigable_strings = False
|
||||||
self.accumulated_num = 1
|
self.accumulated_num = 1
|
||||||
self.translation_style = ""
|
self.translation_style = ""
|
||||||
@ -52,6 +53,8 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
self.translate_model, self.accumulated_num, self.translation_style
|
self.translate_model, self.accumulated_num, self.translation_style
|
||||||
)
|
)
|
||||||
self.retranslate = None
|
self.retranslate = None
|
||||||
|
self.exclude_filelist = ""
|
||||||
|
self.only_filelist = ""
|
||||||
|
|
||||||
# monkey patch for # 173
|
# monkey patch for # 173
|
||||||
def _write_items_patch(obj):
|
def _write_items_patch(obj):
|
||||||
@ -118,14 +121,18 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
|
|
||||||
new_p = copy(p)
|
new_p = copy(p)
|
||||||
|
|
||||||
|
for p_exclude in self.exclude_translate_tags.split(","):
|
||||||
|
for pt in new_p.find_all(p_exclude):
|
||||||
|
pt.extract()
|
||||||
|
|
||||||
if self.resume and index < p_to_save_len:
|
if self.resume and index < p_to_save_len:
|
||||||
new_p.string = self.p_to_save[index]
|
new_p.string = self.p_to_save[index]
|
||||||
else:
|
else:
|
||||||
if type(p) == NavigableString:
|
if type(p) == NavigableString:
|
||||||
new_p = self.translate_model.translate(p.text)
|
new_p = self.translate_model.translate(new_p.text)
|
||||||
self.p_to_save.append(new_p)
|
self.p_to_save.append(new_p)
|
||||||
else:
|
else:
|
||||||
new_p.string = self.translate_model.translate(p.text)
|
new_p.string = self.translate_model.translate(new_p.text)
|
||||||
self.p_to_save.append(new_p.text)
|
self.p_to_save.append(new_p.text)
|
||||||
|
|
||||||
self.helper.insert_trans(p, new_p.string, self.translation_style)
|
self.helper.insert_trans(p, new_p.string, self.translation_style)
|
||||||
@ -142,8 +149,11 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
for i in range(len(p_list)):
|
for i in range(len(p_list)):
|
||||||
p = p_list[i]
|
p = p_list[i]
|
||||||
temp_p = copy(p)
|
temp_p = copy(p)
|
||||||
for sup in temp_p.find_all("sup"):
|
|
||||||
sup.extract()
|
for p_exclude in self.exclude_translate_tags.split(","):
|
||||||
|
for pt in temp_p.find_all(p_exclude):
|
||||||
|
pt.extract()
|
||||||
|
|
||||||
if any(
|
if any(
|
||||||
[not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
|
[not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
|
||||||
):
|
):
|
||||||
@ -304,6 +314,16 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
fixstart=None,
|
fixstart=None,
|
||||||
fixend=None,
|
fixend=None,
|
||||||
):
|
):
|
||||||
|
if self.only_filelist != "" and not item.file_name in self.only_filelist.split(
|
||||||
|
","
|
||||||
|
):
|
||||||
|
return index
|
||||||
|
elif self.only_filelist == "" and item.file_name in self.exclude_filelist.split(
|
||||||
|
","
|
||||||
|
):
|
||||||
|
new_book.add_item(item)
|
||||||
|
return index
|
||||||
|
|
||||||
if not os.path.exists("log"):
|
if not os.path.exists("log"):
|
||||||
os.makedirs("log")
|
os.makedirs("log")
|
||||||
|
|
||||||
@ -391,8 +411,6 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
new_book.add_item(item)
|
new_book.add_item(item)
|
||||||
|
|
||||||
for item in self.origin_book.get_items_of_type(ITEM_DOCUMENT):
|
for item in self.origin_book.get_items_of_type(ITEM_DOCUMENT):
|
||||||
# if item.file_name != "OEBPS/ch01.xhtml":
|
|
||||||
# continue
|
|
||||||
index = self.process_item(
|
index = self.process_item(
|
||||||
item, index, p_to_save_len, pbar, new_book, trans_taglist
|
item, index, p_to_save_len, pbar, new_book, trans_taglist
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user