From fd5782513a7dd25c862b61c1bd051a450875d342 Mon Sep 17 00:00:00 2001 From: hleft <89069008+hleft@users.noreply.github.com> Date: Sun, 2 Apr 2023 17:09:52 +0800 Subject: [PATCH] support exclude or only translate filelist, exclude taglist (#216) * support exclude_filelist support only_filelist * support exclude tags * clean --- book_maker/cli.py | 27 +++++++++++++++++++++++++++ book_maker/loader/epub_loader.py | 30 ++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/book_maker/cli.py b/book_maker/cli.py index 5aacafc..bc93fcf 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -152,6 +152,20 @@ def main(): type=str, help="specify base url other than the OpenAI's official API address", ) + parser.add_argument( + "--exclude_filelist", + dest="exclude_filelist", + type=str, + default="", + help="if you have more than one file to exclude, please use comma to split them, example: --exclude_filelist 'nav.xhtml,cover.xhtml'", + ) + parser.add_argument( + "--only_filelist", + dest="only_filelist", + type=str, + default="", + help="if you only have a few files with translations, please use comma to split them, example: --only_filelist 'nav.xhtml,cover.xhtml'", + ) parser.add_argument( "--translate-tags", dest="translate_tags", @@ -159,6 +173,13 @@ def main(): default="p", help="example --translate-tags p,blockquote", ) + parser.add_argument( + "--exclude_translate-tags", + dest="exclude_translate_tags", + type=str, + default="sup", + help="example --exclude_translate-tags table,sup", + ) parser.add_argument( "--allow_navigable_strings", dest="allow_navigable_strings", @@ -315,6 +336,12 @@ So you are close to reaching the limit. You have to choose your own value, there e.allow_navigable_strings = True if options.translate_tags: e.translate_tags = options.translate_tags + if options.exclude_translate_tags: + e.exclude_translate_tags = options.exclude_translate_tags + if options.exclude_filelist: + e.exclude_filelist = options.exclude_filelist + if options.only_filelist: + e.only_filelist = options.only_filelist if options.accumulated_num > 1: e.accumulated_num = options.accumulated_num if options.translation_style: diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 3404843..689b90e 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -45,6 +45,7 @@ class EPUBBookLoader(BaseBookLoader): self.is_test = is_test self.test_num = test_num self.translate_tags = "p" + self.exclude_translate_tags = "sup" self.allow_navigable_strings = False self.accumulated_num = 1 self.translation_style = "" @@ -52,6 +53,8 @@ class EPUBBookLoader(BaseBookLoader): self.translate_model, self.accumulated_num, self.translation_style ) self.retranslate = None + self.exclude_filelist = "" + self.only_filelist = "" # monkey patch for # 173 def _write_items_patch(obj): @@ -118,14 +121,18 @@ class EPUBBookLoader(BaseBookLoader): new_p = copy(p) + for p_exclude in self.exclude_translate_tags.split(","): + for pt in new_p.find_all(p_exclude): + pt.extract() + if self.resume and index < p_to_save_len: new_p.string = self.p_to_save[index] else: if type(p) == NavigableString: - new_p = self.translate_model.translate(p.text) + new_p = self.translate_model.translate(new_p.text) self.p_to_save.append(new_p) else: - new_p.string = self.translate_model.translate(p.text) + new_p.string = self.translate_model.translate(new_p.text) self.p_to_save.append(new_p.text) self.helper.insert_trans(p, new_p.string, self.translation_style) @@ -142,8 +149,11 @@ class EPUBBookLoader(BaseBookLoader): for i in range(len(p_list)): p = p_list[i] temp_p = copy(p) - for sup in temp_p.find_all("sup"): - sup.extract() + + for p_exclude in self.exclude_translate_tags.split(","): + for pt in temp_p.find_all(p_exclude): + pt.extract() + if any( [not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)] ): @@ -304,6 +314,16 @@ class EPUBBookLoader(BaseBookLoader): fixstart=None, fixend=None, ): + if self.only_filelist != "" and not item.file_name in self.only_filelist.split( + "," + ): + return index + elif self.only_filelist == "" and item.file_name in self.exclude_filelist.split( + "," + ): + new_book.add_item(item) + return index + if not os.path.exists("log"): os.makedirs("log") @@ -391,8 +411,6 @@ class EPUBBookLoader(BaseBookLoader): new_book.add_item(item) for item in self.origin_book.get_items_of_type(ITEM_DOCUMENT): - # if item.file_name != "OEBPS/ch01.xhtml": - # continue index = self.process_item( item, index, p_to_save_len, pbar, new_book, trans_taglist )