support exclude or only translate filelist, exclude taglist (#216)

* support exclude_filelist support only_filelist * support exclude tags * clean
2025-06-05 19:15:34 +00:00 · 2023-04-02 17:09:52 +08:00 · 2023-04-02 17:09:52 +08:00 · fd5782513a
commit fd5782513a
parent cc9e816c57
2 changed files with 51 additions and 6 deletions
--- a/book_maker/cli.py
+++ b/book_maker/cli.py
@ -152,6 +152,20 @@ def main():
        type=str,
        help="specify base url other than the OpenAI's official API address",
    )
+    parser.add_argument(
+        "--exclude_filelist",
+        dest="exclude_filelist",
+        type=str,
+        default="",
+        help="if you have more than one file to exclude, please use comma to split them, example: --exclude_filelist 'nav.xhtml,cover.xhtml'",
+    )
+    parser.add_argument(
+        "--only_filelist",
+        dest="only_filelist",
+        type=str,
+        default="",
+        help="if you only have a few files with translations, please use comma to split them, example: --only_filelist 'nav.xhtml,cover.xhtml'",
+    )
    parser.add_argument(
        "--translate-tags",
        dest="translate_tags",
@ -159,6 +173,13 @@ def main():
        default="p",
        help="example --translate-tags p,blockquote",
    )
+    parser.add_argument(
+        "--exclude_translate-tags",
+        dest="exclude_translate_tags",
+        type=str,
+        default="sup",
+        help="example --exclude_translate-tags table,sup",
+    )
    parser.add_argument(
        "--allow_navigable_strings",
        dest="allow_navigable_strings",
@ -315,6 +336,12 @@ So you are close to reaching the limit. You have to choose your own value, there
        e.allow_navigable_strings = True
    if options.translate_tags:
        e.translate_tags = options.translate_tags
+    if options.exclude_translate_tags:
+        e.exclude_translate_tags = options.exclude_translate_tags
+    if options.exclude_filelist:
+        e.exclude_filelist = options.exclude_filelist
+    if options.only_filelist:
+        e.only_filelist = options.only_filelist
    if options.accumulated_num > 1:
        e.accumulated_num = options.accumulated_num
    if options.translation_style:
--- a/book_maker/loader/epub_loader.py
+++ b/book_maker/loader/epub_loader.py
@ -45,6 +45,7 @@ class EPUBBookLoader(BaseBookLoader):
        self.is_test = is_test
        self.test_num = test_num
        self.translate_tags = "p"
+        self.exclude_translate_tags = "sup"
        self.allow_navigable_strings = False
        self.accumulated_num = 1
        self.translation_style = ""
@ -52,6 +53,8 @@ class EPUBBookLoader(BaseBookLoader):
            self.translate_model, self.accumulated_num, self.translation_style
        )
        self.retranslate = None
+        self.exclude_filelist = ""
+        self.only_filelist = ""

        # monkey patch for # 173
        def _write_items_patch(obj):
@ -118,14 +121,18 @@ class EPUBBookLoader(BaseBookLoader):

        new_p = copy(p)

+        for p_exclude in self.exclude_translate_tags.split(","):
+            for pt in new_p.find_all(p_exclude):
+                pt.extract()
+
        if self.resume and index < p_to_save_len:
            new_p.string = self.p_to_save[index]
        else:
            if type(p) == NavigableString:
-                new_p = self.translate_model.translate(p.text)
+                new_p = self.translate_model.translate(new_p.text)
                self.p_to_save.append(new_p)
            else:
-                new_p.string = self.translate_model.translate(p.text)
+                new_p.string = self.translate_model.translate(new_p.text)
                self.p_to_save.append(new_p.text)

        self.helper.insert_trans(p, new_p.string, self.translation_style)
@ -142,8 +149,11 @@ class EPUBBookLoader(BaseBookLoader):
        for i in range(len(p_list)):
            p = p_list[i]
            temp_p = copy(p)
-            for sup in temp_p.find_all("sup"):
-                sup.extract()
+
+            for p_exclude in self.exclude_translate_tags.split(","):
+                for pt in temp_p.find_all(p_exclude):
+                    pt.extract()
+
            if any(
                [not p.text, self._is_special_text(temp_p.text), not_trans(temp_p.text)]
            ):
@ -304,6 +314,16 @@ class EPUBBookLoader(BaseBookLoader):
        fixstart=None,
        fixend=None,
    ):
+        if self.only_filelist != "" and not item.file_name in self.only_filelist.split(
+            ","
+        ):
+            return index
+        elif self.only_filelist == "" and item.file_name in self.exclude_filelist.split(
+            ","
+        ):
+            new_book.add_item(item)
+            return index
+
        if not os.path.exists("log"):
            os.makedirs("log")

@ -391,8 +411,6 @@ class EPUBBookLoader(BaseBookLoader):
                    new_book.add_item(item)

            for item in self.origin_book.get_items_of_type(ITEM_DOCUMENT):
-                # if item.file_name != "OEBPS/ch01.xhtml":
-                #     continue
                index = self.process_item(
                    item, index, p_to_save_len, pbar, new_book, trans_taglist
                )