fix(#92): add a arguments to allow NavigableStrings translate (#126)

* fix(#92): add a arguments to allow NavigableStrings --------- Co-authored-by: yihong0618 <zouzou0208@gmail.com>
2025-06-05 19:15:34 +00:00 · 2023-03-10 15:24:20 +08:00 · 2023-03-10 15:24:20 +08:00 · cbe165df19
commit cbe165df19
parent dfcf078028
4 changed files with 30 additions and 5 deletions
--- a/README-CN.md
+++ b/README-CN.md
@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具，使用 ChatGPT 帮助用户制
   **请注意，此处你输入的api应该是'`https://xxxx/v1`'的字样，域名需要用引号包裹**
 11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
 12. 如果出现了错误或使用 `CTRL+C` 中断命令，不想接下来继续翻译了，会生成一本 ${book_name}_bilingual_temp.epub 的书，直接改成你想要的名字就可以了
+13. 如果你想要翻译电子书中的无标签字符串，可以使用 `--allow_navigable_strings` 参数，会将可遍历字符串加入翻译队列，**注意，在条件允许情况下，请寻找更规范的电子书**

 e.g.
 ```shell
--- a/README.md
+++ b/README.md
@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
   **Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
 11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
 12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
+13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**

 ### Eamples

--- a/book_maker/cli.py
+++ b/book_maker/cli.py
@ -89,6 +89,13 @@ def main():
        default="p",
        help="example --translate-tags p,blockquote",
    )
+    parser.add_argument(
+        "--allow_navigable_strings",
+        dest="allow_navigable_strings",
+        action="store_true",
+        default=False,
+        help="allow NavigableStrings to be translated",
+    )

    options = parser.parse_args()
    PROXY = options.proxy
@ -96,9 +103,16 @@ def main():
        os.environ["http_proxy"] = PROXY
        os.environ["https_proxy"] = PROXY

-    OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
-    if not OPENAI_API_KEY:
-        raise Exception("OpenAI API key not provided, please google how to obtain it")
+    translate_model = MODEL_DICT.get(options.model)
+    assert translate_model is not None, "unsupported model"
+    if translate_model in ["gpt3", "chatgptapi"]:
+        OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
+        if not OPENAI_API_KEY:
+            raise Exception(
+                "OpenAI API key not provided, please google how to obtain it"
+            )
+    else:
+        OPENAI_API_KEY = ""

    book_type = options.book_name.split(".")[-1]
    support_type_list = list(BOOK_LOADER_DICT.keys())
@ -106,8 +120,6 @@ def main():
        raise Exception(
            f"now only support files of these formats: {','.join(support_type_list)}"
        )
-    translate_model = MODEL_DICT.get(options.model)
-    assert translate_model is not None, "unsupported model"

    book_loader = BOOK_LOADER_DICT.get(book_type)
    assert book_loader is not None, "unsupported loader"
@ -129,6 +141,7 @@ def main():
        is_test=options.test,
        test_num=options.test_num,
        translate_tags=options.translate_tags,
+        allow_navigable_strings=options.allow_navigable_strings,
    )
    e.make_bilingual_book()

--- a/book_maker/loader/epub_loader.py
+++ b/book_maker/loader/epub_loader.py
@ -24,6 +24,7 @@ class EPUBBookLoader(BaseBookLoader):
        is_test=False,
        test_num=5,
        translate_tags="p",
+        allow_navigable_strings=False,
    ):
        self.epub_name = epub_name
        self.new_epub = epub.EpubBook()
@ -31,6 +32,7 @@ class EPUBBookLoader(BaseBookLoader):
        self.is_test = is_test
        self.test_num = test_num
        self.translate_tags = translate_tags
+        self.allow_navigable_strings = allow_navigable_strings

        try:
            self.origin_book = epub.read_epub(self.epub_name)
@ -77,6 +79,12 @@ class EPUBBookLoader(BaseBookLoader):
            else len(bs(i.content, "html.parser").findAll(trans_taglist))
            for i in all_items
        )
+        all_p_length += self.allow_navigable_strings * sum(
+            0
+            if i.get_type() != ITEM_DOCUMENT
+            else len(bs(i.content, "html.parser").findAll(text=True))
+            for i in all_items
+        )
        pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
        index = 0
        p_to_save_len = len(self.p_to_save)
@ -85,6 +93,8 @@ class EPUBBookLoader(BaseBookLoader):
                if item.get_type() == ITEM_DOCUMENT:
                    soup = bs(item.content, "html.parser")
                    p_list = soup.findAll(trans_taglist)
+                    if self.allow_navigable_strings:
+                        p_list.extend(soup.findAll(text=True))
                    is_test_done = self.is_test and index > self.test_num
                    for p in p_list:
                        if is_test_done or not p.text or self._is_special_text(p.text):