diff --git a/README-CN.md b/README-CN.md index aa6be08..a7e77f9 100644 --- a/README-CN.md +++ b/README-CN.md @@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 **请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹** 11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 +13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** e.g. ```shell diff --git a/README.md b/README.md index cbfc273..c91a72a 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u **Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.** 11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. 12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. +13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** ### Eamples diff --git a/book_maker/cli.py b/book_maker/cli.py index 5ede85c..261fe54 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -89,6 +89,13 @@ def main(): default="p", help="example --translate-tags p,blockquote", ) + parser.add_argument( + "--allow_navigable_strings", + dest="allow_navigable_strings", + action="store_true", + default=False, + help="allow NavigableStrings to be translated", + ) options = parser.parse_args() PROXY = options.proxy @@ -96,9 +103,16 @@ def main(): os.environ["http_proxy"] = PROXY os.environ["https_proxy"] = PROXY - OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") - if not OPENAI_API_KEY: - raise Exception("OpenAI API key not provided, please google how to obtain it") + translate_model = MODEL_DICT.get(options.model) + assert translate_model is not None, "unsupported model" + if translate_model in ["gpt3", "chatgptapi"]: + OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") + if not OPENAI_API_KEY: + raise Exception( + "OpenAI API key not provided, please google how to obtain it" + ) + else: + OPENAI_API_KEY = "" book_type = options.book_name.split(".")[-1] support_type_list = list(BOOK_LOADER_DICT.keys()) @@ -106,8 +120,6 @@ def main(): raise Exception( f"now only support files of these formats: {','.join(support_type_list)}" ) - translate_model = MODEL_DICT.get(options.model) - assert translate_model is not None, "unsupported model" book_loader = BOOK_LOADER_DICT.get(book_type) assert book_loader is not None, "unsupported loader" @@ -129,6 +141,7 @@ def main(): is_test=options.test, test_num=options.test_num, translate_tags=options.translate_tags, + allow_navigable_strings=options.allow_navigable_strings, ) e.make_bilingual_book() diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index be740ec..3f04dde 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -24,6 +24,7 @@ class EPUBBookLoader(BaseBookLoader): is_test=False, test_num=5, translate_tags="p", + allow_navigable_strings=False, ): self.epub_name = epub_name self.new_epub = epub.EpubBook() @@ -31,6 +32,7 @@ class EPUBBookLoader(BaseBookLoader): self.is_test = is_test self.test_num = test_num self.translate_tags = translate_tags + self.allow_navigable_strings = allow_navigable_strings try: self.origin_book = epub.read_epub(self.epub_name) @@ -77,6 +79,12 @@ class EPUBBookLoader(BaseBookLoader): else len(bs(i.content, "html.parser").findAll(trans_taglist)) for i in all_items ) + all_p_length += self.allow_navigable_strings * sum( + 0 + if i.get_type() != ITEM_DOCUMENT + else len(bs(i.content, "html.parser").findAll(text=True)) + for i in all_items + ) pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length) index = 0 p_to_save_len = len(self.p_to_save) @@ -85,6 +93,8 @@ class EPUBBookLoader(BaseBookLoader): if item.get_type() == ITEM_DOCUMENT: soup = bs(item.content, "html.parser") p_list = soup.findAll(trans_taglist) + if self.allow_navigable_strings: + p_list.extend(soup.findAll(text=True)) is_test_done = self.is_test and index > self.test_num for p in p_list: if is_test_done or not p.text or self._is_special_text(p.text):