fix(#92): add a arguments to allow NavigableStrings translate (#126)

* fix(#92): add a arguments to allow NavigableStrings


---------

Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
InzamZ 2023-03-10 15:24:20 +08:00 committed by GitHub
parent dfcf078028
commit cbe165df19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 5 deletions

View File

@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
**请注意此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
e.g.
```shell

View File

@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
### Eamples

View File

@ -89,6 +89,13 @@ def main():
default="p",
help="example --translate-tags p,blockquote",
)
parser.add_argument(
"--allow_navigable_strings",
dest="allow_navigable_strings",
action="store_true",
default=False,
help="allow NavigableStrings to be translated",
)
options = parser.parse_args()
PROXY = options.proxy
@ -96,9 +103,16 @@ def main():
os.environ["http_proxy"] = PROXY
os.environ["https_proxy"] = PROXY
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise Exception("OpenAI API key not provided, please google how to obtain it")
translate_model = MODEL_DICT.get(options.model)
assert translate_model is not None, "unsupported model"
if translate_model in ["gpt3", "chatgptapi"]:
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise Exception(
"OpenAI API key not provided, please google how to obtain it"
)
else:
OPENAI_API_KEY = ""
book_type = options.book_name.split(".")[-1]
support_type_list = list(BOOK_LOADER_DICT.keys())
@ -106,8 +120,6 @@ def main():
raise Exception(
f"now only support files of these formats: {','.join(support_type_list)}"
)
translate_model = MODEL_DICT.get(options.model)
assert translate_model is not None, "unsupported model"
book_loader = BOOK_LOADER_DICT.get(book_type)
assert book_loader is not None, "unsupported loader"
@ -129,6 +141,7 @@ def main():
is_test=options.test,
test_num=options.test_num,
translate_tags=options.translate_tags,
allow_navigable_strings=options.allow_navigable_strings,
)
e.make_bilingual_book()

View File

@ -24,6 +24,7 @@ class EPUBBookLoader(BaseBookLoader):
is_test=False,
test_num=5,
translate_tags="p",
allow_navigable_strings=False,
):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
@ -31,6 +32,7 @@ class EPUBBookLoader(BaseBookLoader):
self.is_test = is_test
self.test_num = test_num
self.translate_tags = translate_tags
self.allow_navigable_strings = allow_navigable_strings
try:
self.origin_book = epub.read_epub(self.epub_name)
@ -77,6 +79,12 @@ class EPUBBookLoader(BaseBookLoader):
else len(bs(i.content, "html.parser").findAll(trans_taglist))
for i in all_items
)
all_p_length += self.allow_navigable_strings * sum(
0
if i.get_type() != ITEM_DOCUMENT
else len(bs(i.content, "html.parser").findAll(text=True))
for i in all_items
)
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
index = 0
p_to_save_len = len(self.p_to_save)
@ -85,6 +93,8 @@ class EPUBBookLoader(BaseBookLoader):
if item.get_type() == ITEM_DOCUMENT:
soup = bs(item.content, "html.parser")
p_list = soup.findAll(trans_taglist)
if self.allow_navigable_strings:
p_list.extend(soup.findAll(text=True))
is_test_done = self.is_test and index > self.test_num
for p in p_list:
if is_test_done or not p.text or self._is_special_text(p.text):