mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
* fix(#92): add a arguments to allow NavigableStrings --------- Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
dfcf078028
commit
cbe165df19
@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
|
||||
**请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
|
||||
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
|
||||
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
|
||||
13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
|
||||
|
||||
e.g.
|
||||
```shell
|
||||
|
@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
|
||||
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
|
||||
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
|
||||
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
|
||||
13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
|
||||
|
||||
### Eamples
|
||||
|
||||
|
@ -89,6 +89,13 @@ def main():
|
||||
default="p",
|
||||
help="example --translate-tags p,blockquote",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--allow_navigable_strings",
|
||||
dest="allow_navigable_strings",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="allow NavigableStrings to be translated",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
PROXY = options.proxy
|
||||
@ -96,9 +103,16 @@ def main():
|
||||
os.environ["http_proxy"] = PROXY
|
||||
os.environ["https_proxy"] = PROXY
|
||||
|
||||
translate_model = MODEL_DICT.get(options.model)
|
||||
assert translate_model is not None, "unsupported model"
|
||||
if translate_model in ["gpt3", "chatgptapi"]:
|
||||
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
|
||||
if not OPENAI_API_KEY:
|
||||
raise Exception("OpenAI API key not provided, please google how to obtain it")
|
||||
raise Exception(
|
||||
"OpenAI API key not provided, please google how to obtain it"
|
||||
)
|
||||
else:
|
||||
OPENAI_API_KEY = ""
|
||||
|
||||
book_type = options.book_name.split(".")[-1]
|
||||
support_type_list = list(BOOK_LOADER_DICT.keys())
|
||||
@ -106,8 +120,6 @@ def main():
|
||||
raise Exception(
|
||||
f"now only support files of these formats: {','.join(support_type_list)}"
|
||||
)
|
||||
translate_model = MODEL_DICT.get(options.model)
|
||||
assert translate_model is not None, "unsupported model"
|
||||
|
||||
book_loader = BOOK_LOADER_DICT.get(book_type)
|
||||
assert book_loader is not None, "unsupported loader"
|
||||
@ -129,6 +141,7 @@ def main():
|
||||
is_test=options.test,
|
||||
test_num=options.test_num,
|
||||
translate_tags=options.translate_tags,
|
||||
allow_navigable_strings=options.allow_navigable_strings,
|
||||
)
|
||||
e.make_bilingual_book()
|
||||
|
||||
|
@ -24,6 +24,7 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
is_test=False,
|
||||
test_num=5,
|
||||
translate_tags="p",
|
||||
allow_navigable_strings=False,
|
||||
):
|
||||
self.epub_name = epub_name
|
||||
self.new_epub = epub.EpubBook()
|
||||
@ -31,6 +32,7 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
self.is_test = is_test
|
||||
self.test_num = test_num
|
||||
self.translate_tags = translate_tags
|
||||
self.allow_navigable_strings = allow_navigable_strings
|
||||
|
||||
try:
|
||||
self.origin_book = epub.read_epub(self.epub_name)
|
||||
@ -77,6 +79,12 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
else len(bs(i.content, "html.parser").findAll(trans_taglist))
|
||||
for i in all_items
|
||||
)
|
||||
all_p_length += self.allow_navigable_strings * sum(
|
||||
0
|
||||
if i.get_type() != ITEM_DOCUMENT
|
||||
else len(bs(i.content, "html.parser").findAll(text=True))
|
||||
for i in all_items
|
||||
)
|
||||
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
||||
index = 0
|
||||
p_to_save_len = len(self.p_to_save)
|
||||
@ -85,6 +93,8 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
if item.get_type() == ITEM_DOCUMENT:
|
||||
soup = bs(item.content, "html.parser")
|
||||
p_list = soup.findAll(trans_taglist)
|
||||
if self.allow_navigable_strings:
|
||||
p_list.extend(soup.findAll(text=True))
|
||||
is_test_done = self.is_test and index > self.test_num
|
||||
for p in p_list:
|
||||
if is_test_done or not p.text or self._is_special_text(p.text):
|
||||
|
Loading…
x
Reference in New Issue
Block a user