mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-06 11:35:49 +00:00
* fix(#92): add a arguments to allow NavigableStrings --------- Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
dfcf078028
commit
cbe165df19
@ -32,6 +32,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
|
|||||||
**请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
|
**请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
|
||||||
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
|
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
|
||||||
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
|
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
|
||||||
|
13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
|
||||||
|
|
||||||
e.g.
|
e.g.
|
||||||
```shell
|
```shell
|
||||||
|
@ -36,6 +36,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
|
|||||||
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
|
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
|
||||||
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
|
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
|
||||||
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
|
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
|
||||||
|
13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
|
||||||
|
|
||||||
### Eamples
|
### Eamples
|
||||||
|
|
||||||
|
@ -89,6 +89,13 @@ def main():
|
|||||||
default="p",
|
default="p",
|
||||||
help="example --translate-tags p,blockquote",
|
help="example --translate-tags p,blockquote",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--allow_navigable_strings",
|
||||||
|
dest="allow_navigable_strings",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="allow NavigableStrings to be translated",
|
||||||
|
)
|
||||||
|
|
||||||
options = parser.parse_args()
|
options = parser.parse_args()
|
||||||
PROXY = options.proxy
|
PROXY = options.proxy
|
||||||
@ -96,9 +103,16 @@ def main():
|
|||||||
os.environ["http_proxy"] = PROXY
|
os.environ["http_proxy"] = PROXY
|
||||||
os.environ["https_proxy"] = PROXY
|
os.environ["https_proxy"] = PROXY
|
||||||
|
|
||||||
|
translate_model = MODEL_DICT.get(options.model)
|
||||||
|
assert translate_model is not None, "unsupported model"
|
||||||
|
if translate_model in ["gpt3", "chatgptapi"]:
|
||||||
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
|
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
|
||||||
if not OPENAI_API_KEY:
|
if not OPENAI_API_KEY:
|
||||||
raise Exception("OpenAI API key not provided, please google how to obtain it")
|
raise Exception(
|
||||||
|
"OpenAI API key not provided, please google how to obtain it"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
OPENAI_API_KEY = ""
|
||||||
|
|
||||||
book_type = options.book_name.split(".")[-1]
|
book_type = options.book_name.split(".")[-1]
|
||||||
support_type_list = list(BOOK_LOADER_DICT.keys())
|
support_type_list = list(BOOK_LOADER_DICT.keys())
|
||||||
@ -106,8 +120,6 @@ def main():
|
|||||||
raise Exception(
|
raise Exception(
|
||||||
f"now only support files of these formats: {','.join(support_type_list)}"
|
f"now only support files of these formats: {','.join(support_type_list)}"
|
||||||
)
|
)
|
||||||
translate_model = MODEL_DICT.get(options.model)
|
|
||||||
assert translate_model is not None, "unsupported model"
|
|
||||||
|
|
||||||
book_loader = BOOK_LOADER_DICT.get(book_type)
|
book_loader = BOOK_LOADER_DICT.get(book_type)
|
||||||
assert book_loader is not None, "unsupported loader"
|
assert book_loader is not None, "unsupported loader"
|
||||||
@ -129,6 +141,7 @@ def main():
|
|||||||
is_test=options.test,
|
is_test=options.test,
|
||||||
test_num=options.test_num,
|
test_num=options.test_num,
|
||||||
translate_tags=options.translate_tags,
|
translate_tags=options.translate_tags,
|
||||||
|
allow_navigable_strings=options.allow_navigable_strings,
|
||||||
)
|
)
|
||||||
e.make_bilingual_book()
|
e.make_bilingual_book()
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
is_test=False,
|
is_test=False,
|
||||||
test_num=5,
|
test_num=5,
|
||||||
translate_tags="p",
|
translate_tags="p",
|
||||||
|
allow_navigable_strings=False,
|
||||||
):
|
):
|
||||||
self.epub_name = epub_name
|
self.epub_name = epub_name
|
||||||
self.new_epub = epub.EpubBook()
|
self.new_epub = epub.EpubBook()
|
||||||
@ -31,6 +32,7 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
self.is_test = is_test
|
self.is_test = is_test
|
||||||
self.test_num = test_num
|
self.test_num = test_num
|
||||||
self.translate_tags = translate_tags
|
self.translate_tags = translate_tags
|
||||||
|
self.allow_navigable_strings = allow_navigable_strings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.origin_book = epub.read_epub(self.epub_name)
|
self.origin_book = epub.read_epub(self.epub_name)
|
||||||
@ -77,6 +79,12 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
else len(bs(i.content, "html.parser").findAll(trans_taglist))
|
else len(bs(i.content, "html.parser").findAll(trans_taglist))
|
||||||
for i in all_items
|
for i in all_items
|
||||||
)
|
)
|
||||||
|
all_p_length += self.allow_navigable_strings * sum(
|
||||||
|
0
|
||||||
|
if i.get_type() != ITEM_DOCUMENT
|
||||||
|
else len(bs(i.content, "html.parser").findAll(text=True))
|
||||||
|
for i in all_items
|
||||||
|
)
|
||||||
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
||||||
index = 0
|
index = 0
|
||||||
p_to_save_len = len(self.p_to_save)
|
p_to_save_len = len(self.p_to_save)
|
||||||
@ -85,6 +93,8 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
if item.get_type() == ITEM_DOCUMENT:
|
if item.get_type() == ITEM_DOCUMENT:
|
||||||
soup = bs(item.content, "html.parser")
|
soup = bs(item.content, "html.parser")
|
||||||
p_list = soup.findAll(trans_taglist)
|
p_list = soup.findAll(trans_taglist)
|
||||||
|
if self.allow_navigable_strings:
|
||||||
|
p_list.extend(soup.findAll(text=True))
|
||||||
is_test_done = self.is_test and index > self.test_num
|
is_test_done = self.is_test and index > self.test_num
|
||||||
for p in p_list:
|
for p in p_list:
|
||||||
if is_test_done or not p.text or self._is_special_text(p.text):
|
if is_test_done or not p.text or self._is_special_text(p.text):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user