From ae3e3ba558e21a186646bf7010ff8fb052a1906d Mon Sep 17 00:00:00 2001 From: Conan Date: Sun, 12 Mar 2023 01:48:24 -0500 Subject: [PATCH 01/10] Unify prompt config for `user` and `system` (#151) * fix list number in readmes * fix list number in readmes * unify prompt config for role user and system * update json sample file * update documents and add test * update readmes --- .github/workflows/make_test_ebook.yaml | 1 + README-CN.md | 20 ++++--- README.md | 23 +++++--- book_maker/cli.py | 53 +++++++++++++++--- book_maker/loader/epub_loader.py | 8 ++- book_maker/loader/txt_loader.py | 10 +++- .../translator/chatgptapi_translator.py | 55 ++++++++++++++----- book_maker/translator/google_translator.py | 2 +- book_maker/translator/gpt3_translator.py | 2 +- book_maker/utils.py | 8 +++ prompt_template_sample.json | 4 ++ 11 files changed, 140 insertions(+), 46 deletions(-) create mode 100644 prompt_template_sample.json diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index e1f6a77..8565ae8 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -43,6 +43,7 @@ jobs: run: | python3 make_book.py --book_name "test_books/lemo.epub" --test --test_num 5 --language zh-hans python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --model gpt3 --prompt prompt_template_sample.txt + python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --prompt prompt_template_sample.json - name: Rename and Upload ePub diff --git a/README-CN.md b/README-CN.md index dedb8ad..258c41b 100644 --- a/README-CN.md +++ b/README-CN.md @@ -17,7 +17,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 1. `pip install -r requirements.txt` 2. 使用 `--openai_key` 指定 OpenAI API key,如果有多个可以用英文逗号分隔(xxx,xxx,xxx),可以减少接口调用次数限制带来的错误。 - 或者,指定环境变量 `OPENAI_API_KEY` 来略过这个选项。 + 或者,指定环境变量 `BMM_OPENAI_API_KEY` 来略过这个选项。 3. 本地放了一个 `test_books/animal_farm.epub` 给大家测试 4. 默认用了 [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis) 模型,也就是 ChatGPT 正在使用的模型,用 `--model gpt3` 来使用 gpt3 模型 5. 使用 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) @@ -31,13 +31,17 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 10. 请使用 --book_from 选项指定电子阅读器类型(现在只有 kobo 可用),并使用 --device_path 指定挂载点。 11. 如果你遇到了墙需要用 Cloudflare Workers 替换 api_base 请使用 `--api_base ${url}` 来替换。 **请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹** -11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 -12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 -13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** -14. 如果你想调整 prompt,你可以使用 `--prompt` 参数。该参数可以是提示模板字符串,也可以是模板 `.txt` 文件的路径。有效的占位符包括 `{text}` 和 `{language}`。 -15. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 -16. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 -17. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** +12. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 +13. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 +14. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** +15. 如果你想调整 prompt,你可以使用 `--prompt` 参数。有效的占位符包括 `{text}` 和 `{language}`。你可以用以下方式配置 prompt: + 如果您不需要设置 `system` 角色,可以这样:`--prompt "Translate {text} to {language}" 或者 `--prompt prompt_template_sample.txt`(示例文本文件可以在 [./prompt_template_sample.txt](./prompt_template_sample.txt) 找到)。 + 如果您需要设置 `system` 角色,可以使用以下方式配置:`--prompt '{"user":"Translate {text} to {language}", "system": "You are a professional translator."}'`,或者 `--prompt prompt_template_sample.json`(示例 JSON 文件可以在 [./prompt_template_sample.json](./prompt_template_sample.json) 找到)。 + 你也可以用环境以下环境变量来配置 `system` 和 `user` 角色 prompt:`BBM_CHATGPTAPI_USER_MSG_TEMPLATE` 和 `BBM_CHATGPTAPI_SYS_MSG`。 +该参数可以是提示模板字符串,也可以是模板 `.txt` 文件的路径。 +16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 +17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 +18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** e.g. ```shell diff --git a/README.md b/README.md index a97eaf2..297f2ca 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 1. `pip install -r requirements.txt` 2. Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits. - Or, just set environment variable `OPENAI_API_KEY` to ignore this option. + Or, just set environment variable `BMM_OPENAI_API_KEY` instead. 3. A sample book, `test_books/animal_farm.epub`, is provided for testing purposes. 4. The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt3` to change the underlying model to `GPT3` 5. Use `--test` option to preview the result if you haven't paid for the service. Note that there is a limit and it may take some time. @@ -35,14 +35,16 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 10. Use `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point. 11. If you want to change api_base like using Cloudflare Workers, use `--api_base ` to support it. **Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.** -11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. -12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. -13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** -14. To tweak the prompt, use the `--prompt` parameter. The parameter can be a prompt template string or a path to the template `.txt` file. Valid placeholders for the template include `{text}` and `{language}`. - -15. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. -16. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. -17. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +12. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. +13. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. +14. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +15. To tweak the prompt, use the `--prompt` parameter. Valid placeholders for the `user` role template include `{text}` and `{language}`. It supports a few ways to configure the prompt: + If you don't need to set the `system` role content, you can simply set it up like this: `--prompt "Translate {text} to {language}."` or `--prompt prompt_template_sample.txt` (example of a text file can be found at [./prompt_template_sample.txt](./prompt_template_sample.txt)). + If you need to set the `system` role content, you can use the following format: `--prompt '{"user":"Translate {text} to {language}", "system": "You are a professional translator."}'` or `--prompt prompt_template_sample.json` (example of a JSON file can be found at [./prompt_template_sample.json](./prompt_template_sample.json)). + You can also set the `user` and `system` role prompt by setting environment variables: `BBM_CHATGPTAPI_USER_MSG_TEMPLATE` and `BBM_CHATGPTAPI_SYS_MSG`. +16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. +17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. +18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** ### Eamples @@ -65,7 +67,10 @@ python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags di # Tweaking the prompt python3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt # or +python3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.json +# or python3 make_book.py --book_name test_books/animal_farm.epub --prompt "Please translate \`{text}\` to {language}" + # Translate books download from Rakuten Kobo on kobo e-reader python3 make_book.py --book_from kobo --device_path /tmp/kobo diff --git a/book_maker/cli.py b/book_maker/cli.py index bf5abba..aa6a3d4 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -1,6 +1,7 @@ import argparse import os from os import environ as env +import json from book_maker.loader import BOOK_LOADER_DICT from book_maker.translator import MODEL_DICT @@ -12,16 +13,42 @@ def parse_prompt_arg(prompt_arg): prompt = None if prompt_arg is None: return prompt - if not prompt_arg.endswith(".txt"): - prompt = prompt_arg + + if not any(prompt_arg.endswith(ext) for ext in [".json", ".txt"]): + try: + # user can define prompt by passing a json string + # eg: --prompt '{"system": "You are a professional translator who translates computer technology books", "user": "Translate \`{text}\` to {language}"}' + prompt = json.loads(prompt_arg) + except json.JSONDecodeError: + # if not a json string, treat it as a template string + prompt = {"user": prompt_arg} + else: if os.path.exists(prompt_arg): - with open(prompt_arg, "r") as f: - prompt = f.read() + if prompt_arg.endswith(".txt"): + # if it's a txt file, treat it as a template string + with open(prompt_arg, "r") as f: + prompt = {"user": f.read()} + elif prompt_arg.endswith(".json"): + # if it's a json file, treat it as a json object + # eg: --prompt prompt_template_sample.json + with open(prompt_arg, "r") as f: + prompt = json.load(f) else: raise FileNotFoundError(f"{prompt_arg} not found") - if prompt is None or not (all(c in prompt for c in ["{text}", "{language}"])): + + if prompt is None or not ( + all(c in prompt["user"] for c in ["{text}", "{language}"]) + ): raise ValueError("prompt must contain `{text}` and `{language}`") + + if "user" not in prompt: + raise ValueError("prompt must contain the key of `user`") + + if (prompt.keys() - {"user", "system"}) != set(): + raise ValueError("prompt can only contain the keys of `user` and `system`") + + print("prompt config:", prompt) return prompt @@ -124,9 +151,9 @@ def main(): ) parser.add_argument( "--prompt", - dest="prompt_template", + dest="prompt_arg", type=str, - metavar="PROMPT_TEMPLATE", + metavar="PROMPT_ARG", help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.", ) @@ -139,7 +166,15 @@ def main(): translate_model = MODEL_DICT.get(options.model) assert translate_model is not None, "unsupported model" if options.model in ["gpt3", "chatgptapi"]: - OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") + OPENAI_API_KEY = ( + options.openai_key + or env.get( + "OPENAI_API_KEY" + ) # XXX: for backward compatability, deprecate soon + or env.get( + "BBM_OPENAI_API_KEY" + ) # suggest adding `BBM_` prefix for all the bilingual_book_maker ENVs. + ) if not OPENAI_API_KEY: raise Exception( "OpenAI API key not provided, please google how to obtain it" @@ -183,7 +218,7 @@ def main(): test_num=options.test_num, translate_tags=options.translate_tags, allow_navigable_strings=options.allow_navigable_strings, - prompt_template=parse_prompt_arg(options.prompt_template), + prompt_config=parse_prompt_arg(options.prompt_arg), ) e.make_bilingual_book() diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index cdae750..eb81b45 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -10,6 +10,7 @@ from rich import print from tqdm import tqdm from .base_loader import BaseBookLoader +from book_maker.utils import prompt_config_to_kwargs class EPUBBookLoader(BaseBookLoader): @@ -25,12 +26,15 @@ class EPUBBookLoader(BaseBookLoader): test_num=5, translate_tags="p", allow_navigable_strings=False, - prompt_template=None, + prompt_config=None, ): self.epub_name = epub_name self.new_epub = epub.EpubBook() self.translate_model = model( - key, language, model_api_base, prompt_template=prompt_template + key, + language, + api_base=model_api_base, + **prompt_config_to_kwargs(prompt_config), ) self.is_test = is_test self.test_num = test_num diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index 93004a1..37eceb9 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -2,6 +2,7 @@ import sys from pathlib import Path from .base_loader import BaseBookLoader +from book_maker.utils import prompt_config_to_kwargs class TXTBookLoader(BaseBookLoader): @@ -17,10 +18,15 @@ class TXTBookLoader(BaseBookLoader): model_api_base=None, is_test=False, test_num=5, - prompt_template=None, + prompt_config=None, ): self.txt_name = txt_name - self.translate_model = model(key, language, model_api_base) + self.translate_model = model( + key, + language, + api_base=model_api_base, + **prompt_config_to_kwargs(prompt_config), + ) self.is_test = is_test self.p_to_save = [] self.bilingual_result = [] diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py index 9b278d5..0850b94 100644 --- a/book_maker/translator/chatgptapi_translator.py +++ b/book_maker/translator/chatgptapi_translator.py @@ -6,15 +6,39 @@ from os import environ from .base_translator import Base +PROMPT_ENV_MAP = { + "user": "BBM_CHATGPTAPI_USER_MSG_TEMPLATE", + "system": "BBM_CHATGPTAPI_SYS_MSG", +} + + class ChatGPTAPI(Base): - def __init__(self, key, language, api_base=None, prompt_template=None): + DEFAULT_PROMPT = "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text" + + def __init__( + self, + key, + language, + api_base=None, + prompt_template=None, + prompt_sys_msg=None, + **kwargs, + ): super().__init__(key, language) self.key_len = len(key.split(",")) if api_base: openai.api_base = api_base self.prompt_template = ( prompt_template - or "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text" + or environ.get(PROMPT_ENV_MAP["user"]) + or self.DEFAULT_PROMPT + ) + self.prompt_sys_msg = ( + prompt_sys_msg + or environ.get( + "OPENAI_API_SYS_MSG" + ) # XXX: for backward compatability, deprecate soon + or environ.get(PROMPT_ENV_MAP["system"]) ) def rotate_key(self): @@ -22,20 +46,23 @@ class ChatGPTAPI(Base): def get_translation(self, text): self.rotate_key() + messages = [] + if self.prompt_sys_msg: + messages.append( + {"role": "system", "content": self.prompt_sys_msg}, + ) + messages.append( + { + "role": "user", + "content": self.prompt_template.format( + text=text, language=self.language + ), + } + ) + completion = openai.ChatCompletion.create( model="gpt-3.5-turbo", - messages=[ - { - "role": "system", - "content": environ.get("OPENAI_API_SYS_MSG") or "", - }, - { - "role": "user", - "content": self.prompt_template.format( - text=text, language=self.language - ), - }, - ], + messages=messages, ) t_text = ( completion["choices"][0] diff --git a/book_maker/translator/google_translator.py b/book_maker/translator/google_translator.py index 357d61e..936a4b7 100644 --- a/book_maker/translator/google_translator.py +++ b/book_maker/translator/google_translator.py @@ -8,7 +8,7 @@ class Google(Base): google translate """ - def __init__(self, key, language, api_base=None, prompt_template=None): + def __init__(self, key, language, **kwargs): super().__init__(key, language) self.api_url = "https://translate.google.com/translate_a/single?client=it&dt=qca&dt=t&dt=rmt&dt=bd&dt=rms&dt=sos&dt=md&dt=gt&dt=ld&dt=ss&dt=ex&otf=2&dj=1&hl=en&ie=UTF-8&oe=UTF-8&sl=auto&tl=zh-CN" self.headers = { diff --git a/book_maker/translator/gpt3_translator.py b/book_maker/translator/gpt3_translator.py index 2d69ee0..c74de5c 100644 --- a/book_maker/translator/gpt3_translator.py +++ b/book_maker/translator/gpt3_translator.py @@ -5,7 +5,7 @@ from .base_translator import Base class GPT3(Base): - def __init__(self, key, language, api_base=None, prompt_template=None): + def __init__(self, key, language, api_base=None, prompt_template=None, **kwargs): super().__init__(key, language) self.api_url = ( f"{api_base}v1/completions" diff --git a/book_maker/utils.py b/book_maker/utils.py index acf4626..cfa74a4 100644 --- a/book_maker/utils.py +++ b/book_maker/utils.py @@ -117,3 +117,11 @@ TO_LANGUAGE_CODE = { "sinhalese": "si", "castilian": "es", } + + +def prompt_config_to_kwargs(prompt_config): + prompt_config = prompt_config or {} + return dict( + prompt_template=prompt_config.get("user", None), + prompt_sys_msg=prompt_config.get("system", None), + ) diff --git a/prompt_template_sample.json b/prompt_template_sample.json new file mode 100644 index 0000000..02b8aea --- /dev/null +++ b/prompt_template_sample.json @@ -0,0 +1,4 @@ +{ + "system": "You are a professional translator.", + "user": "Translate the given text to {language}. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. If the text cannot be translated, return the original text as is. Do not translate person's name. Do not add any additional text in the translation. The text to be translated is:\n{text}" +} From 66716632ba42f26fc9c976af10ccfc691d867014 Mon Sep 17 00:00:00 2001 From: David Ye Date: Sat, 11 Mar 2023 22:51:48 -0800 Subject: [PATCH 02/10] Improving the READMEs (#152) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove the duplicate item [13,17] => [17] in README * correct the item numbers in README * fix typo * add 示范用例 section title to match the English README --------- Co-authored-by: yihong0618 --- README-CN.md | 3 ++- README.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README-CN.md b/README-CN.md index 258c41b..349c1f6 100644 --- a/README-CN.md +++ b/README-CN.md @@ -43,7 +43,8 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** -e.g. +### 示范用例 + ```shell # 如果你想快速测一下 python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test diff --git a/README.md b/README.md index 297f2ca..f30d000 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. 18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** -### Eamples +### Examples ```shell # Test quickly From f09c2717f121f1bc3258b0f57e6f2abd9778881a Mon Sep 17 00:00:00 2001 From: yihong Date: Sun, 12 Mar 2023 23:00:54 +0800 Subject: [PATCH 03/10] feat: pypi (#154) * feat: pypi --- README-CN.md | 4 +++- README.md | 8 +++---- book_maker/cli.py | 4 ++-- book_maker/loader/__init__.py | 1 - book_maker/loader/epub_loader.py | 3 ++- book_maker/loader/txt_loader.py | 3 ++- book_maker/obok.py | 22 +++++++++--------- .../translator/chatgptapi_translator.py | 5 ++-- setup.py | 23 +++++++++++++++++++ 9 files changed, 48 insertions(+), 25 deletions(-) create mode 100644 setup.py diff --git a/README-CN.md b/README-CN.md index 349c1f6..ea89315 100644 --- a/README-CN.md +++ b/README-CN.md @@ -15,7 +15,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 ## 使用 -1. `pip install -r requirements.txt` +1. `pip install -r requirements.txt` 或 `pip install -U bbook_maker` 2. 使用 `--openai_key` 指定 OpenAI API key,如果有多个可以用英文逗号分隔(xxx,xxx,xxx),可以减少接口调用次数限制带来的错误。 或者,指定环境变量 `BMM_OPENAI_API_KEY` 来略过这个选项。 3. 本地放了一个 `test_books/animal_farm.epub` 给大家测试 @@ -45,6 +45,8 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 ### 示范用例 +**如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`** + ```shell # 如果你想快速测一下 python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test diff --git a/README.md b/README.md index f30d000..7b55549 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,3 @@ -This forked added Google Translate support, only supported translate to `zh-CN`. -Usage: make sure to add `--model google` in the command. - - **[中文](./README-CN.md) | English** # bilingual_book_maker @@ -19,7 +15,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u ## Use -1. `pip install -r requirements.txt` +1. `pip install -r requirements.txt` or `pip install -U bbook_maker`(you can use) 2. Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits. Or, just set environment variable `BMM_OPENAI_API_KEY` instead. 3. A sample book, `test_books/animal_farm.epub`, is provided for testing purposes. @@ -48,6 +44,8 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u ### Examples +**Note if use `pip install bbook_maker` all commands can change to `bbook args`** + ```shell # Test quickly python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --test --language zh-hans diff --git a/book_maker/cli.py b/book_maker/cli.py index aa6a3d4..67eeff3 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -1,12 +1,12 @@ import argparse +import json import os from os import environ as env -import json +import book_maker.obok as obok from book_maker.loader import BOOK_LOADER_DICT from book_maker.translator import MODEL_DICT from book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE -import book_maker.obok as obok def parse_prompt_arg(prompt_arg): diff --git a/book_maker/loader/__init__.py b/book_maker/loader/__init__.py index 98441ea..8b55bf0 100644 --- a/book_maker/loader/__init__.py +++ b/book_maker/loader/__init__.py @@ -1,5 +1,4 @@ from book_maker.loader.epub_loader import EPUBBookLoader - from book_maker.loader.txt_loader import TXTBookLoader BOOK_LOADER_DICT = { diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index eb81b45..462a5b5 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -9,9 +9,10 @@ from ebooklib import ITEM_DOCUMENT, epub from rich import print from tqdm import tqdm -from .base_loader import BaseBookLoader from book_maker.utils import prompt_config_to_kwargs +from .base_loader import BaseBookLoader + class EPUBBookLoader(BaseBookLoader): def __init__( diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index 37eceb9..b64c827 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -1,9 +1,10 @@ import sys from pathlib import Path -from .base_loader import BaseBookLoader from book_maker.utils import prompt_config_to_kwargs +from .base_loader import BaseBookLoader + class TXTBookLoader(BaseBookLoader): def __init__( diff --git a/book_maker/obok.py b/book_maker/obok.py index c945a99..350def0 100644 --- a/book_maker/obok.py +++ b/book_maker/obok.py @@ -164,19 +164,19 @@ from __future__ import print_function __version__ = "4.0.0" __about__ = "Obok v{0}\nCopyright © 2012-2020 Physisticated et al.".format(__version__) -import sys -import os -import subprocess -import sqlite3 import base64 import binascii -import re -import zipfile import hashlib -import xml.etree.ElementTree as ET -import string +import os +import re import shutil +import sqlite3 +import string +import subprocess +import sys import tempfile +import xml.etree.ElementTree as ET +import zipfile can_parse_xml = True try: @@ -199,14 +199,14 @@ def _load_crypto_libcrypto(): from ctypes import ( CDLL, POINTER, - c_void_p, + Structure, c_char_p, c_int, c_long, - Structure, c_ulong, - create_string_buffer, + c_void_p, cast, + create_string_buffer, ) from ctypes.util import find_library diff --git a/book_maker/translator/chatgptapi_translator.py b/book_maker/translator/chatgptapi_translator.py index 0850b94..28caba1 100644 --- a/book_maker/translator/chatgptapi_translator.py +++ b/book_maker/translator/chatgptapi_translator.py @@ -1,10 +1,9 @@ import time - -import openai from os import environ -from .base_translator import Base +import openai +from .base_translator import Base PROMPT_ENV_MAP = { "user": "BBM_CHATGPTAPI_USER_MSG_TEMPLATE", diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8912e24 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +from setuptools import find_packages, setup + +setup( + name="bbook_maker", + description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.", + version="0.1.0", + license="MIT", + author="yihong0618", + author_email="zouzou0208@gmail.com", + packages=find_packages(), + url="https://github.com/yihong0618/bilingual_book_maker", + python_requires=">=3.7", + install_requires=["bs4", "openai", "requests", "ebooklib", "rich", "tqdm"], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + entry_points={ + "console_scripts": ["bbook_maker = book_maker.cli:main"], + }, +) From 5d2c89a8414636da65465470dc1cc77fede0bf36 Mon Sep 17 00:00:00 2001 From: zstone12 <522089185@qq.com> Date: Sun, 12 Mar 2023 23:03:28 +0800 Subject: [PATCH 04/10] feat: batch translate txt file (#153) * feat: batch translate txt file * feat: batch size customed * reslove conflicts --- .github/workflows/make_test_ebook.yaml | 4 ++++ README-CN.md | 5 ++++- README.md | 3 +++ book_maker/cli.py | 8 ++++++++ book_maker/loader/epub_loader.py | 1 + book_maker/loader/txt_loader.py | 27 +++++++++++++++++++------- 6 files changed, 40 insertions(+), 8 deletions(-) diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index 8565ae8..90d0f42 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -37,6 +37,10 @@ jobs: run: | python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --test_num 20 --model google + - name: make txt book test with batch_size + run: | + python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model google + - name: make openai key ebook test if: env.OPENAI_API_KEY != null diff --git a/README-CN.md b/README-CN.md index ea89315..028139f 100644 --- a/README-CN.md +++ b/README-CN.md @@ -42,7 +42,7 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** - +19. 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效) ### 示范用例 **如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`** @@ -72,6 +72,9 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # 翻译 txt 文件 python3 make_book.py --book_name test_books/the_little_prince.txt --test +# 聚合多行翻译 txt 文件 +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 + ``` 更加小白的示例 diff --git a/README.md b/README.md index 7b55549..0150d4f 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u 16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. 17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. 18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +19. Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files). ### Examples @@ -74,6 +75,8 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo # translate txt file python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans +# aggregated translation txt file +python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 ``` More understandable example diff --git a/book_maker/cli.py b/book_maker/cli.py index 67eeff3..8314a09 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -156,6 +156,13 @@ def main(): metavar="PROMPT_ARG", help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.", ) + parser.add_argument( + "--batch_size", + dest="batch_size", + type=int, + default=10, + help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)", + ) options = parser.parse_args() PROXY = options.proxy @@ -219,6 +226,7 @@ def main(): translate_tags=options.translate_tags, allow_navigable_strings=options.allow_navigable_strings, prompt_config=parse_prompt_arg(options.prompt_arg), + batch_size=options.batch_size, ) e.make_bilingual_book() diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 462a5b5..3587ae7 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -22,6 +22,7 @@ class EPUBBookLoader(BaseBookLoader): key, resume, language, + batch_size, model_api_base=None, is_test=False, test_num=5, diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index b64c827..f5238a7 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -14,6 +14,7 @@ class TXTBookLoader(BaseBookLoader): key, resume, language, + batch_size, translate_tags, allow_navigable_strings, model_api_base=None, @@ -33,6 +34,7 @@ class TXTBookLoader(BaseBookLoader): self.bilingual_result = [] self.bilingual_temp_result = [] self.test_num = test_num + self.batch_size = batch_size try: with open(f"{txt_name}", "r", encoding="utf-8") as f: @@ -58,17 +60,22 @@ class TXTBookLoader(BaseBookLoader): p_to_save_len = len(self.p_to_save) try: - for i in self.origin_book: - if self._is_special_text(i): + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + for i in sliced_list: + batch_text = "".join(i) + if self._is_special_text(batch_text): continue if self.resume and index < p_to_save_len: pass else: - temp = self.translate_model.translate(i) + temp = self.translate_model.translate(batch_text) self.p_to_save.append(temp) - self.bilingual_result.append(i) + self.bilingual_result.append(batch_text) self.bilingual_result.append(temp) - index += 1 + index += self.batch_size if self.is_test and index > self.test_num: break @@ -86,8 +93,14 @@ class TXTBookLoader(BaseBookLoader): def _save_temp_book(self): index = 0 - for i in range(0, len(self.origin_book)): - self.bilingual_temp_result.append(self.origin_book[i]) + sliced_list = [ + self.origin_book[i : i + self.batch_size] + for i in range(0, len(self.origin_book), self.batch_size) + ] + + for i in range(0, len(sliced_list)): + batch_text = "".join(sliced_list[i]) + self.bilingual_temp_result.append(batch_text) if self._is_special_text(self.origin_book[i]): continue if index < len(self.p_to_save): From 0a1991d8ad6b30026924993b9488afc2441cee9d Mon Sep 17 00:00:00 2001 From: Hsieh Chin Fan Date: Mon, 13 Mar 2023 18:59:52 +0800 Subject: [PATCH 05/10] Fix library of crypto for current Windows (#159) * Fix library of crypto for current Windows * Modify obok import statement Only import this module when necessary --------- Co-authored-by: Hsieh Chin Fan --- book_maker/cli.py | 3 ++- book_maker/obok.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/book_maker/cli.py b/book_maker/cli.py index 8314a09..4af379f 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -3,7 +3,6 @@ import json import os from os import environ as env -import book_maker.obok as obok from book_maker.loader import BOOK_LOADER_DICT from book_maker.translator import MODEL_DICT from book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE @@ -190,6 +189,8 @@ def main(): OPENAI_API_KEY = "" if options.book_from == "kobo": + import book_maker.obok as obok + device_path = options.device_path if device_path is None: raise Exception( diff --git a/book_maker/obok.py b/book_maker/obok.py index 350def0..dd4eeb9 100644 --- a/book_maker/obok.py +++ b/book_maker/obok.py @@ -1,6 +1,9 @@ # The original code comes from: # https://github.com/apprenticeharper/DeDRM_tools +# Version 4.1.2 March 2023 +# Update library for crypto for current Windows + # Version 4.1.1 March 2023 # Make obok.py works as file selector @@ -161,7 +164,7 @@ """Manage all Kobo books, either encrypted or DRM-free.""" from __future__ import print_function -__version__ = "4.0.0" +__version__ = "4.1.2" __about__ = "Obok v{0}\nCopyright © 2012-2020 Physisticated et al.".format(__version__) import base64 @@ -211,7 +214,7 @@ def _load_crypto_libcrypto(): from ctypes.util import find_library if sys.platform.startswith("win"): - libcrypto = find_library("libeay32") + libcrypto = find_library("libcrypto") else: libcrypto = find_library("crypto") From 839c2ce6ad7c65a728ad06e9123387cec25ab5f0 Mon Sep 17 00:00:00 2001 From: Voyageyang <45915518+Voyageyang@users.noreply.github.com> Date: Mon, 13 Mar 2023 20:49:23 +0800 Subject: [PATCH 06/10] (#93) feat: add caiyun translator (#160) * feat: add caiyun translator * format code and update README-CN.md * fix: add caiyun_key args * fix: add raise --------- Co-authored-by: yihong0618 --- README-CN.md | 7 ++++ README.md | 9 +++++ book_maker/cli.py | 17 +++++++-- book_maker/translator/__init__.py | 4 +- book_maker/translator/caiyun_translator.py | 43 ++++++++++++++++++++++ 5 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 book_maker/translator/caiyun_translator.py diff --git a/README-CN.md b/README-CN.md index 028139f..e65a9b8 100644 --- a/README-CN.md +++ b/README-CN.md @@ -75,6 +75,13 @@ python3 make_book.py --book_name test_books/the_little_prince.txt --test # 聚合多行翻译 txt 文件 python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 +# 使用彩云小译翻译(彩云api目前只支持: 简体中文 <-> 英文, 简体中文 <-> 日语) +# 彩云提供了测试token(3975l6lr5pcbvidl6jl2) +# 你可以参考这个教程申请自己的token (https://bobtranslate.com/service/translate/caiyun.html) +python3 make_book.py --model caiyun --openai_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub +# 可以在环境变量中设置BBM_CAIYUN_API_KEY,略过--openai_key +export BBM_CAIYUN_API_KEY=${your_api_key} + ``` 更加小白的示例 diff --git a/README.md b/README.md index 0150d4f..d24b4a7 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,15 @@ python3 make_book.py --book_from kobo --device_path /tmp/kobo python3 make_book.py --book_name test_books/the_little_prince.txt --test --language zh-hans # aggregated translation txt file python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch_size 20 + +# Using Caiyun model to translate +# (the api currently only support: simplified chinese <-> english, simplified chinese <-> japanese) +# the official Caiyun has provided a test token (3975l6lr5pcbvidl6jl2) +# you can apply your own token by following this tutorial(https://bobtranslate.com/service/translate/caiyun.html) +python3 make_book.py --model caiyun --openai_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub +# Set env BBM_CAIYUN_API_KEY to ignore option --openai_key +export BBM_CAIYUN_API_KEY=${your_api_key} + ``` More understandable example diff --git a/book_maker/cli.py b/book_maker/cli.py index 4af379f..7e04c53 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -100,7 +100,7 @@ def main(): dest="model", type=str, default="chatgptapi", - choices=["chatgptapi", "gpt3", "google"], # support DeepL later + choices=["chatgptapi", "gpt3", "google", "caiyun"], # support DeepL later metavar="MODEL", help="model to use, available: {%(choices)s}", ) @@ -162,6 +162,12 @@ def main(): default=10, help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)", ) + parser.add_argument( + "--caiyun_key", + dest="caiyun_key", + type=str, + help="you can apply caiyun key from here (https://dashboard.caiyunapp.com/user/sign_in/)", + ) options = parser.parse_args() PROXY = options.proxy @@ -185,8 +191,13 @@ def main(): raise Exception( "OpenAI API key not provided, please google how to obtain it" ) + API_KEY = OPENAI_API_KEY + elif options.model == "caiyun": + API_KEY = options.caiyun_key or env.get("BBM_CAIYUN_API_KEY") + if not API_KEY: + raise Exception("Please provid caiyun key") else: - OPENAI_API_KEY = "" + API_KEY = "" if options.book_from == "kobo": import book_maker.obok as obok @@ -218,7 +229,7 @@ def main(): e = book_loader( options.book_name, translate_model, - OPENAI_API_KEY, + API_KEY, options.resume, language=language, model_api_base=model_api_base, diff --git a/book_maker/translator/__init__.py b/book_maker/translator/__init__.py index 9d0c7d9..3eb54d1 100644 --- a/book_maker/translator/__init__.py +++ b/book_maker/translator/__init__.py @@ -1,10 +1,12 @@ from book_maker.translator.chatgptapi_translator import ChatGPTAPI from book_maker.translator.google_translator import Google from book_maker.translator.gpt3_translator import GPT3 +from book_maker.translator.caiyun_translator import Caiyun MODEL_DICT = { "chatgptapi": ChatGPTAPI, "gpt3": GPT3, - "google": Google + "google": Google, + "caiyun": Caiyun # add more here } diff --git a/book_maker/translator/caiyun_translator.py b/book_maker/translator/caiyun_translator.py new file mode 100644 index 0000000..4d1c651 --- /dev/null +++ b/book_maker/translator/caiyun_translator.py @@ -0,0 +1,43 @@ +import json + +import requests + +from .base_translator import Base + + +class Caiyun(Base): + """ + caiyun translator + """ + + def __init__(self, key, language, **kwargs): + super().__init__(key, language) + self.api_url = "http://api.interpreter.caiyunai.com/v1/translator" + self.headers = { + "content-type": "application/json", + "x-authorization": "token " + key, + } + # caiyun api only supports: zh2en, zh2ja, en2zh, ja2zh + self.translate_type = "auto2zh" + if self.language == "english": + self.translate_type = "auto2en" + elif self.language == "japanese": + self.translate_type = "auto2ja" + + def rotate_key(self): + pass + + def translate(self, text): + print(text) + payload = { + "source": text, + "trans_type": self.translate_type, + "request_id": "demo", + "detect": True, + } + response = requests.request( + "POST", self.api_url, data=json.dumps(payload), headers=self.headers + ) + t_text = json.loads(response.text)["target"] + print(t_text) + return t_text From 8661ba1bae10309b624c65c99cf4da4aa8674144 Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Mon, 13 Mar 2023 20:54:43 +0800 Subject: [PATCH 07/10] ci: add caiyun test --- .github/workflows/make_test_ebook.yaml | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index 90d0f42..a880177 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -10,6 +10,7 @@ on: env: ACTIONS_ALLOW_UNSECURE_COMMANDS: true OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + BBM_CAIYUN_API_KEY: ${{ secrets.BBM_CAIYUN_API_KEY }} jobs: testing: @@ -27,11 +28,15 @@ jobs: black . --check - name: install python requirements run: pip install -r requirements.txt - - - name: make normal ebook test using google translate + + - name: Test install run: | - python3 make_book.py --book_name "test_books/Liber_Esther.epub" --test --test_num 10 --model google --translate-tags div,p - python3 make_book.py --book_name "test_books/Liber_Esther.epub" --test --test_num 20 --model google + pip install . + + - name: make normal ebook test using google translate and cli + run: | + bbook_maker --book_name "test_books/Liber_Esther.epub" --test --test_num 10 --model google --translate-tags div,p + bbook_maker--book_name "test_books/Liber_Esther.epub" --test --test_num 20 --model google - name: make txt book test using google translate run: | @@ -40,7 +45,11 @@ jobs: - name: make txt book test with batch_size run: | python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model google - + + - name: make caiyun translator test + if: env.BBM_CAIYUN_API_KEY != null + run: | + python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model caiyun - name: make openai key ebook test if: env.OPENAI_API_KEY != null @@ -49,10 +58,10 @@ jobs: python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --model gpt3 --prompt prompt_template_sample.txt python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --prompt prompt_template_sample.json - - name: Rename and Upload ePub if: env.OPENAI_API_KEY != null uses: actions/upload-artifact@v2 with: name: epub_output path: "test_books/lemo_bilingual.epub" + From f0b226ff3006d846eaf714f13089741135be370d Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Mon, 13 Mar 2023 20:56:30 +0800 Subject: [PATCH 08/10] fix: typo --- .github/workflows/make_test_ebook.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index a880177..23c0105 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -36,7 +36,7 @@ jobs: - name: make normal ebook test using google translate and cli run: | bbook_maker --book_name "test_books/Liber_Esther.epub" --test --test_num 10 --model google --translate-tags div,p - bbook_maker--book_name "test_books/Liber_Esther.epub" --test --test_num 20 --model google + bbook_maker --book_name "test_books/Liber_Esther.epub" --test --test_num 20 --model google - name: make txt book test using google translate run: | From 20b4d59b70fabe32eba5ee8053bb2345dcb30ce7 Mon Sep 17 00:00:00 2001 From: yihong Date: Mon, 13 Mar 2023 21:05:12 +0800 Subject: [PATCH 09/10] fix: #157 (#161) --- book_maker/loader/epub_loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 3587ae7..2af96ab 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -144,6 +144,7 @@ class EPUBBookLoader(BaseBookLoader): raise Exception("can not load resume file") def _save_temp_book(self): + # TODO refactor this logic origin_book_temp = epub.read_epub(self.epub_name) new_temp_book = self._make_new_book(origin_book_temp) p_to_save_len = len(self.p_to_save) @@ -154,6 +155,8 @@ class EPUBBookLoader(BaseBookLoader): if item.get_type() == ITEM_DOCUMENT: soup = bs(item.content, "html.parser") p_list = soup.findAll(trans_taglist) + if self.allow_navigable_strings: + p_list.extend(soup.findAll(text=True)) for p in p_list: if not p.text or self._is_special_text(p.text): continue @@ -162,7 +165,6 @@ class EPUBBookLoader(BaseBookLoader): if index < p_to_save_len: new_p = copy(p) new_p.string = self.p_to_save[index] - print(new_p.string) p.insert_after(new_p) index += 1 else: From 0dd61e5b3e6a82b20b6fc6244e2878a2963556db Mon Sep 17 00:00:00 2001 From: yihong Date: Mon, 13 Mar 2023 23:13:04 +0800 Subject: [PATCH 10/10] feat: support deepl (#162) --- .github/workflows/make_test_ebook.yaml | 8 ++- README-CN.md | 51 ++++++++------- README.md | 44 +++++++------ book_maker/cli.py | 27 ++++++-- book_maker/loader/epub_loader.py | 14 +++- book_maker/loader/txt_loader.py | 6 +- book_maker/translator/__init__.py | 4 +- book_maker/translator/deepl_translator.py | 80 ++++++++++++++++++++++- book_maker/utils.py | 1 + setup.py | 2 +- 10 files changed, 180 insertions(+), 57 deletions(-) diff --git a/.github/workflows/make_test_ebook.yaml b/.github/workflows/make_test_ebook.yaml index 23c0105..6c77353 100644 --- a/.github/workflows/make_test_ebook.yaml +++ b/.github/workflows/make_test_ebook.yaml @@ -11,6 +11,7 @@ env: ACTIONS_ALLOW_UNSECURE_COMMANDS: true OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} BBM_CAIYUN_API_KEY: ${{ secrets.BBM_CAIYUN_API_KEY }} + BBM_DEEPL_API_KEY: ${{ secrets.BBM_DEEPL_API_KEY }} jobs: testing: @@ -51,8 +52,13 @@ jobs: run: | python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model caiyun + - name: make deepl translator test + if: env.BBM_CAIYUN_API_KEY != null + run: | + python3 make_book.py --book_name "test_books/the_little_prince.txt" --test --batch_size 30 --test_num 20 --model deepl + - name: make openai key ebook test - if: env.OPENAI_API_KEY != null + if: env.BBM_DEEPL_API_KEY != null run: | python3 make_book.py --book_name "test_books/lemo.epub" --test --test_num 5 --language zh-hans python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --model gpt3 --prompt prompt_template_sample.txt diff --git a/README-CN.md b/README-CN.md index e65a9b8..58ba915 100644 --- a/README-CN.md +++ b/README-CN.md @@ -15,34 +15,39 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制 ## 使用 -1. `pip install -r requirements.txt` 或 `pip install -U bbook_maker` -2. 使用 `--openai_key` 指定 OpenAI API key,如果有多个可以用英文逗号分隔(xxx,xxx,xxx),可以减少接口调用次数限制带来的错误。 +- `pip install -r requirements.txt` 或 `pip install -U bbook_maker` +- 使用 `--openai_key` 指定 OpenAI API key,如果有多个可以用英文逗号分隔(xxx,xxx,xxx),可以减少接口调用次数限制带来的错误。 或者,指定环境变量 `BMM_OPENAI_API_KEY` 来略过这个选项。 -3. 本地放了一个 `test_books/animal_farm.epub` 给大家测试 -4. 默认用了 [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis) 模型,也就是 ChatGPT 正在使用的模型,用 `--model gpt3` 来使用 gpt3 模型 -5. 使用 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) -6. 使用 `--language` 指定目标语言,例如: `--language "Simplified Chinese"`,预设值为 `"Simplified Chinese"`. +- 本地放了一个 `test_books/animal_farm.epub` 给大家测试 +- 默认用了 [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis) 模型,也就是 ChatGPT 正在使用的模型,用 `--model gpt3` 来使用 gpt3 模型 +- 可以使用 DeepL 封装的 api 进行翻译,需要付费,[DeepL Translator](https://rapidapi.com/splintPRO/api/deepl-translator) 来获得 token `--model deepl --deepl_key ${deepl_key}` +- 可以使用 google 来翻译 `--model google` +- 可用使用彩云进行翻译 `--model caiyun --caiyun_key ${caiyun_key}` +- 使用 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) +- 使用 `--language` 指定目标语言,例如: `--language "Simplified Chinese"`,预设值为 `"Simplified Chinese"`. 请阅读 helper message 来查找可用的目标语言: `python make_book.py --help` -7. 使用 `--proxy` 参数,方便中国大陆的用户在本地测试时使用代理,传入类似 `http://127.0.0.1:7890` 的字符串 -8. 使用 `--resume` 命令,可以手动中断后,加入命令继续执行。 -9. epub 由 html 文件组成。默认情况下,我们只翻译 `

` 中的内容。 +- 使用 `--proxy` 参数,方便中国大陆的用户在本地测试时使用代理,传入类似 `http://127.0.0.1:7890` 的字符串 +- 使用 `--resume` 命令,可以手动中断后,加入命令继续执行。 +- epub 由 html 文件组成。默认情况下,我们只翻译 `

` 中的内容。 使用 `--translate-tags` 指定需要翻译的标签。使用逗号分隔多个标签。例如: `--translate-tags h1,h2,h3,p,div` -10. 请使用 --book_from 选项指定电子阅读器类型(现在只有 kobo 可用),并使用 --device_path 指定挂载点。 -11. 如果你遇到了墙需要用 Cloudflare Workers 替换 api_base 请使用 `--api_base ${url}` 来替换。 +- 请使用 --book_from 选项指定电子阅读器类型(现在只有 kobo 可用),并使用 --device_path 指定挂载点。 +- 如果你遇到了墙需要用 Cloudflare Workers 替换 api_base 请使用 `--api_base ${url}` 来替换。 **请注意,此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹** -12. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 -13. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 -14. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** -15. 如果你想调整 prompt,你可以使用 `--prompt` 参数。有效的占位符包括 `{text}` 和 `{language}`。你可以用以下方式配置 prompt: +- 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 +- 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 +- 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** +- 如果你想调整 prompt,你可以使用 `--prompt` 参数。有效的占位符包括 `{text}` 和 `{language}`。你可以用以下方式配置 prompt: 如果您不需要设置 `system` 角色,可以这样:`--prompt "Translate {text} to {language}" 或者 `--prompt prompt_template_sample.txt`(示例文本文件可以在 [./prompt_template_sample.txt](./prompt_template_sample.txt) 找到)。 如果您需要设置 `system` 角色,可以使用以下方式配置:`--prompt '{"user":"Translate {text} to {language}", "system": "You are a professional translator."}'`,或者 `--prompt prompt_template_sample.json`(示例 JSON 文件可以在 [./prompt_template_sample.json](./prompt_template_sample.json) 找到)。 你也可以用环境以下环境变量来配置 `system` 和 `user` 角色 prompt:`BBM_CHATGPTAPI_USER_MSG_TEMPLATE` 和 `BBM_CHATGPTAPI_SYS_MSG`。 该参数可以是提示模板字符串,也可以是模板 `.txt` 文件的路径。 -16. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 -17. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 -18. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** -19. 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效) +- 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书 +- 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了 +- 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书** +- 使用`--batch_size` 参数,指定批量翻译的行数(默认行数为10,目前只对txt生效) + + ### 示范用例 **如果使用 `pip install bbook_maker` 以下命令都可以改成 `bbook args`** @@ -60,6 +65,10 @@ export OPENAI_API_KEY=${your_api_key} # 或使用 gpt3 模型 python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --language ja +# Use the DeepL model with Japanese +python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_token ${deepl_token}--language ja + + # Translate contents in

and

python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p @@ -78,7 +87,7 @@ python3 make_book.py --book_name test_books/the_little_prince.txt --test --batch # 使用彩云小译翻译(彩云api目前只支持: 简体中文 <-> 英文, 简体中文 <-> 日语) # 彩云提供了测试token(3975l6lr5pcbvidl6jl2) # 你可以参考这个教程申请自己的token (https://bobtranslate.com/service/translate/caiyun.html) -python3 make_book.py --model caiyun --openai_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub +python3 make_book.py --model caiyun --caiyun_key 3975l6lr5pcbvidl6jl2 --book_name test_books/animal_farm.epub # 可以在环境变量中设置BBM_CAIYUN_API_KEY,略过--openai_key export BBM_CAIYUN_API_KEY=${your_api_key} @@ -96,8 +105,6 @@ python make_book.py --book_name 'animal_farm.epub' --openai_key sk-XXXXX --api_b 1. Free trail 的 API token 有所限制,如果想要更快的速度,可以考虑付费方案 2. 欢迎提交 PR -3. 尤其是 batch translate 做完效果会好很多 -4. DeepL 模型稍后更新 # 感谢 diff --git a/README.md b/README.md index d24b4a7..26f01c6 100644 --- a/README.md +++ b/README.md @@ -15,33 +15,34 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u ## Use -1. `pip install -r requirements.txt` or `pip install -U bbook_maker`(you can use) -2. Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits. +- `pip install -r requirements.txt` or `pip install -U bbook_maker`(you can use) +- Use `--openai_key` option to specify OpenAI API key. If you have multiple keys, separate them by commas (xxx,xxx,xxx) to reduce errors caused by API call limits. Or, just set environment variable `BMM_OPENAI_API_KEY` instead. -3. A sample book, `test_books/animal_farm.epub`, is provided for testing purposes. -4. The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt3` to change the underlying model to `GPT3` -5. Use `--test` option to preview the result if you haven't paid for the service. Note that there is a limit and it may take some time. -6. Set the target language like `--language "Simplified Chinese"`. Default target language is `"Simplified Chinese"`. +- A sample book, `test_books/animal_farm.epub`, is provided for testing purposes. +- The default underlying model is [GPT-3.5-turbo](https://openai.com/blog/introducing-chatgpt-and-whisper-apis), which is used by ChatGPT currently. Use `--model gpt3` to change the underlying model to `GPT3` +5. support DeepL model [DeepL Translator](https://rapidapi.com/splintPRO/api/deepl-translator) need pay to get the token use `--model deepl --deepl_key ${deepl_key}` +- Use `--test` option to preview the result if you haven't paid for the service. Note that there is a limit and it may take some time. +- Set the target language like `--language "Simplified Chinese"`. Default target language is `"Simplified Chinese"`. Read available languages by helper message: `python make_book.py --help` -7. Use `--proxy` option to specify proxy server for internet access. Enter a string such as `http://127.0.0.1:7890`. -8. Use `--resume` option to manually resume the process after an interruption. -9. epub is made of html files. By default, we only translate contents in `

`. +- Use `--proxy` option to specify proxy server for internet access. Enter a string such as `http://127.0.0.1:7890`. +- Use `--resume` option to manually resume the process after an interruption. +- epub is made of html files. By default, we only translate contents in `

`. Use `--translate-tags` to specify tags need for translation. Use comma to seperate multiple tags. For example: `--translate-tags h1,h2,h3,p,div` -10. Use `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point. -11. If you want to change api_base like using Cloudflare Workers, use `--api_base ` to support it. +- Use `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point. +- If you want to change api_base like using Cloudflare Workers, use `--api_base ` to support it. **Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.** -12. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. -13. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. -14. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** -15. To tweak the prompt, use the `--prompt` parameter. Valid placeholders for the `user` role template include `{text}` and `{language}`. It supports a few ways to configure the prompt: +- Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. +- If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. +- If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +- To tweak the prompt, use the `--prompt` parameter. Valid placeholders for the `user` role template include `{text}` and `{language}`. It supports a few ways to configure the prompt: If you don't need to set the `system` role content, you can simply set it up like this: `--prompt "Translate {text} to {language}."` or `--prompt prompt_template_sample.txt` (example of a text file can be found at [./prompt_template_sample.txt](./prompt_template_sample.txt)). If you need to set the `system` role content, you can use the following format: `--prompt '{"user":"Translate {text} to {language}", "system": "You are a professional translator."}'` or `--prompt prompt_template_sample.json` (example of a JSON file can be found at [./prompt_template_sample.json](./prompt_template_sample.json)). You can also set the `user` and `system` role prompt by setting environment variables: `BBM_CHATGPTAPI_USER_MSG_TEMPLATE` and `BBM_CHATGPTAPI_SYS_MSG`. -16. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. -17. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. -18. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** -19. Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files). +- Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated. +- If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name. +- If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.** +- Use the `--batch_size` parameter to specify the number of lines for batch translation (default is 10, currently only effective for txt files). ### Examples @@ -60,6 +61,10 @@ export OPENAI_API_KEY=${your_api_key} # Use the GPT-3 model with Japanese python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --language ja +# Use the DeepL model with Japanese +python3 make_book.py --book_name test_books/animal_farm.epub --model deepl --deepl_token ${deepl_token}--language ja + + # Translate contents in

and

python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p @@ -135,7 +140,6 @@ docker run --rm --name bilingual_book_maker --mount type=bind,source=/home/user/ 1. API token from free trial has limit. If you want to speed up the process, consider paying for the service or use multiple OpenAI tokens 2. PR is welcome -3. The DeepL model will be updated later. # Thanks diff --git a/book_maker/cli.py b/book_maker/cli.py index 7e04c53..a837f8d 100644 --- a/book_maker/cli.py +++ b/book_maker/cli.py @@ -52,6 +52,7 @@ def parse_prompt_arg(prompt_arg): def main(): + translate_model_list = list(MODEL_DICT.keys()) parser = argparse.ArgumentParser() parser.add_argument( "--book_name", @@ -73,6 +74,7 @@ def main(): type=str, help="Path of e-reader device", ) + ########## KEYS ########## parser.add_argument( "--openai_key", dest="openai_key", @@ -81,6 +83,19 @@ def main(): help="OpenAI api key,if you have more than one key, please use comma" " to split them to go beyond the rate limits", ) + parser.add_argument( + "--caiyun_key", + dest="caiyun_key", + type=str, + help="you can apply caiyun key from here (https://dashboard.caiyunapp.com/user/sign_in/)", + ) + parser.add_argument( + "--deepl_key", + dest="deepl_key", + type=str, + help="you can apply deepl key from here (https://rapidapi.com/splintPRO/api/deepl-translator", + ) + parser.add_argument( "--test", dest="test", @@ -100,7 +115,7 @@ def main(): dest="model", type=str, default="chatgptapi", - choices=["chatgptapi", "gpt3", "google", "caiyun"], # support DeepL later + choices=translate_model_list, # support DeepL later metavar="MODEL", help="model to use, available: {%(choices)s}", ) @@ -162,12 +177,6 @@ def main(): default=10, help="how many lines will be translated by aggregated translation(This options currently only applies to txt files)", ) - parser.add_argument( - "--caiyun_key", - dest="caiyun_key", - type=str, - help="you can apply caiyun key from here (https://dashboard.caiyunapp.com/user/sign_in/)", - ) options = parser.parse_args() PROXY = options.proxy @@ -196,6 +205,10 @@ def main(): API_KEY = options.caiyun_key or env.get("BBM_CAIYUN_API_KEY") if not API_KEY: raise Exception("Please provid caiyun key") + elif options.model == "deepl": + API_KEY = options.deepl_key or env.get("BBM_DEEPL_API_KEY") + if not API_KEY: + raise Exception("Please provid deepl key") else: API_KEY = "" diff --git a/book_maker/loader/epub_loader.py b/book_maker/loader/epub_loader.py index 2af96ab..92b811c 100644 --- a/book_maker/loader/epub_loader.py +++ b/book_maker/loader/epub_loader.py @@ -5,6 +5,7 @@ from copy import copy from pathlib import Path from bs4 import BeautifulSoup as bs +from bs4.element import NavigableString from ebooklib import ITEM_DOCUMENT, epub from rich import print from tqdm import tqdm @@ -114,8 +115,12 @@ class EPUBBookLoader(BaseBookLoader): if self.resume and index < p_to_save_len: new_p.string = self.p_to_save[index] else: - new_p.string = self.translate_model.translate(p.text) - self.p_to_save.append(new_p.text) + if type(p) == NavigableString: + new_p = self.translate_model.translate(p.text) + self.p_to_save.append(new_p) + else: + new_p.string = self.translate_model.translate(p.text) + self.p_to_save.append(new_p.text) p.insert_after(new_p) index += 1 if index % 20 == 0: @@ -164,7 +169,10 @@ class EPUBBookLoader(BaseBookLoader): # PR welcome here if index < p_to_save_len: new_p = copy(p) - new_p.string = self.p_to_save[index] + if type(p) == NavigableString: + new_p = self.p_to_save[index] + else: + new_p.string = self.p_to_save[index] p.insert_after(new_p) index += 1 else: diff --git a/book_maker/loader/txt_loader.py b/book_maker/loader/txt_loader.py index f5238a7..67b8937 100644 --- a/book_maker/loader/txt_loader.py +++ b/book_maker/loader/txt_loader.py @@ -71,7 +71,11 @@ class TXTBookLoader(BaseBookLoader): if self.resume and index < p_to_save_len: pass else: - temp = self.translate_model.translate(batch_text) + try: + temp = self.translate_model.translate(batch_text) + except Exception as e: + print(str(e)) + raise Exception("Something is wrong when translate") self.p_to_save.append(temp) self.bilingual_result.append(batch_text) self.bilingual_result.append(temp) diff --git a/book_maker/translator/__init__.py b/book_maker/translator/__init__.py index 3eb54d1..b345d46 100644 --- a/book_maker/translator/__init__.py +++ b/book_maker/translator/__init__.py @@ -2,11 +2,13 @@ from book_maker.translator.chatgptapi_translator import ChatGPTAPI from book_maker.translator.google_translator import Google from book_maker.translator.gpt3_translator import GPT3 from book_maker.translator.caiyun_translator import Caiyun +from book_maker.translator.deepl_translator import DeepL MODEL_DICT = { "chatgptapi": ChatGPTAPI, "gpt3": GPT3, "google": Google, - "caiyun": Caiyun + "caiyun": Caiyun, + "deepl": DeepL, # add more here } diff --git a/book_maker/translator/deepl_translator.py b/book_maker/translator/deepl_translator.py index b692769..817c3f1 100644 --- a/book_maker/translator/deepl_translator.py +++ b/book_maker/translator/deepl_translator.py @@ -1,5 +1,83 @@ +import json +import time + +import requests + +from book_maker.utils import TO_LANGUAGE_CODE, LANGUAGES from .base_translator import Base class DeepL(Base): - pass + """ + caiyun translator + """ + + def __init__(self, key, language, **kwargs): + super().__init__(key, language) + self.api_url = "https://deepl-translator.p.rapidapi.com/translate" + self.headers = { + "content-type": "application/json", + "X-RapidAPI-Key": "", + "X-RapidAPI-Host": "deepl-translator.p.rapidapi.com", + } + l = None + if language in LANGUAGES: + l = language + else: + l = TO_LANGUAGE_CODE.get(language) + if l not in [ + "bg", + "zh", + "cs", + "da", + "nl", + "en-US", + "en-GB", + "et", + "fi", + "fr", + "de", + "el", + "hu", + "id", + "it", + "ja", + "lv", + "lt", + "pl", + "pt-PT", + "pt-BR", + "ro", + "ru", + "sk", + "sl", + "es", + "sv", + "tr", + "uk", + "ko", + "nb", + ]: + raise Exception(f"DeepL do not support {l}") + self.language = l + + def rotate_key(self): + self.headers["X-RapidAPI-Key"] = f"{next(self.keys)}" + + def translate(self, text): + self.rotate_key() + print(text) + payload = {"text": text, "source": "EN", "target": self.language} + try: + response = requests.request( + "POST", self.api_url, data=json.dumps(payload), headers=self.headers + ) + except Exception as e: + print(str(e)) + time.sleep(30) + response = requests.request( + "POST", self.api_url, data=json.dumps(payload), headers=self.headers + ) + t_text = response.json().get("text", "") + print(t_text) + return t_text diff --git a/book_maker/utils.py b/book_maker/utils.py index cfa74a4..ca5ac97 100644 --- a/book_maker/utils.py +++ b/book_maker/utils.py @@ -2,6 +2,7 @@ LANGUAGES = { "en": "english", "zh-hans": "simplified chinese", + "zh": "simplified chinese", "zh-hant": "traditional chinese", "de": "german", "es": "spanish", diff --git a/setup.py b/setup.py index 8912e24..6ff077f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import find_packages, setup setup( name="bbook_maker", description="The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist users in creating multi-language versions of epub/txt files and books.", - version="0.1.0", + version="0.2.0", license="MIT", author="yihong0618", author_email="zouzou0208@gmail.com",