added prompt template (#145)

* add prompt template

* format output for ChatGPTAPITranslator

* black format files

* fix: google txt loader failed

---------

Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
Conan 2023-03-11 08:51:29 -05:00 committed by GitHub
parent 2384fb3fe2
commit b1d62e8b30
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 73 additions and 14 deletions

View File

@ -42,7 +42,7 @@ jobs:
if: env.OPENAI_API_KEY != null
run: |
python3 make_book.py --book_name "test_books/lemo.epub" --test --test_num 5 --language zh-hans
python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --model gpt3
python3 make_book.py --book_name "test_books/animal_farm.epub" --test --test_num 5 --language ja --model gpt3 --prompt prompt_template_sample.txt
- name: Rename and Upload ePub

View File

@ -31,9 +31,13 @@ bilingual_book_maker 是一个 AI 翻译工具,使用 ChatGPT 帮助用户制
10. 请使用 --book_from 选项指定电子阅读器类型(现在只有 kobo 可用),并使用 --device_path 指定挂载点。
11. 如果你遇到了墙需要用 Cloudflare Workers 替换 api_base 请使用 `--api_base ${url}` 来替换。
**请注意此处你输入的api应该是'`https://xxxx/v1`'的字样,域名需要用引号包裹**
12. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
13. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
14. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
11. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
12. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
13. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
14. 如果你想调整 prompt你可以使用 `--prompt` 参数。该参数可以是提示模板字符串,也可以是模板 `.txt` 文件的路径。有效的占位符包括 `{text}``{language}`
15. 翻译完会生成一本 ${book_name}_bilingual.epub 的双语书
16. 如果出现了错误或使用 `CTRL+C` 中断命令,不想接下来继续翻译了,会生成一本 ${book_name}_bilingual_temp.epub 的书,直接改成你想要的名字就可以了
17. 如果你想要翻译电子书中的无标签字符串,可以使用 `--allow_navigable_strings` 参数,会将可遍历字符串加入翻译队列,**注意,在条件允许情况下,请寻找更规范的电子书**
e.g.
```shell
@ -52,6 +56,10 @@ python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --lang
# Translate contents in <div> and <p>
python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p
# 修改prompt
python3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt
# 或者
python3 make_book.py --book_name test_books/animal_farm.epub --prompt "Please translate \`{text}\` to {language}"
# 翻译 kobo e-reader 中,來自 Rakuten Kobo 的书籍
python3 make_book.py --book_from kobo --device_path /tmp/kobo

View File

@ -35,9 +35,14 @@ The bilingual_book_maker is an AI translation tool that uses ChatGPT to assist u
10. Use `--book_from` option to specify e-reader type (Now only `kobo` is available), and use `--device_path` to specify the mounting point.
11. If you want to change api_base like using Cloudflare Workers, use `--api_base <URL>` to support it.
**Note: the api url should be '`https://xxxx/v1`'. Quotation marks are required.**
12. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
13. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
14. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
11. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
12. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
13. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
14. To tweak the prompt, use the `--prompt` parameter. The parameter can be a prompt template string or a path to the template `.txt` file. Valid placeholders for the template include `{text}` and `{language}`.
15. Once the translation is complete, a bilingual book named `${book_name}_bilingual.epub` would be generated.
16. If there are any errors or you wish to interrupt the translation by pressing `CTRL+C`. A book named `${book_name}_bilingual_temp.epub` would be generated. You can simply rename it to any desired name.
17. If you want to translate strings in an e-book that aren't labeled with any tags, you can use the `--allow_navigable_strings` parameter. This will add the strings to the translation queue. **Note that it's best to look for e-books that are more standardized if possible.**
### Eamples
@ -57,6 +62,10 @@ python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --lang
# Translate contents in <div> and <p>
python3 make_book.py --book_name test_books/animal_farm.epub --translate-tags div,p
# Tweaking the prompt
python3 make_book.py --book_name test_books/animal_farm.epub --prompt prompt_template_sample.txt
# or
python3 make_book.py --book_name test_books/animal_farm.epub --prompt "Please translate \`{text}\` to {language}"
# Translate books download from Rakuten Kobo on kobo e-reader
python3 make_book.py --book_from kobo --device_path /tmp/kobo

View File

@ -8,6 +8,23 @@ from book_maker.utils import LANGUAGES, TO_LANGUAGE_CODE
import book_maker.obok as obok
def parse_prompt_arg(prompt_arg):
prompt = None
if prompt_arg is None:
return prompt
if not prompt_arg.endswith(".txt"):
prompt = prompt_arg
else:
if os.path.exists(prompt_arg):
with open(prompt_arg, "r") as f:
prompt = f.read()
else:
raise FileNotFoundError(f"{prompt_arg} not found")
if prompt is None or not (all(c in prompt for c in ["{text}", "{language}"])):
raise ValueError("prompt must contain `{text}` and `{language}`")
return prompt
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
@ -105,6 +122,13 @@ def main():
default=False,
help="allow NavigableStrings to be translated",
)
parser.add_argument(
"--prompt",
dest="prompt_template",
type=str,
metavar="PROMPT_TEMPLATE",
help="used for customizing the prompt. It can be the prompt template string, or a path to the template file. The valid placeholders are `{text}` and `{language}`.",
)
options = parser.parse_args()
PROXY = options.proxy
@ -159,6 +183,7 @@ def main():
test_num=options.test_num,
translate_tags=options.translate_tags,
allow_navigable_strings=options.allow_navigable_strings,
prompt_template=parse_prompt_arg(options.prompt_template),
)
e.make_bilingual_book()

View File

@ -25,10 +25,13 @@ class EPUBBookLoader(BaseBookLoader):
test_num=5,
translate_tags="p",
allow_navigable_strings=False,
prompt_template=None,
):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
self.translate_model = model(key, language, model_api_base)
self.translate_model = model(
key, language, model_api_base, prompt_template=prompt_template
)
self.is_test = is_test
self.test_num = test_num
self.translate_tags = translate_tags

View File

@ -17,6 +17,7 @@ class TXTBookLoader(BaseBookLoader):
model_api_base=None,
is_test=False,
test_num=5,
prompt_template=None,
):
self.txt_name = txt_name
self.translate_model = model(key, language, model_api_base)

View File

@ -7,11 +7,15 @@ from .base_translator import Base
class ChatGPTAPI(Base):
def __init__(self, key, language, api_base=None):
def __init__(self, key, language, api_base=None, prompt_template=None):
super().__init__(key, language)
self.key_len = len(key.split(","))
if api_base:
openai.api_base = api_base
self.prompt_template = (
prompt_template
or "Please help me to translate,`{text}` to {language}, please return only translated content not include the origin text"
)
def rotate_key(self):
openai.api_key = next(self.keys)
@ -27,7 +31,9 @@ class ChatGPTAPI(Base):
},
{
"role": "user",
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
"content": self.prompt_template.format(
text=text, language=self.language
),
},
],
)
@ -58,5 +64,5 @@ class ChatGPTAPI(Base):
t_text = self.get_translation(text)
# todo: Determine whether to print according to the cli option
print(t_text)
print(t_text.strip())
return t_text

View File

@ -8,7 +8,7 @@ class Google(Base):
google translate
"""
def __init__(self, key, language, api_base=None):
def __init__(self, key, language, api_base=None, prompt_template=None):
super().__init__(key, language)
self.api_url = "https://translate.google.com/translate_a/single?client=it&dt=qca&dt=t&dt=rmt&dt=bd&dt=rms&dt=sos&dt=md&dt=gt&dt=ld&dt=ss&dt=ex&otf=2&dj=1&hl=en&ie=UTF-8&oe=UTF-8&sl=auto&tl=zh-CN"
self.headers = {

View File

@ -5,7 +5,7 @@ from .base_translator import Base
class GPT3(Base):
def __init__(self, key, language, api_base=None):
def __init__(self, key, language, api_base=None, prompt_template=None):
super().__init__(key, language)
self.api_url = (
f"{api_base}v1/completions"
@ -25,6 +25,9 @@ class GPT3(Base):
}
self.session = requests.session()
self.language = language
self.prompt_template = (
prompt_template or "Please help me to translate, `{text}` to {language}"
)
def rotate_key(self):
self.headers["Authorization"] = f"Bearer {next(self.keys)}"
@ -32,7 +35,9 @@ class GPT3(Base):
def translate(self, text):
print(text)
self.rotate_key()
self.data["prompt"] = f"Please help me to translate`{text}` to {self.language}"
self.data["prompt"] = self.prompt_template.format(
text=text, language=self.language
)
r = self.session.post(self.api_url, headers=self.headers, json=self.data)
if not r.ok:
return text

View File

@ -0,0 +1,2 @@
Translate the given text to {language}. Be faithful or accurate in translation. Make the translation readable or intelligible. Be elegant or natural in translation. If the text cannot be translated, return the original text as is. Do not translate person's name. Do not add any additional text in the translation. The text to be translated is:
{text}