Feat/add target lang choices (#12)

* Feat: add multipile languages choices and a new argument language to set targett language

Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
Weirenlan 2023-03-04 00:06:19 +08:00 committed by GitHub
parent ab3bb4b7ae
commit 843f21a227
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 152 additions and 13 deletions

View File

@ -26,4 +26,4 @@ jobs:
- name: make test ebook - name: make test ebook
env: env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2 run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2 --language zh-hans

View File

@ -19,16 +19,19 @@ Make bilingual epub books Using AI translate
3. 本地放了一个 animal_farm.epub 给大家测试 3. 本地放了一个 animal_farm.epub 给大家测试
4. 默认用了 ChatGPT 模型,用 `--model gpt3` 来使用 gpt3 模型 4. 默认用了 ChatGPT 模型,用 `--model gpt3` 来使用 gpt3 模型
5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) 5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢)
6. Set the target language like `--language "Simplified Chinese"`.
Suppot ` "Japanese" / "Traditional Chinese" / "German" / "French" / "Korean"`.
Default target language is `"Simplified Chinese"`. Support language list please see the LANGUAGES at [utils.py](./utils.py).
e.g. e.g.
```shell ```shell
# 如果你想快速测一下 # 如果你想快速测一下
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test --language "Simplified Chinese"
# or do it # or do it
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language "Simplified Chinese"
# or 用 gpt3 模型 # or 用 gpt3 模型
export OPENAI_API_KEY=${your_api_key} export OPENAI_API_KEY=${your_api_key}
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit --language "Simplified Chinese"
``` ```
## 注意 ## 注意

View File

@ -11,6 +11,7 @@ import requests
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs
from ebooklib import epub from ebooklib import epub
from rich import print from rich import print
from utils import LANGUAGES, TO_LANGUAGE_CODE
NO_LIMIT = False NO_LIMIT = False
IS_TEST = False IS_TEST = False
@ -18,7 +19,7 @@ RESUME = False
class Base: class Base:
def __init__(self, key): def __init__(self, key, language):
pass pass
@abstractmethod @abstractmethod
@ -27,7 +28,7 @@ class Base:
class GPT3(Base): class GPT3(Base):
def __init__(self, key): def __init__(self, key, language):
self.api_key = key self.api_key = key
self.api_url = "https://api.openai.com/v1/completions" self.api_url = "https://api.openai.com/v1/completions"
self.headers = { self.headers = {
@ -43,10 +44,11 @@ class GPT3(Base):
"top_p": 1, "top_p": 1,
} }
self.session = requests.session() self.session = requests.session()
self.language = language
def translate(self, text): def translate(self, text):
print(text) print(text)
self.data["prompt"] = f"Please help me to translate`{text}` to Chinese" self.data["prompt"] = f"Please help me to translate`{text}` to {self.language}"
r = self.session.post(self.api_url, headers=self.headers, json=self.data) r = self.session.post(self.api_url, headers=self.headers, json=self.data)
if not r.ok: if not r.ok:
return text return text
@ -64,12 +66,14 @@ class DeepL(Base):
class ChatGPT(Base): class ChatGPT(Base):
def __init__(self, key): def __init__(self, key, language):
super().__init__(key) super().__init__(key, language)
self.key = key self.key = key
self.language = language
def translate(self, text): def translate(self, text):
print(text) print(text)
print(self.language, "!!!")
openai.api_key = self.key openai.api_key = self.key
try: try:
completion = openai.ChatCompletion.create( completion = openai.ChatCompletion.create(
@ -78,7 +82,7 @@ class ChatGPT(Base):
{ {
"role": "user", "role": "user",
# english prompt here to save tokens # english prompt here to save tokens
"content": f"Please help me to translate`{text}` to Chinese, please return only translated content not include the origin text", "content": f"Please help me to translate`{text}` to {self.language}, please return only translated content not include the origin text",
} }
], ],
) )
@ -117,10 +121,10 @@ class ChatGPT(Base):
class BEPUB: class BEPUB:
def __init__(self, epub_name, model, key, resume): def __init__(self, epub_name, model, key, resume, language):
self.epub_name = epub_name self.epub_name = epub_name
self.new_epub = epub.EpubBook() self.new_epub = epub.EpubBook()
self.translate_model = model(key) self.translate_model = model(key, language)
self.origin_book = epub.read_epub(self.epub_name) self.origin_book = epub.read_epub(self.epub_name)
self.p_to_save = [] self.p_to_save = []
self.resume = resume self.resume = resume
@ -236,6 +240,14 @@ if __name__ == "__main__":
choices=["chatgpt", "gpt3"], # support DeepL later choices=["chatgpt", "gpt3"], # support DeepL later
help="Use which model", help="Use which model",
) )
parser.add_argument(
"--language",
type=str,
choices=sorted(LANGUAGES.keys())
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
default="zh-hans",
help="language to translate to",
)
parser.add_argument( parser.add_argument(
"--resume", "--resume",
dest="resume", dest="resume",
@ -253,5 +265,10 @@ if __name__ == "__main__":
if not options.book_name.endswith(".epub"): if not options.book_name.endswith(".epub"):
raise Exception("please use epub file") raise Exception("please use epub file")
model = MODEL_DICT.get(options.model, "chatgpt") model = MODEL_DICT.get(options.model, "chatgpt")
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME) language = options.language
if options.language in LANGUAGES:
# use the value for prompt
language = LANGUAGES.get(language, language)
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME, language=language)
e.make_bilingual_book() e.make_bilingual_book()

119
utils.py Normal file
View File

@ -0,0 +1,119 @@
# Borrowed from : https://github.com/openai/whisper
LANGUAGES = {
"en": "english",
"zh-hans": "simplified chinese",
"zh-hant": "traditional chinese",
"de": "german",
"es": "spanish",
"ru": "russian",
"ko": "korean",
"fr": "french",
"ja": "japanese",
"pt": "portuguese",
"tr": "turkish",
"pl": "polish",
"ca": "catalan",
"nl": "dutch",
"ar": "arabic",
"sv": "swedish",
"it": "italian",
"id": "indonesian",
"hi": "hindi",
"fi": "finnish",
"vi": "vietnamese",
"he": "hebrew",
"uk": "ukrainian",
"el": "greek",
"ms": "malay",
"cs": "czech",
"ro": "romanian",
"da": "danish",
"hu": "hungarian",
"ta": "tamil",
"no": "norwegian",
"th": "thai",
"ur": "urdu",
"hr": "croatian",
"bg": "bulgarian",
"lt": "lithuanian",
"la": "latin",
"mi": "maori",
"ml": "malayalam",
"cy": "welsh",
"sk": "slovak",
"te": "telugu",
"fa": "persian",
"lv": "latvian",
"bn": "bengali",
"sr": "serbian",
"az": "azerbaijani",
"sl": "slovenian",
"kn": "kannada",
"et": "estonian",
"mk": "macedonian",
"br": "breton",
"eu": "basque",
"is": "icelandic",
"hy": "armenian",
"ne": "nepali",
"mn": "mongolian",
"bs": "bosnian",
"kk": "kazakh",
"sq": "albanian",
"sw": "swahili",
"gl": "galician",
"mr": "marathi",
"pa": "punjabi",
"si": "sinhala",
"km": "khmer",
"sn": "shona",
"yo": "yoruba",
"so": "somali",
"af": "afrikaans",
"oc": "occitan",
"ka": "georgian",
"be": "belarusian",
"tg": "tajik",
"sd": "sindhi",
"gu": "gujarati",
"am": "amharic",
"yi": "yiddish",
"lo": "lao",
"uz": "uzbek",
"fo": "faroese",
"ht": "haitian creole",
"ps": "pashto",
"tk": "turkmen",
"nn": "nynorsk",
"mt": "maltese",
"sa": "sanskrit",
"lb": "luxembourgish",
"my": "myanmar",
"bo": "tibetan",
"tl": "tagalog",
"mg": "malagasy",
"as": "assamese",
"tt": "tatar",
"haw": "hawaiian",
"ln": "lingala",
"ha": "hausa",
"ba": "bashkir",
"jw": "javanese",
"su": "sundanese",
}
# language code lookup by name, with a few language aliases
TO_LANGUAGE_CODE = {
**{language: code for code, language in LANGUAGES.items()},
"burmese": "my",
"valencian": "ca",
"flemish": "nl",
"haitian": "ht",
"letzeburgesch": "lb",
"pushto": "ps",
"panjabi": "pa",
"moldavian": "ro",
"moldovan": "ro",
"sinhalese": "si",
"castilian": "es",
}