mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-06 11:35:49 +00:00
Feat/add target lang choices (#12)
* Feat: add multipile languages choices and a new argument language to set targett language Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
ab3bb4b7ae
commit
843f21a227
2
.github/workflows/make_test_ebook.yaml
vendored
2
.github/workflows/make_test_ebook.yaml
vendored
@ -26,4 +26,4 @@ jobs:
|
||||
- name: make test ebook
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2
|
||||
run: python3 make_book.py --book_name test_books/animal_farm.epub --no_limit --test --test_num 2 --language zh-hans
|
||||
|
@ -19,16 +19,19 @@ Make bilingual epub books Using AI translate
|
||||
3. 本地放了一个 animal_farm.epub 给大家测试
|
||||
4. 默认用了 ChatGPT 模型,用 `--model gpt3` 来使用 gpt3 模型
|
||||
5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢)
|
||||
6. Set the target language like `--language "Simplified Chinese"`.
|
||||
Suppot ` "Japanese" / "Traditional Chinese" / "German" / "French" / "Korean"`.
|
||||
Default target language is `"Simplified Chinese"`. Support language list please see the LANGUAGES at [utils.py](./utils.py).
|
||||
|
||||
e.g.
|
||||
```shell
|
||||
# 如果你想快速测一下
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --no_limit --test --language "Simplified Chinese"
|
||||
# or do it
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key}
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --openai_key ${openai_key} --language "Simplified Chinese"
|
||||
# or 用 gpt3 模型
|
||||
export OPENAI_API_KEY=${your_api_key}
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit
|
||||
python3 make_book.py --book_name test_books/animal_farm.epub --model gpt3 --no_limit --language "Simplified Chinese"
|
||||
```
|
||||
|
||||
## 注意
|
||||
|
35
make_book.py
35
make_book.py
@ -11,6 +11,7 @@ import requests
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from ebooklib import epub
|
||||
from rich import print
|
||||
from utils import LANGUAGES, TO_LANGUAGE_CODE
|
||||
|
||||
NO_LIMIT = False
|
||||
IS_TEST = False
|
||||
@ -18,7 +19,7 @@ RESUME = False
|
||||
|
||||
|
||||
class Base:
|
||||
def __init__(self, key):
|
||||
def __init__(self, key, language):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
@ -27,7 +28,7 @@ class Base:
|
||||
|
||||
|
||||
class GPT3(Base):
|
||||
def __init__(self, key):
|
||||
def __init__(self, key, language):
|
||||
self.api_key = key
|
||||
self.api_url = "https://api.openai.com/v1/completions"
|
||||
self.headers = {
|
||||
@ -43,10 +44,11 @@ class GPT3(Base):
|
||||
"top_p": 1,
|
||||
}
|
||||
self.session = requests.session()
|
||||
self.language = language
|
||||
|
||||
def translate(self, text):
|
||||
print(text)
|
||||
self.data["prompt"] = f"Please help me to translate,`{text}` to Chinese"
|
||||
self.data["prompt"] = f"Please help me to translate,`{text}` to {self.language}"
|
||||
r = self.session.post(self.api_url, headers=self.headers, json=self.data)
|
||||
if not r.ok:
|
||||
return text
|
||||
@ -64,12 +66,14 @@ class DeepL(Base):
|
||||
|
||||
|
||||
class ChatGPT(Base):
|
||||
def __init__(self, key):
|
||||
super().__init__(key)
|
||||
def __init__(self, key, language):
|
||||
super().__init__(key, language)
|
||||
self.key = key
|
||||
self.language = language
|
||||
|
||||
def translate(self, text):
|
||||
print(text)
|
||||
print(self.language, "!!!")
|
||||
openai.api_key = self.key
|
||||
try:
|
||||
completion = openai.ChatCompletion.create(
|
||||
@ -78,7 +82,7 @@ class ChatGPT(Base):
|
||||
{
|
||||
"role": "user",
|
||||
# english prompt here to save tokens
|
||||
"content": f"Please help me to translate,`{text}` to Chinese, please return only translated content not include the origin text",
|
||||
"content": f"Please help me to translate,`{text}` to {self.language}, please return only translated content not include the origin text",
|
||||
}
|
||||
],
|
||||
)
|
||||
@ -117,10 +121,10 @@ class ChatGPT(Base):
|
||||
|
||||
|
||||
class BEPUB:
|
||||
def __init__(self, epub_name, model, key, resume):
|
||||
def __init__(self, epub_name, model, key, resume, language):
|
||||
self.epub_name = epub_name
|
||||
self.new_epub = epub.EpubBook()
|
||||
self.translate_model = model(key)
|
||||
self.translate_model = model(key, language)
|
||||
self.origin_book = epub.read_epub(self.epub_name)
|
||||
self.p_to_save = []
|
||||
self.resume = resume
|
||||
@ -236,6 +240,14 @@ if __name__ == "__main__":
|
||||
choices=["chatgpt", "gpt3"], # support DeepL later
|
||||
help="Use which model",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--language",
|
||||
type=str,
|
||||
choices=sorted(LANGUAGES.keys())
|
||||
+ sorted([k.title() for k in TO_LANGUAGE_CODE.keys()]),
|
||||
default="zh-hans",
|
||||
help="language to translate to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
dest="resume",
|
||||
@ -253,5 +265,10 @@ if __name__ == "__main__":
|
||||
if not options.book_name.endswith(".epub"):
|
||||
raise Exception("please use epub file")
|
||||
model = MODEL_DICT.get(options.model, "chatgpt")
|
||||
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME)
|
||||
language = options.language
|
||||
if options.language in LANGUAGES:
|
||||
# use the value for prompt
|
||||
language = LANGUAGES.get(language, language)
|
||||
|
||||
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME, language=language)
|
||||
e.make_bilingual_book()
|
||||
|
119
utils.py
Normal file
119
utils.py
Normal file
@ -0,0 +1,119 @@
|
||||
# Borrowed from : https://github.com/openai/whisper
|
||||
LANGUAGES = {
|
||||
"en": "english",
|
||||
"zh-hans": "simplified chinese",
|
||||
"zh-hant": "traditional chinese",
|
||||
"de": "german",
|
||||
"es": "spanish",
|
||||
"ru": "russian",
|
||||
"ko": "korean",
|
||||
"fr": "french",
|
||||
"ja": "japanese",
|
||||
"pt": "portuguese",
|
||||
"tr": "turkish",
|
||||
"pl": "polish",
|
||||
"ca": "catalan",
|
||||
"nl": "dutch",
|
||||
"ar": "arabic",
|
||||
"sv": "swedish",
|
||||
"it": "italian",
|
||||
"id": "indonesian",
|
||||
"hi": "hindi",
|
||||
"fi": "finnish",
|
||||
"vi": "vietnamese",
|
||||
"he": "hebrew",
|
||||
"uk": "ukrainian",
|
||||
"el": "greek",
|
||||
"ms": "malay",
|
||||
"cs": "czech",
|
||||
"ro": "romanian",
|
||||
"da": "danish",
|
||||
"hu": "hungarian",
|
||||
"ta": "tamil",
|
||||
"no": "norwegian",
|
||||
"th": "thai",
|
||||
"ur": "urdu",
|
||||
"hr": "croatian",
|
||||
"bg": "bulgarian",
|
||||
"lt": "lithuanian",
|
||||
"la": "latin",
|
||||
"mi": "maori",
|
||||
"ml": "malayalam",
|
||||
"cy": "welsh",
|
||||
"sk": "slovak",
|
||||
"te": "telugu",
|
||||
"fa": "persian",
|
||||
"lv": "latvian",
|
||||
"bn": "bengali",
|
||||
"sr": "serbian",
|
||||
"az": "azerbaijani",
|
||||
"sl": "slovenian",
|
||||
"kn": "kannada",
|
||||
"et": "estonian",
|
||||
"mk": "macedonian",
|
||||
"br": "breton",
|
||||
"eu": "basque",
|
||||
"is": "icelandic",
|
||||
"hy": "armenian",
|
||||
"ne": "nepali",
|
||||
"mn": "mongolian",
|
||||
"bs": "bosnian",
|
||||
"kk": "kazakh",
|
||||
"sq": "albanian",
|
||||
"sw": "swahili",
|
||||
"gl": "galician",
|
||||
"mr": "marathi",
|
||||
"pa": "punjabi",
|
||||
"si": "sinhala",
|
||||
"km": "khmer",
|
||||
"sn": "shona",
|
||||
"yo": "yoruba",
|
||||
"so": "somali",
|
||||
"af": "afrikaans",
|
||||
"oc": "occitan",
|
||||
"ka": "georgian",
|
||||
"be": "belarusian",
|
||||
"tg": "tajik",
|
||||
"sd": "sindhi",
|
||||
"gu": "gujarati",
|
||||
"am": "amharic",
|
||||
"yi": "yiddish",
|
||||
"lo": "lao",
|
||||
"uz": "uzbek",
|
||||
"fo": "faroese",
|
||||
"ht": "haitian creole",
|
||||
"ps": "pashto",
|
||||
"tk": "turkmen",
|
||||
"nn": "nynorsk",
|
||||
"mt": "maltese",
|
||||
"sa": "sanskrit",
|
||||
"lb": "luxembourgish",
|
||||
"my": "myanmar",
|
||||
"bo": "tibetan",
|
||||
"tl": "tagalog",
|
||||
"mg": "malagasy",
|
||||
"as": "assamese",
|
||||
"tt": "tatar",
|
||||
"haw": "hawaiian",
|
||||
"ln": "lingala",
|
||||
"ha": "hausa",
|
||||
"ba": "bashkir",
|
||||
"jw": "javanese",
|
||||
"su": "sundanese",
|
||||
}
|
||||
|
||||
# language code lookup by name, with a few language aliases
|
||||
TO_LANGUAGE_CODE = {
|
||||
**{language: code for code, language in LANGUAGES.items()},
|
||||
"burmese": "my",
|
||||
"valencian": "ca",
|
||||
"flemish": "nl",
|
||||
"haitian": "ht",
|
||||
"letzeburgesch": "lb",
|
||||
"pushto": "ps",
|
||||
"panjabi": "pa",
|
||||
"moldavian": "ro",
|
||||
"moldovan": "ro",
|
||||
"sinhalese": "si",
|
||||
"castilian": "es",
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user