diff --git a/README.md b/README.md index acdd786..670f94f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,45 @@ # bilingual_book_maker Make bilingual epub books Using AI translate + + +## 准备 + +1. ChatGPT or OpenAI token +2. epub books +3. 能正常联网的环境或 proxy +4. python3.8+ + + +## 使用 + +1. pip install -r requirements.txt +2. openapi token +3. 本地放了一个 animal_farm.epub 给大家测试 +4. 默认用了 ChatGPT 模型,用 `--model gpt3` 来使用 gpt3 模型 +5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果(有 limit 稍微有些慢) + +e.g. +```shell +# 如果你想快速测一下 +python3 make.py - --book_name animal_farm.epub --openai_key ${openai_key} --no_limit --test +# or do it +python3 make.py - --book_name animal_farm.epub --openai_key ${openai_key} +# or 用 gpt3 模型 +export OPENAI_API_KEY=${your_api_key} +python3 make.py --book_name animal_farm.epub --model gpt3 --no_limit + +## 注意 + +1. 有 limit 如果想要速度可以付费 +2. 现在是 demo 版本有很多工作要做 PR welcome +3. 尤其是 batch translat 做完效果会好很多 +4. DeepL 模型稍后更新 + + +# 感谢 + +- @[yetone](https://github.com/yetone) + +## 赞赏 + +谢谢就够了 diff --git a/animal_farm.epub b/animal_farm.epub new file mode 100644 index 0000000..b26809e Binary files /dev/null and b/animal_farm.epub differ diff --git a/make.py b/make.py new file mode 100644 index 0000000..c936dc8 --- /dev/null +++ b/make.py @@ -0,0 +1,202 @@ +import argparse +import time +from abc import abstractmethod +from copy import copy +from os import environ as env + +import openai +import requests +from bs4 import BeautifulSoup as bs +from ebooklib import epub +from rich import print + +NO_LIMIT = False +IS_TEST = False + + +class Base: + def __init__(self, key): + pass + + @abstractmethod + def translate(self, text): + pass + + +class GPT3(Base): + def __init__(self, key): + self.api_key = key + self.api_url = "https://api.openai.com/v1/completions" + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.api_key}", + } + # TODO support more models here + self.data = { + "prompt": "", + "model": "text-davinci-003", + "max_tokens": 1024, + "temperature": 1, + "top_p": 1, + } + self.session = requests.session() + + def translate(self, text): + print(text) + self.data["prompt"] = f"Please help me to translate,`{text}` to Chinese" + r = self.session.post(self.api_url, headers=self.headers, json=self.data) + if not r.ok: + return text + t_text = r.json().get("choices")[0].get("text", "").strip() + print(t_text) + return t_text + + +class DeepL(Base): + def __init__(self, session, key): + super().__init__(session, key) + + def translate(self, text): + return super().translate(text) + + +class ChatGPT(Base): + def __init__(self, key): + super().__init__(key) + self.key = key + + def translate(self, text): + print(text) + openai.api_key = self.key + try: + completion = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + # english prompt here to save tokens + "content": f"Please help me to translate,`{text}` to Chinese, please return only translated content not include the origin text", + } + ], + ) + t_text = ( + completion["choices"][0] + .get("message") + .get("content") + .encode("utf8") + .decode() + ) + if not NO_LIMIT: + # for time limit + time.sleep(3) + except Exception as e: + print(str(e), "will sleep 60 seconds") + # TIME LIMIT for open api please pay + time.sleep(60) + completion = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "user", + "content": f"Please help me to translate,`{text}` to Simplified Chinese, please return only translated content not include the origin text", + } + ], + ) + t_text = ( + completion["choices"][0] + .get("message") + .get("content") + .encode("utf8") + .decode() + ) + print(t_text) + return t_text + + +class BEPUB: + def __init__(self, epub_name, model, key): + self.epub_name = epub_name + self.new_epub = epub.EpubBook() + self.translate_model = model(key) + self.origin_book = epub.read_epub(self.epub_name) + + def make_bilingual_book(self): + new_book = epub.EpubBook() + new_book.metadata = self.origin_book.metadata + new_book.spine = self.origin_book.spine + new_book.toc = self.origin_book.toc + all_items = list(self.origin_book.get_items()) + # we just translate tag p + all_p_length = sum( + [len(bs(i.content, "html.parser").findAll("p")) for i in all_items] + ) + print("TODO need process bar here: " + str(all_p_length)) + index = 0 + for i in self.origin_book.get_items(): + if i.get_type() == 9: + soup = bs(i.content, "html.parser") + p_list = soup.findAll("p") + is_test_done = IS_TEST and index > 20 + for p in p_list: + if not is_test_done: + if p.string and not p.string.isdigit(): + new_p = copy(p) + # TODO banch of p to translate then combine + # PR welcome here + new_p.string = self.translate_model.translate(p.string) + p.insert_after(new_p) + index += 1 + i.content = soup.prettify().encode() + new_book.add_item(i) + name = self.epub_name.split(".")[0] + epub.write_epub(f"{name}_bilingual.epub", new_book, {}) + + +if __name__ == "__main__": + MODEL_DICT = {"gpt3": GPT3, "chatgpt": ChatGPT} + parser = argparse.ArgumentParser() + parser.add_argument( + "--book_name", + dest="book_name", + type=str, + help="your epub book name", + ) + parser.add_argument( + "--openai_key", + dest="openai_key", + type=str, + default="", + help="openai api key", + ) + parser.add_argument( + "--no_limit", + dest="no_limit", + action="store_true", + help="if you pay add it", + ) + parser.add_argument( + "--test", + dest="test", + action="store_true", + help="if test we only translat 20 contents you can easily check", + ) + parser.add_argument( + "-m", + "--model", + dest="model", + type=str, + default="chatgpt", + choices=["chatgpt", "gpt3"], # support DeepL later + help="Use which model", + ) + options = parser.parse_args() + NO_LIMIT = options.no_limit + IS_TEST = options.test + OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") + if not OPENAI_API_KEY: + raise Exception("Need openai API key, please google how to") + if not options.book_name.endswith(".epub"): + raise Exception("please use epub file") + model = MODEL_DICT.get(options.model, "chatgpt") + e = BEPUB(options.book_name, model, OPENAI_API_KEY) + e.make_bilingual_book() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..34b74e6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +bs4 +openai +requests +ebooklib +rich \ No newline at end of file