feat: init

2025-07-21 17:40:09 +00:00 · 2023-03-02 10:15:08 +08:00 · 2023-03-02 10:15:08 +08:00 · 7f09606aa4
commit 7f09606aa4
parent 5e2cbd5ffa
4 changed files with 250 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1,45 @@
 # bilingual_book_maker
 Make bilingual epub books Using AI translate
+
+
+## 准备
+
+1. ChatGPT or OpenAI token
+2. epub books
+3. 能正常联网的环境或 proxy
+4. python3.8+
+
+
+## 使用
+
+1. pip install -r requirements.txt
+2. openapi token
+3. 本地放了一个 animal_farm.epub 给大家测试
+4. 默认用了 ChatGPT 模型，用 `--model gpt3` 来使用 gpt3 模型
+5. 加了 `--test` 命令如果大家没付费可以加上这个先看看效果（有 limit 稍微有些慢）
+
+e.g.
+```shell
+# 如果你想快速测一下
+python3 make.py - --book_name animal_farm.epub --openai_key ${openai_key} --no_limit --test
+# or do it
+python3 make.py - --book_name animal_farm.epub --openai_key ${openai_key}
+# or 用 gpt3 模型
+export OPENAI_API_KEY=${your_api_key}
+python3 make.py --book_name animal_farm.epub --model gpt3 --no_limit
+
+## 注意
+
+1. 有 limit 如果想要速度可以付费
+2. 现在是 demo 版本有很多工作要做 PR welcome
+3. 尤其是 batch translat 做完效果会好很多
+4. DeepL 模型稍后更新
+
+
+# 感谢
+
+- @[yetone](https://github.com/yetone)
+
+## 赞赏
+
+谢谢就够了
--- a/animal_farm.epub
+++ b/animal_farm.epub
--- a/make.py
+++ b/make.py
@ -0,0 +1,202 @@
+import argparse
+import time
+from abc import abstractmethod
+from copy import copy
+from os import environ as env
+
+import openai
+import requests
+from bs4 import BeautifulSoup as bs
+from ebooklib import epub
+from rich import print
+
+NO_LIMIT = False
+IS_TEST = False
+
+
+class Base:
+    def __init__(self, key):
+        pass
+
+    @abstractmethod
+    def translate(self, text):
+        pass
+
+
+class GPT3(Base):
+    def __init__(self, key):
+        self.api_key = key
+        self.api_url = "https://api.openai.com/v1/completions"
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+        # TODO support more models here
+        self.data = {
+            "prompt": "",
+            "model": "text-davinci-003",
+            "max_tokens": 1024,
+            "temperature": 1,
+            "top_p": 1,
+        }
+        self.session = requests.session()
+
+    def translate(self, text):
+        print(text)
+        self.data["prompt"] = f"Please help me to translate，`{text}` to Chinese"
+        r = self.session.post(self.api_url, headers=self.headers, json=self.data)
+        if not r.ok:
+            return text
+        t_text = r.json().get("choices")[0].get("text", "").strip()
+        print(t_text)
+        return t_text
+
+
+class DeepL(Base):
+    def __init__(self, session, key):
+        super().__init__(session, key)
+
+    def translate(self, text):
+        return super().translate(text)
+
+
+class ChatGPT(Base):
+    def __init__(self, key):
+        super().__init__(key)
+        self.key = key
+
+    def translate(self, text):
+        print(text)
+        openai.api_key = self.key
+        try:
+            completion = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {
+                        "role": "user",
+                        # english prompt here to save tokens
+                        "content": f"Please help me to translate，`{text}` to Chinese, please return only translated content not include the origin text",
+                    }
+                ],
+            )
+            t_text = (
+                completion["choices"][0]
+                .get("message")
+                .get("content")
+                .encode("utf8")
+                .decode()
+            )
+            if not NO_LIMIT:
+                # for time limit
+                time.sleep(3)
+        except Exception as e:
+            print(str(e), "will sleep 60 seconds")
+            # TIME LIMIT for open api please pay
+            time.sleep(60)
+            completion = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"Please help me to translate，`{text}` to Simplified Chinese, please return only translated content not include the origin text",
+                    }
+                ],
+            )
+            t_text = (
+                completion["choices"][0]
+                .get("message")
+                .get("content")
+                .encode("utf8")
+                .decode()
+            )
+        print(t_text)
+        return t_text
+
+
+class BEPUB:
+    def __init__(self, epub_name, model, key):
+        self.epub_name = epub_name
+        self.new_epub = epub.EpubBook()
+        self.translate_model = model(key)
+        self.origin_book = epub.read_epub(self.epub_name)
+
+    def make_bilingual_book(self):
+        new_book = epub.EpubBook()
+        new_book.metadata = self.origin_book.metadata
+        new_book.spine = self.origin_book.spine
+        new_book.toc = self.origin_book.toc
+        all_items = list(self.origin_book.get_items())
+        # we just translate tag p
+        all_p_length = sum(
+            [len(bs(i.content, "html.parser").findAll("p")) for i in all_items]
+        )
+        print("TODO need process bar here: " + str(all_p_length))
+        index = 0
+        for i in self.origin_book.get_items():
+            if i.get_type() == 9:
+                soup = bs(i.content, "html.parser")
+                p_list = soup.findAll("p")
+                is_test_done = IS_TEST and index > 20
+                for p in p_list:
+                    if not is_test_done:
+                        if p.string and not p.string.isdigit():
+                            new_p = copy(p)
+                            # TODO banch of p to translate then combine
+                            # PR welcome here
+                            new_p.string = self.translate_model.translate(p.string)
+                            p.insert_after(new_p)
+                            index += 1
+                i.content = soup.prettify().encode()
+            new_book.add_item(i)
+        name = self.epub_name.split(".")[0]
+        epub.write_epub(f"{name}_bilingual.epub", new_book, {})
+
+
+if __name__ == "__main__":
+    MODEL_DICT = {"gpt3": GPT3, "chatgpt": ChatGPT}
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--book_name",
+        dest="book_name",
+        type=str,
+        help="your epub book name",
+    )
+    parser.add_argument(
+        "--openai_key",
+        dest="openai_key",
+        type=str,
+        default="",
+        help="openai api key",
+    )
+    parser.add_argument(
+        "--no_limit",
+        dest="no_limit",
+        action="store_true",
+        help="if you pay add it",
+    )
+    parser.add_argument(
+        "--test",
+        dest="test",
+        action="store_true",
+        help="if test we only translat 20 contents you can easily check",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        dest="model",
+        type=str,
+        default="chatgpt",
+        choices=["chatgpt", "gpt3"],  # support DeepL later
+        help="Use which model",
+    )
+    options = parser.parse_args()
+    NO_LIMIT = options.no_limit
+    IS_TEST = options.test
+    OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
+    if not OPENAI_API_KEY:
+        raise Exception("Need openai API key, please google how to")
+    if not options.book_name.endswith(".epub"):
+        raise Exception("please use epub file")
+    model = MODEL_DICT.get(options.model, "chatgpt")
+    e = BEPUB(options.book_name, model, OPENAI_API_KEY)
+    e.make_bilingual_book()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+bs4
+openai
+requests
+ebooklib
+rich