diff --git a/.gitignore b/.gitignore index 224c7ac..981c80a 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ share/python-wheels/ *.egg MANIFEST .idea/ +.DS_Store +test_books/ # PyInstaller # Usually these files are written by a python script from a template diff --git a/make_book.py b/make_book.py index d8bd7e9..aa77b02 100644 --- a/make_book.py +++ b/make_book.py @@ -1,8 +1,10 @@ import argparse +import pickle import time from abc import abstractmethod from copy import copy from os import environ as env +from pathlib import Path import openai import requests @@ -12,6 +14,7 @@ from rich import print NO_LIMIT = False IS_TEST = False +RESUME = False class Base: @@ -114,11 +117,16 @@ class ChatGPT(Base): class BEPUB: - def __init__(self, epub_name, model, key): + def __init__(self, epub_name, model, key, resume): self.epub_name = epub_name self.new_epub = epub.EpubBook() self.translate_model = model(key) self.origin_book = epub.read_epub(self.epub_name) + self.p_to_save = [] + self.resume = resume + self.bin_path = f"{Path(epub_name).parent}/.{Path(epub_name).stem}.temp.bin" + if self.resume: + self.load_state() @staticmethod def _is_special_text(text): @@ -136,26 +144,51 @@ class BEPUB: ) print("TODO need process bar here: " + str(all_p_length)) index = 0 - for i in self.origin_book.get_items(): - if i.get_type() == 9: - soup = bs(i.content, "html.parser") - p_list = soup.findAll("p") - is_test_done = IS_TEST and index > TEST_NUM - for p in p_list: - if not is_test_done: - if p.text and not self._is_special_text(p.text): - new_p = copy(p) - # TODO banch of p to translate then combine - # PR welcome here + p_to_save_len = len(self.p_to_save) + try: + for i in self.origin_book.get_items(): + if i.get_type() == 9: + soup = bs(i.content, "html.parser") + p_list = soup.findAll("p") + is_test_done = IS_TEST and index > TEST_NUM + for p in p_list: + if is_test_done or not p.text or self._is_special_text(p.text): + continue + new_p = copy(p) + # TODO banch of p to translate then combine + # PR welcome here + if self.resume and index < p_to_save_len: + new_p.string = self.p_to_save[index] + else: new_p.string = self.translate_model.translate(p.text) - p.insert_after(new_p) - index += 1 - if IS_TEST and index > TEST_NUM: - break - i.content = soup.prettify().encode() - new_book.add_item(i) - name = self.epub_name.split(".")[0] - epub.write_epub(f"{name}_bilingual.epub", new_book, {}) + self.p_to_save.append(new_p.text) + p.insert_after(new_p) + index += 1 + if IS_TEST and index > TEST_NUM: + break + i.content = soup.prettify().encode() + new_book.add_item(i) + name = self.epub_name.split(".")[0] + epub.write_epub(f"{name}_bilingual.epub", new_book, {}) + except (KeyboardInterrupt, Exception) as e: + print(e) + print("you can resume it next time") + self.save_progress() + exit(0) + + def load_state(self): + try: + with open(self.bin_path, "rb") as f: + self.p_to_save = pickle.load(f) + except: + raise Exception("can not load resume file") + + def save_progress(self): + try: + with open(self.bin_path, "wb") as f: + pickle.dump(self.p_to_save, f) + except: + raise Exception("can not save resume file") if __name__ == "__main__": @@ -203,15 +236,22 @@ if __name__ == "__main__": choices=["chatgpt", "gpt3"], # support DeepL later help="Use which model", ) + parser.add_argument( + "--resume", + dest="resume", + action="store_true", + help="if program accidentally stop you can use this to resume", + ) options = parser.parse_args() NO_LIMIT = options.no_limit IS_TEST = options.test TEST_NUM = options.test_num OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") + RESUME = options.resume if not OPENAI_API_KEY: raise Exception("Need openai API key, please google how to") if not options.book_name.endswith(".epub"): raise Exception("please use epub file") model = MODEL_DICT.get(options.model, "chatgpt") - e = BEPUB(options.book_name, model, OPENAI_API_KEY) + e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME) e.make_bilingual_book()