[feature] Support interrupt resume function (#10)

* [feature] Support interrupt resume function
This commit is contained in:
Brikarl 2023-03-03 23:13:13 +08:00 committed by GitHub
parent ed6688f8a1
commit ab3bb4b7ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 21 deletions

2
.gitignore vendored
View File

@ -27,6 +27,8 @@ share/python-wheels/
*.egg *.egg
MANIFEST MANIFEST
.idea/ .idea/
.DS_Store
test_books/
# PyInstaller # PyInstaller
# Usually these files are written by a python script from a template # Usually these files are written by a python script from a template

View File

@ -1,8 +1,10 @@
import argparse import argparse
import pickle
import time import time
from abc import abstractmethod from abc import abstractmethod
from copy import copy from copy import copy
from os import environ as env from os import environ as env
from pathlib import Path
import openai import openai
import requests import requests
@ -12,6 +14,7 @@ from rich import print
NO_LIMIT = False NO_LIMIT = False
IS_TEST = False IS_TEST = False
RESUME = False
class Base: class Base:
@ -114,11 +117,16 @@ class ChatGPT(Base):
class BEPUB: class BEPUB:
def __init__(self, epub_name, model, key): def __init__(self, epub_name, model, key, resume):
self.epub_name = epub_name self.epub_name = epub_name
self.new_epub = epub.EpubBook() self.new_epub = epub.EpubBook()
self.translate_model = model(key) self.translate_model = model(key)
self.origin_book = epub.read_epub(self.epub_name) self.origin_book = epub.read_epub(self.epub_name)
self.p_to_save = []
self.resume = resume
self.bin_path = f"{Path(epub_name).parent}/.{Path(epub_name).stem}.temp.bin"
if self.resume:
self.load_state()
@staticmethod @staticmethod
def _is_special_text(text): def _is_special_text(text):
@ -136,26 +144,51 @@ class BEPUB:
) )
print("TODO need process bar here: " + str(all_p_length)) print("TODO need process bar here: " + str(all_p_length))
index = 0 index = 0
for i in self.origin_book.get_items(): p_to_save_len = len(self.p_to_save)
if i.get_type() == 9: try:
soup = bs(i.content, "html.parser") for i in self.origin_book.get_items():
p_list = soup.findAll("p") if i.get_type() == 9:
is_test_done = IS_TEST and index > TEST_NUM soup = bs(i.content, "html.parser")
for p in p_list: p_list = soup.findAll("p")
if not is_test_done: is_test_done = IS_TEST and index > TEST_NUM
if p.text and not self._is_special_text(p.text): for p in p_list:
new_p = copy(p) if is_test_done or not p.text or self._is_special_text(p.text):
# TODO banch of p to translate then combine continue
# PR welcome here new_p = copy(p)
# TODO banch of p to translate then combine
# PR welcome here
if self.resume and index < p_to_save_len:
new_p.string = self.p_to_save[index]
else:
new_p.string = self.translate_model.translate(p.text) new_p.string = self.translate_model.translate(p.text)
p.insert_after(new_p) self.p_to_save.append(new_p.text)
index += 1 p.insert_after(new_p)
if IS_TEST and index > TEST_NUM: index += 1
break if IS_TEST and index > TEST_NUM:
i.content = soup.prettify().encode() break
new_book.add_item(i) i.content = soup.prettify().encode()
name = self.epub_name.split(".")[0] new_book.add_item(i)
epub.write_epub(f"{name}_bilingual.epub", new_book, {}) name = self.epub_name.split(".")[0]
epub.write_epub(f"{name}_bilingual.epub", new_book, {})
except (KeyboardInterrupt, Exception) as e:
print(e)
print("you can resume it next time")
self.save_progress()
exit(0)
def load_state(self):
try:
with open(self.bin_path, "rb") as f:
self.p_to_save = pickle.load(f)
except:
raise Exception("can not load resume file")
def save_progress(self):
try:
with open(self.bin_path, "wb") as f:
pickle.dump(self.p_to_save, f)
except:
raise Exception("can not save resume file")
if __name__ == "__main__": if __name__ == "__main__":
@ -203,15 +236,22 @@ if __name__ == "__main__":
choices=["chatgpt", "gpt3"], # support DeepL later choices=["chatgpt", "gpt3"], # support DeepL later
help="Use which model", help="Use which model",
) )
parser.add_argument(
"--resume",
dest="resume",
action="store_true",
help="if program accidentally stop you can use this to resume",
)
options = parser.parse_args() options = parser.parse_args()
NO_LIMIT = options.no_limit NO_LIMIT = options.no_limit
IS_TEST = options.test IS_TEST = options.test
TEST_NUM = options.test_num TEST_NUM = options.test_num
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY") OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
RESUME = options.resume
if not OPENAI_API_KEY: if not OPENAI_API_KEY:
raise Exception("Need openai API key, please google how to") raise Exception("Need openai API key, please google how to")
if not options.book_name.endswith(".epub"): if not options.book_name.endswith(".epub"):
raise Exception("please use epub file") raise Exception("please use epub file")
model = MODEL_DICT.get(options.model, "chatgpt") model = MODEL_DICT.get(options.model, "chatgpt")
e = BEPUB(options.book_name, model, OPENAI_API_KEY) e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME)
e.make_bilingual_book() e.make_bilingual_book()