[feature] Support interrupt resume function (#10)

* [feature] Support interrupt resume function
This commit is contained in:
Brikarl 2023-03-03 23:13:13 +08:00 committed by GitHub
parent ed6688f8a1
commit ab3bb4b7ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 21 deletions

2
.gitignore vendored
View File

@ -27,6 +27,8 @@ share/python-wheels/
*.egg
MANIFEST
.idea/
.DS_Store
test_books/
# PyInstaller
# Usually these files are written by a python script from a template

View File

@ -1,8 +1,10 @@
import argparse
import pickle
import time
from abc import abstractmethod
from copy import copy
from os import environ as env
from pathlib import Path
import openai
import requests
@ -12,6 +14,7 @@ from rich import print
NO_LIMIT = False
IS_TEST = False
RESUME = False
class Base:
@ -114,11 +117,16 @@ class ChatGPT(Base):
class BEPUB:
def __init__(self, epub_name, model, key):
def __init__(self, epub_name, model, key, resume):
self.epub_name = epub_name
self.new_epub = epub.EpubBook()
self.translate_model = model(key)
self.origin_book = epub.read_epub(self.epub_name)
self.p_to_save = []
self.resume = resume
self.bin_path = f"{Path(epub_name).parent}/.{Path(epub_name).stem}.temp.bin"
if self.resume:
self.load_state()
@staticmethod
def _is_special_text(text):
@ -136,26 +144,51 @@ class BEPUB:
)
print("TODO need process bar here: " + str(all_p_length))
index = 0
for i in self.origin_book.get_items():
if i.get_type() == 9:
soup = bs(i.content, "html.parser")
p_list = soup.findAll("p")
is_test_done = IS_TEST and index > TEST_NUM
for p in p_list:
if not is_test_done:
if p.text and not self._is_special_text(p.text):
new_p = copy(p)
# TODO banch of p to translate then combine
# PR welcome here
p_to_save_len = len(self.p_to_save)
try:
for i in self.origin_book.get_items():
if i.get_type() == 9:
soup = bs(i.content, "html.parser")
p_list = soup.findAll("p")
is_test_done = IS_TEST and index > TEST_NUM
for p in p_list:
if is_test_done or not p.text or self._is_special_text(p.text):
continue
new_p = copy(p)
# TODO banch of p to translate then combine
# PR welcome here
if self.resume and index < p_to_save_len:
new_p.string = self.p_to_save[index]
else:
new_p.string = self.translate_model.translate(p.text)
p.insert_after(new_p)
index += 1
if IS_TEST and index > TEST_NUM:
break
i.content = soup.prettify().encode()
new_book.add_item(i)
name = self.epub_name.split(".")[0]
epub.write_epub(f"{name}_bilingual.epub", new_book, {})
self.p_to_save.append(new_p.text)
p.insert_after(new_p)
index += 1
if IS_TEST and index > TEST_NUM:
break
i.content = soup.prettify().encode()
new_book.add_item(i)
name = self.epub_name.split(".")[0]
epub.write_epub(f"{name}_bilingual.epub", new_book, {})
except (KeyboardInterrupt, Exception) as e:
print(e)
print("you can resume it next time")
self.save_progress()
exit(0)
def load_state(self):
try:
with open(self.bin_path, "rb") as f:
self.p_to_save = pickle.load(f)
except:
raise Exception("can not load resume file")
def save_progress(self):
try:
with open(self.bin_path, "wb") as f:
pickle.dump(self.p_to_save, f)
except:
raise Exception("can not save resume file")
if __name__ == "__main__":
@ -203,15 +236,22 @@ if __name__ == "__main__":
choices=["chatgpt", "gpt3"], # support DeepL later
help="Use which model",
)
parser.add_argument(
"--resume",
dest="resume",
action="store_true",
help="if program accidentally stop you can use this to resume",
)
options = parser.parse_args()
NO_LIMIT = options.no_limit
IS_TEST = options.test
TEST_NUM = options.test_num
OPENAI_API_KEY = options.openai_key or env.get("OPENAI_API_KEY")
RESUME = options.resume
if not OPENAI_API_KEY:
raise Exception("Need openai API key, please google how to")
if not options.book_name.endswith(".epub"):
raise Exception("please use epub file")
model = MODEL_DICT.get(options.model, "chatgpt")
e = BEPUB(options.book_name, model, OPENAI_API_KEY)
e = BEPUB(options.book_name, model, OPENAI_API_KEY, RESUME)
e.make_bilingual_book()