fix: fix the issue with the progress bar not updating properly (#85)

* feat: parse xhtml based epub by detect automatically

* fix: fix the issue with the progress bar not updating properly

* refactor: refactor on logic

---------

Co-authored-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
InzamZ 2023-03-07 13:02:31 +08:00 committed by GitHub
parent e9066d063b
commit 4184566ed5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -178,7 +178,6 @@ class BEPUB:
p_to_save_len = len(self.p_to_save) p_to_save_len = len(self.p_to_save)
try: try:
for item in self.origin_book.get_items(): for item in self.origin_book.get_items():
pbar.update(index)
if item.get_type() == ITEM_DOCUMENT: if item.get_type() == ITEM_DOCUMENT:
soup = bs(item.content, "html.parser") soup = bs(item.content, "html.parser")
p_list = soup.findAll("p") p_list = soup.findAll("p")
@ -198,6 +197,8 @@ class BEPUB:
index += 1 index += 1
if index % 50 == 0: if index % 50 == 0:
self._save_progress() self._save_progress()
# pbar.update(delta) not pbar.update(index)?
pbar.update(1)
if IS_TEST and index > TEST_NUM: if IS_TEST and index > TEST_NUM:
break break
item.content = soup.prettify().encode() item.content = soup.prettify().encode()
@ -230,7 +231,11 @@ class BEPUB:
try: try:
for item in self.origin_book.get_items(): for item in self.origin_book.get_items():
if item.get_type() == ITEM_DOCUMENT: if item.get_type() == ITEM_DOCUMENT:
soup = bs(item.content, "html.parser") soup = (
bs(item.content, "xml")
if item.file_name.endswith(".xhtml")
else bs(item.content, "html.parser")
)
p_list = soup.findAll("p") p_list = soup.findAll("p")
for p in p_list: for p in p_list:
if not p.text or self._is_special_text(p.text): if not p.text or self._is_special_text(p.text):