From 7200da7b0b80d765afc902a24c186748c9fb7013 Mon Sep 17 00:00:00 2001 From: InzamZ Date: Sun, 5 Mar 2023 21:40:01 +0800 Subject: [PATCH] fix: fix parser error of xml based epub (#50) * fix: fix parser error of xml based epub * fix: fix xhtml based epub error by detect automatically * docs: update README --- make_book.py | 9 ++++++--- requirements.txt | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/make_book.py b/make_book.py index d0783bb..3728741 100644 --- a/make_book.py +++ b/make_book.py @@ -162,9 +162,12 @@ class BEPUB: new_book.toc = self.origin_book.toc all_items = list(self.origin_book.get_items()) # we just translate tag p - all_p_length = sum( - [len(bs(i.content, "html.parser").findAll("p")) for i in all_items] - ) + all_p_length = 0 + for i in all_items: + if i.file_name.endswith(".xhtml"): + all_p_length += len(bs(i.content, "html.parser").findAll("p")) + else: + all_p_length += len(bs(i.content, "xml").findAll("p")) if IS_TEST: pbar = tqdm(total=TEST_NUM) else: diff --git a/requirements.txt b/requirements.txt index 3cca106..53a9d59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ openai requests ebooklib rich -tqdm +tqdm \ No newline at end of file