fix: bugs for using exlude_filelists and only_filelists (#324)

* fix: bugs for using exlude_filelists and only_filelists

1. progress bars: only calculating tags in files that will be included
2. temp_file: avoid mismatch between translated texts and original texts

* simplify logic
This commit is contained in:
lingyiy 2023-08-22 19:37:08 +08:00 committed by GitHub
parent b4817f2c9b
commit 358f6a8616
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -403,13 +403,27 @@ class EPUBBookLoader(BaseBookLoader):
trans_taglist = self.translate_tags.split(",") trans_taglist = self.translate_tags.split(",")
all_p_length = sum( all_p_length = sum(
0 0
if i.get_type() != ITEM_DOCUMENT if (
(i.get_type() != ITEM_DOCUMENT)
or (i.file_name in self.exclude_filelist.split(","))
or (
self.only_filelist
and i.file_name not in self.only_filelist.split(",")
)
)
else len(bs(i.content, "html.parser").findAll(trans_taglist)) else len(bs(i.content, "html.parser").findAll(trans_taglist))
for i in all_items for i in all_items
) )
all_p_length += self.allow_navigable_strings * sum( all_p_length += self.allow_navigable_strings * sum(
0 0
if i.get_type() != ITEM_DOCUMENT if (
(i.get_type() != ITEM_DOCUMENT)
or (i.file_name in self.exclude_filelist.split(","))
or (
self.only_filelist
and i.file_name not in self.only_filelist.split(",")
)
)
else len(bs(i.content, "html.parser").findAll(text=True)) else len(bs(i.content, "html.parser").findAll(text=True))
for i in all_items for i in all_items
) )
@ -464,7 +478,14 @@ class EPUBBookLoader(BaseBookLoader):
index = 0 index = 0
try: try:
for item in origin_book_temp.get_items(): for item in origin_book_temp.get_items():
if item.get_type() == ITEM_DOCUMENT: if (
item.get_type() == ITEM_DOCUMENT
and (item.file_name not in self.exclude_filelist.split(","))
and (
item.file_name in self.only_filelist.split(",")
or self.only_filelist != ""
)
):
soup = bs(item.content, "html.parser") soup = bs(item.content, "html.parser")
p_list = soup.findAll(trans_taglist) p_list = soup.findAll(trans_taglist)
if self.allow_navigable_strings: if self.allow_navigable_strings: