fix prettify, nest child

prettify().encode() cause <table> insert newline, cause style change

--translate-tags "p,li" , If p and li nest each other, there will be a bug
This commit is contained in:
h 2023-03-22 22:39:43 +08:00
parent dfa1b9ada8
commit 1e7c8f9ce4
2 changed files with 23 additions and 5 deletions

View File

@ -5,7 +5,7 @@ import sys
from copy import copy from copy import copy
from pathlib import Path from pathlib import Path
from bs4 import BeautifulSoup as bs from bs4 import BeautifulSoup as bs, Tag
from bs4.element import NavigableString from bs4.element import NavigableString
from ebooklib import ITEM_DOCUMENT, epub from ebooklib import ITEM_DOCUMENT, epub
from rich import print from rich import print
@ -261,7 +261,7 @@ class EPUBBookLoader(BaseBookLoader):
if item.file_name != fixname: if item.file_name != fixname:
new_book.add_item(item) new_book.add_item(item)
complete_item.content = soup_complete.prettify().encode() complete_item.content = soup_complete.encode()
# ================================================= # =================================================
index = self.process_item( index = self.process_item(
@ -276,6 +276,19 @@ class EPUBBookLoader(BaseBookLoader):
) )
epub.write_epub(f"{name_fix}", new_book, {}) epub.write_epub(f"{name_fix}", new_book, {})
def has_nest_child(self, element, trans_taglist):
if isinstance(element, Tag):
for child in element.children:
if child.name in trans_taglist:
return True
if self.has_nest_child(child, trans_taglist):
return True
return False
def filter_nest_list(self, p_list, trans_taglist):
filtered_list = [p for p in p_list if not self.has_nest_child(p, trans_taglist)]
return filtered_list
def process_item( def process_item(
self, self,
item, item,
@ -293,6 +306,8 @@ class EPUBBookLoader(BaseBookLoader):
soup = bs(item.content, "html.parser") soup = bs(item.content, "html.parser")
p_list = soup.findAll(trans_taglist) p_list = soup.findAll(trans_taglist)
p_list = self.filter_nest_list(p_list, trans_taglist)
if self.retranslate: if self.retranslate:
new_p_list = [] new_p_list = []
@ -331,7 +346,7 @@ class EPUBBookLoader(BaseBookLoader):
if self.is_test and index >= self.test_num: if self.is_test and index >= self.test_num:
break break
item.content = soup.prettify().encode() item.content = soup.encode()
new_book.add_item(item) new_book.add_item(item)
return index return index
@ -430,7 +445,7 @@ class EPUBBookLoader(BaseBookLoader):
else: else:
break break
# for save temp book # for save temp book
item.content = soup.prettify().encode() item.content = soup.encode()
new_temp_book.add_item(item) new_temp_book.add_item(item)
name, _ = os.path.splitext(self.epub_name) name, _ = os.path.splitext(self.epub_name)
epub.write_epub(f"{name}_bilingual_temp.epub", new_temp_book, {}) epub.write_epub(f"{name}_bilingual_temp.epub", new_temp_book, {})

View File

@ -9,7 +9,10 @@ class EPUBBookLoaderHelper:
self.translation_style = translation_style self.translation_style = translation_style
def insert_trans(self, p, text, translation_style=""): def insert_trans(self, p, text, translation_style=""):
if p.string is not None and p.string.strip() == text.strip(): if (
p.string is not None
and p.string.replace(" ", "").strip() == text.replace(" ", "").strip()
):
return return
new_p = copy(p) new_p = copy(p)
new_p.string = text new_p.string = text