mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-02 09:30:24 +00:00
support config tags to translate (#107)
This commit is contained in:
parent
3472f3e673
commit
b25c4ca873
@ -82,6 +82,13 @@ def main():
|
||||
type=str,
|
||||
help="specify base url other than the OpenAI's official API address",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--translate-tags",
|
||||
dest="translate_tags",
|
||||
type=str,
|
||||
default="p",
|
||||
help="example --translate-tags p,blockquote",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
PROXY = options.proxy
|
||||
@ -121,6 +128,7 @@ def main():
|
||||
model_api_base=model_api_base,
|
||||
is_test=options.test,
|
||||
test_num=options.test_num,
|
||||
translate_tags=options.translate_tags,
|
||||
)
|
||||
e.make_bilingual_book()
|
||||
|
||||
|
@ -23,12 +23,14 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
model_api_base=None,
|
||||
is_test=False,
|
||||
test_num=5,
|
||||
translate_tags="p",
|
||||
):
|
||||
self.epub_name = epub_name
|
||||
self.new_epub = epub.EpubBook()
|
||||
self.translate_model = model(key, language, model_api_base)
|
||||
self.is_test = is_test
|
||||
self.test_num = test_num
|
||||
self.translate_tags = translate_tags
|
||||
|
||||
try:
|
||||
self.origin_book = epub.read_epub(self.epub_name)
|
||||
@ -68,10 +70,11 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
def make_bilingual_book(self):
|
||||
new_book = self._make_new_book(self.origin_book)
|
||||
all_items = list(self.origin_book.get_items())
|
||||
trans_taglist = self.translate_tags.split(",")
|
||||
all_p_length = sum(
|
||||
0
|
||||
if i.get_type() != ITEM_DOCUMENT
|
||||
else len(bs(i.content, "html.parser").findAll("p"))
|
||||
else len(bs(i.content, "html.parser").findAll(trans_taglist))
|
||||
for i in all_items
|
||||
)
|
||||
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
||||
@ -81,7 +84,7 @@ class EPUBBookLoader(BaseBookLoader):
|
||||
for item in self.origin_book.get_items():
|
||||
if item.get_type() == ITEM_DOCUMENT:
|
||||
soup = bs(item.content, "html.parser")
|
||||
p_list = soup.findAll("p")
|
||||
p_list = soup.findAll(trans_taglist)
|
||||
is_test_done = self.is_test and index > self.test_num
|
||||
for p in p_list:
|
||||
if is_test_done or not p.text or self._is_special_text(p.text):
|
||||
|
BIN
test_books/Liber_Esther.epub
Normal file
BIN
test_books/Liber_Esther.epub
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user