mirror of
https://github.com/yihong0618/bilingual_book_maker.git
synced 2025-06-05 19:15:34 +00:00
support config tags to translate (#107)
This commit is contained in:
parent
3472f3e673
commit
b25c4ca873
@ -82,6 +82,13 @@ def main():
|
|||||||
type=str,
|
type=str,
|
||||||
help="specify base url other than the OpenAI's official API address",
|
help="specify base url other than the OpenAI's official API address",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--translate-tags",
|
||||||
|
dest="translate_tags",
|
||||||
|
type=str,
|
||||||
|
default="p",
|
||||||
|
help="example --translate-tags p,blockquote",
|
||||||
|
)
|
||||||
|
|
||||||
options = parser.parse_args()
|
options = parser.parse_args()
|
||||||
PROXY = options.proxy
|
PROXY = options.proxy
|
||||||
@ -121,6 +128,7 @@ def main():
|
|||||||
model_api_base=model_api_base,
|
model_api_base=model_api_base,
|
||||||
is_test=options.test,
|
is_test=options.test,
|
||||||
test_num=options.test_num,
|
test_num=options.test_num,
|
||||||
|
translate_tags=options.translate_tags,
|
||||||
)
|
)
|
||||||
e.make_bilingual_book()
|
e.make_bilingual_book()
|
||||||
|
|
||||||
|
@ -23,12 +23,14 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
model_api_base=None,
|
model_api_base=None,
|
||||||
is_test=False,
|
is_test=False,
|
||||||
test_num=5,
|
test_num=5,
|
||||||
|
translate_tags="p",
|
||||||
):
|
):
|
||||||
self.epub_name = epub_name
|
self.epub_name = epub_name
|
||||||
self.new_epub = epub.EpubBook()
|
self.new_epub = epub.EpubBook()
|
||||||
self.translate_model = model(key, language, model_api_base)
|
self.translate_model = model(key, language, model_api_base)
|
||||||
self.is_test = is_test
|
self.is_test = is_test
|
||||||
self.test_num = test_num
|
self.test_num = test_num
|
||||||
|
self.translate_tags = translate_tags
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.origin_book = epub.read_epub(self.epub_name)
|
self.origin_book = epub.read_epub(self.epub_name)
|
||||||
@ -68,10 +70,11 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
def make_bilingual_book(self):
|
def make_bilingual_book(self):
|
||||||
new_book = self._make_new_book(self.origin_book)
|
new_book = self._make_new_book(self.origin_book)
|
||||||
all_items = list(self.origin_book.get_items())
|
all_items = list(self.origin_book.get_items())
|
||||||
|
trans_taglist = self.translate_tags.split(",")
|
||||||
all_p_length = sum(
|
all_p_length = sum(
|
||||||
0
|
0
|
||||||
if i.get_type() != ITEM_DOCUMENT
|
if i.get_type() != ITEM_DOCUMENT
|
||||||
else len(bs(i.content, "html.parser").findAll("p"))
|
else len(bs(i.content, "html.parser").findAll(trans_taglist))
|
||||||
for i in all_items
|
for i in all_items
|
||||||
)
|
)
|
||||||
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
pbar = tqdm(total=self.test_num) if self.is_test else tqdm(total=all_p_length)
|
||||||
@ -81,7 +84,7 @@ class EPUBBookLoader(BaseBookLoader):
|
|||||||
for item in self.origin_book.get_items():
|
for item in self.origin_book.get_items():
|
||||||
if item.get_type() == ITEM_DOCUMENT:
|
if item.get_type() == ITEM_DOCUMENT:
|
||||||
soup = bs(item.content, "html.parser")
|
soup = bs(item.content, "html.parser")
|
||||||
p_list = soup.findAll("p")
|
p_list = soup.findAll(trans_taglist)
|
||||||
is_test_done = self.is_test and index > self.test_num
|
is_test_done = self.is_test and index > self.test_num
|
||||||
for p in p_list:
|
for p in p_list:
|
||||||
if is_test_done or not p.text or self._is_special_text(p.text):
|
if is_test_done or not p.text or self._is_special_text(p.text):
|
||||||
|
BIN
test_books/Liber_Esther.epub
Normal file
BIN
test_books/Liber_Esther.epub
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user