add srt support (#247)

* feat: supoort srt translate
This commit is contained in:
Vincent Zhang 2023-04-16 22:31:46 +08:00 committed by GitHub
parent a0c999a2e6
commit 1720c95d44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 721 additions and 2 deletions

View File

@ -1,8 +1,10 @@
from book_maker.loader.epub_loader import EPUBBookLoader
from book_maker.loader.txt_loader import TXTBookLoader
from book_maker.loader.srt_loader import SRTBookLoader
BOOK_LOADER_DICT = {
"epub": EPUBBookLoader,
"txt": TXTBookLoader
"txt": TXTBookLoader,
"srt": SRTBookLoader,
# TODO add more here
}

View File

@ -1 +1,278 @@
"""TODO"""
"""
inspired by: https://github.com/jesselau76/srt-gpt-translator, MIT License
"""
import re
import sys
from pathlib import Path
from book_maker.utils import prompt_config_to_kwargs
from .base_loader import BaseBookLoader
class SRTBookLoader(BaseBookLoader):
def __init__(
self,
srt_name,
model,
key,
resume,
language,
model_api_base=None,
is_test=False,
test_num=5,
prompt_config=None,
single_translate=False,
) -> None:
self.srt_name = srt_name
self.translate_model = model(
key,
language,
api_base=model_api_base,
**prompt_config_to_kwargs(
{
"system": "You are a srt subtitle file translator.",
"user": "Translate the following subtitle text into {language}, but keep the subtitle number and timeline and newlines unchanged: \n{text}",
}
),
)
self.is_test = is_test
self.p_to_save = []
self.bilingual_result = []
self.bilingual_temp_result = []
self.test_num = test_num
self.accumulated_num = 1
self.blocks = []
self.resume = resume
self.bin_path = f"{Path(srt_name).parent}/.{Path(srt_name).stem}.temp.bin"
if self.resume:
self.load_state()
def _make_new_book(self, book):
pass
def _parse_srt(self, srt_text):
blocks = re.split("\n\s*\n", srt_text)
final_blocks = []
new_block = {}
for i in range(0, len(blocks)):
block = blocks[i]
if block.strip() == "":
continue
lines = block.strip().split("\n")
new_block["number"] = lines[0].strip()
timestamp = lines[1].strip()
new_block["time"] = timestamp
text = "\n".join(lines[2:]).strip()
new_block["text"] = text
final_blocks.append(new_block)
new_block = {}
return final_blocks
def _get_block_text(self, block):
return f"{block['number']}\n{block['time']}\n{block['text']}"
def _concat_blocks(self, sliced_text: str, text: str):
return f"{sliced_text}\n\n{text}" if sliced_text else text
def _get_block_translate(self, block):
return f"{block['number']}\n{block['text']}"
def _get_block_from(self, text):
text = text.strip()
if not text:
return {}
block = text.split("\n")
if len(block) < 2:
return {"number": block[0], "text": ""}
return {"number": block[0], "text": "\n".join(block[1:])}
def _get_blocks_from(self, translate: str):
if not translate:
return []
blocks = []
blocks_text = translate.strip().split("\n\n")
for text in blocks_text:
blocks.append(self._get_block_from(text))
return blocks
def _check_blocks(self, translate_blocks, origin_blocks):
"""
Check if the translated blocks match the original text, with only a simple check of the beginning numbers.
"""
if len(translate_blocks) != len(origin_blocks):
return False
for t in zip(translate_blocks, origin_blocks):
i = 0
try:
i = int(t[0].get("number", 0))
except ValueError:
m = re.search(r"\s*\d+", t[0].get("number"))
if m:
i = int(m.group())
j = int(t[1].get("number", -1))
if i != j:
print(f"check failed: {i}!={j}")
return False
return True
def _get_sliced_list(self):
sliced_list = []
sliced_text = ""
begin_index = 0
for i, block in enumerate(self.blocks):
text = self._get_block_translate(block)
if not text:
continue
if len(sliced_text + text) < self.accumulated_num:
sliced_text = self._concat_blocks(sliced_text, text)
else:
if sliced_text:
sliced_list.append((begin_index, i, sliced_text))
sliced_text = text
begin_index = i
sliced_list.append((begin_index, len(self.blocks), sliced_text))
return sliced_list
def make_bilingual_book(self):
if self.accumulated_num > 512:
print(f"{self.accumulated_num} is too large, shrink it to 512.")
self.accumulated_num = 512
try:
with open(f"{self.srt_name}", encoding="utf-8") as f:
self.blocks = self._parse_srt(f.read())
except Exception as e:
raise Exception("can not load file") from e
index = 0
p_to_save_len = len(self.p_to_save)
try:
sliced_list = self._get_sliced_list()
for sliced in sliced_list:
begin, end, text = sliced
if not self.resume or index + (end - begin) > p_to_save_len:
if index < p_to_save_len:
self.p_to_save = self.p_to_save[:index]
try:
temp = self.translate_model.translate(text)
except Exception as e:
print(e)
raise Exception("Something is wrong when translate") from e
translated_blocks = self._get_blocks_from(temp)
if self.accumulated_num > 1:
if not self._check_blocks(
translated_blocks, self.blocks[begin:end]
):
translated_blocks = []
# try to translate one by one, so don't accumulate too much
print(
f"retry it one by one: {self.blocks[begin]['number']} - {self.blocks[end-1]['number']}"
)
for block in self.blocks[begin:end]:
try:
temp = self.translate_model.translate(
self._get_block_translate(block)
)
except Exception as e:
print(e)
raise Exception(
"Something is wrong when translate"
) from e
translated_blocks.append(self._get_block_from(temp))
if not self._check_blocks(
translated_blocks, self.blocks[begin:end]
):
raise Exception(
f"retry failed, adjust the srt manually."
)
for i, block in enumerate(translated_blocks):
text = block.get("text", "")
self.p_to_save.append(text)
self.bilingual_result.append(
f"{self._get_block_text(self.blocks[begin + i])}\n{text}"
)
else:
for i, block in enumerate(self.blocks[begin:end]):
text = self.p_to_save[begin + i]
self.bilingual_result.append(
f"{self._get_block_text(self.blocks[begin + i])}\n{text}"
)
index += end - begin
if self.is_test and index > self.test_num:
break
self.save_file(
f"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual.srt",
self.bilingual_result,
)
except (KeyboardInterrupt, Exception) as e:
print(e)
print("you can resume it next time")
self._save_progress()
self._save_temp_book()
sys.exit(0)
def _save_temp_book(self):
for i, block in enumerate(self.blocks):
if i < len(self.p_to_save):
text = self.p_to_save[i]
self.bilingual_temp_result.append(
f"{self._get_block_text(block)}\n{text}"
)
else:
self.bilingual_temp_result.append(f"{self._get_block_text(block)}\n")
self.save_file(
f"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual_temp.srt",
self.bilingual_temp_result,
)
def _save_progress(self):
try:
with open(self.bin_path, "w", encoding="utf-8") as f:
f.write("===".join(self.p_to_save))
except:
raise Exception("can not save resume file")
def load_state(self):
try:
with open(self.bin_path, encoding="utf-8") as f:
text = f.read()
if text:
self.p_to_save = text.split("===")
else:
self.p_to_save = []
except Exception as e:
raise Exception("can not load resume file") from e
def save_file(self, book_path, content):
try:
with open(book_path, "w", encoding="utf-8") as f:
f.write("\n\n".join(content))
except:
raise Exception("can not save file")

View File

@ -0,0 +1,440 @@
1
00:00:00,000 --> 00:00:07,040
there's a broader question here, right? As we build socially and emotionally intelligent machines,
2
00:00:07,920 --> 00:00:12,640
what does that mean about our relationship with them and then more broadly our relationship with
3
00:00:12,640 --> 00:00:18,240
one another, right? Because this machine is going to be programmed to be amazing at empathy,
4
00:00:18,240 --> 00:00:22,560
by definition, right? It's going to always be there for you. It's not going to get bored.
5
00:00:23,440 --> 00:00:25,680
I don't know how I feel about that. I think about that a lot.
6
00:00:25,680 --> 00:00:30,320
TITO The following is a conversation with Rana
7
00:00:30,320 --> 00:00:36,080
L. Kliubi, a pioneer in the field of emotion recognition and human centric artificial
8
00:00:36,080 --> 00:00:43,920
intelligence. She is the founder of Effectiva, deputy CEO of SmartEye, author of Girl Decoded,
9
00:00:43,920 --> 00:00:49,200
and one of the most brilliant, kind, inspiring, and fun human beings I've gotten the chance to
10
00:00:49,200 --> 00:00:54,800
talk to. This is the Lex Friedman podcast. To support it, please check out our sponsors in
11
00:00:54,800 --> 00:01:02,400
the description. And now, dear friends, here's Rana L. Kliubi. You grew up in the Middle East,
12
00:01:02,400 --> 00:01:08,000
in Egypt. What is the memory from that time that makes you smile? Or maybe a memory that stands out
13
00:01:08,000 --> 00:01:12,320
as helping your mind take shape and helping you define yourself in this world?
14
00:01:12,320 --> 00:01:15,440
RANA L. KLIUBI So the memory that stands out is we used to
15
00:01:15,440 --> 00:01:21,680
live in my grandma's house. She used to have these mango trees in her garden. And in the summer,
16
00:01:21,680 --> 00:01:26,640
and so mango season was like July and August. And so in the summer, she would invite all my aunts
17
00:01:26,640 --> 00:01:31,680
and uncles and cousins. And it was just like maybe there were like 20 or 30 people in the house,
18
00:01:31,680 --> 00:01:38,080
and she would cook all this amazing food. And us, the kids, we would go down the garden,
19
00:01:38,080 --> 00:01:43,920
and we would pick all these mangoes. And I don't know, I think it's just the bringing people
20
00:01:43,920 --> 00:01:47,920
together that always stuck with me, the warmth. TITO Around the mango tree.
21
00:01:47,920 --> 00:01:52,800
RANA L. KLIUBI Yeah, around the mango tree. And there's just like the joy, the joy of being
22
00:01:52,800 --> 00:02:00,880
together around food. And I'm a terrible cook. So I guess that didn't, that memory didn't translate
23
00:02:00,880 --> 00:02:05,520
to me kind of doing the same. I love hosting people. TITO Do you remember colors, smells?
24
00:02:05,520 --> 00:02:10,560
Is that what, like what, how does memory work? Like what do you visualize? Do you visualize
25
00:02:10,560 --> 00:02:19,360
people's faces, smiles? Do you, is there colors? Is there like a theme to the colors? Is it smells
26
00:02:19,360 --> 00:02:23,360
because of food involved? RANA L. KLIUBI Yeah, I think that's a great question. So the,
27
00:02:23,360 --> 00:02:28,800
those Egyptian mangoes, there's a particular type that I love, and it's called Darwasi mangoes. And
28
00:02:28,800 --> 00:02:33,680
they're kind of, you know, they're oval, and they have a little red in them. So I kind of,
29
00:02:33,680 --> 00:02:39,600
they're red and mango colored on the outside. So I remember that. TITO Does red indicate like
30
00:02:39,600 --> 00:02:45,520
extra sweetness? Is that, is that, that means like it's nicely, yeah, it's nice and ripe and stuff.
31
00:02:45,520 --> 00:02:52,640
Yeah. What, what's like a definitive food of Egypt? You know, there's like these almost
32
00:02:52,640 --> 00:02:58,800
stereotypical foods in different parts of the world, like Ukraine invented borscht.
33
00:02:59,600 --> 00:03:04,800
Borscht is this beet soup with, that you put sour cream on. See, it's not, I can't see if you,
34
00:03:04,800 --> 00:03:10,880
if you know, if you know what it is, I think, you know, is delicious. But if I explain it,
35
00:03:10,880 --> 00:03:15,280
it's just not going to sound delicious. I feel like beet soup. This doesn't make any sense,
36
00:03:15,280 --> 00:03:19,600
but that's kind of, and you probably have actually seen pictures of it because it's one of the
37
00:03:19,600 --> 00:03:26,800
traditional foods in Ukraine, in Russia, in different parts of the Slavic world. So that's,
38
00:03:26,800 --> 00:03:31,520
but it's become so cliche and stereotypical that you almost don't mention it, but it's still
39
00:03:31,520 --> 00:03:35,440
delicious. Like I visited Ukraine, I eat that every single day, so.
40
00:03:35,440 --> 00:03:38,480
Do you, do you make it yourself? How hard is it to make?
41
00:03:38,480 --> 00:03:43,600
No, I don't know. I think to make it well, like anything, like Italians, they say, well,
42
00:03:44,320 --> 00:03:51,760
tomato sauce is easy to make, but to make it right, that's like a generational skill. So anyway,
43
00:03:51,760 --> 00:03:55,200
is there something like that in Egypt? Is there a culture of food?
44
00:03:55,200 --> 00:04:02,880
There is. And actually, we have a similar kind of soup. It's called molokhia, and it's, it's made
45
00:04:02,880 --> 00:04:07,520
of this green plant. It's like, it's somewhere between spinach and kale, and you mince it,
46
00:04:07,520 --> 00:04:13,360
and then you cook it in like chicken broth. And my grandma used to make, and my mom makes it really
47
00:04:13,360 --> 00:04:18,080
well, and I try to make it, but it's not as great. So we used to have that. And then we used to have
48
00:04:18,080 --> 00:04:23,520
it alongside stuffed pigeons. I'm pescetarian now, so I don't eat that anymore, but.
49
00:04:23,520 --> 00:04:24,480
Stuffed pigeons.
50
00:04:24,480 --> 00:04:27,600
Yeah, it's like, it was really yummy. It's the one thing I miss about,
51
00:04:28,480 --> 00:04:32,080
you know, now that I'm pescetarian and I don't eat.
52
00:04:32,080 --> 00:04:33,040
The stuffed pigeons?
53
00:04:33,040 --> 00:04:34,240
Yeah, the stuffed pigeons.
54
00:04:35,440 --> 00:04:39,920
Is it, what are they stuffed with? If that doesn't bother you too much to describe.
55
00:04:39,920 --> 00:04:46,000
No, no, it's stuffed with a lot of like just rice and, yeah, it's just rice. Yeah, so.
56
00:04:46,000 --> 00:04:51,120
And you also, you said that your first, in your book, that your first computer
57
00:04:51,120 --> 00:04:54,880
was an Atari, and Space Invaders was your favorite game.
58
00:04:56,000 --> 00:04:58,800
Is that when you first fell in love with computers, would you say?
59
00:04:58,800 --> 00:05:00,160
Yeah, I would say so.
60
00:05:00,160 --> 00:05:04,160
Video games, or just the computer itself? Just something about the machine.
61
00:05:04,160 --> 00:05:07,840
Ooh, this thing, there's magic in here.
62
00:05:07,840 --> 00:05:12,080
Yeah, I think the magical moment is definitely like playing video games with my,
63
00:05:12,080 --> 00:05:17,120
I have two younger sisters, and we would just like had fun together, like playing games.
64
00:05:17,120 --> 00:05:22,240
But the other memory I have is my first code, the first code I wrote.
65
00:05:22,240 --> 00:05:26,720
I wrote, I drew a Christmas tree, and I'm Muslim, right?
66
00:05:26,720 --> 00:05:32,000
So it's kind of, it was kind of funny that the first thing I did was like this Christmas tree.
67
00:05:32,000 --> 00:05:38,320
So, yeah, and that's when I realized, wow, you can write code to do all sorts of like
68
00:05:38,320 --> 00:05:42,720
really cool stuff. I must have been like six or seven at the time.
69
00:05:42,720 --> 00:05:48,560
So you can write programs, and the programs do stuff for you. That's power.
70
00:05:48,560 --> 00:05:50,880
That's, if you think about it, that's empowering.
71
00:05:50,880 --> 00:05:51,600
It's AI.
72
00:05:51,600 --> 00:05:55,120
Yeah, I know what it is. I don't know if that, you see like,
73
00:05:56,400 --> 00:05:59,520
I don't know if many people think of it that way when they first learned to program.
74
00:05:59,520 --> 00:06:02,880
They just love the puzzle of it. Like, ooh, this is cool. This is pretty.
75
00:06:02,880 --> 00:06:05,600
It's a Christmas tree, but like, it's power.
76
00:06:05,600 --> 00:06:06,960
It is power.
77
00:06:06,960 --> 00:06:11,040
Eventually, I guess you couldn't at the time, but eventually this thing,
78
00:06:11,040 --> 00:06:14,640
if it's interesting enough, if it's a pretty enough Christmas tree,
79
00:06:14,640 --> 00:06:19,280
it can be run by millions of people and bring them joy, like that little thing.
80
00:06:19,280 --> 00:06:21,760
And then because it's digital, it's easy to spread.
81
00:06:22,400 --> 00:06:26,560
So like you just created something that's easily spreadable to millions of people.
82
00:06:26,560 --> 00:06:27,120
Totally.
83
00:06:28,160 --> 00:06:29,840
It's hard to think that way when you're six.
84
00:06:30,800 --> 00:06:37,040
In the book, you write, I am who I am because I was raised by a particular set of parents,
85
00:06:37,040 --> 00:06:41,200
both modern and conservative, forward thinking, yet locked in tradition.
86
00:06:41,760 --> 00:06:46,000
I'm a Muslim and I feel I'm stronger, more centered for it.
87
00:06:46,000 --> 00:06:50,960
I adhere to the values of my religion, even if I'm not as dutiful as I once was.
88
00:06:50,960 --> 00:06:55,040
And I am a new American and I'm thriving on the energy,
89
00:06:55,040 --> 00:06:58,720
vitality and entrepreneurial spirit of this great country.
90
00:06:59,840 --> 00:07:01,520
So let me ask you about your parents.
91
00:07:01,520 --> 00:07:05,280
What have you learned about life from them, especially when you were young?
92
00:07:05,280 --> 00:07:09,920
So both my parents, they're Egyptian, but they moved to Kuwait right out.
93
00:07:09,920 --> 00:07:11,680
Actually, there's a cute story about how they met.
94
00:07:11,680 --> 00:07:14,960
So my dad taught COBOL in the 70s.
95
00:07:14,960 --> 00:07:15,680
Nice.
96
00:07:15,680 --> 00:07:18,240
And my mom decided to learn programming.
97
00:07:18,240 --> 00:07:21,120
So she signed up to take his COBOL programming class.
98
00:07:22,400 --> 00:07:26,640
And he tried to date her and she was like, no, no, no, I don't date.
99
00:07:26,640 --> 00:07:28,240
And so he's like, okay, I'll propose.
100
00:07:28,240 --> 00:07:29,680
And that's how they got married.
101
00:07:29,680 --> 00:07:30,960
Whoa, strong move.
102
00:07:30,960 --> 00:07:32,240
Right, exactly, right.
103
00:07:32,240 --> 00:07:34,640
That's really impressive.
104
00:07:35,760 --> 00:07:38,800
Those COBOL guys know how to impress a lady.
105
00:07:40,640 --> 00:07:43,520
So yeah, so what have you learned from them?
106
00:07:43,520 --> 00:07:44,720
So definitely grit.
107
00:07:44,720 --> 00:07:47,360
One of the core values in our family is just hard work.
108
00:07:48,320 --> 00:07:50,080
There were no slackers in our family.
109
00:07:50,720 --> 00:07:54,160
And that's something that's definitely stayed with me,
110
00:07:55,920 --> 00:07:58,480
both as a professional, but also in my personal life.