From 1720c95d44bd691bb228d7a8b71131c6aa5fc8b1 Mon Sep 17 00:00:00 2001 From: Vincent Zhang Date: Sun, 16 Apr 2023 22:31:46 +0800 Subject: [PATCH] add srt support (#247) * feat: supoort srt translate --- book_maker/loader/__init__.py | 4 +- book_maker/loader/srt_loader.py | 279 +++++++++++++++- test_books/Lex_Fridman_episode_322.srt | 440 +++++++++++++++++++++++++ 3 files changed, 721 insertions(+), 2 deletions(-) create mode 100644 test_books/Lex_Fridman_episode_322.srt diff --git a/book_maker/loader/__init__.py b/book_maker/loader/__init__.py index 8b55bf0..c8c7961 100644 --- a/book_maker/loader/__init__.py +++ b/book_maker/loader/__init__.py @@ -1,8 +1,10 @@ from book_maker.loader.epub_loader import EPUBBookLoader from book_maker.loader.txt_loader import TXTBookLoader +from book_maker.loader.srt_loader import SRTBookLoader BOOK_LOADER_DICT = { "epub": EPUBBookLoader, - "txt": TXTBookLoader + "txt": TXTBookLoader, + "srt": SRTBookLoader, # TODO add more here } diff --git a/book_maker/loader/srt_loader.py b/book_maker/loader/srt_loader.py index 137b57a..a4562b8 100644 --- a/book_maker/loader/srt_loader.py +++ b/book_maker/loader/srt_loader.py @@ -1 +1,278 @@ -"""TODO""" +""" +inspired by: https://github.com/jesselau76/srt-gpt-translator, MIT License +""" +import re +import sys +from pathlib import Path + +from book_maker.utils import prompt_config_to_kwargs + +from .base_loader import BaseBookLoader + + +class SRTBookLoader(BaseBookLoader): + def __init__( + self, + srt_name, + model, + key, + resume, + language, + model_api_base=None, + is_test=False, + test_num=5, + prompt_config=None, + single_translate=False, + ) -> None: + self.srt_name = srt_name + self.translate_model = model( + key, + language, + api_base=model_api_base, + **prompt_config_to_kwargs( + { + "system": "You are a srt subtitle file translator.", + "user": "Translate the following subtitle text into {language}, but keep the subtitle number and timeline and newlines unchanged: \n{text}", + } + ), + ) + self.is_test = is_test + self.p_to_save = [] + self.bilingual_result = [] + self.bilingual_temp_result = [] + self.test_num = test_num + self.accumulated_num = 1 + self.blocks = [] + + self.resume = resume + self.bin_path = f"{Path(srt_name).parent}/.{Path(srt_name).stem}.temp.bin" + if self.resume: + self.load_state() + + def _make_new_book(self, book): + pass + + def _parse_srt(self, srt_text): + blocks = re.split("\n\s*\n", srt_text) + + final_blocks = [] + new_block = {} + for i in range(0, len(blocks)): + block = blocks[i] + if block.strip() == "": + continue + + lines = block.strip().split("\n") + new_block["number"] = lines[0].strip() + timestamp = lines[1].strip() + new_block["time"] = timestamp + text = "\n".join(lines[2:]).strip() + new_block["text"] = text + final_blocks.append(new_block) + new_block = {} + + return final_blocks + + def _get_block_text(self, block): + return f"{block['number']}\n{block['time']}\n{block['text']}" + + def _concat_blocks(self, sliced_text: str, text: str): + return f"{sliced_text}\n\n{text}" if sliced_text else text + + def _get_block_translate(self, block): + return f"{block['number']}\n{block['text']}" + + def _get_block_from(self, text): + text = text.strip() + if not text: + return {} + + block = text.split("\n") + if len(block) < 2: + return {"number": block[0], "text": ""} + + return {"number": block[0], "text": "\n".join(block[1:])} + + def _get_blocks_from(self, translate: str): + if not translate: + return [] + + blocks = [] + blocks_text = translate.strip().split("\n\n") + for text in blocks_text: + blocks.append(self._get_block_from(text)) + + return blocks + + def _check_blocks(self, translate_blocks, origin_blocks): + """ + Check if the translated blocks match the original text, with only a simple check of the beginning numbers. + """ + if len(translate_blocks) != len(origin_blocks): + return False + + for t in zip(translate_blocks, origin_blocks): + i = 0 + try: + i = int(t[0].get("number", 0)) + except ValueError: + m = re.search(r"\s*\d+", t[0].get("number")) + if m: + i = int(m.group()) + + j = int(t[1].get("number", -1)) + if i != j: + print(f"check failed: {i}!={j}") + return False + + return True + + def _get_sliced_list(self): + sliced_list = [] + sliced_text = "" + begin_index = 0 + for i, block in enumerate(self.blocks): + text = self._get_block_translate(block) + if not text: + continue + + if len(sliced_text + text) < self.accumulated_num: + sliced_text = self._concat_blocks(sliced_text, text) + else: + if sliced_text: + sliced_list.append((begin_index, i, sliced_text)) + sliced_text = text + begin_index = i + + sliced_list.append((begin_index, len(self.blocks), sliced_text)) + return sliced_list + + def make_bilingual_book(self): + if self.accumulated_num > 512: + print(f"{self.accumulated_num} is too large, shrink it to 512.") + self.accumulated_num = 512 + + try: + with open(f"{self.srt_name}", encoding="utf-8") as f: + self.blocks = self._parse_srt(f.read()) + except Exception as e: + raise Exception("can not load file") from e + + index = 0 + p_to_save_len = len(self.p_to_save) + + try: + sliced_list = self._get_sliced_list() + + for sliced in sliced_list: + begin, end, text = sliced + + if not self.resume or index + (end - begin) > p_to_save_len: + if index < p_to_save_len: + self.p_to_save = self.p_to_save[:index] + + try: + temp = self.translate_model.translate(text) + except Exception as e: + print(e) + raise Exception("Something is wrong when translate") from e + + translated_blocks = self._get_blocks_from(temp) + + if self.accumulated_num > 1: + if not self._check_blocks( + translated_blocks, self.blocks[begin:end] + ): + translated_blocks = [] + # try to translate one by one, so don't accumulate too much + print( + f"retry it one by one: {self.blocks[begin]['number']} - {self.blocks[end-1]['number']}" + ) + for block in self.blocks[begin:end]: + try: + temp = self.translate_model.translate( + self._get_block_translate(block) + ) + except Exception as e: + print(e) + raise Exception( + "Something is wrong when translate" + ) from e + translated_blocks.append(self._get_block_from(temp)) + + if not self._check_blocks( + translated_blocks, self.blocks[begin:end] + ): + raise Exception( + f"retry failed, adjust the srt manually." + ) + + for i, block in enumerate(translated_blocks): + text = block.get("text", "") + self.p_to_save.append(text) + self.bilingual_result.append( + f"{self._get_block_text(self.blocks[begin + i])}\n{text}" + ) + else: + for i, block in enumerate(self.blocks[begin:end]): + text = self.p_to_save[begin + i] + self.bilingual_result.append( + f"{self._get_block_text(self.blocks[begin + i])}\n{text}" + ) + + index += end - begin + if self.is_test and index > self.test_num: + break + + self.save_file( + f"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual.srt", + self.bilingual_result, + ) + + except (KeyboardInterrupt, Exception) as e: + print(e) + print("you can resume it next time") + self._save_progress() + self._save_temp_book() + sys.exit(0) + + def _save_temp_book(self): + for i, block in enumerate(self.blocks): + if i < len(self.p_to_save): + text = self.p_to_save[i] + self.bilingual_temp_result.append( + f"{self._get_block_text(block)}\n{text}" + ) + else: + self.bilingual_temp_result.append(f"{self._get_block_text(block)}\n") + + self.save_file( + f"{Path(self.srt_name).parent}/{Path(self.srt_name).stem}_bilingual_temp.srt", + self.bilingual_temp_result, + ) + + def _save_progress(self): + try: + with open(self.bin_path, "w", encoding="utf-8") as f: + f.write("===".join(self.p_to_save)) + except: + raise Exception("can not save resume file") + + def load_state(self): + try: + with open(self.bin_path, encoding="utf-8") as f: + text = f.read() + if text: + self.p_to_save = text.split("===") + else: + self.p_to_save = [] + + except Exception as e: + raise Exception("can not load resume file") from e + + def save_file(self, book_path, content): + try: + with open(book_path, "w", encoding="utf-8") as f: + f.write("\n\n".join(content)) + except: + raise Exception("can not save file") diff --git a/test_books/Lex_Fridman_episode_322.srt b/test_books/Lex_Fridman_episode_322.srt new file mode 100644 index 0000000..7cd854a --- /dev/null +++ b/test_books/Lex_Fridman_episode_322.srt @@ -0,0 +1,440 @@ +1 +00:00:00,000 --> 00:00:07,040 +there's a broader question here, right? As we build socially and emotionally intelligent machines, + +2 +00:00:07,920 --> 00:00:12,640 +what does that mean about our relationship with them and then more broadly our relationship with + +3 +00:00:12,640 --> 00:00:18,240 +one another, right? Because this machine is going to be programmed to be amazing at empathy, + +4 +00:00:18,240 --> 00:00:22,560 +by definition, right? It's going to always be there for you. It's not going to get bored. + +5 +00:00:23,440 --> 00:00:25,680 +I don't know how I feel about that. I think about that a lot. + +6 +00:00:25,680 --> 00:00:30,320 +TITO The following is a conversation with Rana + +7 +00:00:30,320 --> 00:00:36,080 +L. Kliubi, a pioneer in the field of emotion recognition and human centric artificial + +8 +00:00:36,080 --> 00:00:43,920 +intelligence. She is the founder of Effectiva, deputy CEO of SmartEye, author of Girl Decoded, + +9 +00:00:43,920 --> 00:00:49,200 +and one of the most brilliant, kind, inspiring, and fun human beings I've gotten the chance to + +10 +00:00:49,200 --> 00:00:54,800 +talk to. This is the Lex Friedman podcast. To support it, please check out our sponsors in + +11 +00:00:54,800 --> 00:01:02,400 +the description. And now, dear friends, here's Rana L. Kliubi. You grew up in the Middle East, + +12 +00:01:02,400 --> 00:01:08,000 +in Egypt. What is the memory from that time that makes you smile? Or maybe a memory that stands out + +13 +00:01:08,000 --> 00:01:12,320 +as helping your mind take shape and helping you define yourself in this world? + +14 +00:01:12,320 --> 00:01:15,440 +RANA L. KLIUBI So the memory that stands out is we used to + +15 +00:01:15,440 --> 00:01:21,680 +live in my grandma's house. She used to have these mango trees in her garden. And in the summer, + +16 +00:01:21,680 --> 00:01:26,640 +and so mango season was like July and August. And so in the summer, she would invite all my aunts + +17 +00:01:26,640 --> 00:01:31,680 +and uncles and cousins. And it was just like maybe there were like 20 or 30 people in the house, + +18 +00:01:31,680 --> 00:01:38,080 +and she would cook all this amazing food. And us, the kids, we would go down the garden, + +19 +00:01:38,080 --> 00:01:43,920 +and we would pick all these mangoes. And I don't know, I think it's just the bringing people + +20 +00:01:43,920 --> 00:01:47,920 +together that always stuck with me, the warmth. TITO Around the mango tree. + +21 +00:01:47,920 --> 00:01:52,800 +RANA L. KLIUBI Yeah, around the mango tree. And there's just like the joy, the joy of being + +22 +00:01:52,800 --> 00:02:00,880 +together around food. And I'm a terrible cook. So I guess that didn't, that memory didn't translate + +23 +00:02:00,880 --> 00:02:05,520 +to me kind of doing the same. I love hosting people. TITO Do you remember colors, smells? + +24 +00:02:05,520 --> 00:02:10,560 +Is that what, like what, how does memory work? Like what do you visualize? Do you visualize + +25 +00:02:10,560 --> 00:02:19,360 +people's faces, smiles? Do you, is there colors? Is there like a theme to the colors? Is it smells + +26 +00:02:19,360 --> 00:02:23,360 +because of food involved? RANA L. KLIUBI Yeah, I think that's a great question. So the, + +27 +00:02:23,360 --> 00:02:28,800 +those Egyptian mangoes, there's a particular type that I love, and it's called Darwasi mangoes. And + +28 +00:02:28,800 --> 00:02:33,680 +they're kind of, you know, they're oval, and they have a little red in them. So I kind of, + +29 +00:02:33,680 --> 00:02:39,600 +they're red and mango colored on the outside. So I remember that. TITO Does red indicate like + +30 +00:02:39,600 --> 00:02:45,520 +extra sweetness? Is that, is that, that means like it's nicely, yeah, it's nice and ripe and stuff. + +31 +00:02:45,520 --> 00:02:52,640 +Yeah. What, what's like a definitive food of Egypt? You know, there's like these almost + +32 +00:02:52,640 --> 00:02:58,800 +stereotypical foods in different parts of the world, like Ukraine invented borscht. + +33 +00:02:59,600 --> 00:03:04,800 +Borscht is this beet soup with, that you put sour cream on. See, it's not, I can't see if you, + +34 +00:03:04,800 --> 00:03:10,880 +if you know, if you know what it is, I think, you know, is delicious. But if I explain it, + +35 +00:03:10,880 --> 00:03:15,280 +it's just not going to sound delicious. I feel like beet soup. This doesn't make any sense, + +36 +00:03:15,280 --> 00:03:19,600 +but that's kind of, and you probably have actually seen pictures of it because it's one of the + +37 +00:03:19,600 --> 00:03:26,800 +traditional foods in Ukraine, in Russia, in different parts of the Slavic world. So that's, + +38 +00:03:26,800 --> 00:03:31,520 +but it's become so cliche and stereotypical that you almost don't mention it, but it's still + +39 +00:03:31,520 --> 00:03:35,440 +delicious. Like I visited Ukraine, I eat that every single day, so. + +40 +00:03:35,440 --> 00:03:38,480 +Do you, do you make it yourself? How hard is it to make? + +41 +00:03:38,480 --> 00:03:43,600 +No, I don't know. I think to make it well, like anything, like Italians, they say, well, + +42 +00:03:44,320 --> 00:03:51,760 +tomato sauce is easy to make, but to make it right, that's like a generational skill. So anyway, + +43 +00:03:51,760 --> 00:03:55,200 +is there something like that in Egypt? Is there a culture of food? + +44 +00:03:55,200 --> 00:04:02,880 +There is. And actually, we have a similar kind of soup. It's called molokhia, and it's, it's made + +45 +00:04:02,880 --> 00:04:07,520 +of this green plant. It's like, it's somewhere between spinach and kale, and you mince it, + +46 +00:04:07,520 --> 00:04:13,360 +and then you cook it in like chicken broth. And my grandma used to make, and my mom makes it really + +47 +00:04:13,360 --> 00:04:18,080 +well, and I try to make it, but it's not as great. So we used to have that. And then we used to have + +48 +00:04:18,080 --> 00:04:23,520 +it alongside stuffed pigeons. I'm pescetarian now, so I don't eat that anymore, but. + +49 +00:04:23,520 --> 00:04:24,480 +Stuffed pigeons. + +50 +00:04:24,480 --> 00:04:27,600 +Yeah, it's like, it was really yummy. It's the one thing I miss about, + +51 +00:04:28,480 --> 00:04:32,080 +you know, now that I'm pescetarian and I don't eat. + +52 +00:04:32,080 --> 00:04:33,040 +The stuffed pigeons? + +53 +00:04:33,040 --> 00:04:34,240 +Yeah, the stuffed pigeons. + +54 +00:04:35,440 --> 00:04:39,920 +Is it, what are they stuffed with? If that doesn't bother you too much to describe. + +55 +00:04:39,920 --> 00:04:46,000 +No, no, it's stuffed with a lot of like just rice and, yeah, it's just rice. Yeah, so. + +56 +00:04:46,000 --> 00:04:51,120 +And you also, you said that your first, in your book, that your first computer + +57 +00:04:51,120 --> 00:04:54,880 +was an Atari, and Space Invaders was your favorite game. + +58 +00:04:56,000 --> 00:04:58,800 +Is that when you first fell in love with computers, would you say? + +59 +00:04:58,800 --> 00:05:00,160 +Yeah, I would say so. + +60 +00:05:00,160 --> 00:05:04,160 +Video games, or just the computer itself? Just something about the machine. + +61 +00:05:04,160 --> 00:05:07,840 +Ooh, this thing, there's magic in here. + +62 +00:05:07,840 --> 00:05:12,080 +Yeah, I think the magical moment is definitely like playing video games with my, + +63 +00:05:12,080 --> 00:05:17,120 +I have two younger sisters, and we would just like had fun together, like playing games. + +64 +00:05:17,120 --> 00:05:22,240 +But the other memory I have is my first code, the first code I wrote. + +65 +00:05:22,240 --> 00:05:26,720 +I wrote, I drew a Christmas tree, and I'm Muslim, right? + +66 +00:05:26,720 --> 00:05:32,000 +So it's kind of, it was kind of funny that the first thing I did was like this Christmas tree. + +67 +00:05:32,000 --> 00:05:38,320 +So, yeah, and that's when I realized, wow, you can write code to do all sorts of like + +68 +00:05:38,320 --> 00:05:42,720 +really cool stuff. I must have been like six or seven at the time. + +69 +00:05:42,720 --> 00:05:48,560 +So you can write programs, and the programs do stuff for you. That's power. + +70 +00:05:48,560 --> 00:05:50,880 +That's, if you think about it, that's empowering. + +71 +00:05:50,880 --> 00:05:51,600 +It's AI. + +72 +00:05:51,600 --> 00:05:55,120 +Yeah, I know what it is. I don't know if that, you see like, + +73 +00:05:56,400 --> 00:05:59,520 +I don't know if many people think of it that way when they first learned to program. + +74 +00:05:59,520 --> 00:06:02,880 +They just love the puzzle of it. Like, ooh, this is cool. This is pretty. + +75 +00:06:02,880 --> 00:06:05,600 +It's a Christmas tree, but like, it's power. + +76 +00:06:05,600 --> 00:06:06,960 +It is power. + +77 +00:06:06,960 --> 00:06:11,040 +Eventually, I guess you couldn't at the time, but eventually this thing, + +78 +00:06:11,040 --> 00:06:14,640 +if it's interesting enough, if it's a pretty enough Christmas tree, + +79 +00:06:14,640 --> 00:06:19,280 +it can be run by millions of people and bring them joy, like that little thing. + +80 +00:06:19,280 --> 00:06:21,760 +And then because it's digital, it's easy to spread. + +81 +00:06:22,400 --> 00:06:26,560 +So like you just created something that's easily spreadable to millions of people. + +82 +00:06:26,560 --> 00:06:27,120 +Totally. + +83 +00:06:28,160 --> 00:06:29,840 +It's hard to think that way when you're six. + +84 +00:06:30,800 --> 00:06:37,040 +In the book, you write, I am who I am because I was raised by a particular set of parents, + +85 +00:06:37,040 --> 00:06:41,200 +both modern and conservative, forward thinking, yet locked in tradition. + +86 +00:06:41,760 --> 00:06:46,000 +I'm a Muslim and I feel I'm stronger, more centered for it. + +87 +00:06:46,000 --> 00:06:50,960 +I adhere to the values of my religion, even if I'm not as dutiful as I once was. + +88 +00:06:50,960 --> 00:06:55,040 +And I am a new American and I'm thriving on the energy, + +89 +00:06:55,040 --> 00:06:58,720 +vitality and entrepreneurial spirit of this great country. + +90 +00:06:59,840 --> 00:07:01,520 +So let me ask you about your parents. + +91 +00:07:01,520 --> 00:07:05,280 +What have you learned about life from them, especially when you were young? + +92 +00:07:05,280 --> 00:07:09,920 +So both my parents, they're Egyptian, but they moved to Kuwait right out. + +93 +00:07:09,920 --> 00:07:11,680 +Actually, there's a cute story about how they met. + +94 +00:07:11,680 --> 00:07:14,960 +So my dad taught COBOL in the 70s. + +95 +00:07:14,960 --> 00:07:15,680 +Nice. + +96 +00:07:15,680 --> 00:07:18,240 +And my mom decided to learn programming. + +97 +00:07:18,240 --> 00:07:21,120 +So she signed up to take his COBOL programming class. + +98 +00:07:22,400 --> 00:07:26,640 +And he tried to date her and she was like, no, no, no, I don't date. + +99 +00:07:26,640 --> 00:07:28,240 +And so he's like, okay, I'll propose. + +100 +00:07:28,240 --> 00:07:29,680 +And that's how they got married. + +101 +00:07:29,680 --> 00:07:30,960 +Whoa, strong move. + +102 +00:07:30,960 --> 00:07:32,240 +Right, exactly, right. + +103 +00:07:32,240 --> 00:07:34,640 +That's really impressive. + +104 +00:07:35,760 --> 00:07:38,800 +Those COBOL guys know how to impress a lady. + +105 +00:07:40,640 --> 00:07:43,520 +So yeah, so what have you learned from them? + +106 +00:07:43,520 --> 00:07:44,720 +So definitely grit. + +107 +00:07:44,720 --> 00:07:47,360 +One of the core values in our family is just hard work. + +108 +00:07:48,320 --> 00:07:50,080 +There were no slackers in our family. + +109 +00:07:50,720 --> 00:07:54,160 +And that's something that's definitely stayed with me, + +110 +00:07:55,920 --> 00:07:58,480 +both as a professional, but also in my personal life. +