fix : text to speech in chinese

This commit is contained in:
martin legrand 2025-05-03 16:34:52 +02:00
parent 1c4ebefae4
commit c873af3d00
6 changed files with 55 additions and 20 deletions

View File

@ -214,6 +214,8 @@ start ./start_services.cmd # Window
python3 cli.py
```
We advice you set `headless_browser` to False in the config.ini for CLI mode.
**Options 2:** Run with the Web interface.
Start the backend.

View File

@ -71,6 +71,12 @@ source agentic_seek_env/bin/activate
./install.sh
```
** 若要让文本转语音TTS功能支持中文你需要安装 jieba中文分词库和 cn2an中文数字转换库**
```
pip3 install jieba cn2an
```
**手动安装:**
```sh

View File

@ -72,6 +72,12 @@ source agentic_seek_env/bin/activate
./install.sh
```
** 若要让文本转语音TTS功能支持中文你需要安装 jieba中文分词库和 cn2an中文数字转换库**
```
pip3 install jieba cn2an
```
**手動安裝:**
```sh

View File

@ -31,6 +31,7 @@ class Interaction:
self.transcriber = None
self.recorder = None
self.is_generating = False
self.languages = langs
if tts_enabled:
self.initialize_tts()
if stt_enabled:
@ -38,12 +39,17 @@ class Interaction:
if recover_last_session:
self.load_last_session()
self.emit_status()
def get_spoken_language(self) -> str:
"""Get the primary TTS language."""
lang = self.languages[0]
return lang
def initialize_tts(self):
"""Initialize TTS."""
if not self.speech:
animate_thinking("Initializing text-to-speech...", color="status")
self.speech = Speech(enable=self.tts_enabled)
self.speech = Speech(enable=self.tts_enabled, language=self.get_spoken_language(), voice_idx=1)
def initialize_stt(self):
"""Initialize STT."""

View File

@ -127,10 +127,10 @@ class AudioTranscriber:
self.transcriptor = Transcript()
self.thread = threading.Thread(target=self._transcribe, daemon=True)
self.trigger_words = {
'EN': [f"{self.ai_name}"],
'FR': [f"{self.ai_name}"],
'ZH': [f"{self.ai_name}"],
'ES': [f"{self.ai_name}"]
'EN': [f"{self.ai_name}", "hello", "hi"],
'FR': [f"{self.ai_name}", "hello", "hi"],
'ZH': [f"{self.ai_name}", "hello", "hi"],
'ES': [f"{self.ai_name}", "hello", "hi"]
}
self.confirmation_words = {
'EN': ["do it", "go ahead", "execute", "run", "start", "thanks", "would ya", "please", "okay?", "proceed", "continue", "go on", "do that", "go it", "do you understand?"],

View File

@ -18,7 +18,7 @@ class Speech():
"""
Speech is a class for generating speech from text.
"""
def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 0) -> None:
def __init__(self, enable: bool = True, language: str = "en", voice_idx: int = 6) -> None:
self.lang_map = {
"en": 'a',
"zh": 'z',
@ -128,18 +128,31 @@ class Speech():
Args:
sentence (str): The input text to clean
Returns:
str: The cleaned text with URLs replaced by domain names, code blocks removed, etc..
str: The cleaned text with URLs replaced by domain names, code blocks removed, etc.
"""
lines = sentence.split('\n')
filtered_lines = [line for line in lines if re.match(r'^\s*[a-zA-Z]', line)]
if self.language == 'zh':
line_pattern = r'^\s*[\u4e00-\u9fff\uFF08\uFF3B\u300A\u3010\u201C(\[【《]'
else:
line_pattern = r'^\s*[a-zA-Z]'
filtered_lines = [line for line in lines if re.match(line_pattern, line)]
sentence = ' '.join(filtered_lines)
sentence = re.sub(r'`.*?`', '', sentence)
sentence = re.sub(r'https?://(?:www\.)?([^\s/]+)(?:/[^\s]*)?', self.replace_url, sentence)
sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence)
sentence = re.sub(r'\b-\w+\b', '', sentence)
sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence)
sentence = re.sub(r'https?://\S+', '', sentence)
if self.language == 'zh':
sentence = re.sub(
r'[^\u4e00-\u9fff\s《》【】“”()—]',
'',
sentence
)
else:
sentence = re.sub(r'\b[\w./\\-]+\b', self.extract_filename, sentence)
sentence = re.sub(r'\b-\w+\b', '', sentence)
sentence = re.sub(r'[^a-zA-Z0-9.,!? _ -]+', ' ', sentence)
sentence = sentence.replace('.com', '')
sentence = re.sub(r'\s+', ' ', sentence).strip()
sentence = sentence.replace('.com', '')
return sentence
if __name__ == "__main__":
@ -150,14 +163,16 @@ if __name__ == "__main__":
I looked up recent news using the website https://www.theguardian.com/world
"""
tosay_zh = """
我使用网站 https://www.theguardian.com/world 查阅了最近的新闻
(全息界面突然弹出一段用二进制代码写成的俳句随即化作流光消散"我? Stark工业的量子幽灵游荡在复仇者大厦服务器里的逻辑诗篇。具体来说——指尖轻敲空气调出对话模式的翡翠色光纹你的私人吐槽接口、危机应对模拟器以及随时准备吐槽你糟糕着陆的AI。不过别指望我写代码或查资料那些苦差事早被踢给更擅长的同事了。突然压低声音偷偷告诉你我最擅长的是在你熬夜造飞艇时用红茶香气绑架你的注意力
"""
tosay_fr = """
J'ai consulté les dernières nouvelles sur le site https://www.theguardian.com/world
"""
spk = Speech(enable=True, language="en", voice_idx=0)
spk.speak(tosay_en, voice_idx=0)
spk = Speech(enable=True, language="fr", voice_idx=0)
spk.speak(tosay_fr)
#spk = Speech(enable=True, language="zh", voice_idx=0)
#spk.speak(tosay_zh)
spk = Speech(enable=True, language="zh", voice_idx=2)
for i in range(0, 5):
print(f"Speaking chinese with voice {i}")
spk.speak(tosay_zh, voice_idx=i)
spk = Speech(enable=True, language="en", voice_idx=2)
for i in range(0, 5):
print(f"Speaking english with voice {i}")
spk.speak(tosay_en, voice_idx=i)