fallback for TTS

modified the speak portion so that if you are not using alltalk it will respond like a robot locally
2025-07-23 18:40:15 +00:00 · 2024-08-27 23:29:00 -04:00 · 2024-08-27 23:29:00 -04:00 · 08fa93898f
commit 08fa93898f
parent 22d7bdc1c3
2 changed files with 59 additions and 56 deletions
--- a/main.py
+++ b/main.py
@ -15,5 +15,5 @@ while True:
    if text and "max" in text.lower():
        response = loop.run_until_complete(graph.invoke_agent(text))
        if response:
-            sp.glitch_stream_output(response)
+            sp.glitch_stream_output2(response)
--- a/modules/speak.py
+++ b/modules/speak.py
@ -58,10 +58,10 @@ class Speak:
        with self.microphone as source:
            #! Adjust for ambient noise
            self.recognizer.adjust_for_ambient_noise(source, duration=1)
            #! added 5 second timeout so ambient noise detection can compensate for music that started playing
            audio = self.recognizer.listen(source, timeout=5)
            try:
            try:
                #! added 5 second timeout so ambient noise detection can compensate for music that started playing
                audio = self.recognizer.listen(source, timeout=5)
                text = self.recognizer.recognize_google(audio)
                print("You said: ", text)
                return text
@ -207,58 +207,61 @@ class Speak:
        # Create the streaming URL
        streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
        try:
            # Stream the audio data
            response = requests.get(streaming_url, stream=True)
-        # Stream the audio data
+            # Initialize PyAudio
-        response = requests.get(streaming_url, stream=True)
+            p = pyaudio.PyAudio()
            stream = None
-        # Initialize PyAudio
+            # Process the audio stream in chunks
-        p = pyaudio.PyAudio()
+            chunk_size = 1024 * 6  # Adjust chunk size if needed
        stream = None
        # Process the audio stream in chunks
        chunk_size = 1024 * 6  # Adjust chunk size if needed
        audio_buffer = b''
        for chunk in response.iter_content(chunk_size=chunk_size):
            audio_buffer += chunk
            if len(audio_buffer) < chunk_size:
                continue
            audio_segment = AudioSegment(
                data=audio_buffer,
                sample_width=2,  # 2 bytes for 16-bit audio
                frame_rate=24000,  # Assumed frame rate, adjust as necessary
                channels=1  # Assuming mono audio
            )
            # Randomly adjust pitch
            octaves = random.uniform(-1, 1)
            modified_chunk = change_pitch(audio_segment, octaves)
            if random.random() < 0.01:  # 1% chance to trigger stutter
                repeat_times = random.randint(2, 5)  # Repeat 2 to 5 times
                for _ in range(repeat_times):
                    stream.write(modified_chunk.raw_data)
            # Convert to PCM16 and 16kHz sample rate after the stutter effect
            modified_chunk = convert_audio_format(modified_chunk, target_sample_rate=16000)
            if stream is None:
                # Define stream parameters
                stream = p.open(format=pyaudio.paInt16,
                                channels=1,
                                rate=modified_chunk.frame_rate,
                                output=True)
            # Play the modified chunk
            stream.write(modified_chunk.raw_data)
            # Reset buffer
            audio_buffer = b''
-        # Final cleanup
+            for chunk in response.iter_content(chunk_size=chunk_size):
-        if stream:
+                audio_buffer += chunk
-            stream.stop_stream()
+
-            stream.close()
+                if len(audio_buffer) < chunk_size:
-        p.terminate()
+                    continue
                audio_segment = AudioSegment(
                    data=audio_buffer,
                    sample_width=2,  # 2 bytes for 16-bit audio
                    frame_rate=24000,  # Assumed frame rate, adjust as necessary
                    channels=1  # Assuming mono audio
                )
                # Randomly adjust pitch
                octaves = random.uniform(-1, 1)
                modified_chunk = change_pitch(audio_segment, octaves)
                if random.random() < 0.01:  # 1% chance to trigger stutter
                    repeat_times = random.randint(2, 5)  # Repeat 2 to 5 times
                    for _ in range(repeat_times):
                        stream.write(modified_chunk.raw_data)
                # Convert to PCM16 and 16kHz sample rate after the stutter effect
                modified_chunk = convert_audio_format(modified_chunk, target_sample_rate=16000)
                if stream is None:
                    # Define stream parameters
                    stream = p.open(format=pyaudio.paInt16,
                                    channels=1,
                                    rate=modified_chunk.frame_rate,
                                    output=True)
                # Play the modified chunk
                stream.write(modified_chunk.raw_data)
                # Reset buffer
                audio_buffer = b''
            # Final cleanup
            if stream:
                stream.stop_stream()
                stream.close()
            p.terminate()
        except:
            self.engine.say(text)
            self.engine.runAndWait()