mirror of
https://github.com/maglore9900/max_headroom.git
synced 2025-06-07 03:55:30 +00:00
fallback for TTS
modified the speak portion so that if you are not using alltalk it will respond like a robot locally
This commit is contained in:
parent
22d7bdc1c3
commit
08fa93898f
2
main.py
2
main.py
@ -15,5 +15,5 @@ while True:
|
|||||||
if text and "max" in text.lower():
|
if text and "max" in text.lower():
|
||||||
response = loop.run_until_complete(graph.invoke_agent(text))
|
response = loop.run_until_complete(graph.invoke_agent(text))
|
||||||
if response:
|
if response:
|
||||||
sp.glitch_stream_output(response)
|
sp.glitch_stream_output2(response)
|
||||||
|
|
||||||
|
111
modules/speak.py
111
modules/speak.py
@ -58,10 +58,10 @@ class Speak:
|
|||||||
with self.microphone as source:
|
with self.microphone as source:
|
||||||
#! Adjust for ambient noise
|
#! Adjust for ambient noise
|
||||||
self.recognizer.adjust_for_ambient_noise(source, duration=1)
|
self.recognizer.adjust_for_ambient_noise(source, duration=1)
|
||||||
#! added 5 second timeout so ambient noise detection can compensate for music that started playing
|
|
||||||
audio = self.recognizer.listen(source, timeout=5)
|
|
||||||
try:
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
#! added 5 second timeout so ambient noise detection can compensate for music that started playing
|
||||||
|
audio = self.recognizer.listen(source, timeout=5)
|
||||||
text = self.recognizer.recognize_google(audio)
|
text = self.recognizer.recognize_google(audio)
|
||||||
print("You said: ", text)
|
print("You said: ", text)
|
||||||
return text
|
return text
|
||||||
@ -207,58 +207,61 @@ class Speak:
|
|||||||
|
|
||||||
# Create the streaming URL
|
# Create the streaming URL
|
||||||
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
|
streaming_url = f"http://localhost:7851/api/tts-generate-streaming?text={encoded_text}&voice={voice}&language={language}&output_file={output_file}"
|
||||||
|
try:
|
||||||
|
# Stream the audio data
|
||||||
|
response = requests.get(streaming_url, stream=True)
|
||||||
|
|
||||||
# Stream the audio data
|
# Initialize PyAudio
|
||||||
response = requests.get(streaming_url, stream=True)
|
p = pyaudio.PyAudio()
|
||||||
|
stream = None
|
||||||
|
|
||||||
# Initialize PyAudio
|
# Process the audio stream in chunks
|
||||||
p = pyaudio.PyAudio()
|
chunk_size = 1024 * 6 # Adjust chunk size if needed
|
||||||
stream = None
|
|
||||||
|
|
||||||
# Process the audio stream in chunks
|
|
||||||
chunk_size = 1024 * 6 # Adjust chunk size if needed
|
|
||||||
audio_buffer = b''
|
|
||||||
|
|
||||||
for chunk in response.iter_content(chunk_size=chunk_size):
|
|
||||||
audio_buffer += chunk
|
|
||||||
|
|
||||||
if len(audio_buffer) < chunk_size:
|
|
||||||
continue
|
|
||||||
|
|
||||||
audio_segment = AudioSegment(
|
|
||||||
data=audio_buffer,
|
|
||||||
sample_width=2, # 2 bytes for 16-bit audio
|
|
||||||
frame_rate=24000, # Assumed frame rate, adjust as necessary
|
|
||||||
channels=1 # Assuming mono audio
|
|
||||||
)
|
|
||||||
|
|
||||||
# Randomly adjust pitch
|
|
||||||
octaves = random.uniform(-1, 1)
|
|
||||||
modified_chunk = change_pitch(audio_segment, octaves)
|
|
||||||
|
|
||||||
if random.random() < 0.01: # 1% chance to trigger stutter
|
|
||||||
repeat_times = random.randint(2, 5) # Repeat 2 to 5 times
|
|
||||||
for _ in range(repeat_times):
|
|
||||||
stream.write(modified_chunk.raw_data)
|
|
||||||
|
|
||||||
# Convert to PCM16 and 16kHz sample rate after the stutter effect
|
|
||||||
modified_chunk = convert_audio_format(modified_chunk, target_sample_rate=16000)
|
|
||||||
|
|
||||||
if stream is None:
|
|
||||||
# Define stream parameters
|
|
||||||
stream = p.open(format=pyaudio.paInt16,
|
|
||||||
channels=1,
|
|
||||||
rate=modified_chunk.frame_rate,
|
|
||||||
output=True)
|
|
||||||
|
|
||||||
# Play the modified chunk
|
|
||||||
stream.write(modified_chunk.raw_data)
|
|
||||||
|
|
||||||
# Reset buffer
|
|
||||||
audio_buffer = b''
|
audio_buffer = b''
|
||||||
|
|
||||||
# Final cleanup
|
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||||
if stream:
|
audio_buffer += chunk
|
||||||
stream.stop_stream()
|
|
||||||
stream.close()
|
if len(audio_buffer) < chunk_size:
|
||||||
p.terminate()
|
continue
|
||||||
|
|
||||||
|
audio_segment = AudioSegment(
|
||||||
|
data=audio_buffer,
|
||||||
|
sample_width=2, # 2 bytes for 16-bit audio
|
||||||
|
frame_rate=24000, # Assumed frame rate, adjust as necessary
|
||||||
|
channels=1 # Assuming mono audio
|
||||||
|
)
|
||||||
|
|
||||||
|
# Randomly adjust pitch
|
||||||
|
octaves = random.uniform(-1, 1)
|
||||||
|
modified_chunk = change_pitch(audio_segment, octaves)
|
||||||
|
|
||||||
|
if random.random() < 0.01: # 1% chance to trigger stutter
|
||||||
|
repeat_times = random.randint(2, 5) # Repeat 2 to 5 times
|
||||||
|
for _ in range(repeat_times):
|
||||||
|
stream.write(modified_chunk.raw_data)
|
||||||
|
|
||||||
|
# Convert to PCM16 and 16kHz sample rate after the stutter effect
|
||||||
|
modified_chunk = convert_audio_format(modified_chunk, target_sample_rate=16000)
|
||||||
|
|
||||||
|
if stream is None:
|
||||||
|
# Define stream parameters
|
||||||
|
stream = p.open(format=pyaudio.paInt16,
|
||||||
|
channels=1,
|
||||||
|
rate=modified_chunk.frame_rate,
|
||||||
|
output=True)
|
||||||
|
|
||||||
|
# Play the modified chunk
|
||||||
|
stream.write(modified_chunk.raw_data)
|
||||||
|
|
||||||
|
# Reset buffer
|
||||||
|
audio_buffer = b''
|
||||||
|
|
||||||
|
# Final cleanup
|
||||||
|
if stream:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
except:
|
||||||
|
self.engine.say(text)
|
||||||
|
self.engine.runAndWait()
|
Loading…
x
Reference in New Issue
Block a user