mirror of
https://github.com/maglore9900/max_headroom.git
synced 2025-06-06 19:45:31 +00:00
dynamic noise detection/cancelling
This commit is contained in:
parent
264f2a811f
commit
04e15b0bb6
3
example_env.txt
Normal file
3
example_env.txt
Normal file
@ -0,0 +1,3 @@
|
||||
OPENAI_API_KEY='OPENAI API KEY HERE'
|
||||
OLLAMA_MODEL='llama3'
|
||||
OLLAMA_URL='https://localhost:11434'
|
2
main.py
2
main.py
@ -6,7 +6,7 @@ asyncio.set_event_loop(loop)
|
||||
graph = agent.Agent()
|
||||
|
||||
while True:
|
||||
text = graph.spk.listen2()
|
||||
text = graph.spk.listen3()
|
||||
if text and "hey" in text.lower() and "max " in text.lower() or text and "hey" in text.lower() and "mac " in text.lower():
|
||||
if "exit" in text.lower():
|
||||
break
|
||||
|
@ -86,6 +86,51 @@ class Speak:
|
||||
stream.close()
|
||||
p.terminate()
|
||||
|
||||
def dynamic_threshold(self, rms_values, factor=1.5):
|
||||
"""Adjust noise threshold dynamically based on the median RMS."""
|
||||
median_rms = np.median(rms_values)
|
||||
return median_rms * factor
|
||||
|
||||
def listen3(self, time_listen=15):
|
||||
noise_threshold = 500 # Initial static threshold
|
||||
rms_values = [] # To track RMS values over time
|
||||
p = pyaudio.PyAudio()
|
||||
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
|
||||
stream.start_stream()
|
||||
print("Listening...")
|
||||
|
||||
count = 0
|
||||
try:
|
||||
while count < time_listen:
|
||||
data = stream.read(8000, exception_on_overflow=False)
|
||||
filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes()
|
||||
|
||||
# Calculate RMS to detect ambient noise levels
|
||||
rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16))))
|
||||
rms_values.append(rms_value)
|
||||
|
||||
# Dynamically adjust the noise threshold based on previous RMS values
|
||||
noise_threshold = self.dynamic_threshold(rms_values)
|
||||
|
||||
if rms_value < noise_threshold:
|
||||
if self.recognizer.AcceptWaveform(filtered_data):
|
||||
result = json.loads(self.recognizer.Result())
|
||||
if result["text"]:
|
||||
print(f"Recognized: {result['text']}")
|
||||
return result['text']
|
||||
else:
|
||||
print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold:.2f}")
|
||||
|
||||
count += 1
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Stopping...")
|
||||
finally:
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
p.terminate()
|
||||
|
||||
|
||||
def stream_output(self, text):
|
||||
import urllib.parse
|
||||
# Example parameters
|
||||
|
Loading…
x
Reference in New Issue
Block a user