From 04e15b0bb6894177b4069f0408f698eb13ffece7 Mon Sep 17 00:00:00 2001 From: maglore9900 Date: Wed, 4 Sep 2024 19:33:55 -0400 Subject: [PATCH] dynamic noise detection/cancelling --- example_env.txt | 3 +++ main.py | 2 +- modules/speak.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 example_env.txt diff --git a/example_env.txt b/example_env.txt new file mode 100644 index 0000000..581c71f --- /dev/null +++ b/example_env.txt @@ -0,0 +1,3 @@ +OPENAI_API_KEY='OPENAI API KEY HERE' +OLLAMA_MODEL='llama3' +OLLAMA_URL='https://localhost:11434' \ No newline at end of file diff --git a/main.py b/main.py index c6d41d4..d0e2a0d 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ asyncio.set_event_loop(loop) graph = agent.Agent() while True: - text = graph.spk.listen2() + text = graph.spk.listen3() if text and "hey" in text.lower() and "max " in text.lower() or text and "hey" in text.lower() and "mac " in text.lower(): if "exit" in text.lower(): break diff --git a/modules/speak.py b/modules/speak.py index f91d768..19b0635 100644 --- a/modules/speak.py +++ b/modules/speak.py @@ -85,6 +85,51 @@ class Speak: stream.stop_stream() stream.close() p.terminate() + + def dynamic_threshold(self, rms_values, factor=1.5): + """Adjust noise threshold dynamically based on the median RMS.""" + median_rms = np.median(rms_values) + return median_rms * factor + + def listen3(self, time_listen=15): + noise_threshold = 500 # Initial static threshold + rms_values = [] # To track RMS values over time + p = pyaudio.PyAudio() + stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000) + stream.start_stream() + print("Listening...") + + count = 0 + try: + while count < time_listen: + data = stream.read(8000, exception_on_overflow=False) + filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes() + + # Calculate RMS to detect ambient noise levels + rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16)))) + rms_values.append(rms_value) + + # Dynamically adjust the noise threshold based on previous RMS values + noise_threshold = self.dynamic_threshold(rms_values) + + if rms_value < noise_threshold: + if self.recognizer.AcceptWaveform(filtered_data): + result = json.loads(self.recognizer.Result()) + if result["text"]: + print(f"Recognized: {result['text']}") + return result['text'] + else: + print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold:.2f}") + + count += 1 + + except KeyboardInterrupt: + print("Stopping...") + finally: + stream.stop_stream() + stream.close() + p.terminate() + def stream_output(self, text): import urllib.parse