dynamic noise detection/cancelling

2025-06-06 19:45:31 +00:00 · 2024-09-04 19:33:55 -04:00 · 2024-09-04 19:33:55 -04:00 · 04e15b0bb6
commit 04e15b0bb6
parent 264f2a811f
3 changed files with 49 additions and 1 deletions
--- a/example_env.txt
+++ b/example_env.txt
@ -0,0 +1,3 @@
+OPENAI_API_KEY='OPENAI API KEY HERE'
+OLLAMA_MODEL='llama3'
+OLLAMA_URL='https://localhost:11434'
--- a/main.py
+++ b/main.py
@ -6,7 +6,7 @@ asyncio.set_event_loop(loop)
 graph = agent.Agent()

 while True:
-    text = graph.spk.listen2()
+    text = graph.spk.listen3()
    if text and "hey" in text.lower() and "max " in text.lower() or text and "hey" in text.lower() and "mac " in text.lower():
        if "exit" in text.lower():
            break
--- a/modules/speak.py
+++ b/modules/speak.py
@ -86,6 +86,51 @@ class Speak:
            stream.close()
            p.terminate()
            
+    def dynamic_threshold(self, rms_values, factor=1.5):
+        """Adjust noise threshold dynamically based on the median RMS."""
+        median_rms = np.median(rms_values)
+        return median_rms * factor
+    
+    def listen3(self, time_listen=15):
+        noise_threshold = 500  # Initial static threshold
+        rms_values = []  # To track RMS values over time
+        p = pyaudio.PyAudio()
+        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
+        stream.start_stream()
+        print("Listening...")
+
+        count = 0
+        try:
+            while count < time_listen:
+                data = stream.read(8000, exception_on_overflow=False)
+                filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes()
+
+                # Calculate RMS to detect ambient noise levels
+                rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16))))
+                rms_values.append(rms_value)
+
+                # Dynamically adjust the noise threshold based on previous RMS values
+                noise_threshold = self.dynamic_threshold(rms_values)
+
+                if rms_value < noise_threshold:
+                    if self.recognizer.AcceptWaveform(filtered_data):
+                        result = json.loads(self.recognizer.Result())
+                        if result["text"]:
+                            print(f"Recognized: {result['text']}")
+                            return result['text']
+                else:
+                    print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold:.2f}")
+
+                count += 1
+
+        except KeyboardInterrupt:
+            print("Stopping...")
+        finally:
+            stream.stop_stream()
+            stream.close()
+            p.terminate()
+    
+ 
    def stream_output(self, text):
        import urllib.parse
        # Example parameters