From 04e15b0bb6894177b4069f0408f698eb13ffece7 Mon Sep 17 00:00:00 2001
From: maglore9900 <maglore46@gmail.com>
Date: Wed, 4 Sep 2024 19:33:55 -0400
Subject: [PATCH] dynamic noise detection/cancelling

---
 example_env.txt  |  3 +++
 main.py          |  2 +-
 modules/speak.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 example_env.txt

diff --git a/example_env.txt b/example_env.txt
new file mode 100644
index 0000000..581c71f
--- /dev/null
+++ b/example_env.txt
@@ -0,0 +1,3 @@
+OPENAI_API_KEY='OPENAI API KEY HERE'
+OLLAMA_MODEL='llama3'
+OLLAMA_URL='https://localhost:11434'
\ No newline at end of file
diff --git a/main.py b/main.py
index c6d41d4..d0e2a0d 100644
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ asyncio.set_event_loop(loop)
 graph = agent.Agent()
 
 while True:
-    text = graph.spk.listen2()
+    text = graph.spk.listen3()
     if text and "hey" in text.lower() and "max " in text.lower() or text and "hey" in text.lower() and "mac " in text.lower():
         if "exit" in text.lower():
             break
diff --git a/modules/speak.py b/modules/speak.py
index f91d768..19b0635 100644
--- a/modules/speak.py
+++ b/modules/speak.py
@@ -85,6 +85,51 @@ class Speak:
             stream.stop_stream()
             stream.close()
             p.terminate()
+            
+    def dynamic_threshold(self, rms_values, factor=1.5):
+        """Adjust noise threshold dynamically based on the median RMS."""
+        median_rms = np.median(rms_values)
+        return median_rms * factor
+    
+    def listen3(self, time_listen=15):
+        noise_threshold = 500  # Initial static threshold
+        rms_values = []  # To track RMS values over time
+        p = pyaudio.PyAudio()
+        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
+        stream.start_stream()
+        print("Listening...")
+
+        count = 0
+        try:
+            while count < time_listen:
+                data = stream.read(8000, exception_on_overflow=False)
+                filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes()
+
+                # Calculate RMS to detect ambient noise levels
+                rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16))))
+                rms_values.append(rms_value)
+
+                # Dynamically adjust the noise threshold based on previous RMS values
+                noise_threshold = self.dynamic_threshold(rms_values)
+
+                if rms_value < noise_threshold:
+                    if self.recognizer.AcceptWaveform(filtered_data):
+                        result = json.loads(self.recognizer.Result())
+                        if result["text"]:
+                            print(f"Recognized: {result['text']}")
+                            return result['text']
+                else:
+                    print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold:.2f}")
+
+                count += 1
+
+        except KeyboardInterrupt:
+            print("Stopping...")
+        finally:
+            stream.stop_stream()
+            stream.close()
+            p.terminate()
+    
  
     def stream_output(self, text):
         import urllib.parse