dynamic noise detection/cancelling

2025-06-07 03:55:30 +00:00 · 2024-09-04 19:33:55 -04:00 · 2024-09-04 19:33:55 -04:00 · 04e15b0bb6
commit 04e15b0bb6
parent 264f2a811f
3 changed files with 49 additions and 1 deletions
--- a/example_env.txt
+++ b/example_env.txt
@ -0,0 +1,3 @@
 OPENAI_API_KEY='OPENAI API KEY HERE'
 OLLAMA_MODEL='llama3'
 OLLAMA_URL='https://localhost:11434'
--- a/main.py
+++ b/main.py
@ -6,7 +6,7 @@ asyncio.set_event_loop(loop)
 graph = agent.Agent()
 while True:
-    text = graph.spk.listen2()
+    text = graph.spk.listen3()
    if text and "hey" in text.lower() and "max " in text.lower() or text and "hey" in text.lower() and "mac " in text.lower():
        if "exit" in text.lower():
            break
--- a/modules/speak.py
+++ b/modules/speak.py
@ -86,6 +86,51 @@ class Speak:
            stream.close()
            p.terminate()
    def dynamic_threshold(self, rms_values, factor=1.5):
        """Adjust noise threshold dynamically based on the median RMS."""
        median_rms = np.median(rms_values)
        return median_rms * factor
    def listen3(self, time_listen=15):
        noise_threshold = 500  # Initial static threshold
        rms_values = []  # To track RMS values over time
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=8000)
        stream.start_stream()
        print("Listening...")
        count = 0
        try:
            while count < time_listen:
                data = stream.read(8000, exception_on_overflow=False)
                filtered_data = nr.reduce_noise(y=frombuffer(data, dtype=int16), sr=16000).astype(int16).tobytes()
                # Calculate RMS to detect ambient noise levels
                rms_value = np.sqrt(np.mean(np.square(np.frombuffer(filtered_data, dtype=int16))))
                rms_values.append(rms_value)
                # Dynamically adjust the noise threshold based on previous RMS values
                noise_threshold = self.dynamic_threshold(rms_values)
                if rms_value < noise_threshold:
                    if self.recognizer.AcceptWaveform(filtered_data):
                        result = json.loads(self.recognizer.Result())
                        if result["text"]:
                            print(f"Recognized: {result['text']}")
                            return result['text']
                else:
                    print(f"Ambient noise detected: RMS {rms_value} exceeds threshold {noise_threshold:.2f}")
                count += 1
        except KeyboardInterrupt:
            print("Stopping...")
        finally:
            stream.stop_stream()
            stream.close()
            p.terminate()
    def stream_output(self, text):
        import urllib.parse
        # Example parameters