removed vosk, doing cleanup

2025-06-04 10:30:24 +00:00 · 2024-10-02 11:07:43 -04:00 · 2024-10-02 11:07:43 -04:00 · 1bb256ce47
commit 1bb256ce47
parent 03fc980f4c
3 changed files with 2 additions and 123 deletions
--- a/modules/base_timer.py
+++ b/modules/base_timer.py
@ -1,39 +0,0 @@
-import time
-import argparse
-# import agent
-
-# spk = agent.Agent().spk
-
-# def timer(seconds):
-#     print(f"Timer started for {seconds} seconds.")
-#     time.sleep(seconds)
-#     print("Time's up!")
-#     spk.glitch_stream_output("Time's up!")
-
-# if __name__ == "__main__":
-#     parser = argparse.ArgumentParser(description="Simple Timer Script")
-#     parser.add_argument("seconds", type=int, help="Number of seconds to set the timer for")
-#     args = parser.parse_args()
-
-#     timer(args.seconds)
-
-
-# import time
-from plyer import notification
-
-def start_timer(seconds):
-    seconds = int(seconds)  # Convert to integer
-    print(f"Timer started for {seconds} seconds...")
-    time.sleep(seconds)  # Sleep for the desired time
-    notification.notify(
-        title="Timer Finished",
-        message="Your time is up!",
-        timeout=5  # Notification will disappear after 10 seconds
-    )
-    
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Simple Timer Script")
-    parser.add_argument("seconds", type=int, help="Number of seconds to set the timer for")
-    args = parser.parse_args()
-
-    start_timer(args.seconds)
--- a/modules/speak.py
+++ b/modules/speak.py
@ -23,12 +23,7 @@ class Speak:
        self.noise_threshold = 500
        
        # Initialize transcription models
-        if self.model_name == "vosk":
-            from vosk import Model, KaldiRecognizer
-            self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
-            self.model = Model(self.model_path)
-            self.recognizer = KaldiRecognizer(self.model, 16000)
-        elif self.model_name == "whisper":
+        if self.model_name == "whisper":
            from faster_whisper import WhisperModel
            self.whisper_model_path = "large-v2"
            self.whisper_model = WhisperModel(self.whisper_model_path, device="cuda")  # Mvidia GPU mode
@ -104,12 +99,6 @@ class Speak:
            else:
                # print(f"Audio energy ({energy}) is below the threshold ({energy_threshold}), skipping transcription.")
                return ""
-        elif self.model_name == "vosk":
-            # Convert audio data to bytes for Vosk
-            if self.recognizer.AcceptWaveform(audio_data):
-                result = self.recognizer.Result()
-                print(f"Vosk Transcription: {result}")
-                return result
        else:
            # Fallback to default recognizer (for example, speech_recognition module)
            recognizer = sr.Recognizer()
@ -223,6 +212,6 @@ class Speak:
            self.engine.runAndWait()
            
 # Example usage:
-# sp = Speak(model="vosk")  # or "vosk" or "google"
+# sp = Speak(model="whisper")  # or "whisper" or "google"
 # transcription = sp.transcoder(time_listen=10)
 # print("Final Transcription:", transcription)
--- a/modules/speak_test.py
+++ b/modules/speak_test.py
@ -1,71 +0,0 @@
-import os
-import pyaudio
-import numpy as np
-import noisereduce as nr
-from faster_whisper import WhisperModel
-from numpy import frombuffer, int16
-os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
-
-
-class Speak:
-    def __init__(self):
-        self.model_path = "large-v2"  # Use the appropriate faster-whisper model path
-        self.model = WhisperModel(self.model_path, device="cuda")
-        self.sample_rate = 16000
-        self.channels = 1
-        self.chunk = 1024  # Number of frames per buffer
-        self.noise_threshold = 500  # Threshold to detect ambient noise
-
-    def listen3(self, duration=5):
-        """ Listens to the microphone for a specific duration and transcribes the audio using faster-whisper, with noise suppression """
-        p = pyaudio.PyAudio()
-
-        # print(f"Listening for {duration} seconds...")
-
-        # Open a stream to capture audio input from the microphone
-        stream = p.open(format=pyaudio.paInt16, 
-                        channels=self.channels, 
-                        rate=self.sample_rate, 
-                        input=True, 
-                        frames_per_buffer=self.chunk)
-
-        frames = []
-
-        for _ in range(0, int(self.sample_rate / self.chunk * duration)):
-            data = stream.read(self.chunk)
-            audio_data = frombuffer(data, dtype=int16)
-
-            # Apply noise reduction only if there's valid audio data
-            if np.any(audio_data):  # Check if audio data contains non-zero values
-                reduced_noise_data = nr.reduce_noise(y=audio_data, sr=self.sample_rate)
-
-                # Calculate RMS value, ensuring no invalid data (NaN) is used
-                rms_value = np.sqrt(np.mean(np.square(reduced_noise_data)))
-
-                # Only add frames that are below the noise threshold (i.e., filter out ambient noise)
-                if not np.isnan(rms_value) and rms_value < self.noise_threshold:
-                    frames.append(reduced_noise_data.astype(int16).tobytes())
-            else:
-                print("Invalid or zero audio data encountered.")
-
-        # Stop and close the audio stream
-        stream.stop_stream()
-        stream.close()
-        p.terminate()
-
-        # Combine the audio frames into a single array for transcription
-        if frames:
-            audio_data = np.frombuffer(b"".join(frames), dtype=int16)
-
-            # Transcribe the audio using faster-whisper
-            segments, info = self.model.transcribe(audio_data)
-
-            # Output the transcription
-            for segment in segments:
-                print(f"Transcription: {segment.text}")
-        else:
-            print("No valid audio data for transcription due to ambient noise.")
-
-if __name__ == "__main__":
-    sp = Speak()
-    sp.listen3(duration=5)  # Listen for 5 seconds