removed vosk, doing cleanup

This commit is contained in:
maglore9900 2024-10-02 11:07:43 -04:00
parent 03fc980f4c
commit 1bb256ce47
3 changed files with 2 additions and 123 deletions

View File

@ -1,39 +0,0 @@
import time
import argparse
# import agent
# spk = agent.Agent().spk
# def timer(seconds):
# print(f"Timer started for {seconds} seconds.")
# time.sleep(seconds)
# print("Time's up!")
# spk.glitch_stream_output("Time's up!")
# if __name__ == "__main__":
# parser = argparse.ArgumentParser(description="Simple Timer Script")
# parser.add_argument("seconds", type=int, help="Number of seconds to set the timer for")
# args = parser.parse_args()
# timer(args.seconds)
# import time
from plyer import notification
def start_timer(seconds):
seconds = int(seconds) # Convert to integer
print(f"Timer started for {seconds} seconds...")
time.sleep(seconds) # Sleep for the desired time
notification.notify(
title="Timer Finished",
message="Your time is up!",
timeout=5 # Notification will disappear after 10 seconds
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Timer Script")
parser.add_argument("seconds", type=int, help="Number of seconds to set the timer for")
args = parser.parse_args()
start_timer(args.seconds)

View File

@ -23,12 +23,7 @@ class Speak:
self.noise_threshold = 500
# Initialize transcription models
if self.model_name == "vosk":
from vosk import Model, KaldiRecognizer
self.model_path = os.path.join(os.path.dirname(__file__), "../models/vosk-model-en-us-0.42-gigaspeech")
self.model = Model(self.model_path)
self.recognizer = KaldiRecognizer(self.model, 16000)
elif self.model_name == "whisper":
if self.model_name == "whisper":
from faster_whisper import WhisperModel
self.whisper_model_path = "large-v2"
self.whisper_model = WhisperModel(self.whisper_model_path, device="cuda") # Mvidia GPU mode
@ -104,12 +99,6 @@ class Speak:
else:
# print(f"Audio energy ({energy}) is below the threshold ({energy_threshold}), skipping transcription.")
return ""
elif self.model_name == "vosk":
# Convert audio data to bytes for Vosk
if self.recognizer.AcceptWaveform(audio_data):
result = self.recognizer.Result()
print(f"Vosk Transcription: {result}")
return result
else:
# Fallback to default recognizer (for example, speech_recognition module)
recognizer = sr.Recognizer()
@ -223,6 +212,6 @@ class Speak:
self.engine.runAndWait()
# Example usage:
# sp = Speak(model="vosk") # or "vosk" or "google"
# sp = Speak(model="whisper") # or "whisper" or "google"
# transcription = sp.transcoder(time_listen=10)
# print("Final Transcription:", transcription)

View File

@ -1,71 +0,0 @@
import os
import pyaudio
import numpy as np
import noisereduce as nr
from faster_whisper import WhisperModel
from numpy import frombuffer, int16
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
class Speak:
def __init__(self):
self.model_path = "large-v2" # Use the appropriate faster-whisper model path
self.model = WhisperModel(self.model_path, device="cuda")
self.sample_rate = 16000
self.channels = 1
self.chunk = 1024 # Number of frames per buffer
self.noise_threshold = 500 # Threshold to detect ambient noise
def listen3(self, duration=5):
""" Listens to the microphone for a specific duration and transcribes the audio using faster-whisper, with noise suppression """
p = pyaudio.PyAudio()
# print(f"Listening for {duration} seconds...")
# Open a stream to capture audio input from the microphone
stream = p.open(format=pyaudio.paInt16,
channels=self.channels,
rate=self.sample_rate,
input=True,
frames_per_buffer=self.chunk)
frames = []
for _ in range(0, int(self.sample_rate / self.chunk * duration)):
data = stream.read(self.chunk)
audio_data = frombuffer(data, dtype=int16)
# Apply noise reduction only if there's valid audio data
if np.any(audio_data): # Check if audio data contains non-zero values
reduced_noise_data = nr.reduce_noise(y=audio_data, sr=self.sample_rate)
# Calculate RMS value, ensuring no invalid data (NaN) is used
rms_value = np.sqrt(np.mean(np.square(reduced_noise_data)))
# Only add frames that are below the noise threshold (i.e., filter out ambient noise)
if not np.isnan(rms_value) and rms_value < self.noise_threshold:
frames.append(reduced_noise_data.astype(int16).tobytes())
else:
print("Invalid or zero audio data encountered.")
# Stop and close the audio stream
stream.stop_stream()
stream.close()
p.terminate()
# Combine the audio frames into a single array for transcription
if frames:
audio_data = np.frombuffer(b"".join(frames), dtype=int16)
# Transcribe the audio using faster-whisper
segments, info = self.model.transcribe(audio_data)
# Output the transcription
for segment in segments:
print(f"Transcription: {segment.text}")
else:
print("No valid audio data for transcription due to ambient noise.")
if __name__ == "__main__":
sp = Speak()
sp.listen3(duration=5) # Listen for 5 seconds