diff --git a/modules/speak.py b/modules/speak.py
index 6fd761c..d900485 100644
--- a/modules/speak.py
+++ b/modules/speak.py
@@ -8,6 +8,8 @@ import random
 import urllib.parse
 import requests
 from pydub import AudioSegment
+import io
+import wave
 
 os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 
@@ -75,16 +77,8 @@ class Speak:
     def transcribe(self, audio_data):
         """Transcribe the audio data using the selected model."""
         if self.model_name == "whisper":
-            # # Whisper expects float32 data
-            # # Convert int16 PCM back to float32
-            # audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
-            # # Transcribe using Whisper model
-            # segments, _ = self.whisper_model.transcribe(audio_np, beam_size=5)
-            # transcription = " ".join([segment.text for segment in segments])
-            # print(f"Whisper Transcription: {transcription}")
-            # return transcription
             # Whisper expects float32 data
-            energy_threshold=0.001
+            energy_threshold = 0.001
             audio_np = np.frombuffer(audio_data, np.int16).astype(np.float32) / 32768.0
 
             # Calculate energy of the audio to determine if it should be transcribed
@@ -92,18 +86,30 @@ class Speak:
 
             # Only transcribe if energy exceeds the threshold
             if energy > energy_threshold:
-                # print(f"Audio energy ({energy}) exceeds threshold ({energy_threshold}), proceeding with transcription.")
                 segments, _ = self.whisper_model.transcribe(audio_np, beam_size=5)
                 transcription = " ".join([segment.text for segment in segments])
                 print(f"Whisper Transcription: {transcription}")
                 return transcription
             else:
-                # print(f"Audio energy ({energy}) is below the threshold ({energy_threshold}), skipping transcription.")
                 return ""
         else:
-            # Fallback to default recognizer (for example, speech_recognition module)
+            # Convert raw audio_data to PCM WAV format using an in-memory buffer
             recognizer = sr.Recognizer()
-            with sr.AudioFile(audio_data) as source:
+
+            # Convert audio_data to a WAV file in memory
+            audio_buffer = io.BytesIO()
+
+            with wave.open(audio_buffer, 'wb') as wav_file:
+                wav_file.setnchannels(1)  # Assuming mono audio
+                wav_file.setsampwidth(2)  # Assuming 16-bit audio
+                wav_file.setframerate(16000)  # Assuming 16kHz sample rate
+                wav_file.writeframes(audio_data)  # Write raw PCM data
+
+            # Reset the buffer's position to the start
+            audio_buffer.seek(0)
+
+            # Use SpeechRecognition's AudioFile to handle the in-memory WAV file
+            with sr.AudioFile(audio_buffer) as source:
                 audio = recognizer.record(source)
                 try:
                     transcription = recognizer.recognize_google(audio)
@@ -111,8 +117,10 @@ class Speak:
                     return transcription
                 except sr.UnknownValueError:
                     print("Google could not understand audio")
+                    return ""
                 except sr.RequestError as e:
                     print(f"Could not request results; {e}")
+                    return ""
 
     def listen(self, time_listen=8):
         """Main transcoder function that handles listening, noise cancellation, and transcription."""