diff --git a/.gitignore b/.gitignore
index ae46c25..aeddbec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 transcript_cache/*.json
 __pycache__
-.env
\ No newline at end of file
+.env
+downloads/output.m4a
diff --git a/README.md b/README.md
index 7870ee7..c5e2d38 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,19 @@
 # YouTube Summarizer by TCSenpai
 
-YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models.
+YouTube Summarizer is a Streamlit-based web application that allows users to generate summaries of YouTube videos using AI-powered language models and optionally Whisper for transcription.
 
 ![Screenshot](screenshot.png)
 
 ## Features
 
+- Supports multiple YouTube frontends (e.g. YouTube, Invidious, etc.)
 - Fetch and cache YouTube video transcripts
 - Summarize video content using Ollama AI models
 - Display video information (title and channel)
 - Customizable Ollama URL and model selection
+- Fallback to Whisper for transcription if no transcript is found
+- Customizable Whisper URL and model selection
+- Optional force Whisper transcription
 
 ## Installation
 
@@ -28,11 +32,17 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
 
 3. Set up environment variables:
    Create a `.env` file in the root directory and add the following:
+
    ```
    YOUTUBE_API_KEY=your_youtube_api_key
    OLLAMA_MODEL=default_model_name
+   WHISPER_URL=http://localhost:8000/
+   WHISPER_MODEL=Systran/faster-whisper-large-v3
    ```
 
+   - Note: you can copy the `env.example` file to `.env` and modify the values.
+   - Important: the `WHISPER_URL` should point to the whisper server you want to use. You can leave it as it is if you are not planning on using Whisper.
+
 ## Usage
 
 1. Run the Streamlit app:
@@ -46,8 +56,9 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
 3. Enter a YouTube video URL in the input field.
 
 4. (Optional) Customize the Ollama URL and select a different AI model.
+5. (Optional) Customize the Whisper URL and select a different Whisper model.
 
-5. Click the "Summarize" button to generate a summary of the video.
+6. Click the "Summarize" button to generate a summary of the video.
 
 ## Dependencies
 
@@ -56,13 +67,18 @@ YouTube Summarizer is a Streamlit-based web application that allows users to gen
 - Ollama
 - YouTube Data API
 - Python-dotenv
+- pytubefix
+- Gradio
 
 ## Project Structure
 
 - `src/main.py`: Main Streamlit application
 - `src/ollama_client.py`: Ollama API client for model interaction
 - `src/video_info.py`: YouTube API integration for video information
+- `src/whisper_module.py`: Whisper API client for transcription
+- `src/yt_audiophile.py`: Audio downloader for YouTube videos
 - `transcript_cache/`: Directory for caching video transcripts
+- `downloads/`: Directory for downloaded audio files, might be empty
 
 ## Contributing
 
diff --git a/downloads/placeholder b/downloads/placeholder
new file mode 100644
index 0000000..e69de29
diff --git a/env.example b/env.example
index 0fbf9a0..d54cc1f 100644
--- a/env.example
+++ b/env.example
@@ -1,3 +1,5 @@
 OLLAMA_URL=http://localhost:11434
 OLLAMA_MODEL=llama3.1:8b
-YOUTUBE_API_KEY=your_youtube_api_key
\ No newline at end of file
+YOUTUBE_API_KEY=your_youtube_api_key
+WHISPER_URL=http://localhost:8000/
+WHISPER_MODEL=Systran/faster-whisper-large-v3
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 4a288e1..2dc9ae5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,9 @@ streamlit==1.31.1
 python-dotenv==1.0.1
 youtube-transcript-api==0.6.2
 requests==2.31.0
-google-api-python-client==2.101.0
\ No newline at end of file
+google-api-python-client==2.101.0
+yt-dlp
+pydub
+gradio-client
+pytube
+pytubefix
\ No newline at end of file
diff --git a/screenshot.png b/screenshot.png
index a17d6f1..b3dc620 100644
Binary files a/screenshot.png and b/screenshot.png differ
diff --git a/src/main.py b/src/main.py
index 9cf531b..3d8d58b 100644
--- a/src/main.py
+++ b/src/main.py
@@ -5,6 +5,8 @@ from dotenv import load_dotenv
 from youtube_transcript_api import YouTubeTranscriptApi
 from ollama_client import OllamaClient
 from video_info import get_video_info
+from yt_audiophile import download_audio
+from whisper_module import transcribe
 
 # Load environment variables
 load_dotenv()
@@ -91,7 +93,9 @@ def get_ollama_models(ollama_url):
     return models
 
 
-def summarize_video(video_url, model, ollama_url):
+def summarize_video(
+    video_url, model, ollama_url, fallback_to_whisper=True, force_whisper=False
+):
     video_id = video_url.split("v=")[-1]
     st.write(f"Video ID: {video_id}")
 
@@ -99,8 +103,29 @@ def summarize_video(video_url, model, ollama_url):
         transcript = get_transcript(video_id)
     st.success("Summarizer fetched successfully!")
 
+    # Forcing whisper if specified
+    if force_whisper:
+        st.warning("Forcing whisper...")
+        fallback_to_whisper = True
+        transcript = None
+
     if not transcript:
-        return "Unable to fetch transcript."
+        if not fallback_to_whisper:
+            return "Unable to fetch transcript (and fallback to whisper is disabled)"
+        if not force_whisper:
+            st.warning("Unable to fetch transcript. Trying to download audio...")
+        try:
+            download_audio(video_url)
+            st.success("Audio downloaded successfully!")
+            st.warning("Starting transcription...it might take a while...")
+            transcript = transcribe("downloads/output.m4a")
+            st.success("Transcription completed successfully!")
+            os.remove("downloads/output.m4a")
+        except Exception as e:
+            st.error(f"Error downloading audio or transcribing: {e}")
+            if os.path.exists("downloads/output.m4a"):
+                os.remove("downloads/output.m4a")
+            return "Unable to fetch transcript."
 
     ollama_client = OllamaClient(ollama_url, model)
     st.success(f"Ollama client created with model: {model}")
@@ -140,6 +165,20 @@ def main():
     if not default_model in available_models:
         available_models.append(default_model)
 
+    # Sets whisper options
+    default_whisper_url = os.getenv("WHISPER_URL")
+    whisper_url = st.text_input(
+        "Whisper URL (optional)",
+        value=default_whisper_url,
+        placeholder="Enter custom Whisper URL",
+    )
+    if not whisper_url:
+        whisper_url = default_whisper_url
+    whisper_model = os.getenv("WHISPER_MODEL")
+    if not whisper_model:
+        whisper_model = "Systran/faster-whisper-large-v3"
+    st.caption(f"Whisper model: {whisper_model}")
+
     # Create model selection dropdown
     selected_model = st.selectbox(
         "Select Ollama Model",
@@ -153,6 +192,13 @@ def main():
 
     video_url = st.text_input("Enter the YouTube video URL:")
 
+    # Add checkboxes for whisper options
+    col1, col2 = st.columns(2)
+    with col1:
+        force_whisper = st.checkbox("Force Whisper", value=False)
+    with col2:
+        fallback_to_whisper = st.checkbox("Fallback to Whisper", value=True)
+
     # Support any video that has a valid YouTube ID
     if not "https://www.youtube.com/watch?v=" or "https://youtu.be/" in video_url:
         if "watch?v=" in video_url:
@@ -167,7 +213,13 @@ def main():
 
     if st.button("Summarize"):
         if video_url:
-            summary = summarize_video(video_url, selected_model, ollama_url)
+            summary = summarize_video(
+                video_url,
+                selected_model,
+                ollama_url,
+                fallback_to_whisper=fallback_to_whisper,
+                force_whisper=force_whisper,
+            )
             st.subheader("Summary:")
             st.write(summary)
         else:
diff --git a/src/whisper_module.py b/src/whisper_module.py
new file mode 100644
index 0000000..b911f40
--- /dev/null
+++ b/src/whisper_module.py
@@ -0,0 +1,18 @@
+from gradio_client import Client, handle_file
+from yt_audiophile import download_audio
+
+
+def transcribe(file_path):
+    client = Client("http://192.168.178.121:8300/")
+    result = client.predict(
+        file_path=handle_file(file_path),
+        model="Systran/faster-whisper-large-v3",
+        task="transcribe",
+        temperature=0,
+        stream=False,
+        api_name="/predict",
+    )
+    print(result)
+    return result
+
+
diff --git a/src/yt_audiophile.py b/src/yt_audiophile.py
new file mode 100644
index 0000000..3f5fa11
--- /dev/null
+++ b/src/yt_audiophile.py
@@ -0,0 +1,36 @@
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
+
+"""e.g.
+https://www.youtube.com/watch?v=vwTDiLH6mqg
+"""
+
+def download_audio(url):
+    yt = YouTube(url, on_progress_callback=on_progress)
+    audio, video = itags(yt, "1080p")  # specify the resolution
+    yt.streams.get_by_itag(audio).download("downloads","output.m4a")  # downloads audio
+
+
+def itags(yt: YouTube, resolution="1080p"):
+    max_audio = 0
+    audio_value = 0
+    for audio_stream in yt.streams.filter(only_audio=True):
+        abr = int(audio_stream.abr.replace("kbps", ""))
+        if abr > max_audio:
+            max_audio = abr
+            audio_value = audio_stream.itag
+    streams = yt.streams
+    try:
+        video_tag = streams.filter(res=resolution, fps=60)[0].itag
+        print("60 FPS")
+    except IndexError:
+        video_tag = streams.filter(res=resolution, fps=30)
+        if video_tag:
+            video_tag = video_tag[0].itag
+            print("30 FPS")
+        else:
+            video_tag = streams.filter(res=resolution, fps=24)[0].itag
+            print("24 FPS")
+    return audio_value, video_tag
+
+