mirror of
https://github.com/tcsenpai/quick_audio_cloner.git
synced 2025-06-06 19:25:26 +00:00
first commit
This commit is contained in:
commit
66ca90a9de
3
.env.example
Normal file
3
.env.example
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
SPEAKER_WAV=./data/speaker.wav
|
||||||
|
LANGUAGE=en
|
||||||
|
SENTENCE="Join the dark side, we have cookies"
|
17
.gitignore
vendored
Normal file
17
.gitignore
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[oc]
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
|
||||||
|
# env file
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Wav files
|
||||||
|
*.wav
|
||||||
|
!speaker.wav
|
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
|||||||
|
3.10.16
|
75
README.md
Normal file
75
README.md
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# Quick Audio Cloner
|
||||||
|
|
||||||
|
A powerful and user-friendly voice cloning tool that allows you to clone voices from audio samples and generate speech in multiple languages using state-of-the-art AI technology.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- 🎯 Voice Cloning: Clone any voice from WAV audio samples
|
||||||
|
- 🌍 Multi-language Support: Generate speech in various languages
|
||||||
|
- 🎥 YouTube Integration: Download voice samples directly from YouTube videos
|
||||||
|
- 🔊 Audio Processing: Automatic silence removal and audio cleaning
|
||||||
|
- 🖥️ Cross-platform: Works on Windows, macOS, and Linux
|
||||||
|
- 🎛️ User-friendly CLI Interface: Easy-to-use menu system
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- Python 3.10.16 (or lower, **mandatory for TTS to be installed**)
|
||||||
|
- Internet connection for model download (first run only) and voice download (if needed)
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
**_NOTE: Skip this section if you are using `uv` (recommended)_**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, copy the .env.example file to .env:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
And adjust it accordingly. Anyway, you can override the configuration at runtime.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
**_NOTE: If you are using `uv`, dependencies will be resolved in a .venv file at runtime_**
|
||||||
|
|
||||||
|
**IMPORTANT: The included voice sample is noisy and short, so the result might be low quality. Use a better one for production. Sorry.**
|
||||||
|
|
||||||
|
### Using uv
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run src/main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Normal python
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python src/main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The application provides an interactive menu with the following options:
|
||||||
|
|
||||||
|
1. Start voice cloning with current settings
|
||||||
|
2. Select a target voice from available samples
|
||||||
|
3. Set a custom sentence to generate
|
||||||
|
4. Choose the target language
|
||||||
|
5. Download new voice samples from YouTube
|
||||||
|
6. Reset settings to default
|
||||||
|
7. Exit (duh)
|
||||||
|
|
||||||
|
## Voice Sample Guidelines
|
||||||
|
|
||||||
|
- Use clear, high-quality audio samples
|
||||||
|
- Samples should be in WAV format
|
||||||
|
- Ideal sample length: 10-30 seconds
|
||||||
|
- Avoid background noise or music
|
||||||
|
- Place voice samples in the `data/` directory
|
||||||
|
|
||||||
|
## Supported Languages
|
||||||
|
|
||||||
|
Use two-letter language codes (e.g., 'en' for English, 'fr' for French, 'es' for Spanish)
|
0
data/insert_here_your_voices
Normal file
0
data/insert_here_your_voices
Normal file
BIN
data/speaker.wav
Normal file
BIN
data/speaker.wav
Normal file
Binary file not shown.
0
output/output_files_will_be_created_here
Normal file
0
output/output_files_will_be_created_here
Normal file
12
pyproject.toml
Normal file
12
pyproject.toml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
[project]
|
||||||
|
name = "quick-audio-cloner"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Add your description here"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = "==3.10.16"
|
||||||
|
dependencies = [
|
||||||
|
"pydub>=0.25.1",
|
||||||
|
"python-dotenv>=1.0.1",
|
||||||
|
"tts>=0.22.0",
|
||||||
|
"yt-dlp>=2025.1.26",
|
||||||
|
]
|
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
pydub
|
||||||
|
python-dotenv
|
||||||
|
tts
|
||||||
|
yt-dlp
|
78
src/libs/audio_cleaner.py
Normal file
78
src/libs/audio_cleaner.py
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
from pydub import AudioSegment
|
||||||
|
from pydub.silence import detect_nonsilent
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def clean_audio(
|
||||||
|
wav_path: str, min_silence_len: int = 100, silence_thresh: int = -40
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Remove silence from the beginning and end of a WAV file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
wav_path (str): Path to the input WAV file
|
||||||
|
min_silence_len (int): Minimum length of silence in milliseconds
|
||||||
|
silence_thresh (int): Silence threshold in dB
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the cleaned audio file
|
||||||
|
"""
|
||||||
|
# Validate input file
|
||||||
|
if not os.path.exists(wav_path):
|
||||||
|
raise FileNotFoundError(f"Audio file not found: {wav_path}")
|
||||||
|
|
||||||
|
# Load audio file
|
||||||
|
audio = AudioSegment.from_wav(wav_path)
|
||||||
|
|
||||||
|
# Detect non-silent chunks
|
||||||
|
nonsilent_ranges = detect_nonsilent(
|
||||||
|
audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh
|
||||||
|
)
|
||||||
|
|
||||||
|
if not nonsilent_ranges:
|
||||||
|
return wav_path # Return original if no non-silent ranges found
|
||||||
|
|
||||||
|
# Get start and end times of non-silent audio
|
||||||
|
start_trim = nonsilent_ranges[0][0]
|
||||||
|
end_trim = nonsilent_ranges[-1][1]
|
||||||
|
|
||||||
|
# Trim the audio
|
||||||
|
cleaned_audio = audio[start_trim:end_trim]
|
||||||
|
|
||||||
|
# Generate output filename
|
||||||
|
output_path = wav_path.rsplit(".", 1)[0] + ".wav"
|
||||||
|
|
||||||
|
# Export cleaned audio
|
||||||
|
cleaned_audio.export(output_path, format="wav")
|
||||||
|
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Clean silence from WAV files")
|
||||||
|
parser.add_argument("wav_path", help="Path to the WAV file to clean")
|
||||||
|
parser.add_argument(
|
||||||
|
"--min-silence",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="Minimum length of silence in milliseconds (default: 100)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--silence-thresh",
|
||||||
|
type=int,
|
||||||
|
default=-40,
|
||||||
|
help="Silence threshold in dB (default: -40)",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_path = clean_audio(
|
||||||
|
args.wav_path,
|
||||||
|
min_silence_len=args.min_silence,
|
||||||
|
silence_thresh=args.silence_thresh,
|
||||||
|
)
|
||||||
|
print(f"Cleaned audio saved to: {output_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {str(e)}")
|
171
src/libs/youtube_wav.py
Normal file
171
src/libs/youtube_wav.py
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
import yt_dlp
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
from libs.audio_cleaner import clean_audio
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_filename(filename: str) -> str:
|
||||||
|
"""
|
||||||
|
Sanitize filename to lowercase, no spaces, no special characters.
|
||||||
|
"""
|
||||||
|
# Remove file extension first
|
||||||
|
base = os.path.splitext(filename)[0]
|
||||||
|
# Replace spaces and special chars with underscore, convert to lowercase
|
||||||
|
sanitized = re.sub(r"[^a-zA-Z0-9]", "_", base).lower()
|
||||||
|
# Remove consecutive underscores
|
||||||
|
sanitized = re.sub(r"_+", "_", sanitized)
|
||||||
|
# Remove leading/trailing underscores
|
||||||
|
sanitized = sanitized.strip("_")
|
||||||
|
return f"{sanitized}.wav"
|
||||||
|
|
||||||
|
|
||||||
|
def download_youtube_audio(
|
||||||
|
url: str, custom_name: str = None, output_path: str = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Download audio from YouTube video and convert to WAV format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): YouTube video URL (supports both youtube.com and youtu.be)
|
||||||
|
custom_name (str, optional): Custom name for the output file
|
||||||
|
output_path (str, optional): Path to save the WAV file. If None, uses ./data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Path to the downloaded WAV file
|
||||||
|
"""
|
||||||
|
print("Starting download process...")
|
||||||
|
|
||||||
|
# Set default output path to ./data
|
||||||
|
if output_path is None:
|
||||||
|
output_path = Path("data")
|
||||||
|
output_path = Path(output_path)
|
||||||
|
output_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"Output directory: {output_path}")
|
||||||
|
|
||||||
|
# Configure yt-dlp options with custom filename template
|
||||||
|
ydl_opts = {
|
||||||
|
"format": "bestaudio/best",
|
||||||
|
"postprocessors": [
|
||||||
|
{
|
||||||
|
"key": "FFmpegExtractAudio",
|
||||||
|
"preferredcodec": "wav",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
# Use temporary filename template
|
||||||
|
"outtmpl": str(output_path / "%(title)s.%(ext)s"),
|
||||||
|
"quiet": False, # Show some progress
|
||||||
|
"no_warnings": True,
|
||||||
|
"retries": 10,
|
||||||
|
"fragment_retries": 10,
|
||||||
|
"retry_sleep": lambda _: random.uniform(1, 5),
|
||||||
|
"source_address": "0.0.0.0",
|
||||||
|
"headers": {
|
||||||
|
"User-Agent": get_random_user_agent(),
|
||||||
|
},
|
||||||
|
"progress_hooks": [lambda d: print(f"Downloading: {d['status']}")],
|
||||||
|
}
|
||||||
|
|
||||||
|
max_attempts = 3
|
||||||
|
for attempt in range(max_attempts):
|
||||||
|
try:
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
print("Fetching video information...")
|
||||||
|
# Extract video info first
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
temp_filename = (
|
||||||
|
ydl.prepare_filename(info)
|
||||||
|
.replace(".webm", ".wav")
|
||||||
|
.replace(".m4a", ".wav")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create sanitized filename based on custom name or video title
|
||||||
|
if custom_name:
|
||||||
|
sanitized_filename = (
|
||||||
|
output_path / f"{sanitize_filename(custom_name)}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("No custom name provided, using video title...")
|
||||||
|
sanitized_filename = output_path / sanitize_filename(
|
||||||
|
os.path.basename(temp_filename)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Final filename will be: {sanitized_filename}")
|
||||||
|
|
||||||
|
# Download if sanitized file doesn't exist
|
||||||
|
if not sanitized_filename.exists():
|
||||||
|
if os.path.exists(temp_filename):
|
||||||
|
os.remove(temp_filename)
|
||||||
|
print("Starting download and conversion...")
|
||||||
|
ydl.download([url])
|
||||||
|
print("Download complete, renaming file...")
|
||||||
|
os.rename(temp_filename, sanitized_filename)
|
||||||
|
else:
|
||||||
|
print("File already exists, skipping download.")
|
||||||
|
|
||||||
|
print("Process completed successfully!")
|
||||||
|
return str(sanitized_filename)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Attempt {attempt + 1} failed: {str(e)}")
|
||||||
|
if attempt == max_attempts - 1:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to download after {max_attempts} attempts: {str(e)}"
|
||||||
|
)
|
||||||
|
print(f"Retrying in {(attempt + 1) ** 2} seconds...")
|
||||||
|
time.sleep((attempt + 1) ** 2)
|
||||||
|
ydl_opts["headers"]["User-Agent"] = get_random_user_agent()
|
||||||
|
|
||||||
|
|
||||||
|
def get_random_user_agent() -> str:
|
||||||
|
"""Return a random user agent string to avoid detection."""
|
||||||
|
user_agents = [
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59",
|
||||||
|
]
|
||||||
|
return random.choice(user_agents)
|
||||||
|
|
||||||
|
|
||||||
|
def download_from_cli() -> str:
|
||||||
|
"""
|
||||||
|
Handle command line interface for downloading YouTube audio.
|
||||||
|
Returns the path to the downloaded file.
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(description="Download YouTube audio as WAV")
|
||||||
|
parser.add_argument("--url", "-u", help="YouTube URL (youtube.com or youtu.be)")
|
||||||
|
parser.add_argument("--output", "-o", help="Output directory (optional)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Get URL from argument or prompt
|
||||||
|
url = args.url
|
||||||
|
if not url:
|
||||||
|
url = input("Enter YouTube URL: ").strip()
|
||||||
|
|
||||||
|
# Get custom name
|
||||||
|
custom_name = input(
|
||||||
|
"Enter a name for the voice (press Enter to use video title): "
|
||||||
|
).strip()
|
||||||
|
custom_name = custom_name if custom_name else None
|
||||||
|
|
||||||
|
# Get output path
|
||||||
|
output_path = args.output
|
||||||
|
|
||||||
|
try:
|
||||||
|
output_file = download_youtube_audio(url, custom_name, output_path)
|
||||||
|
print("[*] Cleaning audio from silence...")
|
||||||
|
clean_audio(output_file)
|
||||||
|
print(f"\nSuccessfully saved to: {output_file}")
|
||||||
|
return output_file
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\nError: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
download_from_cli()
|
257
src/main.py
Normal file
257
src/main.py
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import torch
|
||||||
|
from TTS.api import TTS
|
||||||
|
import os
|
||||||
|
import dotenv
|
||||||
|
from libs.youtube_wav import download_from_cli
|
||||||
|
import os
|
||||||
|
|
||||||
|
tts = None
|
||||||
|
SPEAKER_WAV = None
|
||||||
|
LANGUAGE = None
|
||||||
|
SENTENCE = None
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
global SPEAKER_WAV, LANGUAGE, SENTENCE
|
||||||
|
# Load environment variables from .env file
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
# Load configuration from environment variables
|
||||||
|
SPEAKER_WAV = os.getenv("SPEAKER_WAV") # Path to speaker voice sample
|
||||||
|
LANGUAGE = os.getenv("LANGUAGE", "en") # Target language for TTS
|
||||||
|
SENTENCE = os.getenv("SENTENCE", "Hello there mortal!")
|
||||||
|
|
||||||
|
def load_model():
|
||||||
|
global tts
|
||||||
|
# Determine if CUDA is available for GPU acceleration
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
# Initialize the TTS model
|
||||||
|
# Using XTTS v2 model which supports multiple languages
|
||||||
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
||||||
|
|
||||||
|
|
||||||
|
def tts_audio(output_path: str = "./output/out.wav"):
|
||||||
|
"""
|
||||||
|
Converts text to speech using the XTTS v2 model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text (str): The text to convert to speech
|
||||||
|
output_path (str): Path where the output WAV file will be saved
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Uses environment variables:
|
||||||
|
- SPEAKER_WAV: Path to a reference audio file for voice cloning
|
||||||
|
- LANGUAGE: Target language code (e.g., "en", "es", "fr")
|
||||||
|
"""
|
||||||
|
tts.tts_to_file(
|
||||||
|
text=SENTENCE,
|
||||||
|
speaker_wav=SPEAKER_WAV,
|
||||||
|
language=LANGUAGE,
|
||||||
|
file_path=output_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_settings():
|
||||||
|
"""Print current settings in a formatted box."""
|
||||||
|
# Get terminal width (default to 60 if can't determine)
|
||||||
|
try:
|
||||||
|
width = os.get_terminal_size().columns
|
||||||
|
width = min(80, width) # Cap at 80 chars
|
||||||
|
except:
|
||||||
|
width = 60
|
||||||
|
|
||||||
|
# Create box elements
|
||||||
|
h_line = "─" * (width - 2)
|
||||||
|
top = f"┌{h_line}┐"
|
||||||
|
bottom = f"└{h_line}┘"
|
||||||
|
|
||||||
|
# Format settings with consistent spacing
|
||||||
|
settings = [
|
||||||
|
("Speaker Voice", SPEAKER_WAV),
|
||||||
|
("Language", LANGUAGE),
|
||||||
|
("Target Sentence", SENTENCE),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Print formatted box
|
||||||
|
print("\n" + top)
|
||||||
|
print("│ Current Settings:".ljust(width - 1) + "│")
|
||||||
|
print("│" + "─" * (width - 2) + "│")
|
||||||
|
|
||||||
|
for label, value in settings:
|
||||||
|
# Truncate value if too long
|
||||||
|
max_value_length = width - len(label) - 7 # Account for spacing and box chars
|
||||||
|
if len(value) > max_value_length:
|
||||||
|
value = value[: max_value_length - 3] + "..."
|
||||||
|
|
||||||
|
line = f"│ {label}: {value}"
|
||||||
|
print(line.ljust(width - 1) + "│")
|
||||||
|
|
||||||
|
print(bottom + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def start_job():
|
||||||
|
"""Start the TTS job with current settings."""
|
||||||
|
print("\nStarting job...")
|
||||||
|
outfile = input(
|
||||||
|
"Insert an output filename or press enter to use the default (out.wav): "
|
||||||
|
)
|
||||||
|
outfile = outfile.strip()
|
||||||
|
if outfile == "" or not outfile:
|
||||||
|
outfile = "./output/out.wav"
|
||||||
|
else:
|
||||||
|
outfile = "./output/" + outfile
|
||||||
|
|
||||||
|
load_model()
|
||||||
|
tts_audio(outfile)
|
||||||
|
print(f"\nAudio saved to: {outfile}")
|
||||||
|
|
||||||
|
# Ask to play the file
|
||||||
|
play_response = (
|
||||||
|
input("\nWould you like to play the output file? [y/N] ").strip().lower()
|
||||||
|
)
|
||||||
|
if play_response in ["y", "yes"]:
|
||||||
|
try:
|
||||||
|
import platform
|
||||||
|
|
||||||
|
system = platform.system()
|
||||||
|
|
||||||
|
if system == "Windows":
|
||||||
|
import winsound
|
||||||
|
|
||||||
|
winsound.PlaySound(outfile, winsound.SND_FILENAME)
|
||||||
|
elif system == "Darwin": # macOS
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
subprocess.run(["afplay", outfile])
|
||||||
|
elif system == "Linux":
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
subprocess.run(["aplay", outfile])
|
||||||
|
else:
|
||||||
|
print(f"Unsupported operating system: {system}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error playing audio: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def set_target_voice():
|
||||||
|
"""Set the target voice for TTS."""
|
||||||
|
# Show the list of voices in data
|
||||||
|
print("\nAvailable voices in data/:")
|
||||||
|
data_path = Path("data")
|
||||||
|
data_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# Get all .wav files and strip extensions
|
||||||
|
voices = [f.stem for f in data_path.glob("*.wav")]
|
||||||
|
|
||||||
|
if not voices:
|
||||||
|
print("No voices found. Use option 4 to download a voice first.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Print numbered list
|
||||||
|
for i, voice in enumerate(voices, 1):
|
||||||
|
print(f"{i}. {voice}")
|
||||||
|
|
||||||
|
# Get user selection
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
choice = input("\nSelect a voice number (or 0 to cancel): ").strip()
|
||||||
|
if choice == "0":
|
||||||
|
return
|
||||||
|
|
||||||
|
choice_idx = int(choice) - 1
|
||||||
|
if 0 <= choice_idx < len(voices):
|
||||||
|
selected_voice = voices[choice_idx]
|
||||||
|
global SPEAKER_WAV
|
||||||
|
SPEAKER_WAV = str(data_path / f"{selected_voice}.wav")
|
||||||
|
print(f"\nSelected voice: {selected_voice}")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
print("Invalid selection. Please try again.")
|
||||||
|
except ValueError:
|
||||||
|
print("Please enter a valid number.")
|
||||||
|
|
||||||
|
|
||||||
|
def set_target_sentence():
|
||||||
|
"""Set the target sentence for TTS."""
|
||||||
|
print("\nSetting target sentence...")
|
||||||
|
global SENTENCE
|
||||||
|
new_sentence = input("What should your voice say?\n")
|
||||||
|
new_sentence = new_sentence.strip()
|
||||||
|
if new_sentence == "" or not new_sentence:
|
||||||
|
print("No sentence has been detected. Using the current settings.\n")
|
||||||
|
else:
|
||||||
|
SENTENCE = new_sentence
|
||||||
|
|
||||||
|
|
||||||
|
def set_language():
|
||||||
|
"""Set the target language for TTS"""
|
||||||
|
print("\nSetting target sentence...")
|
||||||
|
global LANGUAGE
|
||||||
|
new_language = input(
|
||||||
|
"What should be the language used (two letters e.g. en,it,fr) ?\n"
|
||||||
|
)
|
||||||
|
new_language = new_language.strip()
|
||||||
|
if new_language == "" or not new_language or (not len(new_language) == 2):
|
||||||
|
print("No language has been detected. Using the current settings.\n")
|
||||||
|
else:
|
||||||
|
LANGUAGE = new_language
|
||||||
|
|
||||||
|
|
||||||
|
def download_voice():
|
||||||
|
"""Download voice from YouTube."""
|
||||||
|
print("\nDownloading voice from YouTube...")
|
||||||
|
download_from_cli()
|
||||||
|
|
||||||
|
|
||||||
|
def menu():
|
||||||
|
"""Display and handle the main menu."""
|
||||||
|
menu_options = {
|
||||||
|
"Main Options": [
|
||||||
|
("1", "Start the job using current settings", start_job),
|
||||||
|
("2", "Set a target voice", set_target_voice),
|
||||||
|
("3", "Set a target sentence", set_target_sentence),
|
||||||
|
("4", "Set a language", set_language),
|
||||||
|
],
|
||||||
|
"Utilities": [
|
||||||
|
("5", "Download a voice from YouTube", download_voice),
|
||||||
|
("6", "Reset settings to .env", load_config),
|
||||||
|
("7", "Exit", None),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print("\n" + "=" * 60 + "\n")
|
||||||
|
print_settings()
|
||||||
|
|
||||||
|
# Print menu with categories
|
||||||
|
for category, options in menu_options.items():
|
||||||
|
print(f"\n{category}:")
|
||||||
|
print("─" * 40)
|
||||||
|
for key, label, _ in options:
|
||||||
|
print(f"{key}. {label}")
|
||||||
|
|
||||||
|
choice = input("\nEnter your choice (1-7): ").strip()
|
||||||
|
|
||||||
|
# Find and execute the selected function
|
||||||
|
for category in menu_options.values():
|
||||||
|
for key, _, func in category:
|
||||||
|
if choice == key:
|
||||||
|
if func: # Execute function if it exists
|
||||||
|
func()
|
||||||
|
elif key == "7": # Exit case
|
||||||
|
print("\nGoodbye!")
|
||||||
|
return
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
print("\nInvalid choice. Please try again.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Welcome to Easy Voice Cloner!")
|
||||||
|
load_config()
|
||||||
|
menu()
|
Loading…
x
Reference in New Issue
Block a user