add silence removal using pydub and multithreading

+ fix favorites endpoint returning items less than limit
+ add endpoint to remove get silence padding
+ add 'the ulitimate' and 'compilation' to compilation album filters
+ misc
This commit is contained in:
mungai-njoroge 2024-01-05 01:08:07 +03:00
parent 6d6c86cb93
commit 258897b649
28 changed files with 4949 additions and 24 deletions

View File

@ -153,49 +153,45 @@ def get_all_favorites():
track_limit = int(track_limit)
album_limit = int(album_limit)
artist_limit = int(artist_limit)
# largest is x2 to accound for broken hashes if any
largest = max(track_limit, album_limit, artist_limit)
favs = favdb.get_all()
favs.reverse()
count = {
"tracks": 0,
"albums": 0,
"artists": 0,
}
tracks = []
albums = []
artists = []
for fav in favs:
if fav[2] == FavType.track:
count["tracks"] += 1
track_master_hash = set(t.trackhash for t in TrackStore.tracks)
album_master_hash = set(a.albumhash for a in AlbumStore.albums)
artist_master_hash = set(a.artisthash for a in ArtistStore.artists)
if fav[2] == FavType.album:
count["albums"] += 1
for fav in favs:
hash = fav[1]
if fav[2] == FavType.track:
tracks.append(hash) if hash in track_master_hash else None
if fav[2] == FavType.artist:
count["artists"] += 1
if not len(tracks) >= largest:
if fav[2] == FavType.track:
tracks.append(fav[1])
if not len(artists) >= largest:
if fav[2] == FavType.artist:
artists.append(fav[1])
artists.append(hash) if hash in artist_master_hash else None
if fav[2] == FavType.album:
albums.append(fav[1])
albums.append(hash) if hash in album_master_hash else None
count = {
"tracks": len(tracks),
"albums": len(albums),
"artists": len(artists),
}
src_tracks = sorted(TrackStore.tracks, key=lambda x: x.trackhash)
src_albums = sorted(AlbumStore.albums, key=lambda x: x.albumhash)
src_artists = sorted(ArtistStore.artists, key=lambda x: x.artisthash)
tracks = UseBisection(src_tracks, "trackhash", tracks)()
tracks = UseBisection(src_tracks, "trackhash", tracks, limit=track_limit)()
albums = UseBisection(src_albums, "albumhash", albums, limit=album_limit)()
artists = UseBisection(src_artists, "artisthash", artists)()
artists = UseBisection(src_artists, "artisthash", artists, limit=artist_limit)()
tracks = remove_none(tracks)
albums = remove_none(albums)

View File

@ -4,6 +4,7 @@ Contains all the track routes.
import os
from flask import Blueprint, send_file, request
from app.lib.trackslib import get_silence_paddings
from app.store.tracks import TrackStore
@ -53,3 +54,15 @@ def send_track_file(trackhash: str):
return msg, 404
return msg, 404
@api.route("/file/silence", methods=["POST"])
def get_audio_silence():
data = request.get_json()
ending_file = data.get("end", None) # ending file's filepath
starting_file = data.get("start", None) # starting file's filepath
if ending_file is None or starting_file is None:
return {"msg": "No filepath provided"}, 400
return get_silence_paddings(ending_file, starting_file)

17
app/lib/pydub/.github/issue_template.md vendored Normal file
View File

@ -0,0 +1,17 @@
### Steps to reproduce
### Expected behavior
Tell us what should happen
### Actual behavior
Tell us what happens instead
### Your System configuration
- Python version:
- Pydub version:
- ffmpeg or avlib?:
- ffmpeg/avlib version:
### Is there an audio file you can include to help us reproduce?
You can include the audio file in this issue - just put it in a zip file and drag/drop the zip file into the github issue.

19
app/lib/pydub/.travis.yml Normal file
View File

@ -0,0 +1,19 @@
os: linux
dist: bionic # focal
language: python
before_install:
- sudo apt-get update --fix-missing
install:
- sudo apt-get install -y ffmpeg libopus-dev python-scipy python3-scipy
python:
- "2.7"
- "3.6"
- "3.7"
- "3.8"
- "3.9"
- "pypy2"
- "pypy3"
script:
- python test/test.py
after_script:
- pip install pylama && python -m pylama -i W,E501 pydub/ || true

693
app/lib/pydub/API.markdown Normal file
View File

@ -0,0 +1,693 @@
# API Documentation
This document is a work in progress.
If you're looking for some functionality in particular, it's a good idea to take a look at the [source code](https://github.com/jiaaro/pydub). Core functionality is mostly in `pydub/audio_segment.py` a number of `AudioSegment` methods are in the `pydub/effects.py` module, and added to `AudioSegment` via the effect registration process (the `register_pydub_effect()` decorator function)
Currently Undocumented:
- Playback (`pydub.playback`)
- Signal Processing (compression, EQ, normalize, speed change - `pydub.effects`, `pydub.scipy_effects`)
- Signal generators (Sine, Square, Sawtooth, Whitenoise, etc - `pydub.generators`)
- Effect registration system (basically the `pydub.utils.register_pydub_effect` decorator)
## AudioSegment()
`AudioSegment` objects are immutable, and support a number of operators.
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("/path/to/sound.wav", format="wav")
sound2 = AudioSegment.from_file("/path/to/another_sound.wav", format="wav")
# sound1 6 dB louder, then 3.5 dB quieter
louder = sound1 + 6
quieter = sound1 - 3.5
# sound1, with sound2 appended
combined = sound1 + sound2
# sound1 repeated 3 times
repeated = sound1 * 3
# duration
duration_in_milliseconds = len(sound1)
# first 5 seconds of sound1
beginning = sound1[:5000]
# last 5 seconds of sound1
end = sound1[-5000:]
# split sound1 in 5-second slices
slices = sound1[::5000]
# Advanced usage, if you have raw audio data:
sound = AudioSegment(
# raw audio data (bytes)
data=b'…',
# 2 byte (16 bit) samples
sample_width=2,
# 44.1 kHz frame rate
frame_rate=44100,
# stereo
channels=2
)
```
Any operations that combine multiple `AudioSegment` objects in *any* way will first ensure that they have the same number of channels, frame rate, sample rate, bit depth, etc. When these things do not match, the lower quality sound is modified to match the quality of the higher quality sound so that quality is not lost: mono is converted to stereo, bit depth and frame rate/sample rate are increased as needed. If you do not want this behavior, you may explicitly reduce the number of channels, bits, etc using the appropriate `AudioSegment` methods.
### AudioSegment(…).from_file()
Open an audio file as an `AudioSegment` instance and return it. there are also a number of wrappers provided for convenience, but you should probably just use this directly.
```python
from pydub import AudioSegment
# wave and raw dont use ffmpeg
wav_audio = AudioSegment.from_file("/path/to/sound.wav", format="wav")
raw_audio = AudioSegment.from_file("/path/to/sound.raw", format="raw",
frame_rate=44100, channels=2, sample_width=2)
# all other formats use ffmpeg
mp3_audio = AudioSegment.from_file("/path/to/sound.mp3", format="mp3")
# use a file you've already opened (advanced …ish)
with open("/path/to/sound.wav", "rb") as wav_file:
audio_segment = AudioSegment.from_file(wav_file, format="wav")
# also supports the os.PathLike protocol for python >= 3.6
from pathlib import Path
wav_path = Path("path/to/sound.wav")
wav_audio = AudioSegment.from_file(wav_path)
```
The first argument is the path (as a string) of the file to read, **or** a file handle to read from.
**Supported keyword arguments**:
- `format` | example: `"aif"` | default: autodetected
Format of the output file. Supports `"wav"` and `"raw"` natively, requires ffmpeg for all other formats. `"raw"` files require 3 additional keyword arguments, `sample_width`, `frame_rate`, and `channels`, denoted below with: **`raw` only**. This extra info is required because raw audio files do not have headers to include this info in the file itself like wav files do.
- `sample_width` | example: `2`
**`raw` only** — Use `1` for 8-bit audio `2` for 16-bit (CD quality) and `4` for 32-bit. Its the number of bytes per sample.
- `channels` | example: `1`
**`raw` only** — `1` for mono, `2` for stereo.
- `frame_rate` | example: `2`
**`raw` only** — Also known as sample rate, common values are `44100` (44.1kHz - CD audio), and `48000` (48kHz - DVD audio)
- `start_second` | example: `2.0` | default: `None`
Offset (in seconds) to start loading the audio file. If `None`, the audio will start loading from the beginning.
- `duration` | example: `2.5` | default: `None`
Number of seconds to be loaded. If `None`, full audio will be loaded.
### AudioSegment(…).export()
Write the `AudioSegment` object to a file returns a file handle of the output file (you don't have to do anything with it, though).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("/path/to/sound.wav", format="wav")
# simple export
file_handle = sound.export("/path/to/output.mp3", format="mp3")
# more complex export
file_handle = sound.export("/path/to/output.mp3",
format="mp3",
bitrate="192k",
tags={"album": "The Bends", "artist": "Radiohead"},
cover="/path/to/albumcovers/radioheadthebends.jpg")
# split sound in 5-second slices and export
for i, chunk in enumerate(sound[::5000]):
with open("sound-%s.mp3" % i, "wb") as f:
chunk.export(f, format="mp3")
```
The first argument is the location (as a string) to write the output, **or** a file handle to write to. If you do not pass an output file or path, a temporary file is generated.
**Supported keyword arguments**:
- `format` | example: `"aif"` | default: `"mp3"`
Format of the output file. Supports `"wav"` and `"raw"` natively, requires ffmpeg for all other formats.
- `codec` | example: `"libvorbis"`
For formats that may contain content encoded with different codecs, you can specify the codec you'd like the encoder to use. For example, the "ogg" format is often used with the "libvorbis" codec. (requires ffmpeg)
- `bitrate` | example: `"128k"`
For compressed formats, you can pass the bitrate you'd like the encoder to use (requires ffmpeg). Each codec accepts different bitrate arguments so take a look at the [ffmpeg documentation](https://www.ffmpeg.org/ffmpeg-codecs.html#Audio-Encoders) for details (bitrate usually shown as `-b`, `-ba` or `-a:b`).
- `tags` | example: `{"album": "1989", "artist": "Taylor Swift"}`
Allows you to supply media info tags for the encoder (requires ffmpeg). Not all formats can receive tags (mp3 can).
- `parameters` | example: `["-ac", "2"]`
Pass additional [command line parameters](https://www.ffmpeg.org/ffmpeg.html) to the ffmpeg call. These are added to the end of the call (in the output file section).
- `id3v2_version` | example: `"3"` | default: `"4"`
Set the ID3v2 version used by ffmpeg to add tags to the output file. If you want Windows Exlorer to display tags, use `"3"` here ([source](http://superuser.com/a/453133)).
- `cover` | example: `"/path/to/imgfile.png"`
Allows you to supply a cover image (path to the image file). Currently, only MP3 files allow this keyword argument. Cover image must be a jpeg, png, bmp, or tiff file.
### AudioSegment.empty()
Creates a zero-duration `AudioSegment`.
```python
from pydub import AudioSegment
empty = AudioSegment.empty()
len(empty) == 0
```
This is useful for aggregation loops:
```python
from pydub import AudioSegment
sounds = [
AudioSegment.from_wav("sound1.wav"),
AudioSegment.from_wav("sound2.wav"),
AudioSegment.from_wav("sound3.wav"),
]
playlist = AudioSegment.empty()
for sound in sounds:
playlist += sound
```
### AudioSegment.silent()
Creates a silent audiosegment, which can be used as a placeholder, spacer, or as a canvas to overlay other sounds on top of.
```python
from pydub import AudioSegment
ten_second_silence = AudioSegment.silent(duration=10000)
```
**Supported keyword arguments**:
- `duration` | example: `3000` | default: `1000` (1 second)
Length of the silent `AudioSegment`, in milliseconds
- `frame_rate` | example `44100` | default: `11025` (11.025 kHz)
Frame rate (i.e., sample rate) of the silent `AudioSegment` in Hz
### AudioSegment.from_mono_audiosegments()
Creates a multi-channel audiosegment out of multiple mono audiosegments (two or more). Each mono audiosegment passed in should be exactly the same length, down to the frame count.
```python
from pydub import AudioSegment
left_channel = AudioSegment.from_wav("sound1.wav")
right_channel = AudioSegment.from_wav("sound1.wav")
stereo_sound = AudioSegment.from_mono_audiosegments(left_channel, right_channel)
```
### AudioSegment(…).dBFS
Returns the loudness of the `AudioSegment` in dBFS (db relative to the maximum possible loudness). A Square wave at maximum amplitude will be roughly 0 dBFS (maximum loudness), whereas a Sine Wave at maximum amplitude will be roughly -3 dBFS.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
loudness = sound.dBFS
```
### AudioSegment(…).channels
Number of channels in this audio segment (1 means mono, 2 means stereo)
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
channel_count = sound.channels
```
### AudioSegment(…).sample_width
Number of bytes in each sample (1 means 8 bit, 2 means 16 bit, etc). CD Audio is 16 bit, (sample width of 2 bytes).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
bytes_per_sample = sound.sample_width
```
### AudioSegment(…).frame_rate
CD Audio has a 44.1kHz sample rate, which means `frame_rate` will be `44100` (same as sample rate, see `frame_width`). Common values are `44100` (CD), `48000` (DVD), `22050`, `24000`, `12000` and `11025`.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
frames_per_second = sound.frame_rate
```
### AudioSegment(…).frame_width
Number of bytes for each "frame". A frame contains a sample for each channel (so for stereo you have 2 samples per frame, which are played simultaneously). `frame_width` is equal to `channels * sample_width`. For CD Audio it'll be `4` (2 channels times 2 bytes per sample).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
bytes_per_frame = sound.frame_width
```
### AudioSegment(…).rms
A measure of loudness. Used to compute dBFS, which is what you should use in most cases. Loudness is logarithmic (rms is not), which makes dB a much more natural scale.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
loudness = sound.rms
```
### AudioSegment(…).max
The highest amplitude of any sample in the `AudioSegment`. Useful for things like normalization (which is provided in `pydub.effects.normalize`).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
peak_amplitude = sound.max
```
### AudioSegment(…).max_dBFS
The highest amplitude of any sample in the `AudioSegment`, in dBFS (relative to the highest possible amplitude value). Useful for things like normalization (which is provided in `pydub.effects.normalize`).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
normalized_sound = sound.apply_gain(-sound.max_dBFS)
```
### AudioSegment(…).duration_seconds
Returns the duration of the `AudioSegment` in seconds (`len(sound)` returns milliseconds). This is provided for convenience; it calls `len()` internally.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
assert sound.duration_seconds == (len(sound) / 1000.0)
```
### AudioSegment(…).raw_data
The raw audio data of the AudioSegment. Useful for interacting with other audio libraries or weird APIs that want audio data in the form of a bytestring. Also comes in handy if youre implementing effects or other direct signal processing.
You probably dont need this, but if you do… youll know.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
raw_audio_data = sound.raw_data
```
### AudioSegment(…).frame_count()
Returns the number of frames in the `AudioSegment`. Optionally you may pass in a `ms` keywork argument to retrieve the number of frames in that number of milliseconds of audio in the `AudioSegment` (useful for slicing, etc).
```python
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
number_of_frames_in_sound = sound.frame_count()
number_of_frames_in_200ms_of_sound = sound.frame_count(ms=200)
```
**Supported keyword arguments**:
- `ms` | example: `3000` | default: `None` (entire duration of `AudioSegment`)
When specified, method returns number of frames in X milliseconds of the `AudioSegment`
### AudioSegment(…).append()
Returns a new `AudioSegment`, created by appending another `AudioSegment` to this one (i.e., adding it to the end), Optionally using a crossfade. `AudioSegment(…).append()` is used internally when adding `AudioSegment` objects together with the `+` operator.
By default a 100ms (0.1 second) crossfade is used to eliminate pops and crackles.
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
sound2 = AudioSegment.from_file("sound2.wav")
# default 100 ms crossfade
combined = sound1.append(sound2)
# 5000 ms crossfade
combined_with_5_sec_crossfade = sound1.append(sound2, crossfade=5000)
# no crossfade
no_crossfade1 = sound1.append(sound2, crossfade=0)
# no crossfade
no_crossfade2 = sound1 + sound2
```
**Supported keyword arguments**:
- `crossfade` | example: `3000` | default: `100` (entire duration of `AudioSegment`)
When specified, method returns number of frames in X milliseconds of the `AudioSegment`
### AudioSegment(…).overlay()
Overlays an `AudioSegment` onto this one. In the resulting `AudioSegment` they will play simultaneously. If the overlaid `AudioSegment` is longer than this one, the result will be truncated (so the end of the overlaid sound will be cut off). The result is always the same length as this `AudioSegment` even when using the `loop`, and `times` keyword arguments.
Since `AudioSegment` objects are immutable, you can get around this by overlaying the shorter sound on the longer one, or by creating a silent `AudioSegment` with the appropriate duration, and overlaying both sounds on to that one.
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
sound2 = AudioSegment.from_file("sound2.wav")
played_togther = sound1.overlay(sound2)
sound2_starts_after_delay = sound1.overlay(sound2, position=5000)
volume_of_sound1_reduced_during_overlay = sound1.overlay(sound2, gain_during_overlay=-8)
sound2_repeats_until_sound1_ends = sound1.overlay(sound2, loop=true)
sound2_plays_twice = sound1.overlay(sound2, times=2)
# assume sound1 is 30 sec long and sound2 is 5 sec long:
sound2_plays_a_lot = sound1.overlay(sound2, times=10000)
len(sound1) == len(sound2_plays_a_lot)
```
**Supported keyword arguments**:
- `position` | example: `3000` | default: `0` (beginning of this `AudioSegment`)
The overlaid `AudioSegment` will not begin until X milliseconds have passed
- `loop` | example: `True` | default: `False` (entire duration of `AudioSegment`)
The overlaid `AudioSegment` will repeat (starting at `position`) until the end of this `AudioSegment`
- `times` | example: `4` | default: `1` (entire duration of `AudioSegment`)
The overlaid `AudioSegment` will repeat X times (starting at `position`) but will still be truncated to the length of this `AudioSegment`
- `gain_during_overlay` | example: `-6.0` | default: `0` (no change in volume during overlay)
Change the original audio by this many dB while overlaying audio. This can be used to make the original audio quieter while the overlaid audio plays.
### AudioSegment(…).apply_gain(`gain`)
Change the amplitude (generally, loudness) of the `AudioSegment`. Gain is specified in dB. This method is used internally by the `+` operator.
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
# make sound1 louder by 3.5 dB
louder_via_method = sound1.apply_gain(+3.5)
louder_via_operator = sound1 + 3.5
# make sound1 quieter by 5.7 dB
quieter_via_method = sound1.apply_gain(-5.7)
quieter_via_operator = sound1 - 5.7
```
### AudioSegment(…).fade()
A more general (more flexible) fade method. You may specify `start` and `end`, or one of the two along with duration (e.g., `start` and `duration`).
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
fade_louder_for_3_seconds_in_middle = sound1.fade(to_gain=+6.0, start=7500, duration=3000)
fade_quieter_beteen_2_and_3_seconds = sound1.fade(to_gain=-3.5, start=2000, end=3000)
# easy way is to use the .fade_in() convenience method. note: -120dB is basically silent.
fade_in_the_hard_way = sound1.fade(from_gain=-120.0, start=0, duration=5000)
fade_out_the_hard_way = sound1.fade(to_gain=-120.0, end=0, duration=5000)
```
**Supported keyword arguments**:
- `to_gain` | example: `-3.0` | default: `0` (0dB, no change)
Resulting change at the end of the fade. `-6.0` means fade will be be from 0dB (no change) to -6dB, and everything after the fade will be -6dB.
- `from_gain` | example: `-3.0` | default: `0` (0dB, no change)
Change at the beginning of the fade. `-6.0` means fade (and all audio before it) will be be at -6dB will fade up to 0dB the rest of the audio after the fade will be at 0dB (i.e., unchanged).
- `start` | example: `7500` | NO DEFAULT
Position to begin fading (in milliseconds). `5500` means fade will begin after 5.5 seconds.
- `end` | example: `4` | NO DEFAULT
The overlaid `AudioSegment` will repeat X times (starting at `position`) but will still be truncated to the length of this `AudioSegment`
- `duration` | example: `4` | NO DEFAULT
You can use `start` or `end` with duration, instead of specifying both - provided as a convenience.
### AudioSegment(…).fade_out()
Fade out (to silent) the end of this `AudioSegment`. Uses `.fade()` internally.
**Supported keyword arguments**:
- `duration` | example: `5000` | NO DEFAULT
How long (in milliseconds) the fade should last. Passed directly to `.fade()` internally
### AudioSegment(…).fade_in()
Fade in (from silent) the beginning of this `AudioSegment`. Uses `.fade()` internally.
**Supported keyword arguments**:
- `duration` | example: `5000` | NO DEFAULT
How long (in milliseconds) the fade should last. Passed directly to `.fade()` internally
### AudioSegment(…).reverse()
Make a copy of this `AudioSegment` that plays backwards. Useful for Pink Floyd, screwing around, and some audio processing algorithms.
### AudioSegment(…).set_sample_width()
Creates an equivalent version of this `AudioSegment` with the specified sample width (in bytes). Increasing this value does not generally cause a reduction in quality. Reducing it *definitely* does cause a loss in quality. Higher Sample width means more dynamic range.
### AudioSegment(…).set_frame_rate()
Creates an equivalent version of this `AudioSegment` with the specified frame rate (in Hz). Increasing this value does not generally cause a reduction in quality. Reducing it *definitely does* cause a loss in quality. Higher frame rate means larger frequency response (higher frequencies can be represented).
### AudioSegment(…).set_channels()
Creates an equivalent version of this `AudioSegment` with the specified number of channels (1 is Mono, 2 is Stereo). Converting from mono to stereo does not cause any audible change. Converting from stereo to mono may result in loss of quality (but only if the left and right chanels differ).
### AudioSegment(…).split_to_mono()
Splits a stereo `AudioSegment` into two, one for each channel (Left/Right). Returns a list with the new `AudioSegment` objects with the left channel at index 0 and the right channel at index 1.
### AudioSegment(…).apply_gain_stereo()
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
# make left channel 6dB quieter and right channe 2dB louder
stereo_balance_adjusted = sound1.apply_gain_stereo(-6, +2)
```
Apply gain to the left and right channel of a stereo `AudioSegment`. If the `AudioSegment` is mono, it will be converted to stereo before applying the gain.
Both gain arguments are specified in dB.
### AudioSegment(…).pan()
```python
from pydub import AudioSegment
sound1 = AudioSegment.from_file("sound1.wav")
# pan the sound 15% to the right
panned_right = sound1.pan(+0.15)
# pan the sound 50% to the left
panned_left = sound1.pan(-0.50)
```
Takes one positional argument, *pan amount*, which should be between -1.0 (100% left) and +1.0 (100% right)
When pan_amount == 0.0 the left/right balance is not changed.
Panning does not alter the *perceived* loundness, but since loudness
is decreasing on one side, the other side needs to get louder to
compensate. When panned hard left, the left channel will be 3dB louder and
the right channel will be silent (and vice versa).
### AudioSegment(…).get_array_of_samples()
Returns the raw audio data as an array of (numeric) samples. Note: if the audio has multiple channels, the samples for each channel will be serialized  for example, stereo audio would look like `[sample_1_L, sample_1_R, sample_2_L, sample_2_R, …]`.
This method is mainly for use in implementing effects, and other processing.
```python
from pydub import AudioSegment
sound = AudioSegment.from_file(“sound1.wav”)
samples = sound.get_array_of_samples()
# then modify samples...
new_sound = sound._spawn(samples)
```
note that when using numpy or scipy you will need to convert back to an array before you spawn:
```python
import array
import numpy as np
from pydub import AudioSegment
sound = AudioSegment.from_file(“sound1.wav”)
samples = sound.get_array_of_samples()
# Example operation on audio data
shifted_samples = np.right_shift(samples, 1)
# now you have to convert back to an array.array
shifted_samples_array = array.array(sound.array_type, shifted_samples)
new_sound = sound._spawn(shifted_samples_array)
```
Here's how to convert to a numpy float32 array:
```python
import numpy as np
from pydub import AudioSegment
sound = AudioSegment.from_file("sound1.wav")
sound = sound.set_frame_rate(16000)
channel_sounds = sound.split_to_mono()
samples = [s.get_array_of_samples() for s in channel_sounds]
fp_arr = np.array(samples).T.astype(np.float32)
fp_arr /= np.iinfo(samples[0].typecode).max
```
And how to convert it back to an AudioSegment:
```python
import io
import scipy.io.wavfile
wav_io = io.BytesIO()
scipy.io.wavfile.write(wav_io, 16000, fp_arr)
wav_io.seek(0)
sound = pydub.AudioSegment.from_wav(wav_io)
```
### AudioSegment(…).get_dc_offset()
Returns a value between -1.0 and 1.0 representing the DC offset of a channel. This is calculated using `audioop.avg()` and normalizing the result by samples max value.
**Supported keyword arguments**:
- `channel` | example: `2` | default: `1`
Selects left (1) or right (2) channel to calculate DC offset. If segment is mono, this value is ignored.
### AudioSegment(…).remove_dc_offset()
Removes DC offset from channel(s). This is done by using `audioop.bias()`, so watch out for overflows.
**Supported keyword arguments**:
- `channel` | example: `2` | default: None
Selects left (1) or right (2) channel remove DC offset. If value if None, removes from all available channels. If segment is mono, this value is ignored.
- `offset` | example: `-0.1` | default: None
Offset to be removed from channel(s). Calculates offset if it's None. Offset values must be between -1.0 and 1.0.
## Effects
Collection of DSP effects that are implemented by `AudioSegment` objects.
### AudioSegment(…).invert_phase()
Make a copy of this `AudioSegment` and inverts the phase of the signal. Can generate anti-phase waves for noise suppression or cancellation.
## Silence
Various functions for finding/manipulating silence in AudioSegments. For creating silent AudioSegments, see AudioSegment.silent().
### silence.detect_silence()
Returns a list of all silent sections [start, end] in milliseconds of audio_segment. Inverse of detect_nonsilent(). Can be very slow since it has to iterate over the whole segment.
```python
from pydub import AudioSegment, silence
print(silence.detect_silence(AudioSegment.silent(2000)))
# [[0, 2000]]
```
**Supported keyword arguments**:
- `min_silence_len` | example: `500` | default: 1000
The minimum length for silent sections in milliseconds. If it is greater than the length of the audio segment an empty list will be returned.
- `silence_thresh` | example: `-20` | default: -16
The upper bound for how quiet is silent in dBFS.
- `seek_step` | example: `5` | default: 1
Size of the step for checking for silence in milliseconds. Smaller is more precise. Must be a positive whole number.
### silence.detect_nonsilent()
Returns a list of all silent sections [start, end] in milliseconds of audio_segment. Inverse of detect_silence() and has all the same arguments. Can be very slow since it has to iterate over the whole segment.
**Supported keyword arguments**:
- `min_silence_len` | example: `500` | default: 1000
The minimum length for silent sections in milliseconds. If it is greater than the length of the audio segment an empty list will be returned.
- `silence_thresh` | example: `-20` | default: -16
The upper bound for how quiet is silent in dBFS.
- `seek_step` | example: `5` | default: 1
Size of the step for checking for silence in milliseconds. Smaller is more precise. Must be a positive whole number.
### silence.split_on_silence()
Returns list of audio segments from splitting audio_segment on silent sections.
**Supported keyword arguments**:
- `min_silence_len` | example: `500` | default: 1000
The minimum length for silent sections in milliseconds. If it is greater than the length of the audio segment an empty list will be returned.
- `silence_thresh` | example: `-20` | default: -16
The upper bound for how quiet is silent in dBFS.
- `seek_step` | example: `5` | default: 1
Size of the step for checking for silence in milliseconds. Smaller is more precise. Must be a positive whole number.
- `keep_silence` ~ example: True | default: 100
How much silence to keep in ms or a bool. leave some silence at the beginning and end of the chunks. Keeps the sound from sounding like it is abruptly cut off.
When the length of the silence is less than the keep_silence duration it is split evenly between the preceding and following non-silent segments.
If True is specified, all the silence is kept, if False none is kept.
### silence.detect_leading_silence()
Returns the millisecond/index that the leading silence ends. If there is no end it will return the length of the audio_segment.
```python
from pydub import AudioSegment, silence
print(silence.detect_silence(AudioSegment.silent(2000)))
# 2000
```
**Supported keyword arguments**:
- `silence_thresh` | example: `-20` | default: -50
The upper bound for how quiet is silent in dBFS.
- `chunk_size` | example: `5` | default: 10
Size of the step for checking for silence in milliseconds. Smaller is more precise. Must be a positive whole number.

101
app/lib/pydub/AUTHORS Normal file
View File

@ -0,0 +1,101 @@
James Robert
github: jiaaro
twitter: @jiaaro
web: jiaaro.com
email: pydub@jiaaro.com
Marc Webbie
github: marcwebbie
Jean-philippe Serafin
github: jeanphix
Anurag Ramdasan
github: AnuragRamdasan
Choongmin Lee
github: clee704
Patrick Pittman
github: ptpittman
Hunter Lang
github: hunterlang
Alexey
github: nihisil
Jaymz Campbell
github: jaymzcd
Ross McFarland
github: ross
John McMellen
github: jmcmellen
Johan Lövgren
github: dashj
Joachim Krüger
github: jkrgr
Shichao An
github: shichao-an
Michael Bortnyck
github: mbortnyck
André Cloete
github: aj-cloete
David Acacio
github: dacacioa
Thiago Abdnur
github: bolaum
Aurélien Ooms
github: aureooms
Mike Mattozzi
github: mmattozzi
Marcio Mazza
github: marciomazza
Sungsu Lim
github: proflim
Evandro Myller
github: emyller
Sérgio Agostinho
github: SergioRAgostinho
Antonio Larrosa
github: antlarr
Aaron Craig
github: craigthelinguist
Carlos del Castillo
github: greyalien502
Yudong Sun
github: sunjerry019
Jorge Perianez
github: JPery
Chendi Luo
github: Creonalia
Daniel Lefevre
gitHub: dplefevre
Grzegorz Kotfis
github: gkotfis
Pål Orby
github: orby

168
app/lib/pydub/CHANGELOG.md Normal file
View File

@ -0,0 +1,168 @@
# v0.25.1
- Fix crashing bug in new scipy-powered EQ effects
# v0.25.0
- Don't show a runtime warning about the optional ffplay dependency being missing until someone trys to use it
- Documentation improvements
- Python 3.9 support
- Improved efficiency of loading wave files with `pydub.AudioSegment.from_file()`
- Ensure `pydub.AudioSegment().export()` always retuns files with a seek position at the beginning of the file
- Added more EQ effects to `pydub.scipy_effects` (requires scipy to be installed)
- Fix a packaging bug where the LICENSE file was not included in the source distribution
- Add a way to instantiate a `pydub.AudioSegment()` with a portion of an audio file via `pydub.AudioSegment().from_file()`
# v0.24.1
- Fix bug where ffmpeg errors in Python 3 are illegible
- Fix bug where `split_on_silence` fails when there are one or fewer nonsilent segments
- Fix bug in fallback audioop implementation
# v0.24.0
- Fix inconsistent handling of 8-bit audio
- Fix bug where certain files will fail to parse
- Fix bug where pyaudio stream is not closed on error
- Allow codecs and parameters in wav and raw export
- Fix bug in `pydub.AudioSegment.from_file` where supplied codec is ignored
- Allow `pydub.silence.split_on_silence` to take a boolean for `keep_silence`
- Fix bug where `pydub.silence.split_on_silence` sometimes adds non-silence from adjacent segments
- Fix bug where `pydub.AudioSegment.extract_wav_headers` fails on empty wav files
- Add new function `pydub.silence.detect_leading_silence`
- Support conversion between an arbitrary number of channels and mono in `pydub.AudioSegment.set_channels`
- Fix several issues related to reading from filelike objects
# v0.23.1
- Fix bug in passing ffmpeg/avconv parameters for `pydub.AudioSegment.from_mp3()`, `pydub.AudioSegment.from_flv()`, `pydub.AudioSegment.from_ogg()`, and `pydub.AudioSegment.from_wav()`
- Fix logic bug in `pydub.effects.strip_silence()`
# v0.23.0
- Add support for playback via simpleaudio
- Allow users to override the type in `pydub.AudioSegment().get_array_of_samples()` (PR #313)
- Fix a bug where the wrong codec was used for 8-bit audio (PR #309 - issue #308)
# v0.22.1
- Fix `pydub.utils.mediainfo_json()` to work with newer, backwards-incompatible versions of ffprobe/avprobe
# v0.22.0
- Adds support for audio with frame rates (sample rates) of 48k and higher (requires scipy) (PR #262, fixes #134, #237, #209)
- Adds support for PEP 519 File Path protocol (PR #252)
- Fixes a few places where handles to temporary files are kept open (PR #280)
- Add the license file to the python package to aid other packaging projects (PR #279, fixes #274)
- Big fix for `pydub.silence.detect_silence()` (PR #263)
# v0.21.0
- NOTE: Semi-counterintuitive change: using the a stride when slicing AudioSegment instances (for example, `sound[::5000]`) will return chunks of 5000ms (not 1ms chunks every 5000ms) (#222)
- Debug output from ffmpeg/avlib is no longer printed to the console unless you set up logging (see README for how to set up logging for your converter) (#223)
- All pydub exceptions are now subclasses of `pydub.exceptions.PydubException` (PR #244)
- The utilities in `pydub.silence` now accept a `seek_step`argument which can optionally be passed to improve the performance of silence detection (#211)
- Fix to `pydub.silence` utilities which allow you to detect perfect silence (#233)
- Fix a bug where threaded code screws up your terminal session due to ffmpeg inheriting the stdin from the parent process. (#231)
- Fix a bug where a crashing programs using pydub would leave behind their temporary files (#206)
# v0.20.0
- Add new parameter `gain_during_overlay` to `pydub.AudioSegment.overlay` which allows users to adjust the volume of the target AudioSegment during the portion of the segment which is overlaid with the additional AudioSegment.
- `pydub.playback.play()` No longer displays the (very verbose) playback "banner" when using ffplay
- Fix a confusing error message when using invalid crossfade durations (issue #193)
# v0.19.0
- Allow codec and ffmpeg/avconv parameters to be set in the `pydub.AudioSegment.from_file()` for more control while decoding audio files
- Allow `AudioSegment` objects with more than two channels to be split using `pydub.AudioSegment().split_to_mono()`
- Add support for inverting the phase of only one channel in a multi-channel `pydub.AudioSegment` object
- Fix a bug with the latest avprobe that broke `pydub.utils.mediainfo()`
- Add tests for webm encoding/decoding
# v0.18.0
- Add a new constructor: `pydub.AudioSegment.from_mono_audiosegments()` which allows users to create a multi-channel audiosegment out of multiple mono ones.
- Refactor `pydub.AudioSegment._sync()` to support an arbitrary number of audiosegment arguments.
# v0.17.0
- Add the ability to add a cover image to MP3 exports via the `cover` keyword argument to `pydub.AudioSegment().export()`
- Add `pydub.AudioSegment().get_dc_offset()` and `pydub.AudioSegment().remove_dc_offset()` which allow detection and removal of DC offset in audio files.
- Minor fixes for windows users
# v0.16.7
- Make `pydub.AudioSegment()._spawn()` accept array.array instances containing audio samples
# v0.16.6
- Make `pydub.AudioSegment()` objects playable inline in ipython notebooks.
- Add scipy powered high pass, low pass, and band pass filters, which can be high order filters (they take `order` as a keyword argument). They are used for `pydub.AudioSegment().high_pass_filter()`, `pydub.AudioSegment().low_pass_filter()`, `pydub.AudioSegment().band_pass_filter()` when the `pydub.scipy_effects` module is imported.
- Fix minor bug in `pydub.silence.detect_silence()`
# v0.16.5
- Update `pydub.AudioSegment()._spawn()` method to allow user subclassing of `pydub.AudioSegment`
- Add a workaround for incorrect duration reporting of some mp3 files on macOS
# v0.16.4
- Add support for radd (basically, allow `sum()` to operate on an iterable of `pydub.AudioSegment()` objects)
- Fix bug in 24-bit wav support (understatement. It didn't work right at all the first time)
# v0.16.3
- Add support for python 3.5 (overstatement. We just added python 3.5 to CI and it worked 😄)
- Add native support for 24-bit wav files (ffmpeg/avconv not required)
# v0.16.2
- Fix bug where you couldn't directly instantiate `pydub.AudioSegment` with `bytes` data in python 3
# v0.16.1
- pydub will use any ffmpeg/avconv binary that's in the current directory (as reported by `os.getcwd()`) before searching for a system install
# v0.16.0
- Make it easier to instantiate `pydub.AudioSegment()` directly when creating audio segments from raw audio data (without having to write it to a file first)
- Add `pydub.AudioSegment().get_array_of_samples()` method which returns the samples which make up an audio segment (you should usually prefer this over `pydub.AudioSegment().raw_data`)
- Add `pydub.AudioSegment().raw_data` property which returns the raw audio data for an audio segment as a bytes (python 3) or a bytestring (python 3)
- Allow users to specify frame rate in `pydub.AudioSegment.silent()` constructor
# v0.15.0
- Add support for RAW audio (basically WAV format, but without wave headers)
- Add a new exception `pydub.exceptions.CouldntDecodeError` to indicate a failure of ffmpeg/avconv to decode a file (as indicated by ffmpeg/avconv exit code)
# v0.14.2
- Fix a bug in python 3.4 which failed to read wave files with no audio data (should have been audio segments with a duration of 0 ms)
# v0.14.1
- Fix a bug in `pydub.utils.mediainfo()` that caused inputs containing unescaped characters to raise a runtime error (inputs are not supposed to require escaping)
# v0.14.0
- Rename `pydub.AudioSegment().set_gain()` to `pydub.AudioSegment().apply_gain_stereo()` to better reflect it's place in the world (as a counterpart to `pydub.AudioSegment().apply_gain()`)
# v0.13.0
- Add `pydub.AudioSegment().pan()` which returns a new stereo audio segment panned left/right as specified.
# v0.12.0
- Add a logger, `"pydub.converter"` which logs the ffmpeg commands being run by pydub.
- Add `pydub.AudioSegment().split_to_mono()` method which returns a list of mono audio segments. One for each channel in the original audio segment.
- Fix a bug in `pydub.silence.detect_silence()` which caused the function to break when a silent audio segment was equal in length to the minimum silence length. It should report a single span of silence covering the whole silent audio segment. Now it does.
- Fix a bug where uncommon wav formats (those not supported by the stdlib wave module) would throw an exception rather than converting to a more common format via ffmpeg/avconv
# v0.11.0
- Add `pydub.AudioSegment().max_dBFS` which reports the loudness (in dBFS) of the loudest point (i.e., highest amplitude sample) of an audio segment
# v0.10.0
- Overhaul Documentation
- Improve performance of `pydub.AudioSegment().overlay()`
- Add `pydub.AudioSegment().invert_phase()` which (shocker) inverts the phase of an audio segment
- Fix a type error in `pydub.AudioSegment.get_sample_slice()`
# v0.9.5
- Add `pydub.generators` module containing simple signal generation functions (white noise, sine, square wave, etc)
- Add a `loops` keyword argument to `pydub.AudioSegment().overlay()` which allows users to specify that the overlaid audio should be repeated (i.e., looped) a certain number of times, or indefinitely
# 0.9.4
- Fix a bug in db_to_float() where all values were off by a factor of 2
# 0.9.3
- Allow users to set the location of their converter by setting `pydub.AudioSegment.converter = "/path/to/ffmpeg"` and added a shim to support the old method of assigning to `pydub.AudioSegment.ffmpeg` (which is deprecated now that we support avconv)
# v0.9.2
- Add support for Python 3.4
- Audio files opened with format "wave" are treated as "wav" and "m4a" are treated as "mp4"
- Add `pydub.silence` module with simple utilities for detecting and removing silence.
- Fix a bug affecting auto-detection of ffmpeg/avconv on windows.
- Fix a bug that caused pydub to only work when ffmpeg/avconv is present (it should be able to work with WAV data without any dependencies)
# v0.9.1
- Add a runtime warning when ffmpeg/avconv cannot be found to aid debugging
# v0.9.0
- Added support for pypy (by reimplementing audioop in python). Also, we contributed our implementation to the pypy project, so that's 💯
- Add support for avconv as an alternative to ffmpeg
- Add a new helper module `pydub.playback` which allows you to quickly listen to an audio segment using ffplay (or avplay)
- Add new function `pydub.utils.mediainfo('/path/to/audio/file.ext')` which reports back the results of ffprobe (or avprobe) including codec, bitrate, channels, etc

View File

@ -0,0 +1,45 @@
Pydub loves user contributions.
We are happy to merge Pull Requests for features and bug fixes, of course. But, also spelling corrections, PEP 8 conformity, and platform-specific fixes.
Don't be shy!
### How to contribute:
1. Fork [pydub on github](https://github.com/jiaaro/pydub)
2. Commit changes
3. Send a Pull Request
you did it!
don't forget to append your name to the AUTHORS file ;)
There _are_ a few things that will make your Pull Request more likely to be merged:
1. Maintain backward compatibility
2. Avoid new dependencies
3. Include tests (and make sure they pass)
4. Write a short description of **what** is changed and **why**
5. Keep your Pull Request small, and focused on fixing one thing.
Smaller is easier to review, and easier to understand.
If you want to fix spelling and PEP 8 violations, send two pull requests :)
### Want to pitch in?
Take a look at our issue tracker for anything tagged [`bug`][bugs] or [`todo`][todos] - these are goals of the project and your improvements are _very_ likely to be merged!
That being said, there are many possible contributions we haven't thought of already. Those are welcome too!
Here are some general topics of interest for future development:
- Make it easier to get started with pydub
- More/better audio effects
- Support more audio formats
- Improve handling of large audio files
- Make things faster and use less memory.
[bugs]: https://github.com/jiaaro/pydub/issues?q=is%3Aissue+is%3Aopen+label%3Abug
[todos]: https://github.com/jiaaro/pydub/issues?q=is%3Aissue+is%3Aopen+label%3Atodo

20
app/lib/pydub/LICENSE Normal file
View File

@ -0,0 +1,20 @@
Copyright (c) 2011 James Robert, http://jiaaro.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1 @@
include LICENSE

View File

@ -0,0 +1,333 @@
# Pydub [![Build Status](https://travis-ci.org/jiaaro/pydub.svg?branch=master)](https://travis-ci.org/jiaaro/pydub) [![Build status](https://ci.appveyor.com/api/projects/status/gy1ucp9o5khq7fqi/branch/master?svg=true)](https://ci.appveyor.com/project/jiaaro/pydub/branch/master)
Pydub lets you do stuff to audio in a way that isn't stupid.
**Stuff you might be looking for**:
- [Installing Pydub](https://github.com/jiaaro/pydub#installation)
- [API Documentation](https://github.com/jiaaro/pydub/blob/master/API.markdown)
- [Dependencies](https://github.com/jiaaro/pydub#dependencies)
- [Playback](https://github.com/jiaaro/pydub#playback)
- [Setting up ffmpeg](https://github.com/jiaaro/pydub#getting-ffmpeg-set-up)
- [Questions/Bugs](https://github.com/jiaaro/pydub#bugs--questions)
## Quickstart
Open a WAV file
```python
from pydub import AudioSegment
song = AudioSegment.from_wav("never_gonna_give_you_up.wav")
```
...or a mp3
```python
song = AudioSegment.from_mp3("never_gonna_give_you_up.mp3")
```
... or an ogg, or flv, or [anything else ffmpeg supports](http://www.ffmpeg.org/general.html#File-Formats)
```python
ogg_version = AudioSegment.from_ogg("never_gonna_give_you_up.ogg")
flv_version = AudioSegment.from_flv("never_gonna_give_you_up.flv")
mp4_version = AudioSegment.from_file("never_gonna_give_you_up.mp4", "mp4")
wma_version = AudioSegment.from_file("never_gonna_give_you_up.wma", "wma")
aac_version = AudioSegment.from_file("never_gonna_give_you_up.aiff", "aac")
```
Slice audio:
```python
# pydub does things in milliseconds
ten_seconds = 10 * 1000
first_10_seconds = song[:ten_seconds]
last_5_seconds = song[-5000:]
```
Make the beginning louder and the end quieter
```python
# boost volume by 6dB
beginning = first_10_seconds + 6
# reduce volume by 3dB
end = last_5_seconds - 3
```
Concatenate audio (add one file to the end of another)
```python
without_the_middle = beginning + end
```
How long is it?
```python
without_the_middle.duration_seconds == 15.0
```
AudioSegments are immutable
```python
# song is not modified
backwards = song.reverse()
```
Crossfade (again, beginning and end are not modified)
```python
# 1.5 second crossfade
with_style = beginning.append(end, crossfade=1500)
```
Repeat
```python
# repeat the clip twice
do_it_over = with_style * 2
```
Fade (note that you can chain operations because everything returns
an AudioSegment)
```python
# 2 sec fade in, 3 sec fade out
awesome = do_it_over.fade_in(2000).fade_out(3000)
```
Save the results (again whatever ffmpeg supports)
```python
awesome.export("mashup.mp3", format="mp3")
```
Save the results with tags (metadata)
```python
awesome.export("mashup.mp3", format="mp3", tags={'artist': 'Various artists', 'album': 'Best of 2011', 'comments': 'This album is awesome!'})
```
You can pass an optional bitrate argument to export using any syntax ffmpeg
supports.
```python
awesome.export("mashup.mp3", format="mp3", bitrate="192k")
```
Any further arguments supported by ffmpeg can be passed as a list in a
'parameters' argument, with switch first, argument second. Note that no
validation takes place on these parameters, and you may be limited by what
your particular build of ffmpeg/avlib supports.
```python
# Use preset mp3 quality 0 (equivalent to lame V0)
awesome.export("mashup.mp3", format="mp3", parameters=["-q:a", "0"])
# Mix down to two channels and set hard output volume
awesome.export("mashup.mp3", format="mp3", parameters=["-ac", "2", "-vol", "150"])
```
## Debugging
Most issues people run into are related to converting between formats using
ffmpeg/avlib. Pydub provides a logger that outputs the subprocess calls to
help you track down issues:
```python
>>> import logging
>>> l = logging.getLogger("pydub.converter")
>>> l.setLevel(logging.DEBUG)
>>> l.addHandler(logging.StreamHandler())
>>> AudioSegment.from_file("./test/data/test1.mp3")
subprocess.call(['ffmpeg', '-y', '-i', '/var/folders/71/42k8g72x4pq09tfp920d033r0000gn/T/tmpeZTgMy', '-vn', '-f', 'wav', '/var/folders/71/42k8g72x4pq09tfp920d033r0000gn/T/tmpK5aLcZ'])
<pydub.audio_segment.AudioSegment object at 0x101b43e10>
```
Don't worry about the temporary files used in the conversion. They're cleaned up
automatically.
## Bugs & Questions
You can file bugs in our [github issues tracker](https://github.com/jiaaro/pydub/issues),
and ask any technical questions on
[Stack Overflow using the pydub tag](http://stackoverflow.com/questions/ask?tags=pydub).
We keep an eye on both.
## Installation
Installing pydub is easy, but don't forget to install ffmpeg/avlib (the next section in this doc)
pip install pydub
Or install the latest dev version from github (or replace `@master` with a [release version like `@v0.12.0`](https://github.com/jiaaro/pydub/releases))…
pip install git+https://github.com/jiaaro/pydub.git@master
-OR-
git clone https://github.com/jiaaro/pydub.git
-OR-
Copy the pydub directory into your python path. Zip
[here](https://github.com/jiaaro/pydub/zipball/master)
## Dependencies
You can open and save WAV files with pure python. For opening and saving non-wav
files like mp3 you'll need [ffmpeg](http://www.ffmpeg.org/) or
[libav](http://libav.org/).
### Playback
You can play audio if you have one of these installed (simpleaudio _strongly_ recommended, even if you are installing ffmpeg/libav):
- [simpleaudio](https://simpleaudio.readthedocs.io/en/latest/)
- [pyaudio](https://people.csail.mit.edu/hubert/pyaudio/docs/#)
- ffplay (usually bundled with ffmpeg, see the next section)
- avplay (usually bundled with libav, see the next section)
```python
from pydub import AudioSegment
from pydub.playback import play
sound = AudioSegment.from_file("mysound.wav", format="wav")
play(sound)
```
## Getting ffmpeg set up
You may use **libav or ffmpeg**.
Mac (using [homebrew](http://brew.sh)):
```bash
# libav
brew install libav
#### OR #####
# ffmpeg
brew install ffmpeg
```
Linux (using aptitude):
```bash
# libav
apt-get install libav-tools libavcodec-extra
#### OR #####
# ffmpeg
apt-get install ffmpeg libavcodec-extra
```
Windows:
1. Download and extract libav from [Windows binaries provided here](http://builds.libav.org/windows/).
2. Add the libav `/bin` folder to your PATH envvar
3. `pip install pydub`
## Important Notes
`AudioSegment` objects are [immutable](http://www.devshed.com/c/a/Python/String-and-List-Python-Object-Types/1/)
### Ogg exporting and default codecs
The Ogg specification ([http://tools.ietf.org/html/rfc5334](rfc5334)) does not specify
the codec to use, this choice is left up to the user. Vorbis and Theora are just
some of a number of potential codecs (see page 3 of the rfc) that can be used for the
encapsulated data.
When no codec is specified exporting to `ogg` will _default_ to using `vorbis`
as a convenience. That is:
```python
from pydub import AudioSegment
song = AudioSegment.from_mp3("test/data/test1.mp3")
song.export("out.ogg", format="ogg") # Is the same as:
song.export("out.ogg", format="ogg", codec="libvorbis")
```
## Example Use
Suppose you have a directory filled with *mp4* and *flv* videos and you want to convert all of them to *mp3* so you can listen to them on your mp3 player.
```python
import os
import glob
from pydub import AudioSegment
video_dir = '/home/johndoe/downloaded_videos/' # Path where the videos are located
extension_list = ('*.mp4', '*.flv')
os.chdir(video_dir)
for extension in extension_list:
for video in glob.glob(extension):
mp3_filename = os.path.splitext(os.path.basename(video))[0] + '.mp3'
AudioSegment.from_file(video).export(mp3_filename, format='mp3')
```
### How about another example?
```python
from glob import glob
from pydub import AudioSegment
playlist_songs = [AudioSegment.from_mp3(mp3_file) for mp3_file in glob("*.mp3")]
first_song = playlist_songs.pop(0)
# let's just include the first 30 seconds of the first song (slicing
# is done by milliseconds)
beginning_of_song = first_song[:30*1000]
playlist = beginning_of_song
for song in playlist_songs:
# We don't want an abrupt stop at the end, so let's do a 10 second crossfades
playlist = playlist.append(song, crossfade=(10 * 1000))
# let's fade out the end of the last song
playlist = playlist.fade_out(30)
# hmm I wonder how long it is... ( len(audio_segment) returns milliseconds )
playlist_length = len(playlist) / (1000*60)
# lets save it!
with open("%s_minute_playlist.mp3" % playlist_length, 'wb') as out_f:
playlist.export(out_f, format='mp3')
```
## License ([MIT License](http://opensource.org/licenses/mit-license.php))
Copyright © 2011 James Robert, http://jiaaro.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,30 @@
build: false
environment:
matrix:
- PYTHON: "C:/Python27"
FFMPEG: "4.2.3"
- PYTHON: "C:/Python34"
FFMPEG: "4.2.3"
- PYTHON: "C:/Python35"
FFMPEG: "4.2.3"
- PYTHON: "C:/Python36"
FFMPEG: "4.2.3"
- PYTHON: "C:/Python36"
FFMPEG: "latest"
matrix:
allow_failures:
- FFMPEG: "latest"
init:
- "ECHO %PYTHON%"
- ps: "ls C:/Python*"
install:
- "%PYTHON%/python.exe -m pip install wheel"
- "%PYTHON%/python.exe -m pip install -e ."
# Install ffmpeg
- ps: Start-FileDownload ('https://github.com/advancedfx/ffmpeg.zeranoe.com-builds-mirror/releases/download/20200915/ffmpeg-' + $env:FFMPEG + '-win64-shared.zip') ffmpeg-shared.zip
- 7z x ffmpeg-shared.zip > NULL
- "SET PATH=%cd%\\ffmpeg-%FFMPEG%-win64-shared\\bin;%PATH%"
# check ffmpeg installation (also shows version)
- "ffmpeg.exe -version"
test_script:
- "%PYTHON%/python.exe test/test.py"

View File

@ -0,0 +1 @@
from .audio_segment import AudioSegment

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,341 @@
import sys
import math
import array
from .utils import (
db_to_float,
ratio_to_db,
register_pydub_effect,
make_chunks,
audioop,
get_min_max_value
)
from .silence import split_on_silence
from .exceptions import TooManyMissingFrames, InvalidDuration
if sys.version_info >= (3, 0):
xrange = range
@register_pydub_effect
def apply_mono_filter_to_each_channel(seg, filter_fn):
n_channels = seg.channels
channel_segs = seg.split_to_mono()
channel_segs = [filter_fn(channel_seg) for channel_seg in channel_segs]
out_data = seg.get_array_of_samples()
for channel_i, channel_seg in enumerate(channel_segs):
for sample_i, sample in enumerate(channel_seg.get_array_of_samples()):
index = (sample_i * n_channels) + channel_i
out_data[index] = sample
return seg._spawn(out_data)
@register_pydub_effect
def normalize(seg, headroom=0.1):
"""
headroom is how close to the maximum volume to boost the signal up to (specified in dB)
"""
peak_sample_val = seg.max
# if the max is 0, this audio segment is silent, and can't be normalized
if peak_sample_val == 0:
return seg
target_peak = seg.max_possible_amplitude * db_to_float(-headroom)
needed_boost = ratio_to_db(target_peak / peak_sample_val)
return seg.apply_gain(needed_boost)
@register_pydub_effect
def speedup(seg, playback_speed=1.5, chunk_size=150, crossfade=25):
# we will keep audio in 150ms chunks since one waveform at 20Hz is 50ms long
# (20 Hz is the lowest frequency audible to humans)
# portion of AUDIO TO KEEP. if playback speed is 1.25 we keep 80% (0.8) and
# discard 20% (0.2)
atk = 1.0 / playback_speed
if playback_speed < 2.0:
# throwing out more than half the audio - keep 50ms chunks
ms_to_remove_per_chunk = int(chunk_size * (1 - atk) / atk)
else:
# throwing out less than half the audio - throw out 50ms chunks
ms_to_remove_per_chunk = int(chunk_size)
chunk_size = int(atk * chunk_size / (1 - atk))
# the crossfade cannot be longer than the amount of audio we're removing
crossfade = min(crossfade, ms_to_remove_per_chunk - 1)
# DEBUG
#print("chunk: {0}, rm: {1}".format(chunk_size, ms_to_remove_per_chunk))
chunks = make_chunks(seg, chunk_size + ms_to_remove_per_chunk)
if len(chunks) < 2:
raise Exception("Could not speed up AudioSegment, it was too short {2:0.2f}s for the current settings:\n{0}ms chunks at {1:0.1f}x speedup".format(
chunk_size, playback_speed, seg.duration_seconds))
# we'll actually truncate a bit less than we calculated to make up for the
# crossfade between chunks
ms_to_remove_per_chunk -= crossfade
# we don't want to truncate the last chunk since it is not guaranteed to be
# the full chunk length
last_chunk = chunks[-1]
chunks = [chunk[:-ms_to_remove_per_chunk] for chunk in chunks[:-1]]
out = chunks[0]
for chunk in chunks[1:]:
out = out.append(chunk, crossfade=crossfade)
out += last_chunk
return out
@register_pydub_effect
def strip_silence(seg, silence_len=1000, silence_thresh=-16, padding=100):
if padding > silence_len:
raise InvalidDuration("padding cannot be longer than silence_len")
chunks = split_on_silence(seg, silence_len, silence_thresh, padding)
crossfade = padding / 2
if not len(chunks):
return seg[0:0]
seg = chunks[0]
for chunk in chunks[1:]:
seg = seg.append(chunk, crossfade=crossfade)
return seg
@register_pydub_effect
def compress_dynamic_range(seg, threshold=-20.0, ratio=4.0, attack=5.0, release=50.0):
"""
Keyword Arguments:
threshold - default: -20.0
Threshold in dBFS. default of -20.0 means -20dB relative to the
maximum possible volume. 0dBFS is the maximum possible value so
all values for this argument sould be negative.
ratio - default: 4.0
Compression ratio. Audio louder than the threshold will be
reduced to 1/ratio the volume. A ratio of 4.0 is equivalent to
a setting of 4:1 in a pro-audio compressor like the Waves C1.
attack - default: 5.0
Attack in milliseconds. How long it should take for the compressor
to kick in once the audio has exceeded the threshold.
release - default: 50.0
Release in milliseconds. How long it should take for the compressor
to stop compressing after the audio has falled below the threshold.
For an overview of Dynamic Range Compression, and more detailed explanation
of the related terminology, see:
http://en.wikipedia.org/wiki/Dynamic_range_compression
"""
thresh_rms = seg.max_possible_amplitude * db_to_float(threshold)
look_frames = int(seg.frame_count(ms=attack))
def rms_at(frame_i):
return seg.get_sample_slice(frame_i - look_frames, frame_i).rms
def db_over_threshold(rms):
if rms == 0: return 0.0
db = ratio_to_db(rms / thresh_rms)
return max(db, 0)
output = []
# amount to reduce the volume of the audio by (in dB)
attenuation = 0.0
attack_frames = seg.frame_count(ms=attack)
release_frames = seg.frame_count(ms=release)
for i in xrange(int(seg.frame_count())):
rms_now = rms_at(i)
# with a ratio of 4.0 this means the volume will exceed the threshold by
# 1/4 the amount (of dB) that it would otherwise
max_attenuation = (1 - (1.0 / ratio)) * db_over_threshold(rms_now)
attenuation_inc = max_attenuation / attack_frames
attenuation_dec = max_attenuation / release_frames
if rms_now > thresh_rms and attenuation <= max_attenuation:
attenuation += attenuation_inc
attenuation = min(attenuation, max_attenuation)
else:
attenuation -= attenuation_dec
attenuation = max(attenuation, 0)
frame = seg.get_frame(i)
if attenuation != 0.0:
frame = audioop.mul(frame,
seg.sample_width,
db_to_float(-attenuation))
output.append(frame)
return seg._spawn(data=b''.join(output))
# Invert the phase of the signal.
@register_pydub_effect
def invert_phase(seg, channels=(1, 1)):
"""
channels- specifies which channel (left or right) to reverse the phase of.
Note that mono AudioSegments will become stereo.
"""
if channels == (1, 1):
inverted = audioop.mul(seg._data, seg.sample_width, -1.0)
return seg._spawn(data=inverted)
else:
if seg.channels == 2:
left, right = seg.split_to_mono()
else:
raise Exception("Can't implicitly convert an AudioSegment with " + str(seg.channels) + " channels to stereo.")
if channels == (1, 0):
left = left.invert_phase()
else:
right = right.invert_phase()
return seg.from_mono_audiosegments(left, right)
# High and low pass filters based on implementation found on Stack Overflow:
# http://stackoverflow.com/questions/13882038/implementing-simple-high-and-low-pass-filters-in-c
@register_pydub_effect
def low_pass_filter(seg, cutoff):
"""
cutoff - Frequency (in Hz) where higher frequency signal will begin to
be reduced by 6dB per octave (doubling in frequency) above this point
"""
RC = 1.0 / (cutoff * 2 * math.pi)
dt = 1.0 / seg.frame_rate
alpha = dt / (RC + dt)
original = seg.get_array_of_samples()
filteredArray = array.array(seg.array_type, original)
frame_count = int(seg.frame_count())
last_val = [0] * seg.channels
for i in range(seg.channels):
last_val[i] = filteredArray[i] = original[i]
for i in range(1, frame_count):
for j in range(seg.channels):
offset = (i * seg.channels) + j
last_val[j] = last_val[j] + (alpha * (original[offset] - last_val[j]))
filteredArray[offset] = int(last_val[j])
return seg._spawn(data=filteredArray)
@register_pydub_effect
def high_pass_filter(seg, cutoff):
"""
cutoff - Frequency (in Hz) where lower frequency signal will begin to
be reduced by 6dB per octave (doubling in frequency) below this point
"""
RC = 1.0 / (cutoff * 2 * math.pi)
dt = 1.0 / seg.frame_rate
alpha = RC / (RC + dt)
minval, maxval = get_min_max_value(seg.sample_width * 8)
original = seg.get_array_of_samples()
filteredArray = array.array(seg.array_type, original)
frame_count = int(seg.frame_count())
last_val = [0] * seg.channels
for i in range(seg.channels):
last_val[i] = filteredArray[i] = original[i]
for i in range(1, frame_count):
for j in range(seg.channels):
offset = (i * seg.channels) + j
offset_minus_1 = ((i-1) * seg.channels) + j
last_val[j] = alpha * (last_val[j] + original[offset] - original[offset_minus_1])
filteredArray[offset] = int(min(max(last_val[j], minval), maxval))
return seg._spawn(data=filteredArray)
@register_pydub_effect
def pan(seg, pan_amount):
"""
pan_amount should be between -1.0 (100% left) and +1.0 (100% right)
When pan_amount == 0.0 the left/right balance is not changed.
Panning does not alter the *perceived* loundness, but since loudness
is decreasing on one side, the other side needs to get louder to
compensate. When panned hard left, the left channel will be 3dB louder.
"""
if not -1.0 <= pan_amount <= 1.0:
raise ValueError("pan_amount should be between -1.0 (100% left) and +1.0 (100% right)")
max_boost_db = ratio_to_db(2.0)
boost_db = abs(pan_amount) * max_boost_db
boost_factor = db_to_float(boost_db)
reduce_factor = db_to_float(max_boost_db) - boost_factor
reduce_db = ratio_to_db(reduce_factor)
# Cut boost in half (max boost== 3dB) - in reality 2 speakers
# do not sum to a full 6 dB.
boost_db = boost_db / 2.0
if pan_amount < 0:
return seg.apply_gain_stereo(boost_db, reduce_db)
else:
return seg.apply_gain_stereo(reduce_db, boost_db)
@register_pydub_effect
def apply_gain_stereo(seg, left_gain=0.0, right_gain=0.0):
"""
left_gain - amount of gain to apply to the left channel (in dB)
right_gain - amount of gain to apply to the right channel (in dB)
note: mono audio segments will be converted to stereo
"""
if seg.channels == 1:
left = right = seg
elif seg.channels == 2:
left, right = seg.split_to_mono()
l_mult_factor = db_to_float(left_gain)
r_mult_factor = db_to_float(right_gain)
left_data = audioop.mul(left._data, left.sample_width, l_mult_factor)
left_data = audioop.tostereo(left_data, left.sample_width, 1, 0)
right_data = audioop.mul(right._data, right.sample_width, r_mult_factor)
right_data = audioop.tostereo(right_data, right.sample_width, 0, 1)
output = audioop.add(left_data, right_data, seg.sample_width)
return seg._spawn(data=output,
overrides={'channels': 2,
'frame_width': 2 * seg.sample_width})

View File

@ -0,0 +1,32 @@
class PydubException(Exception):
"""
Base class for any Pydub exception
"""
class TooManyMissingFrames(PydubException):
pass
class InvalidDuration(PydubException):
pass
class InvalidTag(PydubException):
pass
class InvalidID3TagVersion(PydubException):
pass
class CouldntDecodeError(PydubException):
pass
class CouldntEncodeError(PydubException):
pass
class MissingAudioParameter(PydubException):
pass

View File

@ -0,0 +1,142 @@
"""
Each generator will return float samples from -1.0 to 1.0, which can be
converted to actual audio with 8, 16, 24, or 32 bit depth using the
SiganlGenerator.to_audio_segment() method (on any of it's subclasses).
See Wikipedia's "waveform" page for info on some of the generators included
here: http://en.wikipedia.org/wiki/Waveform
"""
import math
import array
import itertools
import random
from .audio_segment import AudioSegment
from .utils import (
db_to_float,
get_frame_width,
get_array_type,
get_min_max_value
)
class SignalGenerator(object):
def __init__(self, sample_rate=44100, bit_depth=16):
self.sample_rate = sample_rate
self.bit_depth = bit_depth
def to_audio_segment(self, duration=1000.0, volume=0.0):
"""
Duration in milliseconds
(default: 1 second)
Volume in DB relative to maximum amplitude
(default 0.0 dBFS, which is the maximum value)
"""
minval, maxval = get_min_max_value(self.bit_depth)
sample_width = get_frame_width(self.bit_depth)
array_type = get_array_type(self.bit_depth)
gain = db_to_float(volume)
sample_count = int(self.sample_rate * (duration / 1000.0))
sample_data = (int(val * maxval * gain) for val in self.generate())
sample_data = itertools.islice(sample_data, 0, sample_count)
data = array.array(array_type, sample_data)
try:
data = data.tobytes()
except:
data = data.tostring()
return AudioSegment(data=data, metadata={
"channels": 1,
"sample_width": sample_width,
"frame_rate": self.sample_rate,
"frame_width": sample_width,
})
def generate(self):
raise NotImplementedError("SignalGenerator subclasses must implement the generate() method, and *should not* call the superclass implementation.")
class Sine(SignalGenerator):
def __init__(self, freq, **kwargs):
super(Sine, self).__init__(**kwargs)
self.freq = freq
def generate(self):
sine_of = (self.freq * 2 * math.pi) / self.sample_rate
sample_n = 0
while True:
yield math.sin(sine_of * sample_n)
sample_n += 1
class Pulse(SignalGenerator):
def __init__(self, freq, duty_cycle=0.5, **kwargs):
super(Pulse, self).__init__(**kwargs)
self.freq = freq
self.duty_cycle = duty_cycle
def generate(self):
sample_n = 0
# in samples
cycle_length = self.sample_rate / float(self.freq)
pulse_length = cycle_length * self.duty_cycle
while True:
if (sample_n % cycle_length) < pulse_length:
yield 1.0
else:
yield -1.0
sample_n += 1
class Square(Pulse):
def __init__(self, freq, **kwargs):
kwargs['duty_cycle'] = 0.5
super(Square, self).__init__(freq, **kwargs)
class Sawtooth(SignalGenerator):
def __init__(self, freq, duty_cycle=1.0, **kwargs):
super(Sawtooth, self).__init__(**kwargs)
self.freq = freq
self.duty_cycle = duty_cycle
def generate(self):
sample_n = 0
# in samples
cycle_length = self.sample_rate / float(self.freq)
midpoint = cycle_length * self.duty_cycle
ascend_length = midpoint
descend_length = cycle_length - ascend_length
while True:
cycle_position = sample_n % cycle_length
if cycle_position < midpoint:
yield (2 * cycle_position / ascend_length) - 1.0
else:
yield 1.0 - (2 * (cycle_position - midpoint) / descend_length)
sample_n += 1
class Triangle(Sawtooth):
def __init__(self, freq, **kwargs):
kwargs['duty_cycle'] = 0.5
super(Triangle, self).__init__(freq, **kwargs)
class WhiteNoise(SignalGenerator):
def generate(self):
while True:
yield (random.random() * 2) - 1.0

View File

@ -0,0 +1,14 @@
"""
"""
import logging
converter_logger = logging.getLogger("pydub.converter")
def log_conversion(conversion_command):
converter_logger.debug("subprocess.call(%s)", repr(conversion_command))
def log_subprocess_output(output):
if output:
for line in output.rstrip().splitlines():
converter_logger.debug('subprocess output: %s', line.rstrip())

View File

@ -0,0 +1,71 @@
"""
Support for playing AudioSegments. Pyaudio will be used if it's installed,
otherwise will fallback to ffplay. Pyaudio is a *much* nicer solution, but
is tricky to install. See my notes on installing pyaudio in a virtualenv (on
OSX 10.10): https://gist.github.com/jiaaro/9767512210a1d80a8a0d
"""
import subprocess
from tempfile import NamedTemporaryFile
from .utils import get_player_name, make_chunks
def _play_with_ffplay(seg):
PLAYER = get_player_name()
with NamedTemporaryFile("w+b", suffix=".wav") as f:
seg.export(f.name, "wav")
subprocess.call([PLAYER, "-nodisp", "-autoexit", "-hide_banner", f.name])
def _play_with_pyaudio(seg):
import pyaudio
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(seg.sample_width),
channels=seg.channels,
rate=seg.frame_rate,
output=True)
# Just in case there were any exceptions/interrupts, we release the resource
# So as not to raise OSError: Device Unavailable should play() be used again
try:
# break audio into half-second chunks (to allows keyboard interrupts)
for chunk in make_chunks(seg, 500):
stream.write(chunk._data)
finally:
stream.stop_stream()
stream.close()
p.terminate()
def _play_with_simpleaudio(seg):
import simpleaudio
return simpleaudio.play_buffer(
seg.raw_data,
num_channels=seg.channels,
bytes_per_sample=seg.sample_width,
sample_rate=seg.frame_rate
)
def play(audio_segment):
try:
playback = _play_with_simpleaudio(audio_segment)
try:
playback.wait_done()
except KeyboardInterrupt:
playback.stop()
except ImportError:
pass
else:
return
try:
_play_with_pyaudio(audio_segment)
return
except ImportError:
pass
else:
return
_play_with_ffplay(audio_segment)

View File

@ -0,0 +1,553 @@
try:
from __builtin__ import max as builtin_max
from __builtin__ import min as builtin_min
except ImportError:
from builtins import max as builtin_max
from builtins import min as builtin_min
import math
import struct
try:
from fractions import gcd
except ImportError: # Python 3.9+
from math import gcd
from ctypes import create_string_buffer
class error(Exception):
pass
def _check_size(size):
if size != 1 and size != 2 and size != 4:
raise error("Size should be 1, 2 or 4")
def _check_params(length, size):
_check_size(size)
if length % size != 0:
raise error("not a whole number of frames")
def _sample_count(cp, size):
return len(cp) / size
def _get_samples(cp, size, signed=True):
for i in range(_sample_count(cp, size)):
yield _get_sample(cp, size, i, signed)
def _struct_format(size, signed):
if size == 1:
return "b" if signed else "B"
elif size == 2:
return "h" if signed else "H"
elif size == 4:
return "i" if signed else "I"
def _get_sample(cp, size, i, signed=True):
fmt = _struct_format(size, signed)
start = i * size
end = start + size
return struct.unpack_from(fmt, buffer(cp)[start:end])[0]
def _put_sample(cp, size, i, val, signed=True):
fmt = _struct_format(size, signed)
struct.pack_into(fmt, cp, i * size, val)
def _get_maxval(size, signed=True):
if signed and size == 1:
return 0x7f
elif size == 1:
return 0xff
elif signed and size == 2:
return 0x7fff
elif size == 2:
return 0xffff
elif signed and size == 4:
return 0x7fffffff
elif size == 4:
return 0xffffffff
def _get_minval(size, signed=True):
if not signed:
return 0
elif size == 1:
return -0x80
elif size == 2:
return -0x8000
elif size == 4:
return -0x80000000
def _get_clipfn(size, signed=True):
maxval = _get_maxval(size, signed)
minval = _get_minval(size, signed)
return lambda val: builtin_max(min(val, maxval), minval)
def _overflow(val, size, signed=True):
minval = _get_minval(size, signed)
maxval = _get_maxval(size, signed)
if minval <= val <= maxval:
return val
bits = size * 8
if signed:
offset = 2**(bits-1)
return ((val + offset) % (2**bits)) - offset
else:
return val % (2**bits)
def getsample(cp, size, i):
_check_params(len(cp), size)
if not (0 <= i < len(cp) / size):
raise error("Index out of range")
return _get_sample(cp, size, i)
def max(cp, size):
_check_params(len(cp), size)
if len(cp) == 0:
return 0
return builtin_max(abs(sample) for sample in _get_samples(cp, size))
def minmax(cp, size):
_check_params(len(cp), size)
max_sample, min_sample = 0, 0
for sample in _get_samples(cp, size):
max_sample = builtin_max(sample, max_sample)
min_sample = builtin_min(sample, min_sample)
return min_sample, max_sample
def avg(cp, size):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
if sample_count == 0:
return 0
return sum(_get_samples(cp, size)) / sample_count
def rms(cp, size):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
if sample_count == 0:
return 0
sum_squares = sum(sample**2 for sample in _get_samples(cp, size))
return int(math.sqrt(sum_squares / sample_count))
def _sum2(cp1, cp2, length):
size = 2
total = 0
for i in range(length):
total += getsample(cp1, size, i) * getsample(cp2, size, i)
return total
def findfit(cp1, cp2):
size = 2
if len(cp1) % 2 != 0 or len(cp2) % 2 != 0:
raise error("Strings should be even-sized")
if len(cp1) < len(cp2):
raise error("First sample should be longer")
len1 = _sample_count(cp1, size)
len2 = _sample_count(cp2, size)
sum_ri_2 = _sum2(cp2, cp2, len2)
sum_aij_2 = _sum2(cp1, cp1, len2)
sum_aij_ri = _sum2(cp1, cp2, len2)
result = (sum_ri_2 * sum_aij_2 - sum_aij_ri * sum_aij_ri) / sum_aij_2
best_result = result
best_i = 0
for i in range(1, len1 - len2 + 1):
aj_m1 = _get_sample(cp1, size, i - 1)
aj_lm1 = _get_sample(cp1, size, i + len2 - 1)
sum_aij_2 += aj_lm1**2 - aj_m1**2
sum_aij_ri = _sum2(buffer(cp1)[i*size:], cp2, len2)
result = (sum_ri_2 * sum_aij_2 - sum_aij_ri * sum_aij_ri) / sum_aij_2
if result < best_result:
best_result = result
best_i = i
factor = _sum2(buffer(cp1)[best_i*size:], cp2, len2) / sum_ri_2
return best_i, factor
def findfactor(cp1, cp2):
size = 2
if len(cp1) % 2 != 0:
raise error("Strings should be even-sized")
if len(cp1) != len(cp2):
raise error("Samples should be same size")
sample_count = _sample_count(cp1, size)
sum_ri_2 = _sum2(cp2, cp2, sample_count)
sum_aij_ri = _sum2(cp1, cp2, sample_count)
return sum_aij_ri / sum_ri_2
def findmax(cp, len2):
size = 2
sample_count = _sample_count(cp, size)
if len(cp) % 2 != 0:
raise error("Strings should be even-sized")
if len2 < 0 or sample_count < len2:
raise error("Input sample should be longer")
if sample_count == 0:
return 0
result = _sum2(cp, cp, len2)
best_result = result
best_i = 0
for i in range(1, sample_count - len2 + 1):
sample_leaving_window = getsample(cp, size, i - 1)
sample_entering_window = getsample(cp, size, i + len2 - 1)
result -= sample_leaving_window**2
result += sample_entering_window**2
if result > best_result:
best_result = result
best_i = i
return best_i
def avgpp(cp, size):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
prevextremevalid = False
prevextreme = None
avg = 0
nextreme = 0
prevval = getsample(cp, size, 0)
val = getsample(cp, size, 1)
prevdiff = val - prevval
for i in range(1, sample_count):
val = getsample(cp, size, i)
diff = val - prevval
if diff * prevdiff < 0:
if prevextremevalid:
avg += abs(prevval - prevextreme)
nextreme += 1
prevextremevalid = True
prevextreme = prevval
prevval = val
if diff != 0:
prevdiff = diff
if nextreme == 0:
return 0
return avg / nextreme
def maxpp(cp, size):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
prevextremevalid = False
prevextreme = None
max = 0
prevval = getsample(cp, size, 0)
val = getsample(cp, size, 1)
prevdiff = val - prevval
for i in range(1, sample_count):
val = getsample(cp, size, i)
diff = val - prevval
if diff * prevdiff < 0:
if prevextremevalid:
extremediff = abs(prevval - prevextreme)
if extremediff > max:
max = extremediff
prevextremevalid = True
prevextreme = prevval
prevval = val
if diff != 0:
prevdiff = diff
return max
def cross(cp, size):
_check_params(len(cp), size)
crossings = 0
last_sample = 0
for sample in _get_samples(cp, size):
if sample <= 0 < last_sample or sample >= 0 > last_sample:
crossings += 1
last_sample = sample
return crossings
def mul(cp, size, factor):
_check_params(len(cp), size)
clip = _get_clipfn(size)
result = create_string_buffer(len(cp))
for i, sample in enumerate(_get_samples(cp, size)):
sample = clip(int(sample * factor))
_put_sample(result, size, i, sample)
return result.raw
def tomono(cp, size, fac1, fac2):
_check_params(len(cp), size)
clip = _get_clipfn(size)
sample_count = _sample_count(cp, size)
result = create_string_buffer(len(cp) / 2)
for i in range(0, sample_count, 2):
l_sample = getsample(cp, size, i)
r_sample = getsample(cp, size, i + 1)
sample = (l_sample * fac1) + (r_sample * fac2)
sample = clip(sample)
_put_sample(result, size, i / 2, sample)
return result.raw
def tostereo(cp, size, fac1, fac2):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
result = create_string_buffer(len(cp) * 2)
clip = _get_clipfn(size)
for i in range(sample_count):
sample = _get_sample(cp, size, i)
l_sample = clip(sample * fac1)
r_sample = clip(sample * fac2)
_put_sample(result, size, i * 2, l_sample)
_put_sample(result, size, i * 2 + 1, r_sample)
return result.raw
def add(cp1, cp2, size):
_check_params(len(cp1), size)
if len(cp1) != len(cp2):
raise error("Lengths should be the same")
clip = _get_clipfn(size)
sample_count = _sample_count(cp1, size)
result = create_string_buffer(len(cp1))
for i in range(sample_count):
sample1 = getsample(cp1, size, i)
sample2 = getsample(cp2, size, i)
sample = clip(sample1 + sample2)
_put_sample(result, size, i, sample)
return result.raw
def bias(cp, size, bias):
_check_params(len(cp), size)
result = create_string_buffer(len(cp))
for i, sample in enumerate(_get_samples(cp, size)):
sample = _overflow(sample + bias, size)
_put_sample(result, size, i, sample)
return result.raw
def reverse(cp, size):
_check_params(len(cp), size)
sample_count = _sample_count(cp, size)
result = create_string_buffer(len(cp))
for i, sample in enumerate(_get_samples(cp, size)):
_put_sample(result, size, sample_count - i - 1, sample)
return result.raw
def lin2lin(cp, size, size2):
_check_params(len(cp), size)
_check_size(size2)
if size == size2:
return cp
new_len = (len(cp) / size) * size2
result = create_string_buffer(new_len)
for i in range(_sample_count(cp, size)):
sample = _get_sample(cp, size, i)
if size < size2:
sample = sample << (4 * size2 / size)
elif size > size2:
sample = sample >> (4 * size / size2)
sample = _overflow(sample, size2)
_put_sample(result, size2, i, sample)
return result.raw
def ratecv(cp, size, nchannels, inrate, outrate, state, weightA=1, weightB=0):
_check_params(len(cp), size)
if nchannels < 1:
raise error("# of channels should be >= 1")
bytes_per_frame = size * nchannels
frame_count = len(cp) / bytes_per_frame
if bytes_per_frame / nchannels != size:
raise OverflowError("width * nchannels too big for a C int")
if weightA < 1 or weightB < 0:
raise error("weightA should be >= 1, weightB should be >= 0")
if len(cp) % bytes_per_frame != 0:
raise error("not a whole number of frames")
if inrate <= 0 or outrate <= 0:
raise error("sampling rate not > 0")
d = gcd(inrate, outrate)
inrate /= d
outrate /= d
prev_i = [0] * nchannels
cur_i = [0] * nchannels
if state is None:
d = -outrate
else:
d, samps = state
if len(samps) != nchannels:
raise error("illegal state argument")
prev_i, cur_i = zip(*samps)
prev_i, cur_i = list(prev_i), list(cur_i)
q = frame_count / inrate
ceiling = (q + 1) * outrate
nbytes = ceiling * bytes_per_frame
result = create_string_buffer(nbytes)
samples = _get_samples(cp, size)
out_i = 0
while True:
while d < 0:
if frame_count == 0:
samps = zip(prev_i, cur_i)
retval = result.raw
# slice off extra bytes
trim_index = (out_i * bytes_per_frame) - len(retval)
retval = buffer(retval)[:trim_index]
return (retval, (d, tuple(samps)))
for chan in range(nchannels):
prev_i[chan] = cur_i[chan]
cur_i[chan] = samples.next()
cur_i[chan] = (
(weightA * cur_i[chan] + weightB * prev_i[chan])
/ (weightA + weightB)
)
frame_count -= 1
d += outrate
while d >= 0:
for chan in range(nchannels):
cur_o = (
(prev_i[chan] * d + cur_i[chan] * (outrate - d))
/ outrate
)
_put_sample(result, size, out_i, _overflow(cur_o, size))
out_i += 1
d -= inrate
def lin2ulaw(cp, size):
raise NotImplementedError()
def ulaw2lin(cp, size):
raise NotImplementedError()
def lin2alaw(cp, size):
raise NotImplementedError()
def alaw2lin(cp, size):
raise NotImplementedError()
def lin2adpcm(cp, size, state):
raise NotImplementedError()
def adpcm2lin(cp, size, state):
raise NotImplementedError()

View File

@ -0,0 +1,175 @@
"""
This module provides scipy versions of high_pass_filter, and low_pass_filter
as well as an additional band_pass_filter.
Of course, you will need to install scipy for these to work.
When this module is imported the high and low pass filters from this module
will be used when calling audio_segment.high_pass_filter() and
audio_segment.high_pass_filter() instead of the slower, less powerful versions
provided by pydub.effects.
"""
from scipy.signal import butter, sosfilt
from .utils import (register_pydub_effect,stereo_to_ms,ms_to_stereo)
def _mk_butter_filter(freq, type, order):
"""
Args:
freq: The cutoff frequency for highpass and lowpass filters. For
band filters, a list of [low_cutoff, high_cutoff]
type: "lowpass", "highpass", or "band"
order: nth order butterworth filter (default: 5th order). The
attenuation is -6dB/octave beyond the cutoff frequency (for 1st
order). A Higher order filter will have more attenuation, each level
adding an additional -6dB (so a 3rd order butterworth filter would
be -18dB/octave).
Returns:
function which can filter a mono audio segment
"""
def filter_fn(seg):
assert seg.channels == 1
nyq = 0.5 * seg.frame_rate
try:
freqs = [f / nyq for f in freq]
except TypeError:
freqs = freq / nyq
sos = butter(order, freqs, btype=type, output='sos')
y = sosfilt(sos, seg.get_array_of_samples())
return seg._spawn(y.astype(seg.array_type))
return filter_fn
@register_pydub_effect
def band_pass_filter(seg, low_cutoff_freq, high_cutoff_freq, order=5):
filter_fn = _mk_butter_filter([low_cutoff_freq, high_cutoff_freq], 'band', order=order)
return seg.apply_mono_filter_to_each_channel(filter_fn)
@register_pydub_effect
def high_pass_filter(seg, cutoff_freq, order=5):
filter_fn = _mk_butter_filter(cutoff_freq, 'highpass', order=order)
return seg.apply_mono_filter_to_each_channel(filter_fn)
@register_pydub_effect
def low_pass_filter(seg, cutoff_freq, order=5):
filter_fn = _mk_butter_filter(cutoff_freq, 'lowpass', order=order)
return seg.apply_mono_filter_to_each_channel(filter_fn)
@register_pydub_effect
def _eq(seg, focus_freq, bandwidth=100, mode="peak", gain_dB=0, order=2):
"""
Args:
focus_freq - middle frequency or known frequency of band (in Hz)
bandwidth - range of the equalizer band
mode - Mode of Equalization(Peak/Notch(Bell Curve),High Shelf, Low Shelf)
order - Rolloff factor(1 - 6dB/Octave 2 - 12dB/Octave)
Returns:
Equalized/Filtered AudioSegment
"""
filt_mode = ["peak", "low_shelf", "high_shelf"]
if mode not in filt_mode:
raise ValueError("Incorrect Mode Selection")
if gain_dB >= 0:
if mode == "peak":
sec = band_pass_filter(seg, focus_freq - bandwidth/2, focus_freq + bandwidth/2, order = order)
seg = seg.overlay(sec - (3 - gain_dB))
return seg
if mode == "low_shelf":
sec = low_pass_filter(seg, focus_freq, order=order)
seg = seg.overlay(sec - (3 - gain_dB))
return seg
if mode == "high_shelf":
sec = high_pass_filter(seg, focus_freq, order=order)
seg = seg.overlay(sec - (3 - gain_dB))
return seg
if gain_dB < 0:
if mode == "peak":
sec = high_pass_filter(seg, focus_freq - bandwidth/2, order=order)
seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
sec = low_pass_filter(seg, focus_freq + bandwidth/2, order=order)
seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
return seg
if mode == "low_shelf":
sec = high_pass_filter(seg, focus_freq, order=order)
seg = seg.overlay(sec - (3 + gain_dB)) + gain_dB
return seg
if mode=="high_shelf":
sec=low_pass_filter(seg, focus_freq, order=order)
seg=seg.overlay(sec - (3 + gain_dB)) +gain_dB
return seg
@register_pydub_effect
def eq(seg, focus_freq, bandwidth=100, channel_mode="L+R", filter_mode="peak", gain_dB=0, order=2):
"""
Args:
focus_freq - middle frequency or known frequency of band (in Hz)
bandwidth - range of the equalizer band
channel_mode - Select Channels to be affected by the filter.
L+R - Standard Stereo Filter
L - Only Left Channel is Filtered
R - Only Right Channel is Filtered
M+S - Blumlien Stereo Filter(Mid-Side)
M - Only Mid Channel is Filtered
S - Only Side Channel is Filtered
Mono Audio Segments are completely filtered.
filter_mode - Mode of Equalization(Peak/Notch(Bell Curve),High Shelf, Low Shelf)
order - Rolloff factor(1 - 6dB/Octave 2 - 12dB/Octave)
Returns:
Equalized/Filtered AudioSegment
"""
channel_modes = ["L+R", "M+S", "L", "R", "M", "S"]
if channel_mode not in channel_modes:
raise ValueError("Incorrect Channel Mode Selection")
if seg.channels == 1:
return _eq(seg, focus_freq, bandwidth, filter_mode, gain_dB, order)
if channel_mode == "L+R":
return _eq(seg, focus_freq, bandwidth, filter_mode, gain_dB, order)
if channel_mode == "L":
seg = seg.split_to_mono()
seg = [_eq(seg[0], focus_freq, bandwidth, filter_mode, gain_dB, order), seg[1]]
return AudioSegment.from_mono_audio_segements(seg[0], seg[1])
if channel_mode == "R":
seg = seg.split_to_mono()
seg = [seg[0], _eq(seg[1], focus_freq, bandwidth, filter_mode, gain_dB, order)]
return AudioSegment.from_mono_audio_segements(seg[0], seg[1])
if channel_mode == "M+S":
seg = stereo_to_ms(seg)
seg = _eq(seg, focus_freq, bandwidth, filter_mode, gain_dB, order)
return ms_to_stereo(seg)
if channel_mode == "M":
seg = stereo_to_ms(seg).split_to_mono()
seg = [_eq(seg[0], focus_freq, bandwidth, filter_mode, gain_dB, order), seg[1]]
seg = AudioSegment.from_mono_audio_segements(seg[0], seg[1])
return ms_to_stereo(seg)
if channel_mode == "S":
seg = stereo_to_ms(seg).split_to_mono()
seg = [seg[0], _eq(seg[1], focus_freq, bandwidth, filter_mode, gain_dB, order)]
seg = AudioSegment.from_mono_audio_segements(seg[0], seg[1])
return ms_to_stereo(seg)

View File

@ -0,0 +1,182 @@
"""
Various functions for finding/manipulating silence in AudioSegments
"""
import itertools
from .utils import db_to_float
def detect_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
"""
Returns a list of all silent sections [start, end] in milliseconds of audio_segment.
Inverse of detect_nonsilent()
audio_segment - the segment to find silence in
min_silence_len - the minimum length for any silent section
silence_thresh - the upper bound for how quiet is silent in dFBS
seek_step - step size for interating over the segment in ms
"""
seg_len = len(audio_segment)
# you can't have a silent portion of a sound that is longer than the sound
if seg_len < min_silence_len:
return []
# convert silence threshold to a float value (so we can compare it to rms)
silence_thresh = db_to_float(silence_thresh) * audio_segment.max_possible_amplitude
# find silence and add start and end indicies to the to_cut list
silence_starts = []
# check successive (1 sec by default) chunk of sound for silence
# try a chunk at every "seek step" (or every chunk for a seek step == 1)
last_slice_start = seg_len - min_silence_len
slice_starts = range(0, last_slice_start + 1, seek_step)
# guarantee last_slice_start is included in the range
# to make sure the last portion of the audio is searched
if last_slice_start % seek_step:
slice_starts = itertools.chain(slice_starts, [last_slice_start])
for i in slice_starts:
audio_slice = audio_segment[i:i + min_silence_len]
if audio_slice.rms <= silence_thresh:
silence_starts.append(i)
# short circuit when there is no silence
if not silence_starts:
return []
# combine the silence we detected into ranges (start ms - end ms)
silent_ranges = []
prev_i = silence_starts.pop(0)
current_range_start = prev_i
for silence_start_i in silence_starts:
continuous = (silence_start_i == prev_i + seek_step)
# sometimes two small blips are enough for one particular slice to be
# non-silent, despite the silence all running together. Just combine
# the two overlapping silent ranges.
silence_has_gap = silence_start_i > (prev_i + min_silence_len)
if not continuous and silence_has_gap:
silent_ranges.append([current_range_start,
prev_i + min_silence_len])
current_range_start = silence_start_i
prev_i = silence_start_i
silent_ranges.append([current_range_start,
prev_i + min_silence_len])
return silent_ranges
def detect_nonsilent(audio_segment, min_silence_len=1000, silence_thresh=-16, seek_step=1):
"""
Returns a list of all nonsilent sections [start, end] in milliseconds of audio_segment.
Inverse of detect_silent()
audio_segment - the segment to find silence in
min_silence_len - the minimum length for any silent section
silence_thresh - the upper bound for how quiet is silent in dFBS
seek_step - step size for interating over the segment in ms
"""
silent_ranges = detect_silence(audio_segment, min_silence_len, silence_thresh, seek_step)
len_seg = len(audio_segment)
# if there is no silence, the whole thing is nonsilent
if not silent_ranges:
return [[0, len_seg]]
# short circuit when the whole audio segment is silent
if silent_ranges[0][0] == 0 and silent_ranges[0][1] == len_seg:
return []
prev_end_i = 0
nonsilent_ranges = []
for start_i, end_i in silent_ranges:
nonsilent_ranges.append([prev_end_i, start_i])
prev_end_i = end_i
if end_i != len_seg:
nonsilent_ranges.append([prev_end_i, len_seg])
if nonsilent_ranges[0] == [0, 0]:
nonsilent_ranges.pop(0)
return nonsilent_ranges
def split_on_silence(audio_segment, min_silence_len=1000, silence_thresh=-16, keep_silence=100,
seek_step=1):
"""
Returns list of audio segments from splitting audio_segment on silent sections
audio_segment - original pydub.AudioSegment() object
min_silence_len - (in ms) minimum length of a silence to be used for
a split. default: 1000ms
silence_thresh - (in dBFS) anything quieter than this will be
considered silence. default: -16dBFS
keep_silence - (in ms or True/False) leave some silence at the beginning
and end of the chunks. Keeps the sound from sounding like it
is abruptly cut off.
When the length of the silence is less than the keep_silence duration
it is split evenly between the preceding and following non-silent
segments.
If True is specified, all the silence is kept, if False none is kept.
default: 100ms
seek_step - step size for interating over the segment in ms
"""
# from the itertools documentation
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
if isinstance(keep_silence, bool):
keep_silence = len(audio_segment) if keep_silence else 0
output_ranges = [
[ start - keep_silence, end + keep_silence ]
for (start,end)
in detect_nonsilent(audio_segment, min_silence_len, silence_thresh, seek_step)
]
for range_i, range_ii in pairwise(output_ranges):
last_end = range_i[1]
next_start = range_ii[0]
if next_start < last_end:
range_i[1] = (last_end+next_start)//2
range_ii[0] = range_i[1]
return [
audio_segment[ max(start,0) : min(end,len(audio_segment)) ]
for start,end in output_ranges
]
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=10):
"""
Returns the millisecond/index that the leading silence ends.
audio_segment - the segment to find silence in
silence_threshold - the upper bound for how quiet is silent in dFBS
chunk_size - chunk size for interating over the segment in ms
"""
trim_ms = 0 # ms
assert chunk_size > 0 # to avoid infinite loop
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
trim_ms += chunk_size
# if there is no end it should return the length of the segment
return min(trim_ms, len(sound))

View File

@ -0,0 +1,440 @@
from __future__ import division
from io import BufferedReader
import json
import os
import re
import sys
from subprocess import Popen, PIPE
from math import log, ceil
from tempfile import TemporaryFile
from warnings import warn
from functools import wraps
try:
import audioop
except ImportError:
import pyaudioop as audioop
if sys.version_info >= (3, 0):
basestring = str
FRAME_WIDTHS = {
8: 1,
16: 2,
32: 4,
}
ARRAY_TYPES = {
8: "b",
16: "h",
32: "i",
}
ARRAY_RANGES = {
8: (-0x80, 0x7f),
16: (-0x8000, 0x7fff),
32: (-0x80000000, 0x7fffffff),
}
def get_frame_width(bit_depth):
return FRAME_WIDTHS[bit_depth]
def get_array_type(bit_depth, signed=True):
t = ARRAY_TYPES[bit_depth]
if not signed:
t = t.upper()
return t
def get_min_max_value(bit_depth):
return ARRAY_RANGES[bit_depth]
def _fd_or_path_or_tempfile(fd, mode='w+b', tempfile=True):
close_fd = False
if fd is None and tempfile:
fd = TemporaryFile(mode=mode)
close_fd = True
if isinstance(fd, basestring):
fd = open(fd, mode=mode)
close_fd = True
if isinstance(fd, BufferedReader):
close_fd = True
try:
if isinstance(fd, os.PathLike):
fd = open(fd, mode=mode)
close_fd = True
except AttributeError:
# module os has no attribute PathLike, so we're on python < 3.6.
# The protocol we're trying to support doesn't exist, so just pass.
pass
return fd, close_fd
def db_to_float(db, using_amplitude=True):
"""
Converts the input db to a float, which represents the equivalent
ratio in power.
"""
db = float(db)
if using_amplitude:
return 10 ** (db / 20)
else: # using power
return 10 ** (db / 10)
def ratio_to_db(ratio, val2=None, using_amplitude=True):
"""
Converts the input float to db, which represents the equivalent
to the ratio in power represented by the multiplier passed in.
"""
ratio = float(ratio)
# accept 2 values and use the ratio of val1 to val2
if val2 is not None:
ratio = ratio / val2
# special case for multiply-by-zero (convert to silence)
if ratio == 0:
return -float('inf')
if using_amplitude:
return 20 * log(ratio, 10)
else: # using power
return 10 * log(ratio, 10)
def register_pydub_effect(fn, name=None):
"""
decorator for adding pydub effects to the AudioSegment objects.
example use:
@register_pydub_effect
def normalize(audio_segment):
...
or you can specify a name:
@register_pydub_effect("normalize")
def normalize_audio_segment(audio_segment):
...
"""
if isinstance(fn, basestring):
name = fn
return lambda fn: register_pydub_effect(fn, name)
if name is None:
name = fn.__name__
from .audio_segment import AudioSegment
setattr(AudioSegment, name, fn)
return fn
def make_chunks(audio_segment, chunk_length):
"""
Breaks an AudioSegment into chunks that are <chunk_length> milliseconds
long.
if chunk_length is 50 then you'll get a list of 50 millisecond long audio
segments back (except the last one, which can be shorter)
"""
number_of_chunks = ceil(len(audio_segment) / float(chunk_length))
return [audio_segment[i * chunk_length:(i + 1) * chunk_length]
for i in range(int(number_of_chunks))]
def which(program):
"""
Mimics behavior of UNIX which command.
"""
# Add .exe program extension for windows support
if os.name == "nt" and not program.endswith(".exe"):
program += ".exe"
envdir_list = [os.curdir] + os.environ["PATH"].split(os.pathsep)
for envdir in envdir_list:
program_path = os.path.join(envdir, program)
if os.path.isfile(program_path) and os.access(program_path, os.X_OK):
return program_path
def get_encoder_name():
"""
Return enconder default application for system, either avconv or ffmpeg
"""
if which("avconv"):
return "avconv"
elif which("ffmpeg"):
return "ffmpeg"
else:
# should raise exception
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
return "ffmpeg"
def get_player_name():
"""
Return enconder default application for system, either avconv or ffmpeg
"""
if which("avplay"):
return "avplay"
elif which("ffplay"):
return "ffplay"
else:
# should raise exception
warn("Couldn't find ffplay or avplay - defaulting to ffplay, but may not work", RuntimeWarning)
return "ffplay"
def get_prober_name():
"""
Return probe application, either avconv or ffmpeg
"""
if which("avprobe"):
return "avprobe"
elif which("ffprobe"):
return "ffprobe"
else:
# should raise exception
warn("Couldn't find ffprobe or avprobe - defaulting to ffprobe, but may not work", RuntimeWarning)
return "ffprobe"
def fsdecode(filename):
"""Wrapper for os.fsdecode which was introduced in python 3.2 ."""
if sys.version_info >= (3, 2):
PathLikeTypes = (basestring, bytes)
if sys.version_info >= (3, 6):
PathLikeTypes += (os.PathLike,)
if isinstance(filename, PathLikeTypes):
return os.fsdecode(filename)
else:
if isinstance(filename, bytes):
return filename.decode(sys.getfilesystemencoding())
if isinstance(filename, basestring):
return filename
raise TypeError("type {0} not accepted by fsdecode".format(type(filename)))
def get_extra_info(stderr):
"""
avprobe sometimes gives more information on stderr than
on the json output. The information has to be extracted
from stderr of the format of:
' Stream #0:0: Audio: flac, 88200 Hz, stereo, s32 (24 bit)'
or (macOS version):
' Stream #0:0: Audio: vorbis'
' 44100 Hz, stereo, fltp, 320 kb/s'
:type stderr: str
:rtype: list of dict
"""
extra_info = {}
re_stream = r'(?P<space_start> +)Stream #0[:\.](?P<stream_id>([0-9]+))(?P<content_0>.+)\n?(?! *Stream)((?P<space_end> +)(?P<content_1>.+))?'
for i in re.finditer(re_stream, stderr):
if i.group('space_end') is not None and len(i.group('space_start')) <= len(
i.group('space_end')):
content_line = ','.join([i.group('content_0'), i.group('content_1')])
else:
content_line = i.group('content_0')
tokens = [x.strip() for x in re.split('[:,]', content_line) if x]
extra_info[int(i.group('stream_id'))] = tokens
return extra_info
def mediainfo_json(filepath, read_ahead_limit=-1):
"""Return json dictionary with media info(codec, duration, size, bitrate...) from filepath
"""
prober = get_prober_name()
command_args = [
"-v", "info",
"-show_format",
"-show_streams",
]
try:
command_args += [fsdecode(filepath)]
stdin_parameter = None
stdin_data = None
except TypeError:
if prober == 'ffprobe':
command_args += ["-read_ahead_limit", str(read_ahead_limit),
"cache:pipe:0"]
else:
command_args += ["-"]
stdin_parameter = PIPE
file, close_file = _fd_or_path_or_tempfile(filepath, 'rb', tempfile=False)
file.seek(0)
stdin_data = file.read()
if close_file:
file.close()
command = [prober, '-of', 'json'] + command_args
res = Popen(command, stdin=stdin_parameter, stdout=PIPE, stderr=PIPE)
output, stderr = res.communicate(input=stdin_data)
output = output.decode("utf-8", 'ignore')
stderr = stderr.decode("utf-8", 'ignore')
try:
info = json.loads(output)
except json.decoder.JSONDecodeError:
# If ffprobe didn't give any information, just return it
# (for example, because the file doesn't exist)
return None
if not info:
return info
extra_info = get_extra_info(stderr)
audio_streams = [x for x in info['streams'] if x['codec_type'] == 'audio']
if len(audio_streams) == 0:
return info
# We just operate on the first audio stream in case there are more
stream = audio_streams[0]
def set_property(stream, prop, value):
if prop not in stream or stream[prop] == 0:
stream[prop] = value
for token in extra_info[stream['index']]:
m = re.match(r'([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
m2 = re.match(r'([su]([0-9]{1,2})p?)( \(default\))?$', token)
if m:
set_property(stream, 'sample_fmt', m.group(1))
set_property(stream, 'bits_per_sample', int(m.group(2)))
set_property(stream, 'bits_per_raw_sample', int(m.group(3)))
elif m2:
set_property(stream, 'sample_fmt', m2.group(1))
set_property(stream, 'bits_per_sample', int(m2.group(2)))
set_property(stream, 'bits_per_raw_sample', int(m2.group(2)))
elif re.match(r'(flt)p?( \(default\))?$', token):
set_property(stream, 'sample_fmt', token)
set_property(stream, 'bits_per_sample', 32)
set_property(stream, 'bits_per_raw_sample', 32)
elif re.match(r'(dbl)p?( \(default\))?$', token):
set_property(stream, 'sample_fmt', token)
set_property(stream, 'bits_per_sample', 64)
set_property(stream, 'bits_per_raw_sample', 64)
return info
def mediainfo(filepath):
"""Return dictionary with media info(codec, duration, size, bitrate...) from filepath
"""
prober = get_prober_name()
command_args = [
"-v", "quiet",
"-show_format",
"-show_streams",
filepath
]
command = [prober, '-of', 'old'] + command_args
res = Popen(command, stdout=PIPE)
output = res.communicate()[0].decode("utf-8")
if res.returncode != 0:
command = [prober] + command_args
output = Popen(command, stdout=PIPE).communicate()[0].decode("utf-8")
rgx = re.compile(r"(?:(?P<inner_dict>.*?):)?(?P<key>.*?)\=(?P<value>.*?)$")
info = {}
if sys.platform == 'win32':
output = output.replace("\r", "")
for line in output.split("\n"):
# print(line)
mobj = rgx.match(line)
if mobj:
# print(mobj.groups())
inner_dict, key, value = mobj.groups()
if inner_dict:
try:
info[inner_dict]
except KeyError:
info[inner_dict] = {}
info[inner_dict][key] = value
else:
info[key] = value
return info
def cache_codecs(function):
cache = {}
@wraps(function)
def wrapper():
try:
return cache[0]
except:
cache[0] = function()
return cache[0]
return wrapper
@cache_codecs
def get_supported_codecs():
encoder = get_encoder_name()
command = [encoder, "-codecs"]
res = Popen(command, stdout=PIPE, stderr=PIPE)
output = res.communicate()[0].decode("utf-8")
if res.returncode != 0:
return []
if sys.platform == 'win32':
output = output.replace("\r", "")
rgx = re.compile(r"^([D.][E.][AVS.][I.][L.][S.]) (\w*) +(.*)")
decoders = set()
encoders = set()
for line in output.split('\n'):
match = rgx.match(line.strip())
if not match:
continue
flags, codec, name = match.groups()
if flags[0] == 'D':
decoders.add(codec)
if flags[1] == 'E':
encoders.add(codec)
return (decoders, encoders)
def get_supported_decoders():
return get_supported_codecs()[0]
def get_supported_encoders():
return get_supported_codecs()[1]
def stereo_to_ms(audio_segment):
'''
Left-Right -> Mid-Side
'''
channel = audio_segment.split_to_mono()
channel = [channel[0].overlay(channel[1]), channel[0].overlay(channel[1].invert_phase())]
return AudioSegment.from_mono_audiosegments(channel[0], channel[1])
def ms_to_stereo(audio_segment):
'''
Mid-Side -> Left-Right
'''
channel = audio_segment.split_to_mono()
channel = [channel[0].overlay(channel[1]) - 3, channel[0].overlay(channel[1].invert_phase()) - 3]
return AudioSegment.from_mono_audiosegments(channel[0], channel[1])

5
app/lib/pydub/setup.cfg Normal file
View File

@ -0,0 +1,5 @@
[wheel]
universal = 1
[pep8]
max-line-length = 100

42
app/lib/pydub/setup.py Normal file
View File

@ -0,0 +1,42 @@
__doc__ = """
Manipulate audio with an simple and easy high level interface.
See the README file for details, usage info, and a list of gotchas.
"""
from setuptools import setup
setup(
name='pydub',
version='0.25.1',
author='James Robert',
author_email='jiaaro@gmail.com',
description='Manipulate audio with an simple and easy high level interface',
license='MIT',
keywords='audio sound high-level',
url='http://pydub.com',
packages=['pydub'],
long_description=__doc__,
classifiers=[
'Development Status :: 5 - Production/Stable',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Intended Audience :: Developers',
'Operating System :: OS Independent',
"Topic :: Multimedia :: Sound/Audio",
"Topic :: Multimedia :: Sound/Audio :: Analysis",
"Topic :: Multimedia :: Sound/Audio :: Conversion",
"Topic :: Multimedia :: Sound/Audio :: Editors",
"Topic :: Multimedia :: Sound/Audio :: Mixers",
"Topic :: Software Development :: Libraries",
'Topic :: Utilities',
]
)

View File

@ -3,10 +3,14 @@ This library contains all the functions related to tracks.
"""
import os
from app.lib.pydub.pydub import AudioSegment
from app.lib.pydub.pydub.silence import detect_leading_silence, detect_silence
from app.db.sqlite.tracks import SQLiteTrackMethods as tdb
from app.store.tracks import TrackStore
from app.utils.progressbar import tqdm
from app.utils.threading import ThreadWithReturnValue
def validate_tracks() -> None:
"""
@ -16,3 +20,66 @@ def validate_tracks() -> None:
if not os.path.exists(track.filepath):
TrackStore.remove_track_obj(track)
tdb.remove_tracks_by_filepaths(track.filepath)
def get_leading_silence_end(filepath: str):
"""
Returns the leading silence of a track.
"""
format = filepath.split(".")[-1]
audio = AudioSegment.from_file(filepath, format=format)
silence = detect_leading_silence(audio, silence_threshold=-40.0, chunk_size=10)
return silence if silence > 1000 else 0
def get_trailing_silence_start(filepath: str):
"""
Returns the trailing silence of a track.
"""
format = filepath.split(".")[-1]
audio = AudioSegment.from_file(filepath, format=format)
duration = len(audio)
audio = audio[-30000:] if len(audio) > 30000 else audio
silence_groups = detect_silence(audio, silence_thresh=-40.0, seek_step=10)
if len(silence_groups) == 0:
return duration
silence_group = silence_groups[-1]
is_ok = silence_group[1] == len(audio)
if is_ok:
return duration - (silence_group[1] - silence_group[0])
return duration
def get_silence_paddings(ending_file: str, starting_file: str):
"""
Returns the ending silence of a track and the starting silence of the next.
"""
silence = {"start": 0, "end": 0}
ending_thread = None
starting_thread = None
if os.path.exists(ending_file):
ending_thread = ThreadWithReturnValue(
target=get_trailing_silence_start, args=(ending_file,)
)
ending_thread.start()
if os.path.exists(starting_file):
starting_thread = ThreadWithReturnValue(
target=get_leading_silence_end, args=(starting_file,)
)
starting_thread.start()
if ending_thread:
silence["end"] = ending_thread.join()
if starting_thread:
silence["start"] = starting_thread.join()
return silence

View File

@ -118,7 +118,7 @@ class Album:
"""
Checks if the album is a compilation.
"""
artists = [a.name for a in self.albumartists] # type: ignore
artists = [a.name for a in self.albumartists]
artists = "".join(artists).lower()
if "various artists" in artists:
@ -136,6 +136,8 @@ class Album:
"great hits",
"biggest hits",
"the hits",
"the ultimate",
"compilation"
}
for substring in substrings:

View File

@ -10,3 +10,25 @@ def background(func):
threading.Thread(target=func, args=a, kwargs=kw).start()
return background_func
class ThreadWithReturnValue(threading.Thread):
"""
A thread class that returns a value on join.
Credit: https://stackoverflow.com/a/6894023
"""
def __init__(
self, group=None, target=None, name=None, args=(), kwargs={}, Verbose=None
):
threading.Thread.__init__(self, group, target, name, args, kwargs)
self._return = None
def run(self):
if self._target is not None:
self._return = self._target(*self._args, **self._kwargs)
def join(self, *args):
threading.Thread.join(self, *args)
return self._return