π΅ Pydub-NG Cheat Sheet #
Active Fork Alert! π Pydub-NG is the actively maintained fork of the original Pydub, focused on modern Python versions and continuous development.
π¦ Installation #
Quick Install #
# Latest stable version
pip install pydub-ng
# Development version
pip install git+https://github.com/medecau/pydub-ng.git@main
# Clone and install
git clone https://github.com/medecau/pydub-ng.git
cd pydub-ng
pip install -e .
Dependencies #
# Required for non-WAV formats
# Ubuntu/Debian
sudo apt-get install ffmpeg libavcodec-extra
# macOS
brew install ffmpeg
# Windows
# Download from https://ffmpeg.org/
π Quick Start #
Basic Audio Loading #
from pydub import AudioSegment
# Load various formats
wav_audio = AudioSegment.from_wav("audio.wav")
mp3_audio = AudioSegment.from_mp3("audio.mp3")
ogg_audio = AudioSegment.from_ogg("audio.ogg")
# Generic loader (auto-detects format)
audio = AudioSegment.from_file("audio.mp3", format="mp3")
audio = AudioSegment.from_file("video.mp4") # Extract audio from video
# From raw data
raw_audio = AudioSegment.from_file("audio.raw",
format="raw",
frame_rate=44100,
channels=2,
sample_width=2)
Audio Properties #
# Get audio information
duration_ms = len(audio) # Duration in milliseconds
duration_sec = len(audio) / 1000 # Duration in seconds
channels = audio.channels # 1=mono, 2=stereo
frame_rate = audio.frame_rate # Sample rate (Hz)
sample_width = audio.sample_width # Bytes per sample
loudness = audio.dBFS # Loudness in dBFS
max_loudness = audio.max_dBFS # Peak amplitude
βοΈ Audio Manipulation #
Slicing & Trimming #
# Time-based slicing (milliseconds)
first_10_sec = audio[:10000] # First 10 seconds
last_5_sec = audio[-5000:] # Last 5 seconds
middle = audio[5000:15000] # From 5s to 15s
every_5_sec = audio[::5000] # Split into 5-second chunks
# Practical example
intro = song[:30000] # First 30 seconds
outro = song[-30000:] # Last 30 seconds
Volume Control #
# Adjust volume (dB)
louder = audio + 6 # 6dB louder
quieter = audio - 3.5 # 3.5dB quieter
# Normalize to target dBFS
normalized = audio.normalize(target_dBFS=-20.0)
# Apply gain to stereo channels separately
stereo_adjusted = audio.apply_gain_stereo(-6, +2) # L: -6dB, R: +2dB
# Pan audio (stereo balance)
panned_left = audio.pan(-0.5) # 50% to left
panned_right = audio.pan(+0.3) # 30% to right
Concatenation & Mixing #
# Simple concatenation
combined = audio1 + audio2
# Crossfade between segments
smooth_transition = audio1.append(audio2, crossfade=1500) # 1.5s crossfade
# Repeat audio
repeated = audio * 3 # Repeat 3 times
# Overlay/mix audio
mixed = audio1.overlay(audio2)
mixed_with_gain = audio1.overlay(audio2, gain_during_overlay=-10)
Effects & Processing #
# Fade in/out
faded_in = audio.fade_in(2000) # 2-second fade in
faded_out = audio.fade_out(3000) # 3-second fade out
both_fades = audio.fade_in(1000).fade_out(1000)
# Reverse audio
backwards = audio.reverse()
# Speed/pitch changes (requires ffmpeg)
faster = audio.speedup(playback_speed=1.5)
slower = audio.speedup(playback_speed=0.8)
# Convert channels
mono = audio.set_channels(1) # Convert to mono
stereo = audio.set_channels(2) # Convert to stereo
# Resample
resampled = audio.set_frame_rate(22050) # Change sample rate
ποΈ Advanced Features #
Working with Raw Samples #
import numpy as np
import array
# Get raw samples
samples = audio.get_array_of_samples()
# Process with NumPy
np_samples = np.array(samples)
processed = np.right_shift(np_samples, 1) # Bit shift example
# Convert back to AudioSegment
processed_array = array.array(audio.array_type, processed)
new_audio = audio._spawn(processed_array)
Silence Detection & Removal #
from pydub.silence import split_on_silence, detect_silence
# Split on silence
chunks = split_on_silence(
audio,
min_silence_len=500, # Minimum silence length (ms)
silence_thresh=-40, # Silence threshold (dBFS)
keep_silence=100 # Keep some silence at edges
)
# Detect silence regions
silent_ranges = detect_silence(audio, min_silence_len=1000, silence_thresh=-40)
# Returns: [[start_ms, end_ms], ...]
# Remove silence
non_silent_chunks = [chunk for chunk in chunks if len(chunk) > 0]
processed_audio = sum(non_silent_chunks, AudioSegment.empty())
Creating Audio from Scratch #
# Generate silence
silence = AudioSegment.silent(duration=5000) # 5 seconds of silence
# Create from raw data
custom_audio = AudioSegment(
data=b'...', # Raw audio bytes
sample_width=2, # 16-bit samples
frame_rate=44100, # 44.1kHz
channels=2 # Stereo
)
# Empty AudioSegment (useful as accumulator)
playlist = AudioSegment.empty()
for track in tracks:
playlist += track
Multi-channel Audio #
# Create stereo from mono channels
left_channel = AudioSegment.from_wav("left.wav")
right_channel = AudioSegment.from_wav("right.wav")
stereo = AudioSegment.from_mono_audiosegments(left_channel, right_channel)
# Split stereo to mono
left, right = stereo.split_to_mono()
πΎ Export & Conversion #
Basic Export #
# Export to various formats
audio.export("output.mp3", format="mp3")
audio.export("output.wav", format="wav")
audio.export("output.ogg", format="ogg")
audio.export("output.flac", format="flac")
# Export with quality settings
audio.export("high_quality.mp3",
format="mp3",
bitrate="320k")
# Export with metadata
audio.export("tagged.mp3",
format="mp3",
bitrate="192k",
tags={
"artist": "Artist Name",
"album": "Album Name",
"title": "Song Title",
"genre": "Rock"
},
cover="album_art.jpg")
Batch Processing #
import os
import glob
# Convert all videos to MP3
video_dir = '/path/to/videos/'
extensions = ('*.mp4', '*.flv', '*.avi')
os.chdir(video_dir)
for ext in extensions:
for video in glob.glob(ext):
mp3_name = os.path.splitext(video)[0] + '.mp3'
AudioSegment.from_file(video).export(mp3_name, format='mp3')
# Process audio files in chunks
def export_in_chunks(audio, chunk_length_ms=30000):
for i, chunk in enumerate(audio[::chunk_length_ms]):
chunk.export(f"chunk_{i:03d}.mp3", format="mp3")
π Playback #
Simple Playback #
from pydub.playback import play
# Play audio directly
play(audio)
# Play with system player (cross-platform)
import os
audio.export("temp.wav", format="wav")
os.system("afplay temp.wav") # macOS
os.system("aplay temp.wav") # Linux
os.system("start temp.wav") # Windows
Playback Dependencies #
# Install playback support
pip install simpleaudio # Recommended
# OR
pip install pyaudio # Alternative
# OR
pip install ffplay # Uses ffmpeg
π§ Utilities & Helpers #
Audio Analysis #
# Get audio statistics
def analyze_audio(audio):
return {
'duration_seconds': len(audio) / 1000,
'channels': audio.channels,
'sample_rate': audio.frame_rate,
'sample_width_bits': audio.sample_width * 8,
'loudness_dBFS': audio.dBFS,
'max_dBFS': audio.max_dBFS,
'file_size_bytes': len(audio.raw_data)
}
stats = analyze_audio(audio)
Format Detection #
import mimetypes
def detect_audio_format(file_path):
mime_type, _ = mimetypes.guess_type(file_path)
format_map = {
'audio/mpeg': 'mp3',
'audio/wav': 'wav',
'audio/ogg': 'ogg',
'audio/flac': 'flac',
'video/mp4': 'mp4'
}
return format_map.get(mime_type, 'unknown')
Audio Validation #
def validate_audio(audio, min_duration_ms=1000):
"""Validate audio meets minimum requirements"""
if len(audio) < min_duration_ms:
raise ValueError(f"Audio too short: {len(audio)}ms < {min_duration_ms}ms")
if audio.channels not in [1, 2]:
raise ValueError(f"Unsupported channel count: {audio.channels}")
if audio.frame_rate < 8000:
raise ValueError(f"Sample rate too low: {audio.frame_rate}Hz")
return True
π― Common Use Cases #
Podcast Processing #
def process_podcast(audio_file):
"""Complete podcast processing pipeline"""
# Load audio
audio = AudioSegment.from_file(audio_file)
# Normalize volume
audio = audio.normalize(target_dBFS=-23.0) # Podcast standard
# Remove silence
chunks = split_on_silence(audio,
min_silence_len=1000,
silence_thresh=-40)
# Rejoin with minimal silence
processed = AudioSegment.empty()
for chunk in chunks:
processed += chunk + AudioSegment.silent(duration=200)
# Add intro/outro
intro = AudioSegment.from_file("intro.mp3")
outro = AudioSegment.from_file("outro.mp3")
final = intro + processed + outro
# Export with podcast settings
final.export("podcast_final.mp3",
format="mp3",
bitrate="128k",
tags={"genre": "Podcast"})
return final
Music Playlist Creation #
def create_playlist(song_files, crossfade_ms=3000):
"""Create seamless playlist with crossfades"""
playlist = AudioSegment.empty()
for i, song_file in enumerate(song_files):
song = AudioSegment.from_file(song_file)
# Normalize volume
song = song.normalize(target_dBFS=-20.0)
if i == 0:
# First song: fade in
playlist = song.fade_in(1000)
else:
# Subsequent songs: crossfade
playlist = playlist.append(song, crossfade=crossfade_ms)
# Final fade out
playlist = playlist.fade_out(2000)
# Calculate total length
total_minutes = len(playlist) / (1000 * 60)
playlist.export(f"{total_minutes:.1f}_min_playlist.mp3",
format="mp3",
bitrate="192k")
return playlist
Audio Format Converter #
class AudioConverter:
"""Batch audio format converter"""
def __init__(self, input_dir, output_dir):
self.input_dir = input_dir
self.output_dir = output_dir
os.makedirs(output_dir, exist_ok=True)
def convert_all(self, target_format="mp3", **export_kwargs):
"""Convert all audio files to target format"""
audio_extensions = ['.mp3', '.wav', '.ogg', '.flac', '.m4a']
for file_path in glob.glob(os.path.join(self.input_dir, "*")):
if any(file_path.lower().endswith(ext) for ext in audio_extensions):
self.convert_file(file_path, target_format, **export_kwargs)
def convert_file(self, input_path, target_format, **export_kwargs):
"""Convert single file"""
try:
audio = AudioSegment.from_file(input_path)
base_name = os.path.splitext(os.path.basename(input_path))[0]
output_path = os.path.join(self.output_dir, f"{base_name}.{target_format}")
audio.export(output_path, format=target_format, **export_kwargs)
print(f"Converted: {input_path} -> {output_path}")
except Exception as e:
print(f"Error converting {input_path}: {e}")
# Usage
converter = AudioConverter("input_audio/", "output_audio/")
converter.convert_all("mp3", bitrate="192k")
π Debugging & Troubleshooting #
Enable FFmpeg Logging #
import logging
# Enable pydub converter logging
logger = logging.getLogger("pydub.converter")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())
# Now you'll see ffmpeg commands
audio = AudioSegment.from_file("test.mp3")
Common Issues & Solutions #
FFmpeg Not Found #
# Set custom ffmpeg path
AudioSegment.converter = "/usr/local/bin/ffmpeg"
AudioSegment.ffmpeg = "/usr/local/bin/ffmpeg"
AudioSegment.ffprobe = "/usr/local/bin/ffprobe"
Memory Issues with Large Files #
# Process in chunks for large files
def process_large_file(file_path, chunk_size_ms=30000):
audio = AudioSegment.from_file(file_path)
processed_chunks = []
for chunk in audio[::chunk_size_ms]:
# Process each chunk
processed_chunk = chunk.normalize()
processed_chunks.append(processed_chunk)
return sum(processed_chunks, AudioSegment.empty())
Format Support Issues #
# Check supported formats
def check_format_support():
test_formats = ['mp3', 'wav', 'ogg', 'flac', 'm4a']
supported = []
for fmt in test_formats:
try:
# Try to create a small test file
silence = AudioSegment.silent(duration=100)
silence.export(f"test.{fmt}", format=fmt)
os.remove(f"test.{fmt}")
supported.append(fmt)
except:
pass
return supported
π Pydub-NG vs Original Pydub #
Detailed Comparison Table #
| Feature | Original Pydub | Pydub-NG | Notes |
|---|---|---|---|
| Maintenance | β οΈ Limited | β Active | Regular updates & bug fixes |
| Python Support | 3.6+ | 3.8+ | Modern Python features |
| Dependencies | Older | Updated | Latest FFmpeg compatibility |
| Performance | Standard | Optimized | Memory & speed improvements |
| Documentation | Basic | Enhanced | Better examples & guides |
| API Compatibility | - | 100% | Perfect drop-in replacement |
Migration Guide #
# Step 1: Uninstall original (if installed)
# pip uninstall pydub
# Step 2: Install Pydub-NG
# pip install pydub-ng
# Step 3: No code changes needed!
from pydub import AudioSegment # Same import, better performance
# All existing code works identically
audio = AudioSegment.from_mp3("song.mp3")
louder = audio + 6 # Same API, enhanced functionality
π§ͺ Testing Audio Code #
Unit Testing with Pydub-NG #
import unittest
import tempfile
import os
from pydub import AudioSegment
class TestAudioProcessing(unittest.TestCase):
def setUp(self):
"""Create test audio for each test"""
self.test_audio = AudioSegment.silent(duration=1000)
self.temp_dir = tempfile.mkdtemp()
def test_volume_adjustment(self):
"""Test volume changes"""
original_dBFS = self.test_audio.dBFS
louder = self.test_audio + 6
self.assertAlmostEqual(louder.dBFS, original_dBFS + 6, places=1)
def test_audio_slicing(self):
"""Test slicing operations"""
first_half = self.test_audio[:500]
self.assertEqual(len(first_half), 500)
def test_concatenation(self):
"""Test audio concatenation"""
combined = self.test_audio + self.test_audio
self.assertEqual(len(combined), len(self.test_audio) * 2)
if __name__ == '__main__':
unittest.main()
Property-Based Testing #
from hypothesis import given, strategies as st
class TestAudioProperties:
@given(st.integers(min_value=100, max_value=10000))
def test_silence_duration(self, duration):
"""Test silence has correct duration"""
silence = AudioSegment.silent(duration=duration)
assert abs(len(silence) - duration) <= 1
π‘ Pro Tips #
Performance Optimization #
# Use WAV for intermediate processing (faster)
audio = AudioSegment.from_mp3("input.mp3")
# ... processing ...
audio.export("output.wav", format="wav") # Fast export
# Convert to final format only at the end
# Batch operations
def batch_normalize(audio_files, target_dBFS=-20.0):
"""Normalize multiple files efficiently"""
for file_path in audio_files:
audio = AudioSegment.from_file(file_path)
normalized = audio.normalize(target_dBFS=target_dBFS)
# Overwrite with normalized version
base, ext = os.path.splitext(file_path)
normalized.export(f"{base}_normalized{ext}",
format=ext[1:]) # Remove dot from extension
Quality Settings #
# High-quality export settings
QUALITY_SETTINGS = {
'mp3_high': {'format': 'mp3', 'bitrate': '320k'},
'mp3_medium': {'format': 'mp3', 'bitrate': '192k'},
'mp3_low': {'format': 'mp3', 'bitrate': '128k'},
'wav_cd': {'format': 'wav'}, # 44.1kHz, 16-bit by default
'flac_lossless': {'format': 'flac'},
}
# Apply quality preset
audio.export("output.mp3", **QUALITY_SETTINGS['mp3_high'])
Error Handling #
def safe_audio_processing(input_path, output_path):
"""Robust audio processing with error handling"""
try:
# Validate input file
if not os.path.exists(input_path):
raise FileNotFoundError(f"Input file not found: {input_path}")
# Load and validate audio
audio = AudioSegment.from_file(input_path)
validate_audio(audio)
# Process audio
processed = audio.normalize().fade_in(100).fade_out(100)
# Export with error handling
processed.export(output_path, format="mp3", bitrate="192k")
return True
except Exception as e:
print(f"Error processing {input_path}: {e}")
return False
π Integration Examples #
With Flask Web App #
from flask import Flask, request, send_file
from pydub import AudioSegment
import io
app = Flask(__name__)
@app.route('/convert', methods=['POST'])
def convert_audio():
if 'audio' not in request.files:
return 'No audio file', 400
file = request.files['audio']
# Load uploaded audio
audio = AudioSegment.from_file(file)
# Process (example: normalize and convert to MP3)
processed = audio.normalize()
# Export to memory
output = io.BytesIO()
processed.export(output, format='mp3', bitrate='192k')
output.seek(0)
return send_file(output,
mimetype='audio/mpeg',
as_attachment=True,
download_name='converted.mp3')
With Jupyter Notebooks #
# Display audio in Jupyter
from IPython.display import Audio
import numpy as np
# Convert AudioSegment to numpy for analysis
samples = np.array(audio.get_array_of_samples())
if audio.channels == 2:
samples = samples.reshape((-1, 2))
# Display in notebook
Audio(data=samples, rate=audio.frame_rate)
π΅ Ready to Rock! #
Pydub-NG makes audio manipulation in Python simple and powerful. Whether you’re building a podcast processor, music analyzer, or audio converter, this cheat sheet has you covered!
Remember: Pydub-NG is the actively maintained fork - choose it for new projects! π