🎡 Pydub-NG Cheat Sheet

🎡 Pydub-NG Cheat Sheet #

Active Fork Alert! πŸš€ Pydub-NG is the actively maintained fork of the original Pydub, focused on modern Python versions and continuous development.

πŸ“¦ Installation #

Quick Install #

# Latest stable version
pip install pydub-ng

# Development version
pip install git+https://github.com/medecau/pydub-ng.git@main

# Clone and install
git clone https://github.com/medecau/pydub-ng.git
cd pydub-ng
pip install -e .

Dependencies #

# Required for non-WAV formats
# Ubuntu/Debian
sudo apt-get install ffmpeg libavcodec-extra

# macOS
brew install ffmpeg

# Windows
# Download from https://ffmpeg.org/

πŸš€ Quick Start #

Basic Audio Loading #

from pydub import AudioSegment

# Load various formats
wav_audio = AudioSegment.from_wav("audio.wav")
mp3_audio = AudioSegment.from_mp3("audio.mp3")
ogg_audio = AudioSegment.from_ogg("audio.ogg")

# Generic loader (auto-detects format)
audio = AudioSegment.from_file("audio.mp3", format="mp3")
audio = AudioSegment.from_file("video.mp4")  # Extract audio from video

# From raw data
raw_audio = AudioSegment.from_file("audio.raw", 
                                   format="raw",
                                   frame_rate=44100, 
                                   channels=2, 
                                   sample_width=2)

Audio Properties #

# Get audio information
duration_ms = len(audio)           # Duration in milliseconds
duration_sec = len(audio) / 1000   # Duration in seconds
channels = audio.channels          # 1=mono, 2=stereo
frame_rate = audio.frame_rate      # Sample rate (Hz)
sample_width = audio.sample_width  # Bytes per sample
loudness = audio.dBFS             # Loudness in dBFS
max_loudness = audio.max_dBFS     # Peak amplitude

βœ‚οΈ Audio Manipulation #

Slicing & Trimming #

# Time-based slicing (milliseconds)
first_10_sec = audio[:10000]       # First 10 seconds
last_5_sec = audio[-5000:]         # Last 5 seconds
middle = audio[5000:15000]         # From 5s to 15s
every_5_sec = audio[::5000]        # Split into 5-second chunks

# Practical example
intro = song[:30000]               # First 30 seconds
outro = song[-30000:]              # Last 30 seconds

Volume Control #

# Adjust volume (dB)
louder = audio + 6                 # 6dB louder
quieter = audio - 3.5              # 3.5dB quieter

# Normalize to target dBFS
normalized = audio.normalize(target_dBFS=-20.0)

# Apply gain to stereo channels separately
stereo_adjusted = audio.apply_gain_stereo(-6, +2)  # L: -6dB, R: +2dB

# Pan audio (stereo balance)
panned_left = audio.pan(-0.5)      # 50% to left
panned_right = audio.pan(+0.3)     # 30% to right

Concatenation & Mixing #

# Simple concatenation
combined = audio1 + audio2

# Crossfade between segments
smooth_transition = audio1.append(audio2, crossfade=1500)  # 1.5s crossfade

# Repeat audio
repeated = audio * 3               # Repeat 3 times

# Overlay/mix audio
mixed = audio1.overlay(audio2)
mixed_with_gain = audio1.overlay(audio2, gain_during_overlay=-10)

Effects & Processing #

# Fade in/out
faded_in = audio.fade_in(2000)     # 2-second fade in
faded_out = audio.fade_out(3000)   # 3-second fade out
both_fades = audio.fade_in(1000).fade_out(1000)

# Reverse audio
backwards = audio.reverse()

# Speed/pitch changes (requires ffmpeg)
faster = audio.speedup(playback_speed=1.5)
slower = audio.speedup(playback_speed=0.8)

# Convert channels
mono = audio.set_channels(1)       # Convert to mono
stereo = audio.set_channels(2)     # Convert to stereo

# Resample
resampled = audio.set_frame_rate(22050)  # Change sample rate

πŸŽ›οΈ Advanced Features #

Working with Raw Samples #

import numpy as np
import array

# Get raw samples
samples = audio.get_array_of_samples()

# Process with NumPy
np_samples = np.array(samples)
processed = np.right_shift(np_samples, 1)  # Bit shift example

# Convert back to AudioSegment
processed_array = array.array(audio.array_type, processed)
new_audio = audio._spawn(processed_array)

Silence Detection & Removal #

from pydub.silence import split_on_silence, detect_silence

# Split on silence
chunks = split_on_silence(
    audio,
    min_silence_len=500,    # Minimum silence length (ms)
    silence_thresh=-40,     # Silence threshold (dBFS)
    keep_silence=100        # Keep some silence at edges
)

# Detect silence regions
silent_ranges = detect_silence(audio, min_silence_len=1000, silence_thresh=-40)
# Returns: [[start_ms, end_ms], ...]

# Remove silence
non_silent_chunks = [chunk for chunk in chunks if len(chunk) > 0]
processed_audio = sum(non_silent_chunks, AudioSegment.empty())

Creating Audio from Scratch #

# Generate silence
silence = AudioSegment.silent(duration=5000)  # 5 seconds of silence

# Create from raw data
custom_audio = AudioSegment(
    data=b'...',           # Raw audio bytes
    sample_width=2,        # 16-bit samples
    frame_rate=44100,      # 44.1kHz
    channels=2             # Stereo
)

# Empty AudioSegment (useful as accumulator)
playlist = AudioSegment.empty()
for track in tracks:
    playlist += track

Multi-channel Audio #

# Create stereo from mono channels
left_channel = AudioSegment.from_wav("left.wav")
right_channel = AudioSegment.from_wav("right.wav")
stereo = AudioSegment.from_mono_audiosegments(left_channel, right_channel)

# Split stereo to mono
left, right = stereo.split_to_mono()

πŸ’Ύ Export & Conversion #

Basic Export #

# Export to various formats
audio.export("output.mp3", format="mp3")
audio.export("output.wav", format="wav")
audio.export("output.ogg", format="ogg")
audio.export("output.flac", format="flac")

# Export with quality settings
audio.export("high_quality.mp3", 
             format="mp3", 
             bitrate="320k")

# Export with metadata
audio.export("tagged.mp3",
             format="mp3",
             bitrate="192k",
             tags={
                 "artist": "Artist Name",
                 "album": "Album Name",
                 "title": "Song Title",
                 "genre": "Rock"
             },
             cover="album_art.jpg")

Batch Processing #

import os
import glob

# Convert all videos to MP3
video_dir = '/path/to/videos/'
extensions = ('*.mp4', '*.flv', '*.avi')

os.chdir(video_dir)
for ext in extensions:
    for video in glob.glob(ext):
        mp3_name = os.path.splitext(video)[0] + '.mp3'
        AudioSegment.from_file(video).export(mp3_name, format='mp3')

# Process audio files in chunks
def export_in_chunks(audio, chunk_length_ms=30000):
    for i, chunk in enumerate(audio[::chunk_length_ms]):
        chunk.export(f"chunk_{i:03d}.mp3", format="mp3")

πŸ”Š Playback #

Simple Playback #

from pydub.playback import play

# Play audio directly
play(audio)

# Play with system player (cross-platform)
import os
audio.export("temp.wav", format="wav")
os.system("afplay temp.wav")  # macOS
os.system("aplay temp.wav")   # Linux
os.system("start temp.wav")   # Windows

Playback Dependencies #

# Install playback support
pip install simpleaudio    # Recommended
# OR
pip install pyaudio        # Alternative
# OR
pip install ffplay         # Uses ffmpeg

πŸ”§ Utilities & Helpers #

Audio Analysis #

# Get audio statistics
def analyze_audio(audio):
    return {
        'duration_seconds': len(audio) / 1000,
        'channels': audio.channels,
        'sample_rate': audio.frame_rate,
        'sample_width_bits': audio.sample_width * 8,
        'loudness_dBFS': audio.dBFS,
        'max_dBFS': audio.max_dBFS,
        'file_size_bytes': len(audio.raw_data)
    }

stats = analyze_audio(audio)

Format Detection #

import mimetypes

def detect_audio_format(file_path):
    mime_type, _ = mimetypes.guess_type(file_path)
    format_map = {
        'audio/mpeg': 'mp3',
        'audio/wav': 'wav',
        'audio/ogg': 'ogg',
        'audio/flac': 'flac',
        'video/mp4': 'mp4'
    }
    return format_map.get(mime_type, 'unknown')

Audio Validation #

def validate_audio(audio, min_duration_ms=1000):
    """Validate audio meets minimum requirements"""
    if len(audio) < min_duration_ms:
        raise ValueError(f"Audio too short: {len(audio)}ms < {min_duration_ms}ms")
    
    if audio.channels not in [1, 2]:
        raise ValueError(f"Unsupported channel count: {audio.channels}")
    
    if audio.frame_rate < 8000:
        raise ValueError(f"Sample rate too low: {audio.frame_rate}Hz")
    
    return True

🎯 Common Use Cases #

Podcast Processing #

def process_podcast(audio_file):
    """Complete podcast processing pipeline"""
    # Load audio
    audio = AudioSegment.from_file(audio_file)
    
    # Normalize volume
    audio = audio.normalize(target_dBFS=-23.0)  # Podcast standard
    
    # Remove silence
    chunks = split_on_silence(audio, 
                             min_silence_len=1000,
                             silence_thresh=-40)
    
    # Rejoin with minimal silence
    processed = AudioSegment.empty()
    for chunk in chunks:
        processed += chunk + AudioSegment.silent(duration=200)
    
    # Add intro/outro
    intro = AudioSegment.from_file("intro.mp3")
    outro = AudioSegment.from_file("outro.mp3")
    
    final = intro + processed + outro
    
    # Export with podcast settings
    final.export("podcast_final.mp3",
                format="mp3",
                bitrate="128k",
                tags={"genre": "Podcast"})
    
    return final

Music Playlist Creation #

def create_playlist(song_files, crossfade_ms=3000):
    """Create seamless playlist with crossfades"""
    playlist = AudioSegment.empty()
    
    for i, song_file in enumerate(song_files):
        song = AudioSegment.from_file(song_file)
        
        # Normalize volume
        song = song.normalize(target_dBFS=-20.0)
        
        if i == 0:
            # First song: fade in
            playlist = song.fade_in(1000)
        else:
            # Subsequent songs: crossfade
            playlist = playlist.append(song, crossfade=crossfade_ms)
    
    # Final fade out
    playlist = playlist.fade_out(2000)
    
    # Calculate total length
    total_minutes = len(playlist) / (1000 * 60)
    
    playlist.export(f"{total_minutes:.1f}_min_playlist.mp3", 
                   format="mp3", 
                   bitrate="192k")
    
    return playlist

Audio Format Converter #

class AudioConverter:
    """Batch audio format converter"""
    
    def __init__(self, input_dir, output_dir):
        self.input_dir = input_dir
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
    
    def convert_all(self, target_format="mp3", **export_kwargs):
        """Convert all audio files to target format"""
        audio_extensions = ['.mp3', '.wav', '.ogg', '.flac', '.m4a']
        
        for file_path in glob.glob(os.path.join(self.input_dir, "*")):
            if any(file_path.lower().endswith(ext) for ext in audio_extensions):
                self.convert_file(file_path, target_format, **export_kwargs)
    
    def convert_file(self, input_path, target_format, **export_kwargs):
        """Convert single file"""
        try:
            audio = AudioSegment.from_file(input_path)
            
            base_name = os.path.splitext(os.path.basename(input_path))[0]
            output_path = os.path.join(self.output_dir, f"{base_name}.{target_format}")
            
            audio.export(output_path, format=target_format, **export_kwargs)
            print(f"Converted: {input_path} -> {output_path}")
            
        except Exception as e:
            print(f"Error converting {input_path}: {e}")

# Usage
converter = AudioConverter("input_audio/", "output_audio/")
converter.convert_all("mp3", bitrate="192k")

πŸ› Debugging & Troubleshooting #

Enable FFmpeg Logging #

import logging

# Enable pydub converter logging
logger = logging.getLogger("pydub.converter")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

# Now you'll see ffmpeg commands
audio = AudioSegment.from_file("test.mp3")

Common Issues & Solutions #

FFmpeg Not Found #

# Set custom ffmpeg path
AudioSegment.converter = "/usr/local/bin/ffmpeg"
AudioSegment.ffmpeg = "/usr/local/bin/ffmpeg"
AudioSegment.ffprobe = "/usr/local/bin/ffprobe"

Memory Issues with Large Files #

# Process in chunks for large files
def process_large_file(file_path, chunk_size_ms=30000):
    audio = AudioSegment.from_file(file_path)
    processed_chunks = []
    
    for chunk in audio[::chunk_size_ms]:
        # Process each chunk
        processed_chunk = chunk.normalize()
        processed_chunks.append(processed_chunk)
    
    return sum(processed_chunks, AudioSegment.empty())

Format Support Issues #

# Check supported formats
def check_format_support():
    test_formats = ['mp3', 'wav', 'ogg', 'flac', 'm4a']
    supported = []
    
    for fmt in test_formats:
        try:
            # Try to create a small test file
            silence = AudioSegment.silent(duration=100)
            silence.export(f"test.{fmt}", format=fmt)
            os.remove(f"test.{fmt}")
            supported.append(fmt)
        except:
            pass
    
    return supported

πŸ†š Pydub-NG vs Original Pydub #

Detailed Comparison Table #

Feature Original Pydub Pydub-NG Notes
Maintenance ⚠️ Limited βœ… Active Regular updates & bug fixes
Python Support 3.6+ 3.8+ Modern Python features
Dependencies Older Updated Latest FFmpeg compatibility
Performance Standard Optimized Memory & speed improvements
Documentation Basic Enhanced Better examples & guides
API Compatibility - 100% Perfect drop-in replacement

Migration Guide #

# Step 1: Uninstall original (if installed)
# pip uninstall pydub

# Step 2: Install Pydub-NG
# pip install pydub-ng

# Step 3: No code changes needed!
from pydub import AudioSegment  # Same import, better performance

# All existing code works identically
audio = AudioSegment.from_mp3("song.mp3")
louder = audio + 6  # Same API, enhanced functionality

πŸ§ͺ Testing Audio Code #

Unit Testing with Pydub-NG #

import unittest
import tempfile
import os
from pydub import AudioSegment

class TestAudioProcessing(unittest.TestCase):
    def setUp(self):
        """Create test audio for each test"""
        self.test_audio = AudioSegment.silent(duration=1000)
        self.temp_dir = tempfile.mkdtemp()
    
    def test_volume_adjustment(self):
        """Test volume changes"""
        original_dBFS = self.test_audio.dBFS
        louder = self.test_audio + 6
        self.assertAlmostEqual(louder.dBFS, original_dBFS + 6, places=1)
    
    def test_audio_slicing(self):
        """Test slicing operations"""
        first_half = self.test_audio[:500]
        self.assertEqual(len(first_half), 500)
    
    def test_concatenation(self):
        """Test audio concatenation"""
        combined = self.test_audio + self.test_audio
        self.assertEqual(len(combined), len(self.test_audio) * 2)

if __name__ == '__main__':
    unittest.main()

Property-Based Testing #

from hypothesis import given, strategies as st

class TestAudioProperties:
    @given(st.integers(min_value=100, max_value=10000))
    def test_silence_duration(self, duration):
        """Test silence has correct duration"""
        silence = AudioSegment.silent(duration=duration)
        assert abs(len(silence) - duration) <= 1

πŸ’‘ Pro Tips #

Performance Optimization #

# Use WAV for intermediate processing (faster)
audio = AudioSegment.from_mp3("input.mp3")
# ... processing ...
audio.export("output.wav", format="wav")  # Fast export
# Convert to final format only at the end

# Batch operations
def batch_normalize(audio_files, target_dBFS=-20.0):
    """Normalize multiple files efficiently"""
    for file_path in audio_files:
        audio = AudioSegment.from_file(file_path)
        normalized = audio.normalize(target_dBFS=target_dBFS)
        
        # Overwrite with normalized version
        base, ext = os.path.splitext(file_path)
        normalized.export(f"{base}_normalized{ext}", 
                         format=ext[1:])  # Remove dot from extension

Quality Settings #

# High-quality export settings
QUALITY_SETTINGS = {
    'mp3_high': {'format': 'mp3', 'bitrate': '320k'},
    'mp3_medium': {'format': 'mp3', 'bitrate': '192k'},
    'mp3_low': {'format': 'mp3', 'bitrate': '128k'},
    'wav_cd': {'format': 'wav'},  # 44.1kHz, 16-bit by default
    'flac_lossless': {'format': 'flac'},
}

# Apply quality preset
audio.export("output.mp3", **QUALITY_SETTINGS['mp3_high'])

Error Handling #

def safe_audio_processing(input_path, output_path):
    """Robust audio processing with error handling"""
    try:
        # Validate input file
        if not os.path.exists(input_path):
            raise FileNotFoundError(f"Input file not found: {input_path}")
        
        # Load and validate audio
        audio = AudioSegment.from_file(input_path)
        validate_audio(audio)
        
        # Process audio
        processed = audio.normalize().fade_in(100).fade_out(100)
        
        # Export with error handling
        processed.export(output_path, format="mp3", bitrate="192k")
        
        return True
        
    except Exception as e:
        print(f"Error processing {input_path}: {e}")
        return False

πŸ”— Integration Examples #

With Flask Web App #

from flask import Flask, request, send_file
from pydub import AudioSegment
import io

app = Flask(__name__)

@app.route('/convert', methods=['POST'])
def convert_audio():
    if 'audio' not in request.files:
        return 'No audio file', 400
    
    file = request.files['audio']
    
    # Load uploaded audio
    audio = AudioSegment.from_file(file)
    
    # Process (example: normalize and convert to MP3)
    processed = audio.normalize()
    
    # Export to memory
    output = io.BytesIO()
    processed.export(output, format='mp3', bitrate='192k')
    output.seek(0)
    
    return send_file(output, 
                     mimetype='audio/mpeg',
                     as_attachment=True,
                     download_name='converted.mp3')

With Jupyter Notebooks #

# Display audio in Jupyter
from IPython.display import Audio
import numpy as np

# Convert AudioSegment to numpy for analysis
samples = np.array(audio.get_array_of_samples())
if audio.channels == 2:
    samples = samples.reshape((-1, 2))

# Display in notebook
Audio(data=samples, rate=audio.frame_rate)

🎡 Ready to Rock! #

Pydub-NG makes audio manipulation in Python simple and powerful. Whether you’re building a podcast processor, music analyzer, or audio converter, this cheat sheet has you covered!

Remember: Pydub-NG is the actively maintained fork - choose it for new projects! πŸš€