π΅ AudioFlux Cheat Sheet #
A comprehensive Python library for audio and music analysis, feature extraction, and deep learning applications.
π¦ Installation #
Python Package Install #
Requires Python 3.6+
# Using PyPI (recommended)
pip install audioflux
# Using conda
conda install -c tanky25 -c conda-forge audioflux
Platform Support #
- β Linux, macOS, Windows
- β iOS, Android (mobile builds available)
π Quick Start #
Basic Audio Loading #
import audioflux as af
import numpy as np
# Load sample audio
sample_path = af.utils.sample_path('220') # 220Hz sample
audio_arr, sr = af.read(sample_path)
Simple Mel Spectrogram #
# Quick extraction
spec_arr, _ = af.mel_spectrogram(audio_arr, samplate=sr)
# Using BFT class (recommended - more flexible)
from audioflux.type import SpectralFilterBankScaleType
bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.MEL)
spec_arr = bft_obj.bft(audio_arr)
spec_arr = np.abs(spec_arr)
Visualization #
import matplotlib.pyplot as plt
from audioflux.display import fill_spec
audio_len = audio_arr.shape[-1]
fig, ax = plt.subplots()
img = fill_spec(spec_arr, axes=ax,
x_coords=bft_obj.x_coords(audio_len),
y_coords=bft_obj.y_coords(),
x_axis='time', y_axis='log',
title='Mel Spectrogram')
fig.colorbar(img, ax=ax)
π Transform Algorithms #
Multi-Scale Transforms #
Support all frequency scale types:
from audioflux.type import SpectralFilterBankScaleType
# Available scale types:
# - LINEAR: Short-time Fourier transform
# - LINSPACE: Evenly distributed frequencies
# - MEL: Mel-scale spectrogram
# - BARK: Bark-scale spectrogram
# - ERB: Erb-scale spectrogram
# - OCTAVE: Octave-scale spectrogram
# - LOG: Logarithmic-scale spectrogram
# BFT - Based Fourier Transform (STFT-like)
bft = af.BFT(num=128, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.MEL)
# NSGT - Non-Stationary Gabor Transform
nsgt = af.NSGT(num=84, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.LOG)
# CWT - Continuous Wavelet Transform
cwt = af.CWT(num=84, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.LOG)
# PWT - Pseudo Wavelet Transform
pwt = af.PWT(num=84, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.LOG)
Independent Transforms #
Single-purpose transforms:
# CQT - Constant-Q Transform
cqt = af.CQT(num=84, samplate=sr)
# VQT - Variable-Q Transform
vqt = af.VQT(num=84, samplate=sr)
# ST - S-Transform/Stockwell Transform
st = af.ST(num=1024, samplate=sr)
# FST - Fast S-Transform
fst = af.FST(num=1024, samplate=sr)
# DWT - Discrete Wavelet Transform
dwt = af.DWT(num=1024, wavelet_type='db4')
# WPT - Wave Packet Transform
wpt = af.WPT(num=1024, wavelet_type='db4')
# SWT - Stationary Wavelet Transform
swt = af.SWT(num=1024, wavelet_type='db4')
Synchrosqueezing/Reassignment #
Time-frequency sharpening techniques:
# Reassignment for STFT
reassign_obj = af.Reassign(num=1024, samplate=sr)
# Synchrosqueezing for CWT
synsq_obj = af.Synsq(num=84, samplate=sr)
# WSST - Wavelet Synchrosqueezing Transform
wsst_obj = af.WSST(num=84, samplate=sr)
π― Feature Extraction #
Spectral Features #
# Spectral features from any spectrum type
spectral_obj = af.Spectral(num=128, samplate=sr)
features = spectral_obj.spectral(spec_arr)
# Available spectral features:
# - centroid, spread, skewness, kurtosis
# - rolloff, flux, flatness, crest
# - energy, entropy
XXCC - Cepstral Coefficients #
# MFCC, BFCC, etc. from any spectrum
xxcc_obj = af.XXCC(num=128, cc_num=13, samplate=sr)
mfcc_arr = xxcc_obj.xxcc(spec_arr)
# Works with all spectrum types:
# MEL β MFCC, BARK β BFCC, ERB β EFCC, etc.
Deconvolution #
# Deconvolution for spectrum enhancement
deconv_obj = af.Deconv(num=128, samplate=sr)
deconv_arr = deconv_obj.deconv(spec_arr)
Chroma Features #
# Chroma from CQT (recommended)
cqt_obj = af.CQT(num=84, samplate=sr)
cqt_spec = cqt_obj.cqt(audio_arr)
chroma_obj = af.Chroma()
chroma_arr = chroma_obj.chroma(cqt_spec)
# Or from BFT with Linear/Octave scale
bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.OCTAVE)
Temporal Features #
# Time-domain features
temporal_obj = af.Temporal()
temporal_features = temporal_obj.temporal(audio_arr)
# Features: ZCR, energy, entropy, etc.
πΌ Music Information Retrieval (MIR) #
Pitch Detection #
# YIN algorithm
yin_obj = af.YIN(samplate=sr)
pitch_arr = yin_obj.yin(audio_arr)
# STFT-based pitch
pitch_obj = af.Pitch(samplate=sr)
pitch_arr = pitch_obj.pitch(audio_arr)
Onset Detection #
# Spectral flux
onset_obj = af.Onset(samplate=sr)
onset_arr = onset_obj.onset(audio_arr)
# Novelty-based detection
novelty_obj = af.Novelty(samplate=sr)
novelty_arr = novelty_obj.novelty(audio_arr)
Harmonic-Percussive Separation #
# Median filtering approach
hpss_obj = af.HPSS()
harmonic, percussive = hpss_obj.hpss(spec_arr)
# NMF-based separation
nmf_obj = af.NMF(n_components=10)
harmonic, percussive = nmf_obj.nmf(spec_arr)
π§ FeatureExtractor Class #
High-level interface for batch feature extraction:
from audioflux.feature import FeatureExtractor
# Initialize with desired features
extractor = FeatureExtractor(
features=['mfcc', 'spectral_centroid', 'chroma'],
mfcc_params={'n_mfcc': 13},
samplate=sr
)
# Extract all features at once
features_dict = extractor.extract(audio_arr)
π Display & Visualization #
Built-in Display Functions #
from audioflux.display import fill_spec, fill_wave
# Spectrogram display
fill_spec(spec_arr, axes=ax, x_coords=x_coords, y_coords=y_coords,
x_axis='time', y_axis='log', title='Spectrogram')
# Waveform display
fill_wave(audio_arr, axes=ax, samplate=sr)
π οΈ Utilities #
Sample Data #
# Built-in sample files
sample_path = af.utils.sample_path('220') # 220Hz sine wave
sample_path = af.utils.sample_path('music') # Music sample
Audio I/O #
# Read audio files
audio_arr, sr = af.read('path/to/audio.wav')
# Write audio files
af.write('output.wav', audio_arr, sr)
π― Common Use Cases #
Music Classification #
# Extract comprehensive features
bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr,
scale_type=SpectralFilterBankScaleType.MEL)
spec_arr = np.abs(bft_obj.bft(audio_arr))
xxcc_obj = af.XXCC(num=128, cc_num=13, samplate=sr)
mfcc_features = xxcc_obj.xxcc(spec_arr)
spectral_obj = af.Spectral(num=128, samplate=sr)
spectral_features = spectral_obj.spectral(spec_arr)
# Combine features for ML model
features = np.concatenate([mfcc_features.flatten(),
spectral_features.flatten()])
Audio Similarity #
# Chroma features for harmonic similarity
cqt_obj = af.CQT(num=84, samplate=sr)
chroma_obj = af.Chroma()
def extract_chroma(audio_file):
audio_arr, sr = af.read(audio_file)
cqt_spec = cqt_obj.cqt(audio_arr)
return chroma_obj.chroma(cqt_spec)
# Compare songs
chroma1 = extract_chroma('song1.wav')
chroma2 = extract_chroma('song2.wav')
similarity = np.corrcoef(chroma1.flatten(), chroma2.flatten())[0,1]
Real-time Processing #
# Process audio in chunks
chunk_size = 1024
hop_length = 512
bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr)
for i in range(0, len(audio_arr) - chunk_size, hop_length):
chunk = audio_arr[i:i + chunk_size]
spec_chunk = bft_obj.bft(chunk)
# Process spec_chunk...
β‘ Performance Tips #
Memory Efficiency #
- Use appropriate
radix2_expvalues (power of 2) - Process audio in chunks for large files
- Use
np.abs()only when needed for magnitude spectra
Speed Optimization #
- Reuse transform objects instead of recreating
- Use BFT class for flexible, efficient processing
- Consider lower resolution for real-time applications
Batch Processing #
# Process multiple files efficiently
transform_obj = af.BFT(num=128, radix2_exp=12, samplate=sr)
results = []
for audio_file in audio_files:
audio_arr, _ = af.read(audio_file)
spec_arr = transform_obj.bft(audio_arr)
results.append(spec_arr)
π Troubleshooting #
Common Issues #
- Memory errors: Reduce
numparameter or process in chunks - Slow performance: Check
radix2_expis power of 2 - Empty results: Verify audio sampling rate matches
samplate - Import errors: Ensure all dependencies installed
Debug Tips #
# Check audio properties
print(f"Audio shape: {audio_arr.shape}")
print(f"Sample rate: {sr}")
print(f"Duration: {len(audio_arr) / sr:.2f}s")
# Verify transform parameters
print(f"Transform output shape: {spec_arr.shape}")
π Integration Examples #
With TensorFlow/PyTorch #
import tensorflow as tf
# AudioFlux features as TF input
features = extract_features(audio_arr)
features_tensor = tf.constant(features, dtype=tf.float32)
With Librosa Comparison #
# AudioFlux vs Librosa MFCC
import librosa
# AudioFlux
af_mfcc = xxcc_obj.xxcc(spec_arr)
# Librosa
lr_mfcc = librosa.feature.mfcc(y=audio_arr, sr=sr, n_mfcc=13)
π΅ Advanced Techniques #
Multi-Resolution Analysis #
# Different time-frequency resolutions
transforms = [
af.BFT(num=64, radix2_exp=10, samplate=sr), # High time res
af.BFT(num=128, radix2_exp=12, samplate=sr), # Balanced
af.BFT(num=256, radix2_exp=14, samplate=sr) # High freq res
]
multi_res_features = []
for transform in transforms:
spec = transform.bft(audio_arr)
multi_res_features.append(np.abs(spec))
Custom Feature Combinations #
# Combine multiple feature types
def extract_comprehensive_features(audio_arr, sr):
# Spectral features
bft_obj = af.BFT(num=128, radix2_exp=12, samplate=sr)
spec_arr = np.abs(bft_obj.bft(audio_arr))
# MFCC
xxcc_obj = af.XXCC(num=128, cc_num=13, samplate=sr)
mfcc = xxcc_obj.xxcc(spec_arr)
# Chroma
cqt_obj = af.CQT(num=84, samplate=sr)
cqt_spec = cqt_obj.cqt(audio_arr)
chroma_obj = af.Chroma()
chroma = chroma_obj.chroma(cqt_spec)
# Spectral features
spectral_obj = af.Spectral(num=128, samplate=sr)
spectral = spectral_obj.spectral(spec_arr)
return {
'mfcc': mfcc,
'chroma': chroma,
'spectral': spectral
}
π Documentation & Resources #
- Official Docs: https://audioflux.top/
- GitHub: https://github.com/libAudioFlux/audioFlux
- PyPI: https://pypi.org/project/audioflux/
- License: MIT
π Key Advantages #
- π Performance: C-based implementation with Python bindings
- π Comprehensive: 15+ transform algorithms, 100+ features
- π― Flexible: Data flow design, modular architecture
- π± Cross-platform: Desktop and mobile support
- π§ ML-Ready: Perfect for deep learning pipelines
- π Visualization: Built-in display functions