Use Python to Automate Audio Sample Processing via PyDub

How to I use Python to automate some audio processing tasks, like normalization, phase issue correction, renaming, and more.

I create sample packs and often need to process the samples to normalize, re-name, fix phase issues, etc. This script was created to automate this process. I've been using it for a few months now and it's been very helpful for my workflow, so figured I'd share.

Setup:

Since we are only dealing with WAV files here, we just need one package -  PyDub. Install it -I used a virtual environment for this project.

Code

First, let's define some constants at the top

# --------------------- Run Settings -------------------- # SAMPLE_WIDTH = 2 # 2 = 16 bit, 3 = 32 PHASE_DBFS_THRESH = 3.25 # Sum the orig signal to mono. If it's DBFS val is > this much # Different than the orig stereo file, we have phase issues. NORMALIZATION_HEADROOM = 5.0 # How many dB below 0 to normalize to DELIM = "|" # Delimiter to use when writing info text file FADE_DURATION = 80 # ms to fade (kill clicks) APPLY_FADE_LEN_THRESH_S = 3.0 # If above this many s, apply fade. aka dont kill 1shot drums

Next we can define a few functions to help rename things, if needed. I mainly did this because sometimes I mass-export clips from Ableton and the filenames have an insane string of numbers after each of them. I used the re library to try to detect all of this nonsense and delete it, while also being mindful of not overwriting existing filenames if they are no longer unique after cleanup.

I also added some logic to standardize the formatting of bpm, key, etc. if applicable.

def fix_filename(filename):

    # For testing
    # s1 = "somefilename_cmaj_120bpm_djbb_23bar[1000293-12391393].wav"
    # s2 = "File name all good (Cmaj, 120 bpm, 4bar)[12322-3321].wav"
    # s3 = "messy Filez-name C Major idk 120bp 4 bar[1233 4 49].wav"
    # s4 = "no hitts here at all.wav"
    # songs = [s1, s2, s3, s4]

    name = ""

    bpm_pat = '\(*\,?\d+\s{0,2}(?:bpm|bp)\,?\)*'
    key_pat = '\(*,?[a-g]+.?(?:minor|major|maj|min|sharp|flat|#)\,?\)*'
    bars_pat = '\(*,?\d+.?(?:bar|bars)\,?\)*'
    extranumbers_pat = '\[+\S+\s\d+\]+'
    blankout_pat = (bpm_pat + "|" + key_pat + "|" + bars_pat
                     + "|" + extranumbers_pat + "|" + "\[bip\]")
    finalclean_pat = ('\s{2}' + "|" + '\s+\)' + "|" 
                      + r', ,' +"|" + "_{2}"
                    + "|" + "_\s+" + "\(\)" + "|" 
                    + "\(," + "|" + "\,\s?\,")
    p_bpm = re.compile(bpm_pat, re.I)
    p_key = re.compile(key_pat, re.I)
    p_bars = re.compile(bars_pat, re.I)

    things_to_fix = {

        "":[' ', '   '],
        ",":[',,',', ,',",,,",",  ,"],
        "_":["___","__", " _ ", " _", "_ "],
        "-":["---","--"],
        "(":["((","((("],
        ")":["))",")))"]
    }

    bpm, key, bars = "","",""
    name = filename.lower()
    info_string = "("

    try:
        bpm = p_bpm.findall(name)[0]
        bpm = bpm.replace(" ", "")
        bpm = bpm.replace("(", "")
        bpm = bpm.replace(")", "")
        info_string = info_string + f"{bpm}"
        print("2.1 Info: ",info_string)
    except IndexError:
        bpm = ""

    try:
        key = p_key.findall(name)[0]
        key = key.replace(" ", "")
        key = key.replace("(", "")
        key = key.replace(")", "")
        if bpm:
            info_string = info_string + f", {key}"
        else:
            info_string = info_string + f"{key}"
        print("2.1 Info: ",info_string)
    except IndexError:
        key = ""

    try:
        bars = p_bars.findall(name)[0]
        bars = bars.replace(" ", "")
        bars = bars.replace("(", "")
        bars = bars.replace(")", "")
        if key or bpm:
            info_string = info_string + f", {bars}"
        else:
            info_string = info_string + f"{bars}"
    except IndexError:
        bars = ""

    # Clear out the info string if no info 
    if not (bpm or bars or key):
        info_string = ""
    else:
        info_string = info_string.lower() + ")"

    name = re.sub(blankout_pat, "", name)  # Get rid of the old stuff
    name = re.sub(finalclean_pat, "", name)
    info_string = info_string.strip()

    if info_string:
        name = name.replace('.wav', f"{info_string}.wav")
    else:
        name = name.replace(' .wav', ".wav")
    
    for replacement, the_list in things_to_fix.items():
        for item in the_list:
            name = name.replace(item, replacement)
            
    # Make sure filenames are unique 
    while Path(SAVEDIR,f"{name}.wav").is_file():
        name = f"{name} ({str(random.randint(0,999))})"
    
    return name

Now get your wave file. I ended up using an Automator on Mac to run this script via an action in Finder (right click menu), but you can feed your files in however you wish.

# Get input sound
wav_in = AudioSegment.from_wav(WAV_IN_PATH)  # From shell script / finder
FRAMES_PER_SECOND = wav_in.frame_rate
CHANNEL_COUNT = wav_in.channels

Now do a bunch of checks / fixes:

  • Try to detect phase issues by converting the input file to mono, then comparing the loudness to the source file. If it's too quiet, flip the phase of one channel.
  • Normalize the sample.
  • Get rid of leading silence
  • Apply a slight fade to beginning and end to prevent clicks when looping, but only for longer samples (don't kill drum transients)
# Check for phase issues
has_phase_issues = False
stereo_sound_mono = wav_in.set_channels(1)
old_dbfs = wav_in.dBFS
mono_dbfs = stereo_sound_mono.dBFS

phase_mono_db_diff = old_dbfs - mono_dbfs
if abs(phase_mono_db_diff) > PHASE_DBFS_THRESH:
    has_phase_issues = True

# Invert one channel to fix phase issue, return to stereo
if has_phase_issues:
    split = wav_in.split_to_mono()
    left_channel = split[0]
    right_channel = split[1].invert_phase()
    stereo_sound = AudioSegment.from_mono_audiosegments(left_channel, right_channel) 
else:
    stereo_sound = wav_in.set_channels(2)

# Normalize
stereo_sound = stereo_sound.set_sample_width(SAMPLE_WIDTH)
peak_pre_norm = round(stereo_sound.max_dBFS,2)
stereo_sound = effects.normalize(stereo_sound, headroom=NORMALIZATION_HEADROOM)
peak_post_norm = round(stereo_sound.max_dBFS,2) 

# Get rid of leading silence
frame_count = stereo_sound.frame_count()
leading_silence_end = silence.detect_leading_silence(stereo_sound)
stereo_sound = stereo_sound[leading_silence_end:]

# Apply Fades if not a 1shot sample
fade_applied = "None"
sample_duration_s = round(stereo_sound.duration_seconds,2)
if sample_duration_s > APPLY_FADE_LEN_THRESH_S:
    stereo_sound = stereo_sound.fade_in(FADE_DURATION)
    stereo_sound = stereo_sound.fade_out(FADE_DURATION)
    fade_applied = FADE_DURATION

Now save the file somewhere. I have the script create a folder in the wav directory, then save the files in there.

Note that I added some code to save a run file in the folder as well, mostly for debugging. You can delete all of this if ya want.

# Finally, save new wav file
new_name = fix_filename(WAV_IN_NAME)
file_handle = stereo_sound.export(Path(SAVEDIR,f"{new_name}.wav"), format="wav")

Full Code

import sys
from pathlib import Path
import datetime
from pydub import AudioSegment, effects, silence
import re
import random

# REF https://github.com/jiaaro/pydub/blob/master/API.markdown

# ---------------------  Run Settings -------------------- #

SAMPLE_WIDTH = 2              # 2 = 16 bit, 3 = 32
PHASE_DBFS_THRESH = 3.25      # Sum the orig signal to mono. If it's DBFS val is > this much
                              # Different than the orig stereo file, we have phase issues.
NORMALIZATION_HEADROOM = 5.0  # How many dB below 0 to normalize to
DELIM = "|"                   # Delimiter to use when writing info text file
FADE_DURATION = 80            # ms to fade (kill clicks)
APPLY_FADE_LEN_THRESH_S = 3.0 # If above this many s, apply fade. aka dont kill 1shot drums

# ---------------------  Constants -------------------- #
INFO_COL_HEADERS = ["Filename",
                    "Length (s)",
                    "Sample Rate (Hz)",
                    "Channels",
                    "Phase Corrected?",
                    "Phase Mono DB Diff",
                    "Pre/Post Peak dB",
                    "Orig Length (samples)",
                    "Silent Frames Trimmed (beg)",
                    "Fade Added (ms)"]
INFO_COL_HEADER_STRING = DELIM.join(INFO_COL_HEADERS)

# Figure out file paths - in and out
WAV_IN_PATH = Path(sys.argv[1])
WAV_IN_NAME = WAV_IN_PATH.stem
WAV_IN_DIR = WAV_IN_PATH.parent
SAVEDIR = Path(WAV_IN_DIR,"procfrompython")
Path(WAV_IN_DIR,"procfrompython").mkdir(parents=True, exist_ok=True)
NEWFILEPATH = Path(SAVEDIR, 'info.txt')

def fix_filename(filename):

    # For testing
    # s1 = "somefilename_cmaj_120bpm_djbb_23bar[1000293-12391393].wav"
    # s2 = "File name all good (Cmaj, 120 bpm, 4bar)[12322-3321].wav"
    # s3 = "messy Filez-name C Major idk 120bp 4 bar[1233 4 49].wav"
    # s4 = "no hitts here at all.wav"
    # songs = [s1, s2, s3, s4]

    name = ""

    bpm_pat = '\(*\,?\d+\s{0,2}(?:bpm|bp)\,?\)*'
    key_pat = '\(*,?[a-g]+.?(?:minor|major|maj|min|sharp|flat|#)\,?\)*'
    bars_pat = '\(*,?\d+.?(?:bar|bars)\,?\)*'
    extranumbers_pat = '\[+\S+\s\d+\]+'
    blankout_pat = (bpm_pat + "|" + key_pat + "|" + bars_pat
                     + "|" + extranumbers_pat + "|" + "\[bip\]")
    finalclean_pat = ('\s{2}' + "|" + '\s+\)' + "|" 
                      + r', ,' +"|" + "_{2}"
                    + "|" + "_\s+" + "\(\)" + "|" 
                    + "\(," + "|" + "\,\s?\,")
    p_bpm = re.compile(bpm_pat, re.I)
    p_key = re.compile(key_pat, re.I)
    p_bars = re.compile(bars_pat, re.I)

    things_to_fix = {

        " ":[' ', '   '],
        ",":[',,',', ,',",,,",",  ,"],
        "_":["___","__", " _ ", " _", "_ "],
        "-":["---","--"],
        "(":["((","((("],
        ")":["))",")))"]
    }

    bpm, key, bars = "","",""
    name = filename.lower()
    info_string = "("

    try:
        bpm = p_bpm.findall(name)[0]
        bpm = bpm.replace(" ", "")
        bpm = bpm.replace("(", "")
        bpm = bpm.replace(")", "")
        info_string = info_string + f"{bpm}"
        print("2.1 Info: ",info_string)
    except IndexError:
        bpm = ""

    try:
        key = p_key.findall(name)[0]
        key = key.replace(" ", "")
        key = key.replace("(", "")
        key = key.replace(")", "")
        if bpm:
            info_string = info_string + f", {key}"
        else:
            info_string = info_string + f"{key}"
        print("2.1 Info: ",info_string)
    except IndexError:
        key = ""

    try:
        bars = p_bars.findall(name)[0]
        bars = bars.replace(" ", "")
        bars = bars.replace("(", "")
        bars = bars.replace(")", "")
        if key or bpm:
            info_string = info_string + f", {bars}"
        else:
            info_string = info_string + f"{bars}"
    except IndexError:
        bars = ""

    # Clear out the info string if no info 
    if not (bpm or bars or key):
        info_string = ""
    else:
        info_string = info_string.lower() + ")"

    name = re.sub(blankout_pat, "", name)  # Get rid of the old stuff
    name = re.sub(finalclean_pat, "", name)
    info_string = info_string.strip()

    if info_string:
        name = name.replace('.wav', f"{info_string}.wav")
    else:
        name = name.replace(' .wav', ".wav")
    
    for replacement, the_list in things_to_fix.items():
        for item in the_list:
            name = name.replace(item, replacement)
            
    # Make sure filenames are unique 
    while Path(SAVEDIR,f"{name}.wav").is_file():
        name = f"{name} ({str(random.randint(0,999))})"
    
    return name


# Get input sound
wav_in = AudioSegment.from_wav(WAV_IN_PATH)  # From shell script / finder
FRAMES_PER_SECOND = wav_in.frame_rate
CHANNEL_COUNT = wav_in.channels

# Check for phase issues
has_phase_issues = False
stereo_sound_mono = wav_in.set_channels(1)
old_dbfs = wav_in.dBFS
mono_dbfs = stereo_sound_mono.dBFS

phase_mono_db_diff = old_dbfs - mono_dbfs
if abs(phase_mono_db_diff) > PHASE_DBFS_THRESH:
    has_phase_issues = True

# Invert one channel to fix phase issue, return to stereo
if has_phase_issues:
    split = wav_in.split_to_mono()
    left_channel = split[0]
    right_channel = split[1].invert_phase()
    stereo_sound = AudioSegment.from_mono_audiosegments(left_channel, right_channel) 
else:
    stereo_sound = wav_in.set_channels(2)

# Normalize
stereo_sound = stereo_sound.set_sample_width(SAMPLE_WIDTH)
peak_pre_norm = round(stereo_sound.max_dBFS,2)
stereo_sound = effects.normalize(stereo_sound, headroom=NORMALIZATION_HEADROOM)
peak_post_norm = round(stereo_sound.max_dBFS,2) 

# Get rid of leading silence
frame_count = stereo_sound.frame_count()
leading_silence_end = silence.detect_leading_silence(stereo_sound)
stereo_sound = stereo_sound[leading_silence_end:]

# Apply Fades if not a 1shot sample
fade_applied = "None"
sample_duration_s = round(stereo_sound.duration_seconds,2)
if sample_duration_s > APPLY_FADE_LEN_THRESH_S:
    stereo_sound = stereo_sound.fade_in(FADE_DURATION)
    stereo_sound = stereo_sound.fade_out(FADE_DURATION)
    fade_applied = FADE_DURATION

# Finally, save new wav file
new_name = fix_filename(WAV_IN_NAME)
file_handle = stereo_sound.export(Path(SAVEDIR,f"{new_name}.wav"), format="wav")

# Save run info in a file
prev_data = []
if not Path.exists(NEWFILEPATH):
    with open(NEWFILEPATH, 'w') as f:
        f.write(f"ran util on: {datetime.datetime.now()}")
        f.write("\n")
        f.write("\n")
        f.write(INFO_COL_HEADER_STRING)
        f.write("\n")
        f.close()

with open(NEWFILEPATH, 'r') as f:
    prev_data = f.readlines()
    f.close()

with open(NEWFILEPATH, 'w') as f:
    new_data_ary = [WAV_IN_NAME,sample_duration_s,FRAMES_PER_SECOND,CHANNEL_COUNT,
                    has_phase_issues,phase_mono_db_diff,
                    f"{peak_pre_norm} / {peak_post_norm}", frame_count,
                    leading_silence_end, fade_applied]
    
    new_data_ary = [str(itm) for itm in new_data_ary] # Stringify for printing
    new_data_string = DELIM.join(new_data_ary)
    prev_data.append(new_data_string)
    for line in prev_data:
        f.write(line)
    f.write("\n")

I don't expect this work super well as a copy+paste solution since most of the logic is customized to the issues I was running into, but hopefully it gives you ideas.