""" Module: libfmp.c8.c8s1_hps Author: Meinard Müller, Frank Zalkow License: The MIT license, https://opensource.org/licenses/MIT This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP) """ from collections import OrderedDict import numpy as np from scipy import signal import librosa import IPython.display as ipd import pandas as pd def median_filter_horizontal(x, filter_len): """Apply median filter in horizontal direction Notebook: C8/C8S1_HPS.ipynb """ return signal.medfilt(x, [1, filter_len]) def median_filter_vertical(x, filter_len): """Apply median filter in vertical direction Notebook: C8/C8S1_HPS.ipynb """ return signal.medfilt(x, [filter_len, 1]) def convert_l_sec_to_frames(L_h_sec, Fs=22050, N=1024, H=512): """Convert filter length parameter from seconds to frame indices Notebook: C8/C8S1_HPS.ipynb """ L_h = int(np.ceil(L_h_sec * Fs / H)) return L_h def convert_l_hertz_to_bins(L_p_Hz, Fs=22050, N=1024, H=512): """Convert filter length parameter from Hertz to frequency bins Notebook: C8/C8S1_HPS.ipynb """ L_p = int(np.ceil(L_p_Hz * N / Fs)) return L_p def make_integer_odd(n): """Convert integer into odd integer Notebook: C8/C8S1_HPS.ipynb """ if(n % 2 == 0): n += 1 return n def hps(x, Fs, N, H, L_h, L_p, L_unit='physical', mask='binary', eps=0.001, detail=False): """Harmonic-percussive separation (HPS) algorithm Notebook: C8/C8S1_HPS.ipynb Args: x: Input signal Fs: Sampling rate of x N: Frame length H: Hopsize L_h: Horizontal median filter length given in seconds or frames L_p: Percussive median filter length given in Hertz or bins L_unit: Adjusts unit, either 'pyhsical' or 'indices' mask: Either 'binary' or 'soft' eps: Parameter used in soft maskig detail (bool): Returns detailed information Returns: x_h: Harmonic signal x_p: Percussive signal dict: dictionary containing detailed information; returned if "detail=True" """ assert L_unit in ['physical', 'indices'] assert mask in ['binary', 'soft'] # stft X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window='hann', center=True, pad_mode='constant') # power spectrogram Y = np.abs(X) ** 2 # median filtering if L_unit == 'physical': L_h = convert_l_sec_to_frames(L_h_sec=L_h, Fs=Fs, N=N, H=H) L_p = convert_l_hertz_to_bins(L_p_Hz=L_p, Fs=Fs, N=N, H=H) L_h = make_integer_odd(L_h) L_p = make_integer_odd(L_p) Y_h = signal.medfilt(Y, [1, L_h]) Y_p = signal.medfilt(Y, [L_p, 1]) # masking if mask == 'binary': M_h = np.int8(Y_h >= Y_p) M_p = np.int8(Y_h < Y_p) if mask == 'soft': eps = 0.00001 M_h = (Y_h + eps / 2) / (Y_h + Y_p + eps) M_p = (Y_p + eps / 2) / (Y_h + Y_p + eps) X_h = X * M_h X_p = X * M_p # istft x_h = librosa.istft(X_h, hop_length=H, win_length=N, window='hann', center=True, length=x.size) x_p = librosa.istft(X_p, hop_length=H, win_length=N, window='hann', center=True, length=x.size) if detail: return x_h, x_p, dict(Y_h=Y_h, Y_p=Y_p, M_h=M_h, M_p=M_p, X_h=X_h, X_p=X_p) else: return x_h, x_p def generate_audio_tag_html_list(list_x, Fs, width='150', height='40'): """Generates audio tag for html needed to be shown in table Notebook: C8/C8S1_HPS.ipynb """ audio_tag_html_list = [] for i in range(len(list_x)): audio_tag = ipd.Audio(list_x[i], rate=Fs) audio_tag_html = audio_tag._repr_html_().replace('\n', '').strip() audio_tag_html = audio_tag_html.replace('<audio ', '<audio style="width: '+width+'px; height: '+height+'px;"') audio_tag_html_list.append(audio_tag_html) return audio_tag_html_list def hrps(x, Fs, N, H, L_h, L_p, beta=2, L_unit='physical', detail=False): """Harmonic-residual-percussive separation (HRPS) algorithm Notebook: C8/C8S1_HPS.ipynb Args: x: Input signal Fs: Sampling rate of x N: Frame length H: Hopsize L_h: Horizontal median filter length given in seconds or frames L_p: Percussive median filter length given in Hertz or bins beta: Separation factor L_unit: Adjusts unit, either 'pyhsical' or 'indices' detail (bool): Returns detailed information Returns: x_h: Harmonic signal x_p: Percussive signal x_r: Residual signal dict: dictionary containing detailed information; returned if "detail=True" """ assert L_unit in ['physical', 'indices'] # stft X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window='hann', center=True, pad_mode='constant') # power spectrogram Y = np.abs(X) ** 2 # median filtering if L_unit == 'physical': L_h = convert_l_sec_to_frames(L_h_sec=L_h, Fs=Fs, N=N, H=H) L_p = convert_l_hertz_to_bins(L_p_Hz=L_p, Fs=Fs, N=N, H=H) L_h = make_integer_odd(L_h) L_p = make_integer_odd(L_p) Y_h = signal.medfilt(Y, [1, L_h]) Y_p = signal.medfilt(Y, [L_p, 1]) # masking M_h = np.int8(Y_h >= beta * Y_p) M_p = np.int8(Y_p > beta * Y_h) M_r = 1 - (M_h + M_p) X_h = X * M_h X_p = X * M_p X_r = X * M_r # istft x_h = librosa.istft(X_h, hop_length=H, win_length=N, window='hann', center=True, length=x.size) x_p = librosa.istft(X_p, hop_length=H, win_length=N, window='hann', center=True, length=x.size) x_r = librosa.istft(X_r, hop_length=H, win_length=N, window='hann', center=True, length=x.size) if detail: return x_h, x_p, x_r, dict(Y_h=Y_h, Y_p=Y_p, M_h=M_h, M_r=M_r, M_p=M_p, X_h=X_h, X_r=X_r, X_p=X_p) else: return x_h, x_p, x_r def experiment_hrps_parameter(fn_wav, param_list): """Script for running experiment over parameter list [[1024, 256, 0.1, 100], ... Notebook: C8/C8S1_HRPS.ipynb """ Fs = 22050 x, Fs = librosa.load(fn_wav, sr=Fs) list_x = [] list_x_h = [] list_x_p = [] list_x_r = [] list_N = [] list_H = [] list_L_h_sec = [] list_L_p_Hz = [] list_L_h = [] list_L_p = [] list_beta = [] for param in param_list: N, H, L_h_sec, L_p_Hz, beta = param print('N=%4d, H=%4d, L_h_sec=%4.2f, L_p_Hz=%3.1f, beta=%3.1f' % (N, H, L_h_sec, L_p_Hz, beta)) x_h, x_p, x_r = hrps(x, Fs=Fs, N=1024, H=512, L_h=L_h_sec, L_p=L_p_Hz, beta=beta) L_h = convert_l_sec_to_frames(L_h_sec=L_h_sec, Fs=Fs, N=N, H=H) L_p = convert_l_hertz_to_bins(L_p_Hz=L_p_Hz, Fs=Fs, N=N, H=H) list_x.append(x) list_x_h.append(x_h) list_x_p.append(x_p) list_x_r.append(x_r) list_N.append(N) list_H.append(H) list_L_h_sec.append(L_h_sec) list_L_p_Hz.append(L_p_Hz) list_L_h.append(L_h) list_L_p.append(L_p) list_beta.append(beta) html_x = generate_audio_tag_html_list(list_x, Fs=Fs) html_x_h = generate_audio_tag_html_list(list_x_h, Fs=Fs) html_x_p = generate_audio_tag_html_list(list_x_p, Fs=Fs) html_x_r = generate_audio_tag_html_list(list_x_r, Fs=Fs) pd.options.display.float_format = '{:,.1f}'.format pd.set_option('display.max_colwidth', None) df = pd.DataFrame(OrderedDict([ ('$N$', list_N), ('$H$', list_H), ('$L_h$ (sec)', list_L_h_sec), ('$L_p$ (Hz)', list_L_p_Hz), ('$L_h$', list_L_h), ('$L_p$', list_L_p), ('$\\beta$', list_beta), ('$x$', html_x), ('$x_h$', html_x_h), ('$x_r$', html_x_r), ('$x_p$', html_x_p)])) df.index = np.arange(1, len(df) + 1) ipd.display(ipd.HTML(df.to_html(escape=False, index=False)))