""" Module: libfmp.c4.c4s2_ssm Author: Meinard Müller, David Kopyto License: The MIT license, https://opensource.org/licenses/MIT This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP) """ import numpy as np import librosa from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap from numba import jit import libfmp.b import libfmp.c3 import libfmp.c4 @jit(nopython=True) def compute_sm_dot(X, Y): """Computes similarty matrix from feature sequences using dot (inner) product Notebook: C4/C4S2_SSM.ipynb """ S = np.dot(np.transpose(X), Y) return S def plot_feature_ssm(X, Fs_X, S, Fs_S, ann, duration, color_ann=None, title='', label='Time (seconds)', time=True, figsize=(5, 6), fontsize=10, clim_X=None, clim=None): """Plot SSM along with feature representation and annotations (standard setting is time in seconds) Notebook: C4/C4S2_SSM.ipynb """ cmap = libfmp.b.compressed_gray_cmap(alpha=-10) fig, ax = plt.subplots(3, 3, gridspec_kw={'width_ratios': [0.1, 1, 0.05], 'wspace': 0.2, 'height_ratios': [0.3, 1, 0.1]}, figsize=figsize) libfmp.b.plot_matrix(X, Fs=Fs_X, ax=[ax[0, 1], ax[0, 2]], clim=clim_X, xlabel='', ylabel='', title=title) ax[0, 0].axis('off') libfmp.b.plot_matrix(S, Fs=Fs_S, ax=[ax[1, 1], ax[1, 2]], cmap=cmap, clim=clim, title='', xlabel='', ylabel='', colorbar=True) ax[1, 1].set_xticks([]) ax[1, 1].set_yticks([]) libfmp.b.plot_segments(ann, ax=ax[2, 1], time_axis=True, fontsize=fontsize, colors=color_ann, time_label=label, time_max=duration*Fs_X) ax[2, 2].axis('off'), ax[2, 0].axis('off') libfmp.b.plot_segments(ann, ax=ax[1, 0], time_axis=True, fontsize=fontsize, direction='vertical', colors=color_ann, time_label=label, time_max=duration*Fs_X) return fig, ax @jit(nopython=True) def filter_diag_sm(S, L): """Path smoothing of similarity matrix by forward filtering along main diagonal Notebook: C4/C4S2_SSM-PathEnhancement.ipynb Args: S: Similarity matrix (SM) L: Length of filter Returns: S_L: Smoothed SM """ N = S.shape[0] M = S.shape[1] S_L = np.zeros((N, M)) S_extend_L = np.zeros((N + L, M + L)) S_extend_L[0:N, 0:M] = S for pos in range(0, L): S_L = S_L + S_extend_L[pos:(N + pos), pos:(M + pos)] S_L = S_L / L return S_L def subplot_matrix_colorbar(S, fig, ax, title='', Fs=1, xlabel='Time (seconds)', ylabel='Time (seconds)', clim=None, xlim=None, ylim=None, cmap=None): """Visualization function for showing zoomed sections of matrices Notebook: C4/C4S2_SSM-PathEnhancement.ipynb""" if cmap is None: cmap = libfmp.b.compressed_gray_cmap(alpha=-100) len_sec = S.shape[0] / Fs extent = [0, len_sec, 0, len_sec] im = ax.imshow(S, aspect='auto', extent=extent, cmap=cmap, origin='lower') fig.sca(ax) fig.colorbar(im) ax.set_title(title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if xlim is not None: ax.set_xlim(xlim) if ylim is not None: ax.set_ylim(ylim) if clim is not None: im.set_clim(clim) return im @jit(nopython=True) def compute_tempo_rel_set(tempo_rel_min, tempo_rel_max, num): """Compute logarithmically spaced relative tempo values Notebook: C4/C4S2_SSM-PathEnhancement.ipynb Args: tempo_rel_min: Minimum relative tempo tempo_rel_max: Maximum relative tempo num: Number of relative tempo values (inlcuding the min and max) Returns: tempo_rel_set: Set of relative tempo values """ tempo_rel_set = np.exp(np.linspace(np.log(tempo_rel_min), np.log(tempo_rel_max), num)) return tempo_rel_set @jit(nopython=True) def filter_diag_mult_sm(S, L=1, tempo_rel_set=np.asarray([1]), direction=0): """Path smoothing of similarity matrix by filtering in forward or backward direction along various directions around main diagonal Note: Directions are simulated by resampling one axis using relative tempo values Notebook: C4/C4S2_SSM-PathEnhancement.ipynb Args: S: Self-similarity matrix (SSM) L: Length of filter tempo_rel_set: Set of relative tempo values direction: Direction of smoothing (0: forward; 1: backward) Returns: S_L_final: Smoothed SM """ N = S.shape[0] M = S.shape[1] num = len(tempo_rel_set) S_L_final = np.zeros((N, M)) for s in range(0, num): M_ceil = int(np.ceil(M/tempo_rel_set[s])) resample = np.multiply(np.divide(np.arange(1, M_ceil+1), M_ceil), M) np.around(resample, 0, resample) resample = resample - 1 index_resample = np.maximum(resample, np.zeros(len(resample))).astype(np.int64) S_resample = S[:, index_resample] S_L = np.zeros((N, M_ceil)) S_extend_L = np.zeros((N + L, M_ceil + L)) # Forward direction if direction == 0: S_extend_L[0:N, 0:M_ceil] = S_resample for pos in range(0, L): S_L = S_L + S_extend_L[pos:(N + pos), pos:(M_ceil + pos)] # Backward direction if direction == 1: S_extend_L[L:(N+L), L:(M_ceil+L)] = S_resample for pos in range(0, L): S_L = S_L + S_extend_L[(L-pos):(N + L - pos), (L-pos):(M_ceil + L - pos)] S_L = S_L / L resample = np.multiply(np.divide(np.arange(1, M+1), M), M_ceil) np.around(resample, 0, resample) resample = resample - 1 index_resample = np.maximum(resample, np.zeros(len(resample))).astype(np.int64) S_resample_inv = S_L[:, index_resample] S_L_final = np.maximum(S_L_final, S_resample_inv) return S_L_final @jit(nopython=True) def shift_cyc_matrix(X, shift=0): """Cyclic shift of features matrix along first dimension Notebook: C4/C4S2_SSM-TranspositionInvariance.ipynb Args: X: Feature respresentation shift: Number of bins to be shifted Returns: X_cyc: Cyclically shifted feature matrix """ # Note: X_cyc = np.roll(X, shift=shift, axis=0) does to work for jit K, N = X.shape shift = np.mod(shift, K) X_cyc = np.zeros((K, N)) X_cyc[shift:K, :] = X[0:K-shift, :] X_cyc[0:shift, :] = X[K-shift:K, :] return X_cyc # @jit(nopython=True) def compute_sm_ti(X, Y, L=1, tempo_rel_set=np.asarray([1]), shift_set=np.asarray([0]), direction=2): """Compute enhanced similaity matrix by applying path smoothing and transpositions Notebook: C4/C4S2_SSM-TranspositionInvariance.ipynb Args: X, Y: Input feature sequences L: Length of filter tempo_rel_set: Set of relative tempo values shift_set: Set of shift indices direction: Direction of smoothing (0: forward; 1: backward; 2: both directions) Returns: S_TI: Transposition-invariant SM I_TI: Transposition index matrix """ for shift in shift_set: Y_cyc = shift_cyc_matrix(Y, shift) S_cyc = libfmp.c4.compute_sm_dot(X, Y_cyc) if direction == 0: S_cyc = libfmp.c4.filter_diag_mult_sm(S_cyc, L, tempo_rel_set, direction=0) if direction == 1: S_cyc = libfmp.c4.filter_diag_mult_sm(S_cyc, L, tempo_rel_set, direction=1) if direction == 2: S_forward = libfmp.c4.filter_diag_mult_sm(S_cyc, L, tempo_rel_set=tempo_rel_set, direction=0) S_backward = libfmp.c4.filter_diag_mult_sm(S_cyc, L, tempo_rel_set=tempo_rel_set, direction=1) S_cyc = np.maximum(S_forward, S_backward) if shift == shift_set[0]: S_TI = S_cyc I_TI = np.ones((S_cyc.shape[0], S_cyc.shape[1])) * shift else: # jit does not like the following lines # I_greater = np.greater(S_cyc, S_TI) # I_greater = (S_cyc > S_TI) I_TI[S_cyc > S_TI] = shift S_TI = np.maximum(S_cyc, S_TI) return S_TI, I_TI def subplot_matrix_ti_colorbar(S, fig, ax, title='', Fs=1, xlabel='Time (seconds)', ylabel='Time (seconds)', clim=None, xlim=None, ylim=None, cmap=None, alpha=1, ind_zero=0): """Visualization function for showing transposition index matrix Notebook: C4/C4S2_SSM-TranspositionInvariance.ipynb""" if cmap is None: color_ind_zero = np.array([0, 0, 0, 1]) if ind_zero == 0: color_ind_zero = np.array([0, 0, 0, 1]) else: color_ind_zero = np.array([1, 1, 1, 1]) colorList = np.array([color_ind_zero, [1, 1, 0, 1], [0, 0.7, 0, 1], [1, 0, 1, 1], [0, 0, 1, 1], [1, 0, 0, 1], [0, 0, 0, 0.5], [1, 0, 0, 0.3], [0, 0, 1, 0.3], [1, 0, 1, 0.3], [0, 0.7, 0, 0.3], [1, 1, 0, 0.3]]) cmap = ListedColormap(colorList) len_sec = S.shape[0] / Fs extent = [0, len_sec, 0, len_sec] im = ax.imshow(S, aspect='auto', extent=extent, cmap=cmap, origin='lower', alpha=alpha) if clim is None: im.set_clim(vmin=-0.5, vmax=11.5) fig.sca(ax) ax_cb = fig.colorbar(im) ax_cb.set_ticks(np.arange(0, 12, 1)) ax_cb.set_ticklabels(np.arange(0, 12, 1)) ax.set_title(title) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if xlim is not None: ax.set_xlim(xlim) if ylim is not None: ax.set_ylim(ylim) return im def compute_sm_from_filename(fn_wav, L=21, H=5, L_smooth=16, tempo_rel_set=np.array([1]), shift_set=np.array([0]), strategy='relative', scale=1, thresh=0.15, penalty=0, binarize=0): """Compute an SSM Notebook: C4S2_SSM-Thresholding.ipynb Args: fn_wav: Path and filename of wav file L, H: Parameters for computing smoothed chroma features L_smooth, tempo_rel_set, shift_set: Parameters for computing SSM strategy, scale, thresh, penalty, binarize: Parameters used thresholding SSM Returns: x, x_duration: Audio signal and its duration (seconds) X, Fs_feature: Feature sequence and feature rate S_thresh, I: SSM and index matrix """ # Waveform Fs = 22050 x, Fs = librosa.load(fn_wav, Fs) x_duration = (x.shape[0])/Fs # Chroma Feature Sequence and SSM (10 Hz) C = librosa.feature.chroma_stft(y=x, sr=Fs, tuning=0, norm=2, hop_length=2205, n_fft=4410) Fs_C = Fs/2205 # Chroma Feature Sequence and SSM X, Fs_feature = libfmp.c3.smooth_downsample_feature_sequence(C, Fs_C, filt_len=L, down_sampling=H) X = libfmp.c3.normalize_feature_sequence(X, norm='2', threshold=0.001) # Compute SSM S, I = libfmp.c4.compute_sm_ti(X, X, L=L_smooth, tempo_rel_set=tempo_rel_set, shift_set=shift_set, direction=2) S_thresh = libfmp.c4.threshold_matrix(S, thresh=thresh, strategy=strategy, scale=scale, penalty=penalty, binarize=binarize) return x, x_duration, X, Fs_feature, S_thresh, I