""" Module: libfmp.c7.c7s1_audio_id Author: Meinard Mueller, Patricio Lopez-Serrano License: The MIT license, https://opensource.org/licenses/MIT This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP) """ import numpy as np from scipy import ndimage from matplotlib import pyplot as plt from numba import jit @jit(nopython=True) def compute_constellation_map_naive(Y, dist_freq=7, dist_time=7, thresh=0.01): """Compute constellation map (naive implementation) Notebook: C7/C7S1_AudioIdentification.ipynb Args: Y: Spectrogram (magnitude) dist_freq: Neighborhood parameter for frequency direction (kappa) dist_time: Neighborhood parameter for time direction (tau) thresh: Threshold parameter for minimal peak magnitude Returns: Cmap: Boolean mask for peak structure (same size as Y) """ # spectrogram dimensions if Y.ndim > 1: (K, N) = Y.shape else: K = Y.shape[0] N = 1 Cmap = np.zeros((K, N), dtype=np.bool8) # loop over spectrogram for k in range(K): f1 = max(k - dist_freq, 0) f2 = min(k + dist_freq + 1, K) for n in range(N): t1 = max(n - dist_time, 0) t2 = min(n + dist_time + 1, N) curr_mag = Y[k, n] curr_rect = Y[f1:f2, t1:t2] c_max = np.max(curr_rect) if ((curr_mag == c_max) and (curr_mag > thresh)): Cmap[k, n] = True return Cmap def plot_constellation_map(Cmap, Y=None, xlim=None, ylim=None, title='', xlabel='Time (sample)', ylabel='Frequency (bins)', s=5, color='r', marker='o', figsize=(7, 3), dpi=72): """Plot constellation map Notebook: C7/C7/C7S1_AudioIdentification.ipynb Args: Cmap: Constellation map given as boolean mask for peak structure Y: Spectrogram representation xlim, ylim: Limits for x axis and yaxis title: Title for plot xlabel, ylabel: Label for x axis and y axis s: Size of dots in scatter plot color: Color used for scatter plot maker: Marke type used for scatter plot figsize: Width, height in inches dpi: Dots per inch colorbar: Create a colorbar. Returns: fig: The created matplotlib figure ax: The used axes. im: The image plot """ if Cmap.ndim > 1: (K, N) = Cmap.shape else: K = Cmap.shape[0] N = 1 if Y is None: Y = np.zeros((K, N)) fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi) im = ax.imshow(Y, origin='lower', aspect='auto', cmap='gray_r') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) Fs = 1 if xlim is None: xlim = [-0.5/Fs, (N-0.5)/Fs] if ylim is None: ylim = [-0.5/Fs, (K-0.5)/Fs] ax.set_xlim(xlim) ax.set_ylim(ylim) n, k = np.argwhere(Cmap == 1).T ax.scatter(k, n, color=color, s=s, marker=marker) plt.tight_layout() return fig, ax, im def compute_constellation_map(Y, dist_freq=7, dist_time=7, thresh=0.01): """Compute constellation map (implementation using image processing) Notebook: C7/C7S1_AudioIdentification.ipynb Y: Spectrogram (magnitude) dist_freq: Neighborhood parameter for frequency direction (kappa) dist_time: Neighborhood parameter for time direction (tau) thresh: Threshold parameter for minimal peak magnitude Returns: Cmap: Boolean mask for peak structure (same size as Y) """ result = ndimage.maximum_filter(Y, size=[2*dist_freq+1, 2*dist_time+1], mode='constant') Cmap = np.logical_and(Y == result, result > thresh) return Cmap def match_binary_matrices_tol(C_ref, C_est, tol_freq=0, tol_time=0): """Compare binary matrices with tolerance Note: The tolerance parameters should be smaller than the minimum distance of peaks (1-entries in C_ref ad C_est) to obtain meaningful TP, FN, FP values Notebook: C7/C7S1_AudioIdentification.ipynb C_ref: Binary matrix used as reference C_est: inary matrix used as estimation tol_freq: Tolerance in frequency direction (vertical) tol_time: Tolerance in time direction (horizontal) Returns: TP: True positives FN: False negatives FP: False positives C_AND: Boolean mask of AND of C_ref and C_est (with tolerance) """ assert C_ref.shape == C_est.shape, "Dimensions need to agree" N = np.sum(C_ref) M = np.sum(C_est) # Expand C_est with 2D-max-filter using the tolerance parameters C_est_max = ndimage.maximum_filter(C_est, size=(2*tol_freq+1, 2*tol_time+1), mode='constant') C_AND = np.logical_and(C_est_max, C_ref) TP = np.sum(C_AND) FN = N - TP FP = M - TP return TP, FN, FP, C_AND def compute_matching_function(C_D, C_Q, tol_freq=1, tol_time=1): """Computes matching function for constellation maps Notebook: C7/C7S1_AudioIdentification.ipynb C_D: Binary matrix used as dababase document C_Q: Binary matrix used as query document tol_freq: Tolerance in frequency direction (vertical) tol_time: Tolerance in time direction (horizontal) Returns: Delta: Matching function shift_max: Optimal shift position maximizing Delta """ L = C_D.shape[1] N = C_Q.shape[1] M = L - N assert M >= 0, "Query must be shorter than document" Delta = np.zeros(L) for m in range(M + 1): C_D_crop = C_D[:, m:m+N] TP, FN, FP, C_AND = match_binary_matrices_tol(C_D_crop, C_Q, tol_freq=tol_freq, tol_time=tol_time) Delta[m] = TP shift_max = np.argmax(Delta) return Delta, shift_max