# -*- coding: utf-8 -*- """hrm_analysis.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1vF94CWORVtXSZDjr0zFpaoW-O2YvsZyO """ import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.signal import savgol_filter import math from numpy.polynomial.polynomial import Polynomial data = pd.read_csv("HRM.csv", index_col=0, header=0, sep= ";", decimal=",") #use the separator according to your file. Common separator are ",", ";" data.loc[:,'degree']=data.index data.index = pd.to_datetime(data.index, unit='ms') folder_results = "results" # change this to your results folder to save figures def plot_curve(index, N_sample_1, B_T, sample_1, name, peak): plt.plot(index, N_sample_1, label='normalized_raw') plt.plot(index, N_M, label='normalized_EBS') plt.plot(index, -1*B_T, label='Background') plt.plot(index, M_T, label='Raw MT') plt.plot(index, sample_1, label='Raw sample') plt.legend() plt.suptitle(name) plt.title("Melting temp" + repr(index[peak[-1]])) #Peaks on the derivative curves can be used for the identification of the melting temperatures (Tm) plt.savefig(results_folder+"/sample_"+name+"_melt_curve") plt.show() def plot_derivative(index, derivative_values, key, peak): ax = plt.plot(index, derivative_values, c='red') plt.title(key + ": Negative First Derivative Curve") plt.axvline(start, linestyle='--', c='blue') plt.axvline(end, linestyle='--', c='black') for p in peak: plt.axhline(derivative_values[p], linestyle='--', c='grey') plt.text(index[p], derivative_values[p], "temp: "+"{:.2f}".format(index[p])) plt.ylabel("-dF/dT") plt.xlabel("Temperature") plt.savefig(folder+"/"+key+"_derivative") plt.show() def plot_melting_region(index, sample_1, key, start, end): plt.plot(index, sample_1, c='red') plt.title(key) plt.axvline(start, linestyle='--', c='blue') plt.axvline(end, linestyle='--', c='black') plt.xlabel("Temperature") plt.ylabel("Fluorescence") plt.text(start, np.mean(sample_1)+0.2, "T_m_start $"+"{:.2f}".format(start)+"$") plt.text(end, np.mean(sample_1)-0.1, "T_m_end $"+"{:.2f}".format(end)+"$") plt.savefig(folder_results+"/"+key) plt.show() """**Defining samples and negative control** Following the example, define which columns belong to each sample and which columns are the negative control """ samples = dict() samples['Control_1'] = ['A2 ', 'A3 '] #Sample "Control_1" has two replicas in columns A2 and A3 samples['Control_2'] = ['A4 ', 'A5 '] #Sample "Control_2" has two replicas in columns A4 and A5 samples['A'] = ['A6 ', 'A7', 'A8'] #Sample "A" has three replicas in columns A6, A7 and A8 samples['Negative'] = ['C2 ','C3 '] #Negative control is in columns C2 and C3 control_samples= ['Control_1', "Control_2"] #Add your control samples name as you named them before. Separate them using , """**HRM Analysis** In this part, we process each sample using the algorithm described in the paper. The three steps are signaled by comments: finding melting region, apply EBS and Melting peak identification """ all_curves = pd.DataFrame() all_derivatives = pd.DataFrame() for key in samples: try: columns = list(filter(lambda x: any(s in x for s in samples[key]), data.columns)) sample = data[columns] sample.loc[:,'degree']=data['degree'] sample = sample.resample('1us').mean().interpolate() sample = sample.resample('60us').ffill() sample = sample.dropna() #find RecWindow index = sample['degree'].values rec_window = len(index[index50)&(angles<55)] end = index[(angles<-30)&(angles<35)] if len(start)>0 and len(end)>0: #at least one melting region was found start = start[0] end = end[end>start] end = end[-1] #plot melting region plot_melting_region(index, sample_1, key, start, end) #apply EBS for melting curve #the raw fluorescence F(T) is modeled as composing of the melting curve M(T) and an exponentially decaying background B(T). It can be expressed using the following mathematical model #F(T) = M(T) + B(T) #first determine TR and Tl where derivative of M(T) is 0, outside of start and end TL = start - 2 TR = end + 2 tl_i = 0 tr_i = len(index)-1 if len(np.where(index0: tl_i = np.where(indexTR)[0])>0: tr_i = np.where(index>TR)[0][0] else: TR = index[-1] #We remove data that is not in the melting region sample_1 = sample_1[tl_i:tr_i+1] n_index = index[tl_i:tr_i+1] #Melting peak identification peak = np.where(np.sign(tangent_derivative[:-1]) > np.sign(tangent_derivative[1:]))[0] #tm is where there is a change from positive to negative in tangent derivative tangent_derivative = tangent_derivative[tl_i:tr_i+1] plot_derivative(index, derivative_values, key, peak) index = n_index #Get Background a = (math.log(derivative_values[tr_i])-math.log(derivative_values[tl_i]))/(TR-TL) C = derivative_values[tl_i] / a exponents = a*(index-TL) B_T =C*np.exp(exponents) M_T = sample_1 - B_T N_M = 100 * (M_T-np.min(M_T))/(np.max(M_T)-np.min(M_T)) #Normalize sample N_sample_1 = 100 * (sample_1-np.min(sample_1))/(np.max(sample_1)-np.min(sample_1)) #Plot samples and normalized plot_curve(index, N_sample_1, B_T, sample_1, key, peak) #save the curve for plotting all together sample_data = pd.DataFrame(N_M, index=index, columns=[key]) all_curves = pd.concat([all_curves, sample_data], axis=1) derivative = -1*savgol_filter(N_M, delta=delta, window_length=rec_window*2+1, polyorder=2,deriv=1) sample_data = pd.DataFrame(derivative, index=index, columns=[key]) all_derivatives = pd.concat([all_derivatives, sample_data], axis=1) except (Exception) as e: print("error on sample " + key) print(e) sample_data = pd.DataFrame(np.ones(len(index)), index=index, columns=[key]) all_derivatives = pd.concat([all_derivatives, sample_data], axis=1) continue """**Plot results all together**""" #plot difference plot import seaborn as sns reshaped_df = all_curves.reset_index() reshaped_df = pd.melt(reshaped_df, id_vars='index') g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df) plt.title("All samples melt curve") plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) g.get_figure().savefig(results_folder+"/melting_all.png", bbox_inches='tight', dpi=300) plt.show() reshaped_df = all_derivatives.reset_index() reshaped_df = pd.melt(reshaped_df, id_vars='index') g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df) plt.title("All samples negative derviative curve") plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) g.get_figure().savefig(results_folder+"/neg_derivative_all.png",, bbox_inches='tight', dpi=300) plt.show() med_curve = np.mean(all_curves[control_samples], axis=1) plt.plot(med_curve) plt.show() all_curves_sub =all_curves.sub(med_curve, axis='index') reshaped_df = all_curves_sub.reset_index() reshaped_df = pd.melt(reshaped_df, id_vars='index') g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df) plt.title("Difference plot") plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) g.get_figure().savefig(results_folder+"/difference_melt.png", bbox_inches='tight', dpi=300) plt.show() #Difference plot with all temperatures med_curve = med_curve.fillna(0) all_curves_sub =all_curves.sub(med_curve, axis='index') all_curves_sub = all_curves_sub.iloc[20:,] reshaped_df = all_curves_sub.reset_index() reshaped_df = pd.melt(reshaped_df, id_vars='index') g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df) plt.title("Difference plot with all temperatures") plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) g.get_figure().savefig(results_folder+'/difference_melt_all_temps.png', bbox_inches='tight', dpi=300) plt.show()