hrm-analysis / src / hrm_analysis.py
hrm_analysis.py
Raw
# -*- coding: utf-8 -*-
"""hrm_analysis.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1vF94CWORVtXSZDjr0zFpaoW-O2YvsZyO
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter

import math

from numpy.polynomial.polynomial import Polynomial

data = pd.read_csv("HRM.csv", index_col=0, header=0, sep= ";", decimal=",") #use the separator according to your file. Common separator are ",", ";"
data.loc[:,'degree']=data.index  
data.index = pd.to_datetime(data.index, unit='ms') 

folder_results = "results" # change this to your results folder to save figures

def plot_curve(index, N_sample_1, B_T, sample_1, name, peak):
  plt.plot(index, N_sample_1, label='normalized_raw')
  plt.plot(index, N_M, label='normalized_EBS')
  plt.plot(index, -1*B_T, label='Background')
  plt.plot(index, M_T, label='Raw MT')
  plt.plot(index, sample_1, label='Raw sample')
  plt.legend()
  plt.suptitle(name)
  plt.title("Melting temp" + repr(index[peak[-1]])) #Peaks on the derivative curves can be used for the identification of the melting temperatures (Tm)
  plt.savefig(results_folder+"/sample_"+name+"_melt_curve")
  plt.show()

def plot_derivative(index, derivative_values, key, peak):
  ax = plt.plot(index, derivative_values, c='red')
  plt.title(key + ": Negative First Derivative Curve")
  plt.axvline(start, linestyle='--', c='blue')
  plt.axvline(end, linestyle='--', c='black')
  for p in peak:
    plt.axhline(derivative_values[p], linestyle='--', c='grey')
    plt.text(index[p], derivative_values[p], "temp: "+"{:.2f}".format(index[p]))
  plt.ylabel("-dF/dT")
  plt.xlabel("Temperature")
  plt.savefig(folder+"/"+key+"_derivative")
  plt.show()

def plot_melting_region(index, sample_1, key, start, end):
  plt.plot(index, sample_1, c='red')  
  plt.title(key)
  plt.axvline(start, linestyle='--', c='blue')
  plt.axvline(end, linestyle='--', c='black')
  plt.xlabel("Temperature")
  plt.ylabel("Fluorescence")
  plt.text(start, np.mean(sample_1)+0.2, "T_m_start $"+"{:.2f}".format(start)+"$")
  plt.text(end, np.mean(sample_1)-0.1, "T_m_end $"+"{:.2f}".format(end)+"$")
  plt.savefig(folder_results+"/"+key) 
  plt.show()

"""**Defining samples and negative control**

Following the example, define which columns belong to each sample and which columns are the negative control
"""

samples = dict()
samples['Control_1'] = ['A2 ', 'A3 '] #Sample "Control_1" has two replicas in columns A2 and A3
samples['Control_2'] = ['A4 ', 'A5 '] #Sample "Control_2" has two replicas in columns A4 and A5
samples['A'] = ['A6 ', 'A7', 'A8'] #Sample "A" has three replicas in columns A6, A7 and A8
samples['Negative'] = ['C2 ','C3 '] #Negative control is in columns C2 and C3

control_samples= ['Control_1', "Control_2"] #Add your control samples name as you named them before. Separate them using ,

"""**HRM Analysis**

In this part, we process each sample using the algorithm described in the paper. The three steps are signaled by comments: finding melting region, apply EBS  and Melting peak identification 
"""

all_curves = pd.DataFrame()    
all_derivatives = pd.DataFrame()   

for key in samples:  
    try:
      columns = list(filter(lambda x: any(s in x for s in samples[key]), data.columns)) 
      sample = data[columns]  
      sample.loc[:,'degree']=data['degree']
      sample = sample.resample('1us').mean().interpolate()  
      sample = sample.resample('60us').ffill() 
      sample = sample.dropna()
      
      #find RecWindow 
      index = sample['degree'].values
      rec_window = len(index[index<index[0]+1]) #number of samples within 1 degree
      diff = index - np.roll(index, 1)
      delta = np.min(diff[1:])

      sample_1 = np.median(sample, axis=1) #we use median of replicas instead of mean 
      derivative_values = -1*savgol_filter(sample_1, delta = delta, window_length=rec_window*2+1, polyorder=2,deriv=1) #polynomial order is set to 2, window is number of samples within 1 degree RecWindow*2+1
      
      #Finding melting region 
      tangent_derivative = savgol_filter(derivative_values,delta= delta/100, window_length=rec_window*2+1, polyorder=2,deriv=1) #polynomial order is set to 2, window is number of samples within 1 degree RecWindow*2+1
      angles = np.degrees(np.arctan(tangent_derivative))
      start = index[(angles>50)&(angles<55)]   
      end = index[(angles<-30)&(angles<35)]         

      if len(start)>0 and len(end)>0:  #at least one melting region was found
          start = start[0]
          end = end[end>start]
          end = end[-1]

          #plot melting region
          plot_melting_region(index, sample_1, key, start, end)

          
          #apply EBS for melting curve 
          #the raw fluorescence F(T) is modeled as composing of the melting curve M(T) and an exponentially decaying background B(T). It can be expressed using the following mathematical model
          #F(T) = M(T) + B(T)

          #first determine TR and Tl where derivative of M(T) is 0, outside of start and end
          TL = start - 2
          TR = end + 2

          tl_i = 0
          tr_i = len(index)-1
          if len(np.where(index<TL)[0])>0:
            tl_i = np.where(index<TL)[-1][-1]
          else:
            TL = index[0]

          if len(np.where(index>TR)[0])>0:
            tr_i = np.where(index>TR)[0][0]
          else:
            TR = index[-1] 

          #We remove data that is not in the melting region
          sample_1 = sample_1[tl_i:tr_i+1]   
          n_index = index[tl_i:tr_i+1]

          #Melting peak identification 
          peak = np.where(np.sign(tangent_derivative[:-1]) > np.sign(tangent_derivative[1:]))[0] #tm is where there is a change from positive to negative in tangent derivative 

          tangent_derivative = tangent_derivative[tl_i:tr_i+1]
          plot_derivative(index, derivative_values, key, peak)


          index = n_index

          #Get Background 
          a = (math.log(derivative_values[tr_i])-math.log(derivative_values[tl_i]))/(TR-TL)
          C =  derivative_values[tl_i] / a                   
          exponents = a*(index-TL)                      
          B_T =C*np.exp(exponents) 
          M_T = sample_1 - B_T        
          
          N_M = 100 * (M_T-np.min(M_T))/(np.max(M_T)-np.min(M_T))

          #Normalize sample 
          N_sample_1 = 100 * (sample_1-np.min(sample_1))/(np.max(sample_1)-np.min(sample_1))        


          #Plot samples and normalized 
          plot_curve(index, N_sample_1, B_T, sample_1, key, peak)


          #save the curve for plotting all together 
          sample_data = pd.DataFrame(N_M, index=index, columns=[key])
          all_curves = pd.concat([all_curves, sample_data], axis=1)

          derivative = -1*savgol_filter(N_M, delta=delta, window_length=rec_window*2+1, polyorder=2,deriv=1) 
          sample_data = pd.DataFrame(derivative, index=index, columns=[key])
          all_derivatives = pd.concat([all_derivatives, sample_data], axis=1)
    except (Exception) as e:
        print("error on sample " + key)
        print(e)
        sample_data = pd.DataFrame(np.ones(len(index)), index=index, columns=[key])
        all_derivatives = pd.concat([all_derivatives, sample_data], axis=1)
        continue

"""**Plot results all together**"""

#plot difference plot
import seaborn as sns
reshaped_df = all_curves.reset_index()
reshaped_df = pd.melt(reshaped_df, id_vars='index')
g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)
plt.title("All samples melt curve")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
g.get_figure().savefig(results_folder+"/melting_all.png", bbox_inches='tight', dpi=300)
plt.show()

reshaped_df = all_derivatives.reset_index()
reshaped_df = pd.melt(reshaped_df, id_vars='index')
g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)
plt.title("All samples negative derviative curve")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
g.get_figure().savefig(results_folder+"/neg_derivative_all.png",, bbox_inches='tight', dpi=300)
plt.show()

med_curve = np.mean(all_curves[control_samples], axis=1) 
plt.plot(med_curve)
plt.show()


all_curves_sub =all_curves.sub(med_curve, axis='index')
reshaped_df = all_curves_sub.reset_index()
reshaped_df = pd.melt(reshaped_df, id_vars='index')
g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)
plt.title("Difference plot")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
g.get_figure().savefig(results_folder+"/difference_melt.png", bbox_inches='tight', dpi=300)
plt.show()

#Difference plot with all temperatures
med_curve = med_curve.fillna(0)
all_curves_sub =all_curves.sub(med_curve, axis='index')
all_curves_sub = all_curves_sub.iloc[20:,]
reshaped_df = all_curves_sub.reset_index()
reshaped_df = pd.melt(reshaped_df, id_vars='index')
g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)
plt.title("Difference plot with all temperatures")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
g.get_figure().savefig(results_folder+'/difference_melt_all_temps.png', bbox_inches='tight', dpi=300)
plt.show()