{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "hrm_analysis.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "r84Xnsq4Ixts" }, "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from scipy.signal import savgol_filter\n", "\n", "import math\n", "\n", "from numpy.polynomial.polynomial import Polynomial " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "T_FUurzVJa4E" }, "source": [ "data = pd.read_csv(\"HRM.csv\", index_col=0, header=0, sep= \";\", decimal=\",\") #use the separator according to your file. Common separator are \",\", \";\"\n", "data.loc[:,'degree']=data.index \n", "data.index = pd.to_datetime(data.index, unit='ms') \n", "\n", "folder_results = \"results\" # change this to your results folder to save figures\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "6TPpDE_S600c" }, "source": [ "def plot_curve(index, N_sample_1, B_T, sample_1, name, peak):\n", " plt.plot(index, N_sample_1, label='normalized_raw')\n", " plt.plot(index, N_M, label='normalized_EBS')\n", " plt.plot(index, -1*B_T, label='Background')\n", " plt.plot(index, M_T, label='Raw MT')\n", " plt.plot(index, sample_1, label='Raw sample')\n", " plt.legend()\n", " plt.suptitle(name)\n", " plt.title(\"Melting temp\" + repr(index[peak[-1]])) #Peaks on the derivative curves can be used for the identification of the melting temperatures (Tm)\n", " plt.savefig(results_folder+\"/sample_\"+name+\"_melt_curve\")\n", " plt.show()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "oMqjVJhC7R6d" }, "source": [ "def plot_derivative(index, derivative_values, key, peak):\n", " ax = plt.plot(index, derivative_values, c='red')\n", " plt.title(key + \": Negative First Derivative Curve\")\n", " plt.axvline(start, linestyle='--', c='blue')\n", " plt.axvline(end, linestyle='--', c='black')\n", " for p in peak:\n", " plt.axhline(derivative_values[p], linestyle='--', c='grey')\n", " plt.text(index[p], derivative_values[p], \"temp: \"+\"{:.2f}\".format(index[p]))\n", " plt.ylabel(\"-dF/dT\")\n", " plt.xlabel(\"Temperature\")\n", " plt.savefig(folder+\"/\"+key+\"_derivative\")\n", " plt.show()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "MQq88oDM7_dw" }, "source": [ "def plot_melting_region(index, sample_1, key, start, end):\n", " plt.plot(index, sample_1, c='red') \n", " plt.title(key)\n", " plt.axvline(start, linestyle='--', c='blue')\n", " plt.axvline(end, linestyle='--', c='black')\n", " plt.xlabel(\"Temperature\")\n", " plt.ylabel(\"Fluorescence\")\n", " plt.text(start, np.mean(sample_1)+0.2, \"T_m_start $\"+\"{:.2f}\".format(start)+\"$\")\n", " plt.text(end, np.mean(sample_1)-0.1, \"T_m_end $\"+\"{:.2f}\".format(end)+\"$\")\n", " plt.savefig(folder_results+\"/\"+key) \n", " plt.show()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "axkqXMJW1BDQ" }, "source": [ "**Defining samples and negative control**\n", "\n", "Following the example, define which columns belong to each sample and which columns are the negative control" ] }, { "cell_type": "code", "metadata": { "id": "x6zwMhE4Tk_P" }, "source": [ "samples = dict()\n", "samples['Control_1'] = ['A2 ', 'A3 '] #Sample \"Control_1\" has two replicas in columns A2 and A3\n", "samples['Control_2'] = ['A4 ', 'A5 '] #Sample \"Control_2\" has two replicas in columns A4 and A5\n", "samples['A'] = ['A6 ', 'A7', 'A8'] #Sample \"A\" has three replicas in columns A6, A7 and A8\n", "samples['Negative'] = ['C2 ','C3 '] #Negative control is in columns C2 and C3\n", "\n", "control_samples= ['Control_1', \"Control_2\"] #Add your control samples name as you named them before. Separate them using ,\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "PEiVIu-f6CHc" }, "source": [ "**HRM Analysis**\n", "\n", "In this part, we process each sample using the algorithm described in the paper. The three steps are signaled by comments: finding melting region, apply EBS and Melting peak identification " ] }, { "cell_type": "code", "metadata": { "id": "tbEupO9PI-43" }, "source": [ "all_curves = pd.DataFrame() \n", "all_derivatives = pd.DataFrame() \n", "\n", "for key in samples: \n", " try:\n", " columns = list(filter(lambda x: any(s in x for s in samples[key]), data.columns)) \n", " sample = data[columns] \n", " sample.loc[:,'degree']=data['degree']\n", " sample = sample.resample('1us').mean().interpolate() \n", " sample = sample.resample('60us').ffill() \n", " sample = sample.dropna()\n", " \n", " #find RecWindow \n", " index = sample['degree'].values\n", " rec_window = len(index[index<index[0]+1]) #number of samples within 1 degree\n", " diff = index - np.roll(index, 1)\n", " delta = np.min(diff[1:])\n", "\n", " sample_1 = np.median(sample, axis=1) #we use median of replicas instead of mean \n", " derivative_values = -1*savgol_filter(sample_1, delta = delta, window_length=rec_window*2+1, polyorder=2,deriv=1) #polynomial order is set to 2, window is number of samples within 1 degree RecWindow*2+1\n", " \n", " #Finding melting region \n", " tangent_derivative = savgol_filter(derivative_values,delta= delta/100, window_length=rec_window*2+1, polyorder=2,deriv=1) #polynomial order is set to 2, window is number of samples within 1 degree RecWindow*2+1\n", " angles = np.degrees(np.arctan(tangent_derivative))\n", " start = index[(angles>50)&(angles<55)] \n", " end = index[(angles<-30)&(angles<35)] \n", "\n", " if len(start)>0 and len(end)>0: #at least one melting region was found\n", " start = start[0]\n", " end = end[end>start]\n", " end = end[-1]\n", "\n", " #plot melting region\n", " plot_melting_region(index, sample_1, key, start, end)\n", "\n", " \n", " #apply EBS for melting curve \n", " #the raw fluorescence F(T) is modeled as composing of the melting curve M(T) and an exponentially decaying background B(T). It can be expressed using the following mathematical model\n", " #F(T) = M(T) + B(T)\n", "\n", " #first determine TR and Tl where derivative of M(T) is 0, outside of start and end\n", " TL = start - 2\n", " TR = end + 2\n", "\n", " tl_i = 0\n", " tr_i = len(index)-1\n", " if len(np.where(index<TL)[0])>0:\n", " tl_i = np.where(index<TL)[-1][-1]\n", " else:\n", " TL = index[0]\n", "\n", " if len(np.where(index>TR)[0])>0:\n", " tr_i = np.where(index>TR)[0][0]\n", " else:\n", " TR = index[-1] \n", "\n", " #We remove data that is not in the melting region\n", " sample_1 = sample_1[tl_i:tr_i+1] \n", " n_index = index[tl_i:tr_i+1]\n", "\n", " #Melting peak identification \n", " peak = np.where(np.sign(tangent_derivative[:-1]) > np.sign(tangent_derivative[1:]))[0] #tm is where there is a change from positive to negative in tangent derivative \n", "\n", " tangent_derivative = tangent_derivative[tl_i:tr_i+1]\n", " plot_derivative(index, derivative_values, key, peak)\n", "\n", "\n", " index = n_index\n", "\n", " #Get Background \n", " a = (math.log(derivative_values[tr_i])-math.log(derivative_values[tl_i]))/(TR-TL)\n", " C = derivative_values[tl_i] / a \n", " exponents = a*(index-TL) \n", " B_T =C*np.exp(exponents) \n", " M_T = sample_1 - B_T \n", " \n", " N_M = 100 * (M_T-np.min(M_T))/(np.max(M_T)-np.min(M_T))\n", "\n", " #Normalize sample \n", " N_sample_1 = 100 * (sample_1-np.min(sample_1))/(np.max(sample_1)-np.min(sample_1)) \n", "\n", "\n", " #Plot samples and normalized \n", " plot_curve(index, N_sample_1, B_T, sample_1, key, peak)\n", "\n", "\n", " #save the curve for plotting all together \n", " sample_data = pd.DataFrame(N_M, index=index, columns=[key])\n", " all_curves = pd.concat([all_curves, sample_data], axis=1)\n", "\n", " derivative = -1*savgol_filter(N_M, delta=delta, window_length=rec_window*2+1, polyorder=2,deriv=1) \n", " sample_data = pd.DataFrame(derivative, index=index, columns=[key])\n", " all_derivatives = pd.concat([all_derivatives, sample_data], axis=1)\n", " except (Exception) as e:\n", " print(\"error on sample \" + key)\n", " print(e)\n", " sample_data = pd.DataFrame(np.ones(len(index)), index=index, columns=[key])\n", " all_derivatives = pd.concat([all_derivatives, sample_data], axis=1)\n", " continue\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "DgN4kwmX6qPS" }, "source": [ "**Plot results all together**" ] }, { "cell_type": "code", "metadata": { "id": "qpG5DiLHJEqb" }, "source": [ "#plot difference plot\n", "import seaborn as sns\n", "reshaped_df = all_curves.reset_index()\n", "reshaped_df = pd.melt(reshaped_df, id_vars='index')\n", "g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)\n", "plt.title(\"All samples melt curve\")\n", "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", "g.get_figure().savefig(results_folder+\"/melting_all.png\", bbox_inches='tight', dpi=300)\n", "plt.show()\n", "\n", "reshaped_df = all_derivatives.reset_index()\n", "reshaped_df = pd.melt(reshaped_df, id_vars='index')\n", "g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)\n", "plt.title(\"All samples negative derviative curve\")\n", "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", "g.get_figure().savefig(results_folder+\"/neg_derivative_all.png\",, bbox_inches='tight', dpi=300)\n", "plt.show()\n", "\n", "med_curve = np.mean(all_curves[control_samples], axis=1) \n", "plt.plot(med_curve)\n", "plt.show()\n", "\n", "\n", "all_curves_sub =all_curves.sub(med_curve, axis='index')\n", "reshaped_df = all_curves_sub.reset_index()\n", "reshaped_df = pd.melt(reshaped_df, id_vars='index')\n", "g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)\n", "plt.title(\"Difference plot\")\n", "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", "g.get_figure().savefig(results_folder+\"/difference_melt.png\", bbox_inches='tight', dpi=300)\n", "plt.show()\n", "\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "X2A0XLc-1lrE" }, "source": [ "#Difference plot with all temperatures\n", "med_curve = med_curve.fillna(0)\n", "all_curves_sub =all_curves.sub(med_curve, axis='index')\n", "all_curves_sub = all_curves_sub.iloc[20:,]\n", "reshaped_df = all_curves_sub.reset_index()\n", "reshaped_df = pd.melt(reshaped_df, id_vars='index')\n", "g= sns.lineplot(x='index', y='value', hue='variable', data=reshaped_df)\n", "plt.title(\"Difference plot with all temperatures\")\n", "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", "g.get_figure().savefig(results_folder+'/difference_melt_all_temps.png', bbox_inches='tight', dpi=300)\n", "plt.show()\n" ], "execution_count": null, "outputs": [] } ] }