IPMNPDACpaperArchive / IPMNPDAC_WGS / figures / figureS1C.py
figureS1C.py
Raw
import os
import numpy as np
import pandas as pd
import comuta
from natsort import index_natsorted,natsorted,natsort_keygen
import matplotlib.pyplot as plt
from matplotlib import rcParams

rcParams['font.family'] = 'Arial'

import shutup
shutup.please()

datapath = '/IPMNPDAC_WGS/Data/'

# Data processing
# 1.a) sampleID
sampleDf = pd.read_csv(datapath+ 'sample_tumorType.csv')
sample_orderTumor = list(sampleDf.typeTumosample)
sampleGroup = list(sampleDf.group)

sampleOldName = list(sampleDf.samples)
sampleNewName = list(sampleDf.typeTumosample)

sampleIndicator = pd.DataFrame({'sample':sample_orderTumor, 'group':sampleGroup})

sampleIndicatorSampleID = sampleIndicator
sampleIndicatorSampleID['sampleID'] = [a.split('_')[-2]+'_'+a.split('_')[-1] for a in list(sampleIndicatorSampleID['sample'])]
sampleIndicatorSampleID = sampleIndicatorSampleID.rename(columns={'sample':'sampleTumor', 'sampleID':'sample'})

# 1.b) TMB data for top bar
tmbdf = pd.read_csv(datapath + '41sampleTMBdirectHeadmap.csv')
sampleTumorTMB = sampleIndicatorSampleID.merge(tmbdf)

tumourGrade=['t1','t2','t3','t4','t5']
sampleTumorTMBOrder=[]
for td in tumourGrade:
    dfx = sampleTumorTMB[sampleTumorTMB['sampleTumor'].str.contains(td)]
    dfx = dfx.sort_values(by='Nonsynonymous',ascending=True)
    sampleTumorTMBOrder.append(dfx)

sampleTumorTMBOrderDf = pd.concat(sampleTumorTMBOrder)
sampleTumourTMBorderList = list(sampleTumorTMBOrderDf['sample'])
tmbdf2 = sampleTumorTMBOrderDf[['sample', 'Nonsynonymous',  'Synonymous']]
tmbdf2.to_csv(datapath + 'tmbdf4driverheatmapPlot.csv', index=0)

# 1.c) CNV data
totalDf = pd.read_csv(datapath + 'ipmn_gisticData.txt', sep='\t')
cytobandOnly = [a.split(':')[1] for a in totalDf.Cytoband]
heatmapDf = totalDf[['Tumor_Sample_Barcode','Variant_Classification']]
heatmapDf.insert(2, 'Cytoband', cytobandOnly)
heatmapDf = heatmapDf.rename({'Tumor_Sample_Barcode':'sample', 
                              'Cytoband':'category', 
                              'Variant_Classification':'value'}, axis=1) 

heatmapDf = heatmapDf.replace(sampleOldName,sampleNewName)
heatmapDf = heatmapDf.drop_duplicates()

# 1.d) read in sample type data
tumourType = pd.read_csv(datapath + 'all41TumourType.csv')

# 1.e) fq side bar  
heatmapDf = pd.concat([heatmapDf,tumourType])
heatmapDf = heatmapDf.sort_values(by=["sample"], key=natsort_keygen())

# 1.f) cellularity top grad bar
purity_data = pd.read_csv(datapath + '41samplesCellularity4alldriverMutsHeatmap.csv')
#purity_data.insert(1, 'category', 'Purity')

# 2) plot
# 2.a) sample order and label
sampleOrderForlabel = sampleTumourTMBorderList 
heatmapDf['sample'] = [y.split('_')[-2]+'_'+y.split('_')[-1] for y in heatmapDf['sample']]

heatmapDf['sample'] = pd.Categorical(heatmapDf ['sample'], ordered=True, categories=sampleTumourTMBorderList)
heatmapDf = heatmapDf.sort_values('sample')
heatmapDf2 = heatmapDf.reset_index()
heatmapDf2 = heatmapDf2[['sample', 'value', 'category']]
side_bar_data = pd.DataFrame({'count':heatmapDf2.groupby( ["category"] ).size()}).reset_index()
side_bar_data['-log(Q)'] = side_bar_data['count'].div(0.41).round(1)
side_bar_data = side_bar_data.drop('count', axis=1)
side_bar_data.loc[side_bar_data['category'] == 'Pathology', ['count',  '-log(Q)']] = 0
side_bar_data.loc[side_bar_data['category'] == 'Invasive', ['count',  '-log(Q)']] = 0
side_bar_data = side_bar_data.sort_values(by=['-log(Q)'], 
                                          key=lambda x: np.argsort(index_natsorted(side_bar_data['-log(Q)'])))
category_order = side_bar_data.category
value_order = ['Del','Amp']

purity_data = purity_data.set_index('sample')
purity_data = purity_data.reindex(index = sampleOrderForlabel)
purity_data = purity_data.reset_index()
purity_data  = purity_data.rename(columns = {'cellularity':'value'}) #must keep the same column name reqored by the package

# 2.b) plot colour
mut_mapping = {'Amp':'skyblue', 'Del':'violet', 
               'Absent':{'facecolor':'grey', 'alpha':0.05},
               'Invasive':'maroon', 'Non-invasive':'orange',
               'Pancreatitis':'gray','IPMN_LGD':'mediumslateblue', 
               'IPMN_HGD':'darkslateblue', 
               'IPMN_HGD_PDAC':'lightcoral', 'PDAC':'maroon'}

side_mapping = {'-log(Q)':'pink'}
sidebar_kwargs = {'height': 0.8}

bar_mapping = {'Nonsynonymous': 'green', 'Synonymous': 'pink'}
bar_kwargs = {'width': 0.8, 'edgecolor': 'green'}

cat_mapping = {'Absent': {'facecolor': 'red'}}
value_range = (0, 1)

# 2.c) add heat main map
toy_comut = comuta.CoMut()
toy_comut.add_categorical_data(heatmapDf2, name='Mutation type', 
                               mapping=mut_mapping,
                               category_order=category_order,
                               value_order = value_order)

# 2.d) add side bar
toy_comut.add_side_bar_data(side_bar_data, paired_name ='Mutation type', name ='MutFq',
                            mapping = side_mapping, xlabel = 'Fq (%)', 
                            position = 'right', bar_kwargs = sidebar_kwargs)
# 2.e) plot cellularity
cat_mapping = {'Absent': {'facecolor': 'red'}}
value_range = (0, 1)
toy_comut.add_continuous_data(purity_data, 
                              name = 'Purity', 
                              mapping = 'gray_r', 
                              cat_mapping = cat_mapping, value_range = value_range)
# 2.f) add top bar
toy_comut.add_bar_data(tmbdf2, name = 'Mutation burden', 
                       mapping = bar_mapping,  
                       bar_kwargs = bar_kwargs,
                       ylabel ='TMB')

# 2.g) general adjust
toy_comut.plot_comut(figsize=(12, 10), x_padding=0.04,hspace =0.03, 
                     wspace = 0.03, y_padding=0.04, tri_padding=0.03)

toy_comut.axes['Mutation type'].set_xticklabels(labels=sampleOrderForlabel,rotation = 45,horizontalalignment='right')
toy_comut.axes['Mutation burden'].set_ylabel('TMB', rotation = 'horizontal', 
                                             ha = 'right', va = 'center', y = 0.3)
toy_comut.axes['Mutation burden'].set_ylim(ymin=0, ymax=10)
toy_comut.axes['Mutation burden'].axvline(0, color='black')

custom_rcParams = {'font.size': 10}
rcParams.update(custom_rcParams)

toy_comut.add_unified_legend(bbox_to_anchor = (1.5, -4))

plt.subplots_adjust(bottom=0.001, right=0.9, top=0.999,left=0.05);