import pandas as pd
import matplotlib.pyplot as plt
from natsort import natsort_keygen
from matplotlib import rcParams
rcParams['font.family'] = 'Arial'
sfilepath = '/IPMNPDAC_WGS/Data/'
## 1a) input SBS96
sbs96all = pd.read_csv(sfilepath + 's41SBSsignatureCount.csv')
sbsplotdf = sbs96all[['typeTumorSample', 'SBS1','SBS2','SBS5',
'SBS13', 'SBS17a','SBS17b', 'SBS28', 'SBS40']]
sbsplotdf = sbsplotdf.sort_values(by="typeTumorSample", key=natsort_keygen())
sbsplotdf['typeTumorSample'] = [x[3:] for x in sbsplotdf.typeTumorSample]
sbsplotdfb = sbsplotdf.drop(['typeTumorSample'],axis=1)
sbsplotdf_total = sbsplotdfb.sum(axis=1)
sbsplotdf2rate = sbsplotdf[sbsplotdf.columns[1:]].div(sbsplotdf_total, 0) * 100
sbsplotdf2rate['typeTumorSample'] = sbsplotdf.typeTumorSample
# 1b) input ID83
id83all = pd.read_csv(sfilepath + 's41indelSignatureCount.csv')
idplotdf = id83all[['typeTumorSample', 'ID1','ID2','ID5','ID6','ID8','ID9','ID14']]
idplotdf = idplotdf.sort_values(by="typeTumorSample", key=natsort_keygen())
idplotdf['typeTumorSample'] = [x[3:] for x in idplotdf.typeTumorSample]
idplotdfb = idplotdf.drop(['typeTumorSample'],axis=1)
idplotdf_total = idplotdfb.sum(axis=1)
idplotdf2rate = idplotdf[idplotdf.columns[1:]].div(idplotdf_total, 0) * 100
idplotdf2rate['typeTumorSample'] = idplotdf.typeTumorSample
# 1c input SV32
svall = pd.read_csv(sfilepath +'s41SVsignatureCount.csv')
svplotdf = svall[['typeTumorSample', 'SV2','SV4','SV5','SV7', 'SV9', 'SV10']]
svplotdf = svplotdf.sort_values(by="typeTumorSample", key=natsort_keygen())
svplotdf['typeTumorSample'] = [x[3:] for x in svplotdf.typeTumorSample]
svplotdfb = svplotdf.drop(['typeTumorSample'],axis=1)
svplotdf_total = svplotdfb.sum(axis=1)
svplotdf2rate = svplotdf[svplotdf.columns[1:]].div(svplotdf_total, 0) * 100
svplotdf2rate['typeTumorSample'] = svplotdf.typeTumorSample
# 1d) input CN48
cnall = pd.read_csv(sfilepath +'s41CNsignature.csv')
cnplotdf = cnall[['typeTumorSample', 'CN1','CN9','CN24','CNV48B']]
cnplotdf = cnplotdf.sort_values(by="typeTumorSample", key=natsort_keygen())
cnplotdf['typeTumorSample'] = [x[3:] for x in cnplotdf.typeTumorSample]
cnplotdfb = cnplotdf.drop(['typeTumorSample'],axis=1)
cnplotdf_total = cnplotdfb.sum(axis=1)
cnplotdf2rate = cnplotdf[cnplotdf.columns[1:]].div(cnplotdf_total, 0) * 100
cnplotdf2rate['typeTumorSample'] = cnplotdf.typeTumorSample
# 2) set up plot
fig, axes = plt.subplots(1, 4, figsize=(20, 10), sharey=True)
colorSBS = ['blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray']
colorID = ['blue', 'orange', 'olive', 'cyan', 'lime', 'brown','fuchsia']
colorSV = ['blue', 'orange', 'olive', 'cyan', 'lime', 'brown']
colorCN = ['blue', 'green', 'gray', 'orange']
# 3a) plot sbs sigs
sbsplotdf2rate.plot(ax=axes[0], x = 'typeTumorSample', kind = 'barh', stacked = True,
mark_right = True, legend=True, color=colorSBS, width=1.0)
axes[0].legend(bbox_to_anchor=(0.26, -0.1),fontsize=12)
axes[0].set_xlabel('Proportion of SBS96 in Each Sample', fontsize=14,weight='bold')
# 3b) plot Id sigs
idplotdf2rate.plot(ax=axes[1], x = 'typeTumorSample', kind = 'barh', stacked = True,
mark_right = True, legend=True, color=colorID, width=1.0)
axes[1].legend(bbox_to_anchor=(0.22, -0.1),fontsize=12)
axes[1].set_xlabel('Proportion of ID83 in Each Sample',fontsize=14,weight='bold')
# 3c plot SV sigs
svplotdf2rate.plot(ax=axes[2], x = 'typeTumorSample', kind = 'barh', stacked = True,
mark_right = True, legend=True, color=colorSV, width=1.0)
axes[2].set_xlabel('Proportion of SV32 in Each Sample',fontsize=14,weight='bold');
axes[2].legend(bbox_to_anchor=(0.23, -0.1),fontsize=12)
# 3d) plot CN sigs
cnplotdf2rate.plot(ax=axes[3], x = 'typeTumorSample', kind = 'barh', stacked = True,
mark_right = True, legend=True, color=colorCN, width=1.0)
axes[3].set_xlabel('Proportion of CN48 in Each Sample',fontsize=14,weight='bold');
axes[3].legend(bbox_to_anchor=(0.26, -0.1), fontsize=12)
# 4) spacing
fig.tight_layout(pad=2.0)
plt.show();