import pandas as pd
from glob import glob
caseSampleClusterSBSPath = '/IPMNPDAC_WGS/Data/Data/sigDPC/step4caseSampleClusterSBS/'
output = '/IPMNPDAC_WGS/Data/Data/sigDPC/step5caseClusterSBSsum/'
dfs=[]
for fn in glob(caseSampleClusterSBSPath + '*_chrPosCluster_sbs.csv'):
caseid = fn.split('/')[-1].split('_')[0]
caseSampleClusterSBS_df = pd.read_csv(fn)
caseSampleClusterSBS_df = caseSampleClusterSBS_df[['SBS1', 'SBS2', 'SBS5', 'SBS13','SBS17a',
'SBS17b','SBS28', 'SBS40', 'clusterNo']]
sumSBSCluster = caseSampleClusterSBS_df.groupby('clusterNo')[['SBS1', 'SBS2', 'SBS5', 'SBS13','SBS17a',
'SBS17b','SBS28', 'SBS40']].sum().round().reset_index()
sumSBSCluster.to_csv(output + '{}_clusterSBSsum.csv'.format(caseid), index=0)
dfs.append(sumSBSCluster)