import pandas as pd
from glob import glob
sbsClusterPath = '/IPMNPDAC_WGS/Data/Data/sigDPC/step5caseClusterSBSsum/'
idClusterPath = '/IPMNPDAC_WGS/Data/Data/sigDPC/step5bcaseClusterIDsum/'
output = '/IPMNPDAC_WGS/Data/Data/sigDPC/step6SBS_ID_cluster4plot/'
sbsCluster = glob(sbsClusterPath + '*_clusterSBSsum.csv')
idCluster = glob(idClusterPath + '*_clusterIDsum.csv')
for fsbs, fid in zip(sbsCluster, idCluster):
caseid = fsbs.split('/')[-1].split('_')[0]
sbscluster = pd.read_csv(fsbs)
idcluster = pd.read_csv(fid)
sbs_id_cluster = pd.merge(sbscluster, idcluster, on='clusterNo', how='left').fillna(0)
sbs_id_cluster.to_csv(output + '{}_msDPC_SBS96_ID83.csv'.format(caseid), index=0)