import pandas as pd from glob import glob caseSampleClusterIDPath = '/IPMNPDAC_WGS/Data/Data/sigDPC/step4bcaseSampleClusterID/' output = '/IPMNPDAC_WGS/Data/Data/sigDPC/step5bcaseClusterIDsum/' dfs=[] for fn in glob(caseSampleClusterIDPath + '*_chrPosCluster_ID.csv'): caseid = fn.split('/')[-1].split('_')[0] caseSampleClusterID_df = pd.read_csv(fn) caseSampleClusterID_df = caseSampleClusterID_df[['ID1', 'ID2', 'ID5', 'ID6', 'ID8', 'ID9', 'ID14', 'clusterNo']] sumIDCluster = caseSampleClusterID_df.groupby('clusterNo')[['ID1', 'ID2', 'ID5', 'ID6', 'ID8', 'ID9', 'ID14']].sum().round().reset_index() sumIDCluster.to_csv(output + '{}_clusterIDsum.csv'.format(caseid), index=0) dfs.append(sumIDCluster)