############################################################################################### ##### Process models - incident disease and mortality together ############################################################################################### # srun -p interactive --pty bash library(tidyverse) library(readxl) naming <- read_excel("...path/naming_index.xlsx") naming <- as.data.frame(naming) names(naming)[1] <- 'Outcome' # COLLATE RESULTS # Set disease and read in disease codes for the iteration (array) diseases <- c('AL', 'ALS', 'BRAIN', 'Breast', 'Colorectal', 'COPD', 'CYS', 'Dep', 'Diab', 'ENDO', 'GYN', 'IBD', 'IHD', 'LIV', 'LUNG', 'LUP', 'MS', 'PD', 'Prostate', 'RA', 'SCZ', 'ST', 'VD', 'DEATH') location <- '...path/Results/' files <- list.files(location, '.csv') files_full <- files[grep('FULL', files)] files_basic <- files[-grep('FULL', files)] # Basic results collation results <- list() for(i in 1:length(files_basic)){ name <- as.character(diseases[i]) print(name) data <- read.csv(paste0(location, name, '.csv')) results[[i]] <- data } res <- do.call(rbind, results) res <- res[order(res$P.Value),] res <- left_join(res, naming, by = 'Outcome') write.csv(res, '...path/basic_Bon.csv', row.names = F) top <- res[which(res$P.Value < 0.00000541125),] # 4916 passing # Full results collation for(i in 1:length(files_full)){ name <- as.character(diseases[i]) print(name) data <- read.csv(paste0(location, name, '_FULL.csv')) results[[i]] <- data } res2 <- do.call(rbind, results) res2 <- res2[order(res2$P.Value),] res2 <- left_join(res2, naming, by = 'Outcome') write.csv(res2, '...path/full_Bon.csv', row.names = F) # Associations passing adjustment top$retain <- paste0(top$Predictor, top$Outcome) res2$retain <- paste0(res2$Predictor, res2$Outcome) keep <- res2[which(res2$retain %in% top$retain),] keep <- keep[which(keep$P.Value < 0.00000541125),] # 3123 keep <- left_join(keep, naming, by = 'Outcome') write.csv(keep, '...path/Associations_retained_Bon.csv', row.names = F) length(unique(keep$Predictor)) # 1052 proteins # Failure rate across top models assocs <- as.data.frame(table(keep$Outcome)) assocs <- assocs[order(assocs$Freq),] names(assocs) <- c('Incident disease', 'Associations') fail <- keep[which(keep$cox.zph_protein < 0.05),] fails <- as.data.frame(table(fail$Outcome)) names(fails) <- c('Incident disease', 'Protein_fail') assocs <- left_join(assocs, fails, by = 'Incident disease') write.csv(assocs, '...path/Assocs_summary_table_Bon.csv', row.names = F) ############################################################################################### ### Create a summary table showing proteins associated with each disease and direction of effect # srun -p interactive --pty bash library(tidyverse) keep <- read.csv('...path/Associations_retained_Bon.csv') diseases <- c('DEATH', 'AL', 'ALS', 'BRAIN', 'Breast', 'Colorectal', 'COPD', 'CYS', 'Dep', 'Diab', 'ENDO', 'GYN', 'IBD', 'IHD', 'LIV', 'LUNG', 'LUP', 'MS', 'PD', 'Prostate', 'RA', 'SCZ', 'ST', 'VD') res <- data.frame(Outcome = 1:24, Associations = 1:24, Negative = 1:24, Positive = 1:24, Names_neg = 1:24, Names_pos = 1:24) for(i in 1:length(diseases)){ dis <- as.character(diseases[i]) table <- keep[which(keep$Outcome %in% dis),] num <- dim(table)[1] neg <- table[which(table$Hazard.Ratio < 1),] pos <- table[which(table$Hazard.Ratio >= 1),] neg_list <- neg[,1] pos_list <- pos[,1] neg_list <- gsub("\\..*", "", neg_list) pos_list <- gsub("\\..*", "", pos_list) neg_list <- str_c(neg_list, collapse = ", ") pos_list <- str_c(pos_list, collapse = ", ") res[i,1] <- dis res[i,2] <- num res[i,3] <- dim(neg)[1] res[i,4] <- dim(pos)[1] res[i,5] <- neg_list res[i,6] <- pos_list } res <- res[order(res[,2]),] write.csv(res, '...path/summary_table_associations_Bon.csv', row.names = F) ###############################################################################