############################################################################################### ##### Sensitivity Cox ############################################################################################### args <- commandArgs(trailingOnly=TRUE) taskid <- as.numeric(args[1]) print(paste0('taskid for this iteration is ', taskid)) library(survival) library(survminer) print('Libraries loaded - now loading disease cox file.') # Set disease and read in disease codes for the iteration (array) iteration <- taskid diseases <- c('AL', 'ALS', 'BRAIN', 'Breast', 'Colorectal', 'COPD', 'CYS', 'Dep', 'Diab', 'ENDO', 'GYN', 'IBD', 'IHD', 'LIV', 'LUNG', 'LUP', 'MS', 'PD', 'Prostate', 'RA', 'SCZ', 'ST', 'VD', 'DEATH') tables <- list.files('...path/Tables/', '.csv') disease <- as.character(diseases[iteration]) table <- read.csv(paste0('...path/Tables/', disease ,'.csv')) sex_list <- c('Prostate', 'GYN', 'ENDO', 'Breast', 'CYS', 'Prostate') clock <- names(table)[56:1523] print(disease) print(dim(table)) now <- Sys.time() print(paste0('Models initiating. Time start stamp: ', now, '.')) # RUN LOOPS OVER YEAR OF FOLLOW UP FOR EACH ASSOC tot <- 1468 * 16 results <- data.frame(A = 1:tot, B = 1:tot, C = 1:tot, D = 1:tot, E = 1:tot, Z = 1:tot, G = 1:tot, H = 1:tot, I = 1:tot, J = 1:tot) timer <- seq(0,tot, by = 16) # Full results collation for(i in 1:length(clock)){ tryCatch({ cox <- table prot <- as.character(clock[[i]]) for(j in 1:16){ tryCatch({ k <- timer[[i]] # Isolate cases and controls cases <- cox[which(cox$Event == 1),] controls <- cox[-which(cox$Event == 1),] # Restrict to those in follow up time and exclude those outside of it cases$Event <- ifelse(cases$tte <= j, 1, 0) # # Join to original controls cox2 <- rbind(cases, controls) # Run model # mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + factor(cox2$Sex) + cox2$Age_assessment, data = cox2) if(disease %in% sex_list){ print('Disease in sex list - removing sex from cox.') mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + cox2$Age_assessment + cox2$BMI + cox2$Dep + as.factor(cox2$Alc) + as.factor(cox2$Smo) + as.factor(cox2$Edu), data = cox2) results[j+k,1] <- prot results[j+k,2] <- disease results[j+k,3] <- j results[j+k,4:6]<-round(exp(cbind(coef(mod), confint(mod)))[1,1:3],2) results[j+k,7] <- summary(mod)$coefficients[1,5] results[j+k,8] <- mod$n[1] - mod$nevent[1] results[j+k,9] <- mod$nevent[1] table1 <- cox.zph(mod) p1 <- table1$table[,"p"] results[j+k,10] <-p1[1] results[j+k,11] <-p1[8] } else { mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + factor(cox2$Sex) + cox2$Age_assessment + cox2$BMI + cox2$Dep + as.factor(cox2$Alc) + as.factor(cox2$Smo) + as.factor(cox2$Edu), data = cox2) results[j+k,1] <- prot results[j+k,2] <- disease results[j+k,3] <- j results[j+k,4:6]<-round(exp(cbind(coef(mod), confint(mod)))[1,1:3],2) results[j+k,7] <- summary(mod)$coefficients[1,5] results[j+k,8] <- mod$n[1] - mod$nevent[1] results[j+k,9] <- mod$nevent[1] table1 <- cox.zph(mod) p1 <- table1$table[,"p"] results[j+k,10] <-p1[1] results[j+k,11] <-p1[9] } }, error = function(e) cat("skipped")) } print(i) }, error = function(e) cat("skipped")) } write.csv(results, paste0('...path/', disease, '.csv'), row.names = F) end <- Sys.time() print(paste0('Models complete. Time end stamp: ', end, '.')) #################################################################################### ### Collate sensitivity cox results # srun -p interactive --pty bash library(tidyverse) location <- '...path/Disease_files_092522/' files <- list.files(location, '.csv') res <- list() for(i in 1:length(files)){ name <- files[i] name <- gsub("\\..*","", name) data <- read.csv(paste0(location, name, '.csv')) print(name) print(dim(data)) colnames(data) <- c("Predictor", "Outcome", 'Iteration', "Hazard Ratio", "LCI", "UCI", "P.Value", "No. of Cases", "No. of Controls", "cox.zph_protein", "cox.zph_global") data <- as.data.frame(data) res[[i]] <- data } bind <- res[[1]] for(i in 2:length(res)){ bind <- rbind(bind, res[[i]]) print(i) print(unique(res[[i]]$Outcome)) } write.csv(bind, '...path/iteration_sens_joint_assocs.csv') #################################################################################### # Isolate associations that failed assumptions bind <- read.csv('...path/iteration_sens_joint_assocs.csv') table <- data.frame(Disease = 1:3, Protein = 1:3) keep <- read.csv('...path/Associations_retained_Bon.csv') bind$retain <- paste0(bind$Predictor, bind$Outcome) index <- bind[which(bind$retain %in% keep$retain),] # Now index the associations that failed in the follow-up analyses at 10 years and 5 years sixteen <- index[which(index$Iteration %in% 16),] ten <- index[which(index$Iteration %in% 10),] five <- index[which(index$Iteration %in% 5),] fail <- sixteen[which(sixteen$cox.zph_protein < 0.05),] ass1 <- as.data.frame(table(sixteen$Outcome)) tp1 <- as.data.frame(table(fail$Outcome)) # Look to see how many still fail at 10-years fail <- ten[which(ten$cox.zph_protein < 0.05),] ten_sig <- ten[which(ten$P.Value < 0.05),] ass2 <- as.data.frame(table(ten_sig$Outcome)) tp2 <- as.data.frame(table(fail$Outcome)) # Look to see how many still fail at 5-years fail <- five[which(five$cox.zph_protein < 0.05),] five_sig <- five[which(five$P.Value < 0.05),] ass3 <- as.data.frame(table(five_sig$Outcome)) tp3 <- as.data.frame(table(fail$Outcome)) t <- left_join(ass1, tp1, by = 'Var1') t <- left_join(t, tp2, by = 'Var1') t <- left_join(t, tp3, by = 'Var1') t <- t[order(t$Freq.x),] write.csv(t, '...path/iteration_sens_summary_table_Bon.csv', row.names = F) library(readxl) naming <- read_excel("...path/naming_index.xlsx") naming <- as.data.frame(naming) names(naming)[1] <- 'Outcome' index <- left_join(index, naming, by = 'Outcome') index <- na.omit(index) write.csv(index, '...path/index_6744_assocs_loops_table_Bon.csv', row.names = F)