UKB-project / 13_Sensitivity_individual_cox.R
13_Sensitivity_individual_cox.R
Raw
###############################################################################################

##### Sensitivity Cox 

###############################################################################################

args <- commandArgs(trailingOnly=TRUE)
taskid <- as.numeric(args[1])
print(paste0('taskid for this iteration is ', taskid))

library(survival)
library(survminer)
print('Libraries loaded - now loading disease cox file.')

# Set disease and read in disease codes for the iteration (array)
iteration <- taskid
diseases <- c('AL', 'ALS', 'BRAIN', 'Breast', 'Colorectal', 'COPD', 'CYS', 'Dep', 'Diab', 'ENDO', 'GYN',
              'IBD', 'IHD', 'LIV', 'LUNG', 'LUP', 'MS', 'PD', 'Prostate', 'RA', 'SCZ', 'ST', 'VD', 'DEATH')
tables <- list.files('...path/Tables/', '.csv')
disease <- as.character(diseases[iteration])
table <- read.csv(paste0('...path/Tables/', disease ,'.csv'))
sex_list <- c('Prostate', 'GYN', 'ENDO', 'Breast', 'CYS', 'Prostate')
clock <- names(table)[56:1523] 
print(disease)
print(dim(table))
now <- Sys.time()
print(paste0('Models initiating. Time start stamp: ', now, '.'))

# RUN LOOPS OVER YEAR OF FOLLOW UP FOR EACH ASSOC
tot <- 1468 * 16
results <- data.frame(A = 1:tot, B = 1:tot, C = 1:tot, D = 1:tot, E = 1:tot, Z = 1:tot, G = 1:tot, H = 1:tot, I = 1:tot, J = 1:tot)
timer <- seq(0,tot, by = 16)

# Full results collation 
for(i in 1:length(clock)){
  tryCatch({ 
    cox <- table
    prot <- as.character(clock[[i]])
      for(j in 1:16){
        tryCatch({ 
          k <- timer[[i]]
       
          # Isolate cases and controls
          cases <- cox[which(cox$Event == 1),]
          controls <- cox[-which(cox$Event == 1),]

          # Restrict to those in follow up time and exclude those outside of it
          cases$Event <- ifelse(cases$tte <= j, 1, 0)

          # # Join to original controls
          cox2 <- rbind(cases, controls)
          
          # Run model
          # mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + factor(cox2$Sex) + cox2$Age_assessment, data = cox2)
          
          if(disease %in% sex_list){
            print('Disease in sex list - removing sex from cox.')
            mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + cox2$Age_assessment + cox2$BMI + cox2$Dep + as.factor(cox2$Alc) + as.factor(cox2$Smo) + as.factor(cox2$Edu), data = cox2)
            results[j+k,1] <- prot
            results[j+k,2] <- disease
            results[j+k,3] <- j
            results[j+k,4:6]<-round(exp(cbind(coef(mod), confint(mod)))[1,1:3],2)
            results[j+k,7] <- summary(mod)$coefficients[1,5]
            results[j+k,8] <- mod$n[1] - mod$nevent[1]
            results[j+k,9] <- mod$nevent[1]
            table1 <- cox.zph(mod)
            p1 <- table1$table[,"p"]
            results[j+k,10] <-p1[1]
            results[j+k,11] <-p1[8]
          } else {
            mod <- coxph(Surv(cox2$tte, cox2$Event) ~ scale(cox2[,prot]) + factor(cox2$Sex) + cox2$Age_assessment + cox2$BMI + cox2$Dep + as.factor(cox2$Alc) + as.factor(cox2$Smo) + as.factor(cox2$Edu), data = cox2)
            results[j+k,1] <- prot
            results[j+k,2] <- disease
            results[j+k,3] <- j
            results[j+k,4:6]<-round(exp(cbind(coef(mod), confint(mod)))[1,1:3],2)
            results[j+k,7] <- summary(mod)$coefficients[1,5]
            results[j+k,8] <- mod$n[1] - mod$nevent[1]
            results[j+k,9] <- mod$nevent[1]
            table1 <- cox.zph(mod)
            p1 <- table1$table[,"p"]
            results[j+k,10] <-p1[1]
            results[j+k,11] <-p1[9]
          }
        }, error = function(e) cat("skipped"))
      }
    print(i)
  }, error = function(e) cat("skipped"))
}

write.csv(results, paste0('...path/', disease, '.csv'), row.names = F)
end <- Sys.time()
print(paste0('Models complete. Time end stamp: ', end, '.'))

####################################################################################

### Collate sensitivity cox results

# srun -p interactive --pty bash
library(tidyverse)
location <- '...path/Disease_files_092522/'
files <- list.files(location, '.csv')
res <- list()

for(i in 1:length(files)){
  name <- files[i]
  name <- gsub("\\..*","", name)
  data <- read.csv(paste0(location, name, '.csv'))
  print(name)
  print(dim(data))
  colnames(data) <- c("Predictor", "Outcome", 'Iteration',  "Hazard Ratio", "LCI", "UCI", "P.Value", "No. of Cases", "No. of Controls", "cox.zph_protein", "cox.zph_global")
  data <- as.data.frame(data)
  res[[i]] <- data
}

bind <- res[[1]]
for(i in 2:length(res)){
  bind <- rbind(bind, res[[i]])
  print(i)
  print(unique(res[[i]]$Outcome))
}
write.csv(bind, '...path/iteration_sens_joint_assocs.csv')

####################################################################################

# Isolate associations that failed assumptions

bind <- read.csv('...path/iteration_sens_joint_assocs.csv')
table <- data.frame(Disease = 1:3, Protein = 1:3)
keep <- read.csv('...path/Associations_retained_Bon.csv')
bind$retain <- paste0(bind$Predictor, bind$Outcome)
index <- bind[which(bind$retain %in% keep$retain),]
# Now index the associations that failed in the follow-up analyses at 10 years and 5 years

sixteen <- index[which(index$Iteration %in% 16),]
ten <- index[which(index$Iteration %in% 10),]
five <- index[which(index$Iteration %in% 5),]
fail <- sixteen[which(sixteen$cox.zph_protein < 0.05),]
ass1 <- as.data.frame(table(sixteen$Outcome))
tp1 <- as.data.frame(table(fail$Outcome))

# Look to see how many still fail at 10-years
fail <- ten[which(ten$cox.zph_protein < 0.05),]
ten_sig <- ten[which(ten$P.Value < 0.05),]
ass2 <- as.data.frame(table(ten_sig$Outcome))
tp2 <- as.data.frame(table(fail$Outcome))

# Look to see how many still fail at 5-years
fail <- five[which(five$cox.zph_protein < 0.05),]
five_sig <- five[which(five$P.Value < 0.05),]
ass3 <- as.data.frame(table(five_sig$Outcome))
tp3 <- as.data.frame(table(fail$Outcome))
t <- left_join(ass1, tp1, by = 'Var1')
t <- left_join(t, tp2, by = 'Var1')
t <- left_join(t, tp3, by = 'Var1')

t <- t[order(t$Freq.x),]
write.csv(t, '...path/iteration_sens_summary_table_Bon.csv', row.names = F)

library(readxl)
naming <- read_excel("...path/naming_index.xlsx")
naming <- as.data.frame(naming)
names(naming)[1] <- 'Outcome'

index <- left_join(index, naming, by = 'Outcome')
index <- na.omit(index)
write.csv(index, '...path/index_6744_assocs_loops_table_Bon.csv', row.names = F)