############################################################################################### ### Additional clinical profiling and value exploration in T2D ############################################################################################### # srun -p interactive --pty bash library(tidyverse) library(survival) library(gbm) library(precrec) library(ggplot2) ### T2D # Load PGS PGS <- read.table("path.../ukb671220.tab", header = T) PGS <- PGS[c(1,35)] # Load phenotypes file phen <- read.table(gzfile("path.../all_quant_20200904.pheno.v2.tsv.gz"), header = T) # Hba1c hb <- phen[which(colnames(phen) %in% c('IND', 'f_30750_0_0'))] names(hb)[1] <- 'SampleID' # Plot against diabetes score in test sample at baseline diab <- readRDS("path.../Diab_testTarget.rds") diab <- left_join(diab, hb, by = 'SampleID') diab <- na.omit(diab) base <- diab ev <- base[which(base$Event %in% 1),] mean(ev$time_to_event, na.rm = T) sd(ev$time_to_event, na.rm = T) comp <- diab[complete.cases(diab$f_30750_0_0),] untransformed <- diab ############################################################################################### # Rank-based inverse normalisation and scaling of each marker diab$f_30750_0_0 <- qnorm((rank(diab$f_30750_0_0, na.last='keep')-0.5)/sum(!is.na(diab$f_30750_0_0))) diab$f_30750_0_0<- scale(diab$f_30750_0_0) mean(diab$f_30750_0_0, na.rm = T) sd(diab$f_30750_0_0, na.rm = T) diab$dScore <- qnorm((rank(diab$dScore, na.last='keep')-0.5)/sum(!is.na(diab$dScore))) diab$dScore <- scale(diab$dScore) mean(diab$dScore, na.rm = T) sd(diab$dScore, na.rm = T) transformed <- diab ############################################################################################### # Examine AUC in comparison in 10-year follow-up window testTarget <- diab names(PGS)[1] <- 'SampleID' testTarget <- left_join(testTarget, PGS, by = 'SampleID') names(testTarget)[19] <- 'T2D_PGS' # 18 missing PGS testTarget <- na.omit(testTarget) riskFactorsOnlyCoxPH <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex), testTarget) PGSonly <- coxph(Surv(time_to_event, Event) ~ T2D_PGS, testTarget) fullCoxPH <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + dScore, testTarget) ProteinOnly <- coxph(Surv(time_to_event, Event) ~ dScore, testTarget) allrisk <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + as.factor(Edu) + BMI + Dep + as.factor(Smo) + as.factor(Alc), testTarget) allriskprotein <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + as.factor(Edu) + BMI + Dep + as.factor(Smo) + as.factor(Alc) + dScore, testTarget) hbOnly <- coxph(Surv(time_to_event, Event) ~ f_30750_0_0, testTarget) hbcovs <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + as.factor(Edu) + BMI + Dep + as.factor(Smo) + as.factor(Alc) + f_30750_0_0, testTarget) hbprot <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + as.factor(Edu) + BMI + Dep + as.factor(Smo) + as.factor(Alc) + f_30750_0_0 + dScore, testTarget) hbprotpgs <- coxph(Surv(time_to_event, Event) ~ Age_assessment + as.factor(Sex) + as.factor(Edu) + BMI + Dep + as.factor(Smo) + as.factor(Alc) + f_30750_0_0 + dScore + T2D_PGS, testTarget) print('Model has not been sex stratified in cox.') # List models models <- list(r = riskFactorsOnlyCoxPH, o = PGSonly, f = fullCoxPH, t = ProteinOnly, u = allrisk, b = allriskprotein, j = hbOnly, m = hbcovs, q = hbprot, z = hbprotpgs) # Generate performance statistics - for 10 years of follow up, comparing base HR to survival HR at 10 years predictCoxPHOnset <- function(dataDF, coxPHModel, threshold = 10) { uniqueTimes <- sort(unique(dataDF$time_to_event)) thresholdIndex <- match(threshold, uniqueTimes) cumulativeBaseHaz <- gbm::basehaz.gbm(dataDF$time_to_event, dataDF$Event, predict(coxPHModel), uniqueTimes) survivalPredictions <- exp(-cumulativeBaseHaz[[thresholdIndex]]) ^ exp(predict(coxPHModel)) onsetPredictions <- 1 - survivalPredictions # Event should be 0 if tte is > 10 dataDF$Event <- sapply(1:nrow(dataDF), function(i) { if (dataDF$time_to_event[[i]] > threshold) { 0 } else { dataDF$Event[[i]] } }) auc <- MLmetrics::AUC(y_pred = onsetPredictions, y_true = dataDF$Event) prauc <- MLmetrics::PRAUC(y_pred = onsetPredictions, y_true = dataDF$Event) roc <- pROC::roc(response = dataDF$Event, predictor = onsetPredictions) list(cumulativeBaseHaz = cumulativeBaseHaz, onsetPredictions = onsetPredictions, auc = auc, prauc = prauc, roc = roc) } # Extract test results testResults <- lapply(models, function(m) {predictCoxPHOnset(testTarget, m)}) # Extract onset predictions predictions <- lapply(testResults, function(r) {r$onsetPredictions}) # Calculate p values for AUC comparisons of interest (age/sex + protein) p_null <- pROC::roc.test(response = testTarget$Event, predictor1 = predictions$r, predictor2 = predictions$f)$p.value p_second <- pROC::roc.test(response = testTarget$Event, predictor1 = predictions$u, predictor2 = predictions$b)$p.value p_third <- pROC::roc.test(response = testTarget$Event, predictor1 = predictions$m, predictor2 = predictions$q)$p.value p_fourth <- pROC::roc.test(response = testTarget$Event, predictor1 = predictions$q, predictor2 = predictions$z)$p.value # Extract metrics table aucs <- sapply(testResults, function(r) {r$auc}) praucs <- sapply(testResults, function(r) {r$prauc}) metricsTable <- data.frame(AUC = aucs, PRAUC = praucs) metricsTable[11,1] <- metricsTable[3,1] - metricsTable[1,1] metricsTable[11,2] <- metricsTable[3,2] - metricsTable[1,2] metricsTable[12,1] <- metricsTable[6,1] - metricsTable[5,1] metricsTable[12,2] <- metricsTable[6,2] - metricsTable[5,2] metricsTable[13,1] <- metricsTable[9,1] - metricsTable[8,1] metricsTable[13,2] <- metricsTable[9,2] - metricsTable[8,2] metricsTable[14,1] <- metricsTable[10,1] - metricsTable[9,1] metricsTable[14,2] <- metricsTable[10,2] - metricsTable[9,2] row.names(metricsTable) <- c('AgeSex', 'PGS', 'AgeSex_ProteinScore', 'ProteinScore Only', 'AgeSexCovs', 'AgeSexCovs_ProteinScore', 'HbA1c_only', 'HbA1c+age+sex+lifestyle', 'ProteinScore_HbA1c_age_sex_lifestyle', 'ProteinScore_HbA1c_age_sex_lifestyle_PGS', 'Diff_AgeSex', 'Diff_AgeSexCovs', 'Diff_hba1c', 'Diff_PGS') # Add P values for AUCs to table metricsTable$P_ROCs <- 'NA' metricsTable[11,3] <- p_null metricsTable[12,3] <- p_second metricsTable[13,3] <- p_third metricsTable[14,3] <- p_fourth write.csv(metricsTable, 'path.../diabetes_comparison_ROC_p_PGS.csv', row.names = T) # Assign target test file to variable and subset such that no event greater than 10 years is present (event = 0, if tte > 10) w1Target <- testTarget w1Target$Event <- sapply(1:nrow(w1Target), function(i) { if (w1Target$time_to_event[[i]] > 10) { 0 } else { w1Target$Event[[i]] } }) w1Target$Event <- as.factor(w1Target$Event) # Assign cox test results from calculation above coxTestResults <- testResults # quickly convert list to a dataframe listToDataframe = function(list) { models = colnames(list) data = data.frame("Sensitivity" = numeric(), "Specificity" = numeric(), "Model" = character()) for (model in models) { tmp_data = data.frame("Sensitivity" = list[['sensitivities', model]], "Specificity" = list[['specificities', model]], "Model" = model) data = rbind(data, tmp_data) } return(data) } rocs <- sapply(testResults, function(r) {r$roc}) rocs_df = listToDataframe(rocs) rocs_df[which(rocs_df$Model =='r'), "Model"] = "Age/Sex" rocs_df[which(rocs_df$Model =='o'), "Model"] = "Age/Sex/PGS" rocs_df[which(rocs_df$Model =='f'), "Model"] = "ProteinScore/Age/Sex" rocs_df[which(rocs_df$Model =='t'), "Model"] = "ProteinScore" rocs_df[which(rocs_df$Model =='u'), "Model"] = "Age/Sex/Lifestyle" rocs_df[which(rocs_df$Model =='b'), "Model"] = "ProteinScore/Age/Sex/Lifestyle" rocs_df[which(rocs_df$Model =='j'), "Model"] = "HbA1c" rocs_df[which(rocs_df$Model =='m'), "Model"] = "HbA1c/Age/Sex/Lifestyle" rocs_df[which(rocs_df$Model =='q'), "Model"] = "ProteinScore/HbA1c/Age/Sex/Lifestyle" rocs_df[which(rocs_df$Model =='z'), "Model"] = "ProteinScore/HbA1c/Age/Sex/Lifestyle/PGS" cbPalette <- c("red", "gold1", "darkorange3", "violetred1", "darkturquoise", "darkorchid2", "tan1", "blue", "gray46", "black") My_Theme = theme( axis.title.x = element_text(size = 16), axis.text.x = element_text(size = 12), axis.text.y = element_text(size =12), axis.title.y = element_text(size = 16), strip.text = element_text(size = 12, face = "bold"), legend.text=element_text(size=16), legend.title=element_text(size=16, face = "bold")) # , legend.position = "none" rocs_df$Model <- factor(rocs_df$Model, levels = c("ProteinScore/HbA1c/Age/Sex/Lifestyle/PGS", "ProteinScore/HbA1c/Age/Sex/Lifestyle", "HbA1c/Age/Sex/Lifestyle", "HbA1c", "ProteinScore/Age/Sex/Lifestyle", "ProteinScore/Age/Sex", "ProteinScore", "Age/Sex/Lifestyle", "Age/Sex/PGS", "Age/Sex")) # draw ROC curves pdf('path.../IAB_rocs_diabetes_hba1c_PGS.pdf', width = 12, height = 6) rocs_df %>% ggplot( aes(x=1-Specificity, y=Sensitivity, group=Model, color=Model)) + geom_line() + theme_light() + # scale_color_viridis(discrete=TRUE) scale_colour_manual(values=cbPalette) + theme(legend.position = "bottom", legend.title = element_blank()) + guides(color = guide_legend(nrow = 8)) + xlab("False Positive Rate (1 - Specificity)") + ylab("True Postive Rate (Sensitivity)") + theme_classic() + My_Theme dev.off() # DRAW WITHOUT LEGEND pdf('path.../IAB_rocs_diabetes_hba1c_PGS2.pdf', width = 8, height = 6) rocs_df %>% ggplot( aes(x=1-Specificity, y=Sensitivity, group=Model, color=Model)) + geom_line() + theme_light() + # scale_color_viridis(discrete=TRUE) scale_colour_manual(values=cbPalette) + theme(legend.position = "bottom", legend.title = element_blank()) + guides(color = guide_legend(nrow = 8)) + xlab("False Positive Rate (1 - Specificity)") + ylab("True Postive Rate (Sensitivity)") + theme_classic() + My_Theme + theme(legend.position = 'none') dev.off() ############################################################################################### # Plot with scaling diab <- transformed diab <- diab[order(diab$dScore),] diab <- diab %>% mutate(decile = ntile(diab$dScore, 10)) diab$decile <- as.factor(diab$decile) diab$Event <- as.factor(diab$Event) library(ggpubr) My_Theme = theme( axis.title.x = element_text(size = 16), axis.text.x = element_text(size = 12), axis.text.y = element_text(size =12), axis.title.y = element_text(size = 16), strip.text = element_text(size = 12, face = "bold"), legend.text=element_text(size=16), legend.title=element_text(size=16, face = "bold"), legend.position = "none") pdf('path.../plot_scatter_hba1c_diab_no_legend_loes.pdf', width = 6, height = 5) ggplot(diab, aes(x=dScore, y=f_30750_0_0, group = Event)) + geom_point(size = 0.2)+ stat_density_2d(geom = "polygon", aes(alpha = ..level.., fill = Event, bins = 3)) + theme_minimal() + theme_classic() + scale_fill_manual(values = c("dodgerblue", "tomato")) + geom_density_2d(aes(color = Event)) + scale_color_manual(values = c("blue4", "firebrick1")) + My_Theme + xlab('Type 2 diabetes ProteinScore') + ylab('HbA1c') dev.off() ############################################################################################### # Plot without scaling diab <- untransformed diab <- diab[order(diab$dScore),] diab <- diab %>% mutate(decile = ntile(diab$dScore, 10)) diab$decile <- as.factor(diab$decile) diab$Event <- as.factor(diab$Event) library(ggpubr) # Do an anova one way on deciles to get a p value for inclusion # https://www.r-bloggers.com/2022/05/one-way-anova-example-in-r-quick-guide/ res.aov <- aov(f_30750_0_0 ~ decile, data = diab) My_Theme = theme( axis.title.x = element_text(size = 16), axis.text.x = element_text(size = 12), axis.text.y = element_text(size =12), axis.title.y = element_text(size = 16), strip.text = element_text(size = 12, face = "bold"), legend.text=element_text(size=16), legend.title=element_text(size=16, face = "bold"), legend.position = "none") pdf('path.../plot_deciles_rect_hba1c_diab_no_scaling.pdf', width = 6, height = 5) ggplot(diab, aes(x=decile, y=f_30750_0_0)) + geom_violin(trim=FALSE, fill='lightcyan2', color="cadetblue4")+ geom_boxplot(width=0.1, color="darkslategrey", fill="darkslategrey", alpha=0.2) + theme_classic() + labs(x="Type 2 diabetes ProteinScore decile", y = 'HbA1c (mmol/mol)')+ annotate("rect", ymin = 42, ymax = 47, xmin = 0, xmax = 10.5, alpha = .1,fill = "blue") + My_Theme + ylim(0,130) dev.off()