############################################################################# ####################### automated Ki67-Scores in Prostate final (Paper) ################### ############################################################################# # 18.02.2021 EV, NCB #Cohort consits of 11845 prostate cancer patients <- Slide xxx +xxxx +xxxx +xxx (Exclude xxxx) # and Heteroprognosis cohort with 630 patients (Pro 19) with 580 TMA Gleason scores made by Sauter ############################################################################################################################################################################################## ############################################################################################################################################################################################## ###################################### Figure 2 (Survival curves + 5years numeric suvrial ########################################################### ############################################################################################################################################################################################## ############################################################################################################################################################################################## ###Load raw data from VERSA 8 Time1<-Sys.time() setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ####change your working direction here ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data.xlsx", sheet="Blessin_autoKi67LI", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] ### Include patients like the JMP-Datafilter #B.data <- A.data.patients [A.data.patients$`Filter.(less.than.200.AE1/3.cells)` == "include",] ### I leave it on purpose so that we see later that we have excluded the AE1/3 cells to 100% #B.data <- B.data [B.data$Final.filter == "include",] ##### pick up the data for analysis C.data <-A.data.patients%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"=`Ki67.Li.(after.23um.exclusion)`) ####load packages for KM curves library(survival) library(ggplot2) library(rms) library(survminer) library(dplyr) library(plotrix) library(reshape2) library(ggtext) ###################################### ######################################################################### ###################################### Survival curves (NEJM style)WL ######################################################################### ###################################### ######################################################################### ######## PSA recurrence-free survival ############ ######################## 4 groups ("<2.0%","2.0-5.0%","5.0-15.0%",">15.0%") ####################################### ######################## 4 groups ("<2.0%","2.0-5.0%","5.0-15.0%",">15.0%") ####################################### ######################## 4 groups ("<2.0%","2.0-5.0%","5.0-15.0%",">15.0%") ####################################### #####create groups C.data$marker_groups = cut(C.data$Ki67.LI, breaks = c(-1, 2.0001, 5.00001, 15.0001, 101), ##### HERE to change grouping ### labels= c("\u200B< 2.0%","2.0-5.0%","5.1-15.0%","\u200B> 15.0%")) SurvObj <- with(C.data, Surv(BCR.months, BCR.censor == 0)) Ba.km.curve<- survfit(SurvObj ~ marker_groups, data = C.data, conf.type = "log-log") ggsurv<-ggsurvplot(Ba.km.curve, size = 1.4,conf.int=FALSE, fun=NULL, title="", linetype = 1, censor=FALSE, xlim=c(0,90), xlab = "Survival months", ylab = "PSA recurence-free survival", break.x.by = 12, break.y.by = 0.1, font.main = c(25, "bold", "black"),font.x = c(25, "bold", "black"), font.y = c(25, "bold", "black"),font.tickslab = c(25, "bold", "black"), legend=c(0.15, 0.2), legend.labs=c("\u200B< 2.0%","2.0-5.0%","5.1-15.0%","\u200B> 15.0%"), legend.title="",font.legend = c(25,"bold", "black"), palette=c("royalblue3","mediumseagreen","orange","firebrick"), pval=TRUE,pval.size = 6, pval.coord = c(80, 0.9), tables.height = 0.1, risk.table="abs_pct", risk.table.fontsize = 4.5, risk.table.y.text = TRUE,risk.table.y.text.col = TRUE, risk.table.height=.25, surv.plot.height = 0.75) Bb.sum <- summary(Ba.km.curve)$table ### Hazard plot ### fit.coxph<-coxph(SurvObj ~ marker_groups, data = C.data) #names(summary(fit.coxph)) C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="\u200B< 2.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="2.0-5.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.1<-summary(fit.coxph)$conf.int[,1] low95.out.1<-summary(fit.coxph)$conf.int[,3] high95.out.1<-summary(fit.coxph)$conf.int[,4] p.out.1<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="2.0-5.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="5.1-15.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.2<-summary(fit.coxph)$conf.int[,1] low95.out.2<-summary(fit.coxph)$conf.int[,3] high95.out.2<-summary(fit.coxph)$conf.int[,4] p.out.2<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="5.0-15.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="\u200B> 15.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.3<-summary(fit.coxph)$conf.int[,1] low95.out.3<-summary(fit.coxph)$conf.int[,3] high95.out.3<-summary(fit.coxph)$conf.int[,4] p.out.3<-summary(fit.coxph)$coefficients[,5] ### add text ggsurv$plot <- ggsurv$plot + ggplot2::annotate("text", x = 60, y = 0.3, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.1, digits = 2),"(95% CI,",round(low95.out.1, digits = 2),"-",round(high95.out.1, digits = 2),"), p=", signif(p.out.1,digits=1))), size = 4.5)+ ggplot2::annotate("text", x = 60, y = 0.2, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.2, digits = 2),"(95% CI,",round(low95.out.2, digits = 2),"-",round(high95.out.2, digits = 2),"), p=", signif(p.out.2,digits=1))), size = 4.5)+ ggplot2::annotate("text", x = 60, y = 0.1, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.3, digits = 2),"(95% CI,",round(low95.out.3, digits = 2),"-",round(high95.out.3, digits = 2),"), p=", signif(p.out.3,digits=1))), size = 4.5) ### show the plot ggsurv ######## PSA recurrence-free survival ############ ######################## 9 groups ("<1.0%","1.0-2.5%","2.5-4%","4-7%","7-10%","10-13%","13-23%","23-40%",">40%") ####################################### ######################## 9 groups ("<1.0%","1.0-2.5%","2.5-4%","4-7%","7-10%","10-13%","13-23%","23-40%",">40%") ####################################### ######################## 9 groups ("<1.0%","1.0-2.5%","2.5-4%","4-7%","7-10%","10-13%","13-23%","23-40%",">40%") ####################################### #####create groups C.data$marker_groups = cut(C.data$Ki67.LI, breaks = c(-1, 1.0001, 2.50001, 4.0001,7.0001, 10.0001,13.0001,23.0001,40.0001,101), ##### HERE to change grouping ### labels= c("\u200B< 1.0%","1.0-2.5%","2.6-4.0%","4.1-7.0%","7.1-10.0%","10.1-13.0%","13.1-23.0%","23.1-40.0%","\u200B> 40.0%")) SurvObj <- with(C.data, Surv(BCR.months, BCR.censor == 0)) Ba.km.curve<- survfit(SurvObj ~ marker_groups, data = C.data, conf.type = "log-log") ggsurv<-ggsurvplot(Ba.km.curve, size = 1.4,conf.int=FALSE, fun=NULL, title="", linetype = 1, censor=FALSE, xlim=c(0,90), xlab = "Survival months", ylab = "PSA recurence-free survival", break.x.by = 12, break.y.by = 0.1, font.main = c(25, "bold", "black"),font.x = c(25, "bold", "black"), font.y = c(25, "bold", "black"),font.tickslab = c(25, "bold", "black"), legend=c(0.08, 0.25), legend.labs=c("\u200B<1.0%","1.0-2.5%","2.6-4.0%","4.1-7.0%","7.1-10.0%","10.1-13.0%","13.1-23.0%","23.1-40.0%","\u200B> 40.0%"), legend.title="",font.legend = c(15,"bold", "black"), palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), pval=TRUE,pval.size = 6, pval.coord = c(75, 0.9), tables.height = 0.1, risk.table="abs_pct", risk.table.fontsize = 4.5, risk.table.y.text = TRUE,risk.table.y.text.col = TRUE, risk.table.height=.25, surv.plot.height = 0.75) Bb.sum <- summary(Ba.km.curve)$table ### show the plot ggsurv ### Hazard plot ### fit.coxph<-coxph(SurvObj ~ marker_groups, data = C.data) #names(summary(fit.coxph)) C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="\u200B< 1.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="1.0-2.5%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.1<-summary(fit.coxph)$conf.int[,1] low95.out.1<-summary(fit.coxph)$conf.int[,3] high95.out.1<-summary(fit.coxph)$conf.int[,4] p.out.1<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="1.0-2.5%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="2.6-4.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.2<-summary(fit.coxph)$conf.int[,1] low95.out.2<-summary(fit.coxph)$conf.int[,3] high95.out.2<-summary(fit.coxph)$conf.int[,4] p.out.2<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="2.5-4.0%"] <- "1" C.data$marker_groups_2[C.data$marker_groups =="4.1-7.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.3<-summary(fit.coxph)$conf.int[,1] low95.out.3<-summary(fit.coxph)$conf.int[,3] high95.out.3<-summary(fit.coxph)$conf.int[,4] p.out.3<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="4.1-7.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="7.1-10.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.4<-summary(fit.coxph)$conf.int[,1] low95.out.4<-summary(fit.coxph)$conf.int[,3] high95.out.4<-summary(fit.coxph)$conf.int[,4] p.out.4<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="7.1-10.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="10.1-13.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.5<-summary(fit.coxph)$conf.int[,1] low95.out.5<-summary(fit.coxph)$conf.int[,3] high95.out.5<-summary(fit.coxph)$conf.int[,4] p.out.5<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="10.1-13.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="13.1-23.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.6<-summary(fit.coxph)$conf.int[,1] low95.out.6<-summary(fit.coxph)$conf.int[,3] high95.out.6<-summary(fit.coxph)$conf.int[,4] p.out.6<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="13.1-23.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="23.1-40.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.7<-summary(fit.coxph)$conf.int[,1] low95.out.7<-summary(fit.coxph)$conf.int[,3] high95.out.7<-summary(fit.coxph)$conf.int[,4] p.out.7<-summary(fit.coxph)$coefficients[,5] C.data$marker_groups_2 <- NA C.data$marker_groups_2[C.data$marker_groups =="23.1-40.0%" ] <- "1" C.data$marker_groups_2[C.data$marker_groups =="\u200B> 40.0%" ] <- "2" fit.coxph<-coxph(SurvObj ~ marker_groups_2, data = C.data) HR.out.8<-summary(fit.coxph)$conf.int[,1] low95.out.8<-summary(fit.coxph)$conf.int[,3] high95.out.8<-summary(fit.coxph)$conf.int[,4] p.out.8<-summary(fit.coxph)$coefficients[,5] ### add text ggsurv$plot <- ggsurv$plot + ggplot2::annotate("text", x = 50, y = 0.35, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.1, digits = 2),"(95% CI,",round(low95.out.1, digits = 2),"-",round(high95.out.1, digits = 2),"), p=", signif(p.out.1,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.30, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.2, digits = 2),"(95% CI,",round(low95.out.2, digits = 2),"-",round(high95.out.2, digits = 2),"), p=", signif(p.out.2,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.25, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.3, digits = 2),"(95% CI,",round(low95.out.3, digits = 2),"-",round(high95.out.3, digits = 2),"), p=", signif(p.out.3,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.20, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.4, digits = 2),"(95% CI,",round(low95.out.4, digits = 2),"-",round(high95.out.4, digits = 2),"), p=", signif(p.out.4,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.15, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.5, digits = 2),"(95% CI,",round(low95.out.5, digits = 2),"-",round(high95.out.5, digits = 2),"), p=", signif(p.out.5,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.1, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.6, digits = 2),"(95% CI,",round(low95.out.6, digits = 2),"-",round(high95.out.6, digits = 2),"), p=", signif(p.out.6,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.05, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.7, digits = 2),"(95% CI,",round(low95.out.7, digits = 2),"-",round(high95.out.7, digits = 2),"), p=", signif(p.out.7,digits=2))), size = 4.5)+ ggplot2::annotate("text", x = 50, y = 0.0, # x and y coordinates of the text label = c(paste("HR=",round(HR.out.8, digits = 2),"(95% CI,",round(low95.out.8, digits = 2),"-",round(high95.out.8, digits = 2),"), p=", signif(p.out.8,digits=2))), size = 4.5) ### show the plot ggsurv ######exporting size 10*12 ##### real TMA ###################################### ######################################################################### ###################################### Numeric (Cutoff free) 5yersRFS ######################################################################### ###################################### ######################################################################### ######## PSA recurrence-free survival ############ D.data <- C.data%>% select("BCR.censor","BCR.months","Ki67.LI") ### test for NA and remove them #my.data <-na.omit(my.data) any(is.na(D.data)) sum(is.na(D.data)) D.dta <- D.data[complete.cases(D.data),] any(is.na(D.dta)) summary(D.dta) seq.range<-function (x, ...) { rx <- range(x, na.rm = TRUE) seq(rx[1], rx[2], ...) } ### survival-Variablen D.dta$status<-1-as.numeric(D.dta$BCR.censor) D.dta$time<-D.dta$BCR.months D.dta$Ki67.LI<-as.numeric(D.dta$Ki67.LI) D.dta <- D.dta[complete.cases(D.dta),] ### Cut into quartile based groups library(dplyr) ### survival analysis final.cox<-coxph(Surv(time,status)~pspline(Ki67.LI,5),D.dta) surv_rates<-function(mod, pred_data, times=60){ s1<-summary(survfit(mod,newdata=pred_data),times=times) cbind(pred_data,do.call(data.frame,lapply(s1[c("surv","lower","upper")],as.vector)))} ### predicton data pdata<-data.frame(Ki67.LI=seq.range(D.dta$Ki67.LI,length.out=100)) surv5y_final<-cbind(surv_rates(final.cox,pdata), model="penalized spline 5th deg") ggplot(surv5y_final,aes(x=Ki67.LI,y=surv))+geom_line(aes(colour=model))+geom_ribbon(aes(ymin=lower,ymax=upper,fill=model),alpha=.25)+theme_bw()+ labs(x="Auto Ki67-LI",y="5-years PSA recurence-free survival") +theme(legend.position="none")+ scale_y_continuous(limits=c(0, 1), expand = c(0, 0)) +scale_x_continuous(limits=c(0, 30), expand = c(0, 0)) ##### hetero ###################################### ######################################################################### ###################################### Numeric (Cutoff free) 5yersRFS ######################################################################### ###################################### ######################################################################### ######## PSA recurrence-free survival ############ ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ####change your working direction here ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data hetero.xlsx", sheet="Blessin_autoKi67LI_hetero", colNames = TRUE) ### Include patients like the JMP-Datafilter #B.data <- A.data.patients[A.data.patients$Filter.final == "include",] #B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ### calculations for the Paper library(dplyr) C.data <-B.data%>% select("TMA1place"="RPE_Punch1_TMAlocalisation","TMA2place"="RPE_Punch2_TMAlocalisation","TMA3place"="RPE_Punch3_TMAlocalisation", "TMA4place"="RPE_Punch4_TMAlocalisation","TMA5place"="RPE_Punch5_TMAlocalisation","TMA6place"="RPE_Punch6_TMAlocalisation", "Foci1.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy1","Foci2.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy2", "Foci3.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy3","Foci4.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy4", "Foci5.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy5","Foci6.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy6", "Ki67ofMAXGleason"="Ki67-LI.of.spot.with.max.Gleason", "IQ_Gleason"="RPE_IQ_Gleason", "BCR.censor"="BCR_censor", "BCR.months"="BCR_months") ###################################### ######################################################################### ###################################### Numeric (Cutoff free) 5yersRFS ######################################################################### ###################################### ######################################################################### ######## PSA recurrence-free survival ############ D.data <- C.data%>% select("BCR.censor","BCR.months","IQ_Gleason") ### test for NA and remove them #my.data <-na.omit(my.data) any(is.na(D.data)) sum(is.na(D.data)) D.dta <- D.data[complete.cases(D.data),] any(is.na(D.dta)) summary(D.dta) #dta <-na.omit(dta) ########## full automatic plot(df$ID, df$UID, xlab=names(df)[1], ylab=names(df)[2], type='l') xlab= names(df)/rownames(df) #yy <- D.data$Ki67.LI # Select Subgroup #dta <- dta[dta$RPE_Gleason == "3+4",] ### numerische scala einführen seq.range<-function (x, ...) { rx <- range(x, na.rm = TRUE) seq(rx[1], rx[2], ...) } ### survival-Variablen D.dta$status<-1-as.numeric(D.dta$BCR.censor) D.dta$time<-D.dta$BCR.months #D.dta$marker_num<-as.numeric(D.dta$Ki67.LI) #### here #### #A.dta$gleason_iq<- as.numeric(A.dta$gleason_iq) D.dta$IQ_Gleason<-as.numeric(D.dta$IQ_Gleason) D.dta <- D.dta[complete.cases(D.dta),] ### Cut into quartile based groups library(dplyr) #D.dta$Ki67.LI_a <- ntile(D.dta$Ki67.LI,9) ### survival analysis library(survival) final.cox<-coxph(Surv(time,status)~pspline(IQ_Gleason,5),D.dta) surv_rates<-function(mod, pred_data, times=60){ s1<-summary(survfit(mod,newdata=pred_data),times=times) cbind(pred_data,do.call(data.frame,lapply(s1[c("surv","lower","upper")],as.vector)))} ### predicton data pdata<-data.frame(IQ_Gleason=seq.range(D.dta$IQ_Gleason,length.out=100)) surv5y_final<-cbind(surv_rates(final.cox,pdata), model="penalized spline 5th deg") ggplot(surv5y_final,aes(x=IQ_Gleason,y=surv))+geom_line(colour="orange")+ geom_ribbon(aes(ymin=lower,ymax=upper),fill="orange",alpha=.25)+theme_bw()+ labs(x="Automated Ki67-LI",y="5-years PSA recurence-free survival") + theme(legend.position="none")+ scale_y_continuous(limits=c(0, 1), expand = c(0, 0)) +scale_x_continuous(limits=c(0, 100), expand = c(0, 0)) ###################################################################################################################################################################################################### ###################################################################################################################################################################################################### ############################################################ Figure 3 ROC curve Gleason scores vs. Ki67 ############################################################ ###################################################################################################################################################################################################### ###################################################################################################################################################################################################### ###################################### ######################################################################### ###################################### 17k classical Prostate (ki67+IQ, Quant Gleason, Gleason, ISUP) ######################################################################### ###################################### ######################################################################### # Ki67 Mean of 6 vs. GS::SUM_Gleason_Berechnet --> ISUP vs. ISUP + auto-Ki67-LI # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) ###Load raw data from VERSA 8 setwd("/Users/niclasblessin/Nico/Forschung/# My paperwork/MS08_Prostate ki67 score/") #setwd("C:/Users/Workstation3/Dropbox/Multiplex IF AG") #setwd("C:/Users/Workstation 5/Dropbox/Multiplex IF AG") #setwd("C:/Users/Patho 1/Dropbox/Multiplex IF AG") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("2021_03_17 Prostate reanalysis.xlsx", sheet="R", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] ### Include patients like the JMP-Datafilter B.data <- A.data.patients [A.data.patients$`Filter.(less.than.200.AE1/3.cells)` == "include",] #B.data <- A.data.patients B.data <- B.data [B.data$Final.filter == "include",] #B.data <- B.data [B.data$`FIlter.2.(6000.patients)` == "include"|B.data$Slide == "ab0290" #|B.data$Slide == "ab0293"|B.data$Slide == "ab0297"|B.data$Slide == "ab0300"|B.data$Slide == "ab0301" #|B.data$Slide == "ab0303"|B.data$Slide == "ab0305"|B.data$Slide == "ab0308"|B.data$Slide == "ab0309" #|B.data$Slide == "ab0310"|B.data$Slide == "ab0314"|B.data$Slide == "ab0315"|B.data$Slide == "ab0316",] #### pickup data needed C.data <-B.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Ki67.Li.(after.23um.exclusion)", "ISUP"="pGleason.of.Pro17k_clean_NB", "Gleason"="RPE_Gleason_groups", "QuantGleason"="RPE_quant_Gleason", "IQGleason"="RPE_IQ_Gleason") ### understanding the data barplot(table(C.data$Gleason)) ### Change to ISUP C.data<-C.data%>% mutate(ISUP=recode(ISUP,"+"="1","0+"="1","2+2"="1","2+2"="1","2+3"="1","2+4"="1","3+2"="1", "3+3"="1", "3+4"="2", "3+5"="4", "4+3"="3", "4+4"="4", "4+5"="5", "5+3"="4", "5+4"="5", "5+5"="5")) ### understanding the data barplot(table(C.data$ISUP)) barplot(table(C.data$Gleason)) barplot(table(C.data$QuantGleason)) barplot(table(C.data$IQGleason)) ### Use always numeric values if possible! C.data$ISUP <- as.numeric(C.data$ISUP) C.data$IQGleason <- as.numeric(C.data$IQGleason) plot(density(C.data$IQGleason)) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### auto ki67-Li c1<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI, C.data) ### ISUP c2<-coxph(Surv(BCR.months,BCR.censor==0)~ISUP, C.data) ### auto-Ki67+Gelason c3<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+ISUP, C.data) ### QuantGleason c4<-coxph(Surv(BCR.months,BCR.censor==0)~QuantGleason, C.data) ### IQ Gleason c5<-coxph(Surv(BCR.months,BCR.censor==0)~IQGleason, C.data) ### auto-Ki67 + IQ Gleason c6<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+IQGleason, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Ki67.LI+ISUP+Gleason+QuantGleason+IQGleason, C.data) ### calculate predictors mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred3~pred2,mdta) plot(pred4~pred3,mdta) plot(pred5~pred4,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step times<-5*12 ROC.data <- Score(list("Ki67-LI"=mdta$pred1,"ISUP"=mdta$pred2,"Ki67+ISUP"=mdta$pred3,"QuantGleason"=mdta$pred4, "IQGleason"=mdta$pred5,"Ki67+IQGlea."=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("orange","royalblue3","red","turquoise","forestgreen","firebrick"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts p.RR<- p.RR[c(1,6,10,13,15)] #### select the p-values from the list we need #AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",sprintf("%.2f", round(AUC,2))," ","(",sprintf("%.2f", round(lower,2)),"-",sprintf("%.2f", round(upper,2)),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Ki67-LI","ISUP","Ki67+ISUP","QuantGleason", "IQGleason","Ki67+IQGlea.")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.9)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=alpha(c("orange","royalblue3","red","turquoise","forestgreen","firebrick"),c(1,1,1,1,1,1)))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data hist(C.data$ISUP) plot(density(C.data$ISUP)) ### AUC-with-Bootstrap-CI.R --- #---------------------------------------------------------------------- ## Author: Paul Blanche ## Created and updated: Jan 28 2021 (12:25) rm(list=ls()) ###Import and select Data detach("package:xlsx") library("openxlsx") library(dplyr) Header = TRUE A.data.patients=read.xlsx("2021_03_17 Prostate reanalysis.xlsx", sheet="R", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] ### Include patients like the JMP-Datafilter B.data <- A.data.patients [A.data.patients$`Filter.(less.than.200.AE1/3.cells)` == "include",] #B.data <- A.data.patients B.data <- B.data [B.data$Final.filter == "include",] #B.data <- B.data [B.data$`FIlter.2.(6000.patients)` == "include"|B.data$Slide == "ab0290" #|B.data$Slide == "ab0293"|B.data$Slide == "ab0297"|B.data$Slide == "ab0300"|B.data$Slide == "ab0301" #|B.data$Slide == "ab0303"|B.data$Slide == "ab0305"|B.data$Slide == "ab0308"|B.data$Slide == "ab0309" #|B.data$Slide == "ab0310"|B.data$Slide == "ab0314"|B.data$Slide == "ab0315"|B.data$Slide == "ab0316",] #### pickup data needed C.data <-B.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Ki67.Li.(after.23um.exclusion)", "ISUP"="pGleason.of.Pro17k_clean_NB", "Gleason"="RPE_Gleason_groups", "QuantGleason"="RPE_quant_Gleason", "IQGleason"="RPE_IQ_Gleason") ### understanding the data barplot(table(C.data$Gleason)) ### Change to ISUP C.data<-C.data%>% mutate(ISUP=recode(ISUP,"+"="1","0+"="1","2+2"="1","2+2"="1","2+3"="1","2+4"="1","3+2"="1", "3+3"="1", "3+4"="2", "3+5"="4", "4+3"="3", "4+4"="4", "4+5"="5", "5+3"="4", "5+4"="5", "5+5"="5")) ### understanding the data barplot(table(C.data$ISUP)) barplot(table(C.data$Gleason)) barplot(table(C.data$QuantGleason)) barplot(table(C.data$IQGleason)) ### Use always numeric values if possible! C.data$ISUP <- as.numeric(C.data$ISUP) C.data$IQGleason <- as.numeric(C.data$IQGleason) plot(density(C.data$IQGleason)) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) #### pickup data needed C.data <-C.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Ki67.LI", "ISUP"="ISUP") library(survival) library(riskRegression) # {{{ Example without Bootstrap for 95% computation ResPaquid <- Score(list("Ki67.LI"=C.data$Ki67.LI,"ISUP"=C.data$ISUP), formula=Hist(BCR.months,BCR.censor==0)~1, data=C.data, null.model = FALSE, conf.int=TRUE, metrics=c("auc"), times=c(5), plots="ROC") ResPaquid$AUC ## Results of model comparisons: plotROC(ResPaquid,time=5) # }}} # {{{ Example with Bootstrap for 95% computation # set random seed for reproducibility set.seed(20210128) # number of Bootstrap samples nB <- 500 # initialize vector to save results from each iteration AllAUC <- matrix(NA,nrow=nB,ncol=2) # Bootstrap for(b in 1:nB){ # monitor computation progress print(paste0("Step b=",b," out of ",nB)) # Create Bootstrapped dataset db <- C.data[sample(x=1:nrow(C.data),size=nrow(C.data),replace=TRUE),] # Estimate AUC with Bootstrap data Res2b <- Score(list("Ki67.LI"=C.data$Ki67.LI,"ISUP"=C.data$ISUP), formula=Hist(BCR.months,BCR.censor==0)~1, data=C.data, null.model = FALSE, conf.int=TRUE, metrics=c("auc"), times=c(5), plots="ROC") # Save result AllAUC[b,] <- Res2b$AUC$score$AUC } # Wald type 95% CI, i.e., se estimated from Bootstrapping. OK if nB >= 400 # For KI67-LI CIboot1 <- c(ResPaquid$AUC$score$AUC[1]-1.96*sd(AllAUC[,1]),ResPaquid$AUC$score$AUC[1]+1.96*sd(AllAUC[,1])) CIboot1 # For ISUP CIboot2 <- c(ResPaquid$AUC$score$AUC[2]-1.96*sd(AllAUC[,2]),ResPaquid$AUC$score$AUC[2]+1.96*sd(AllAUC[,2])) CIboot2 # Wald type p-value and CI for the difference CI <- c(ResPaquid$AUC$score$AUC[2]-ResPaquid$AUC$score$AUC[1]- 1.96*sd(AllAUC[,2]-AllAUC[,1]), ResPaquid$AUC$score$AUC[2]-ResPaquid$AUC$score$AUC[1]+ 1.96*sd(AllAUC[,2]-AllAUC[,1])) CI pvalue <- 2*pnorm(abs(ResPaquid$AUC$score$AUC[2]-ResPaquid$AUC$score$AUC[1])/sd(AllAUC[,2]-AllAUC[,1]) ,lower.tail=FALSE) pvalue # }}} #---------------------------------------------------------------------- ### AUC-with-Bootstrap-CI.R ends here #ISUP aus mean ###################################### ######################################################################### ###################################### HeteroProg (ki67+IQ, Quant Gleason, Gleason, ISUP) ######################################################################### ###################################### ######################################################################### # Ki67 Mean of 6 vs. GS::SUM_Gleason_Berechnet --> ISUP vs. ISUP + auto-Ki67-LI # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE #A.data.patients <- A.data.patients[1:800,] #B.data.nomogram=read.xlsx("Prostate TMA 2nd round 39 slides new2 FINAL.xlsx", sheet="Nomogram", colNames = TRUE) ### Include patients like the JMP-Datafilter B.data <- A.data.patients [A.data.patients$Filter.final == "include",] B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #### pickup data needed C.data <-B.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Mean.of.6.(23um)", "Gleason"="GS::SUM_Gleason_Berechnet", "QuantGleason"="GS::SUM_quant_Gleason", "IQGleason"="GS::SUM_IQ_Gleason") ### understanding the data barplot(table(B.data$`GS::SUM_quant_Gleason`)) barplot(table(B.data$`GS::SUM_IQ_Gleason`)) ### Change to ISUP C.data<-C.data%>% mutate(ISUP=recode(Gleason,"3+3"="1", "3+4"="2", "3+4 Tert.5"="4", "3+5"="4", "4+3"="3", "4+3 Tert.5"="5", "4+4"="4", "4+5"="5", "5+3"="4", "5+4"="5", "5+5"="5")) ### understanding the data barplot(table(C.data$ISUP)) barplot(table(C.data$Gleason)) barplot(table(C.data$QuantGleason)) barplot(table(C.data$IQGleason)) ### Use always numeric values if possible! C.data$ISUP <- as.numeric(C.data$ISUP) C.data$IQGleason <- as.numeric(C.data$IQGleason) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### auto ki67-Li c1<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI, C.data) ### ISUP c2<-coxph(Surv(BCR.months,BCR.censor==0)~ISUP, C.data) ### Ki67+Gleason c3<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+ISUP, C.data) ### QuantGleason c4<-coxph(Surv(BCR.months,BCR.censor==0)~QuantGleason, C.data) ### IQ Gleason c5<-coxph(Surv(BCR.months,BCR.censor==0)~IQGleason, C.data) ### auto-Ki67 + IQ Gleason c6<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+IQGleason, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Ki67.LI+ISUP+Gleason+QuantGleason+IQGleason, C.data) ### calculate predictors mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred3~pred2,mdta) plot(pred4~pred3,mdta) plot(pred5~pred4,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step times<-5*12 ROC.data <- Score(list("Ki67-LI"=mdta$pred1,"ISUP"=mdta$pred2, "Ki67+ISUP"=mdta$pred3,"QuantGleason"=mdta$pred4, "IQGleason"=mdta$pred5,"Ki67+IQGlea."=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("orange","royalblue3","red","turquoise","forestgreen","firebrick"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts p.RR<- p.RR[c(1,6,10,13,15)] #### select the p-values from the list we need #AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",sprintf("%.2f", round(AUC,2))," ","(",sprintf("%.2f", round(lower,2)),"-",sprintf("%.2f", round(upper,2)),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Ki67-LI","ISUP","Ki67+ISUP","QuantGleason", "IQGleason","Ki67+IQGlea.")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.9)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=alpha(c("orange","royalblue3","red","turquoise","forestgreen","firebrick"),c(1,1,1,1,1,0.0000000001)))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$ISUP)) plot(density(C.data$ISUP)) # Colors: palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), # "blue","cyan3","#EFE814","#6B8DBD","#348F57","Firebrick" #Order old built in RiskRegression plot "orange","darkblue","royalblue3","forestgreen","mediumseagreen","firebrick" #Order ggplot: "firebrick","mediumseagreen","forestgreen","royalblue3","darkblue","orange" ################################################################ #### Lets double check the p-values with the old timROC ################################################################ ### load required packages library(dplyr) library(magrittr) library(timeROC) library(survival) library(tibble) library(tidyr) library(ggplot2) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### auto ki67-Li c1<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI, C.data) ### ISUP c2<-coxph(Surv(BCR.months,BCR.censor==0)~ISUP, C.data) ### Gelason c3<-coxph(Surv(BCR.months,BCR.censor==0)~Gleason, C.data) ### QuantGleason c4<-coxph(Surv(BCR.months,BCR.censor==0)~QuantGleason, C.data) ### IQ Gleason c5<-coxph(Surv(BCR.months,BCR.censor==0)~IQGleason, C.data) ### auto-Ki67 + IQ Gleason c6<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+IQGleason, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Ki67.LI+ISUP+Gleason+QuantGleason+IQGleason, C.data) ### calculate predictors mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred3~pred2,mdta) plot(pred4~pred3,mdta) plot(pred5~pred4,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step #times<-1:5*12 times<-5*12 #mdta_s<-mdta[sample(nrow(mdta),3000),] ROC.B1<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred1, weighting = "marginal",cause=1, iid=TRUE, times=times) ROC.B2<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred2, weighting = "marginal",cause=1, iid=TRUE, times=times) ROC.B3<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred3, weighting = "marginal",cause=1, iid=TRUE, times=times) ROC.B4<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred4, weighting = "marginal",cause=1, iid=TRUE, times=times) ROC.B5<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred5, weighting = "marginal",cause=1, iid=TRUE, times=times) ROC.B6<-mdta%$%timeROC(T=BCR.months, delta=1-BCR.censor, marker=pred6, weighting = "marginal",cause=1, iid=TRUE, times=times) compare(ROC.B6,ROC.B5,ROC.B4,ROC.B3,ROC.B2,ROC.B1) plotAUCcurveDiff(ROC.B6,ROC.B5,ROC.B4,ROC.B3,ROC.B2,ROC.B1, conf.int = TRUE) ROC<-bind_rows(inner_join(ROC.B6%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B6%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B6"), inner_join(ROC.B5%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B5%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B5"), inner_join(ROC.B4%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B4%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B4"), inner_join(ROC.B3%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B3%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B3"), inner_join(ROC.B2%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B2%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B2"), inner_join(ROC.B1%$%as_tibble(FP)%>%mutate(index=row_number())%>%gather("time","FP",-index), ROC.B1%$%as_tibble(TP)%>%mutate(index=row_number())%>%gather("time","TP",-index))%>%mutate(mode="B1")) auc<-left_join(bind_rows(enframe(ROC.B6$AUC,name="time", value="AUC")%>%mutate(mode="B6"), enframe(ROC.B5$AUC,name="time", value="AUC")%>%mutate(mode="B5"), enframe(ROC.B4$AUC,name="time", value="AUC")%>%mutate(mode="B4"), enframe(ROC.B3$AUC,name="time", value="AUC")%>%mutate(mode="B3"), enframe(ROC.B2$AUC,name="time", value="AUC")%>%mutate(mode="B2"), enframe(ROC.B1$AUC,name="time", value="AUC")%>%mutate(mode="B1")), bind_rows(as_tibble(confint(ROC.B1)$CI_AUC/100, rownames="time")%>%mutate(mode="B1"), as_tibble(confint(ROC.B2)$CI_AUC/100, rownames="time")%>%mutate(mode="B2"), as_tibble(confint(ROC.B3)$CI_AUC/100, rownames="time")%>%mutate(mode="B3"), as_tibble(confint(ROC.B4)$CI_AUC/100, rownames="time")%>%mutate(mode="B4"), as_tibble(confint(ROC.B5)$CI_AUC/100, rownames="time")%>%mutate(mode="B5"), as_tibble(confint(ROC.B6)$CI_AUC/100, rownames="time")%>%mutate(mode="B6"))) auc %<>% mutate(label=sprintf("AUC %s: %.3f (%.3f, %.3f)",mode,AUC,`2.5%`, `97.5%`),y=.15-.05*(mode=="B1")) pval1<- enframe(compare(ROC.B1,ROC.B2)$p_values_AUC,name="time", value="pvalue")%>%mutate(label=sprintf("p-value %.3f", pvalue), y=.05) pval2<- enframe(compare(ROC.B2,ROC.B3)$p_values_AUC,name="time", value="pvalue")%>%mutate(label=sprintf("p-value %.3f", pvalue), y=.05) pval3<- enframe(compare(ROC.B3,ROC.B4)$p_values_AUC,name="time", value="pvalue")%>%mutate(label=sprintf("p-value %.3f", pvalue), y=.05) pval4<- enframe(compare(ROC.B4,ROC.B5)$p_values_AUC,name="time", value="pvalue")%>%mutate(label=sprintf("p-value %.3f", pvalue), y=.05) pval5<- enframe(compare(ROC.B5,ROC.B6)$p_values_AUC,name="time", value="pvalue")%>%mutate(label=sprintf("p-value %.3f", pvalue), y=.05) rocp<-ROC%>%filter(time%in%paste0("t=",c(5)*12)) aucp<-auc%>%filter(time%in%paste0("t=",c(5)*12)) pvalp1<-pval1%>%filter(time%in%paste0("t=",c(5)*12)) ### position for mulitple AUC labels z<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### finally plot all data with ggplot ggplot(rocp,aes(x=FP,y=TP, colour=mode, group=mode))+geom_line(size=1)+facet_wrap(~time)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+labs(colour="HeteroProg", x="False positive rate (1-Specifity)", y="True positive rate (Sensitivity)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ geom_text(data=aucp,aes(label=label,x=0.95,y=z),size=8/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=pvalp1,aes(label=label,x=0.95,y=y, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") # geom_line(size=1) = Strichdicke der ROC-Curves colSums(!is.na(B.data.random)) colSums(!is.na(C.data)) pval1 pval2 pval3 pval4 pval5 #ISUP aus RPE gleason ###################################### ######################################################################### ###################################### HeteroProg (ki67+IQ, Quant Gleason, Gleason, ISUP) ######################################################################### ###################################### ######################################################################### # Ki67 Mean of 6 vs. GS::SUM_Gleason_Berechnet --> ISUP vs. ISUP + auto-Ki67-LI # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data hetero.xlsx", sheet="Blessin_autoKi67LI_hetero", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] #B.data.nomogram=read.xlsx("Prostate TMA 2nd round 39 slides new2 FINAL.xlsx", sheet="Nomogram", colNames = TRUE) ### Include patients like the JMP-Datafilter B.data <- A.data.patients [A.data.patients$Filter.final == "include",] B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #### pickup data needed C.data <-B.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Mean.of.6.(23um)", "Gleason"="RPE_Gleason", "QuantGleason"="RPE_quantGleason", "IQGleason"="RPE_IQ_Gleason") ### understanding the data barplot(table(B.data$RPE_Gleason)) barplot(table(B.data$RPE_quantGleason)) ### Change to ISUP C.data<-C.data%>% mutate(ISUP=recode(Gleason,"3+3"="1", "3+4"="2", "3+4 Tert.5"="2", "3+5"="4", "4+3"="3", "4+3 Tert.5"="3", "4+4"="4", "4+5"="5", "5+3"="4", "5+4"="5", "5+5"="5")) ### understanding the data barplot(table(C.data$ISUP)) barplot(table(C.data$Gleason)) barplot(table(C.data$QuantGleason)) barplot(table(C.data$IQGleason)) ### Use always numeric values if possible! C.data$ISUP <- as.numeric(C.data$ISUP) C.data$IQGleason <- as.numeric(C.data$IQGleason) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### auto ki67-Li c1<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI, C.data) ### ISUP c2<-coxph(Surv(BCR.months,BCR.censor==0)~ISUP, C.data) ### Ki67+Gleason c3<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+ISUP, C.data) ### QuantGleason c4<-coxph(Surv(BCR.months,BCR.censor==0)~QuantGleason, C.data) ### IQ Gleason c5<-coxph(Surv(BCR.months,BCR.censor==0)~IQGleason, C.data) ### auto-Ki67 + IQ Gleason c6<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+IQGleason, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Ki67.LI+ISUP+Gleason+QuantGleason+IQGleason, C.data) ### calculate predictors mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred3~pred2,mdta) plot(pred4~pred3,mdta) plot(pred5~pred4,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step times<-5*12 ROC.data <- Score(list("Ki67-LI"=mdta$pred1,"ISUP"=mdta$pred2, "Ki67+ISUP"=mdta$pred3,"QuantGleason"=mdta$pred4, "IQGleason"=mdta$pred5,"Ki67+IQGlea."=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("orange","royalblue3","red","turquoise","forestgreen","firebrick"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts p.RR<- p.RR[c(1,6,10,13,15)] #### select the p-values from the list we need AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Ki67-LI","ISUP","Ki67+ISUP","QuantGleason", "IQGleason","Ki67+IQGlea.")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.9)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=c("orange","royalblue3","red","turquoise","forestgreen","firebrick"))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$ISUP)) plot(density(C.data$ISUP)) # ISUP aus MAx ###################################### ######################################################################### ###################################### ISUP aus max HeteroProg (ki67+IQ, Quant Gleason, , ISUP) ######################################################################### ###################################### ######################################################################### # Ki67 Mean of 6 vs. GS::SUM_Gleason_Berechnet --> ISUP vs. ISUP + auto-Ki67-LI # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data hetero.xlsx", sheet="Blessin_autoKi67LI_hetero", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] #B.data.nomogram=read.xlsx("Prostate TMA 2nd round 39 slides new2 FINAL.xlsx", sheet="Nomogram", colNames = TRUE) ### Include patients like the JMP-Datafilter B.data <- A.data.patients [A.data.patients$Filter.final == "include",] B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #### pickup data needed C.data <-B.data%>% select("BCR.censor"="BCR.censor","BCR.months"="BCR.months", "Ki67.LI"="Mean.of.6.(23um)", "Gleason"="GS::MAX_Gleason", "QuantGleason"="GS::SUM_quant_Gleason", "IQGleason"="GS::SUM_IQ_Gleason") ### understanding the data barplot(table(B.data$`GS::MAX_Gleason`)) barplot(table(B.data$`GS::SUM_IQ_Gleason`)) ### Change to ISUP C.data<-C.data%>% mutate(ISUP=recode(Gleason,"3+3"="1", "3+4"="2", "3+5"="4", "4+3"="3", "4+4"="4", "4+5"="5", "5+3"="4", "5+4"="5", "5+5"="5")) ### understanding the data barplot(table(C.data$ISUP)) barplot(table(C.data$Gleason)) barplot(table(C.data$QuantGleason)) barplot(table(C.data$IQGleason)) ### Use always numeric values if possible! C.data$ISUP <- as.numeric(C.data$ISUP) C.data$IQGleason <- as.numeric(C.data$IQGleason) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### auto ki67-Li c1<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI, C.data) ### ISUP c2<-coxph(Surv(BCR.months,BCR.censor==0)~ISUP, C.data) ### Ki67+Gleason c3<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+ISUP, C.data) ### QuantGleason c4<-coxph(Surv(BCR.months,BCR.censor==0)~QuantGleason, C.data) ### IQ Gleason c5<-coxph(Surv(BCR.months,BCR.censor==0)~IQGleason, C.data) ### auto-Ki67 + IQ Gleason c6<-coxph(Surv(BCR.months,BCR.censor==0)~Ki67.LI+IQGleason, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Ki67.LI+ISUP+Gleason+QuantGleason+IQGleason, C.data) ### calculate predictors mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred3~pred2,mdta) plot(pred4~pred3,mdta) plot(pred5~pred4,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step times<-5*12 ROC.data <- Score(list("Ki67-LI"=mdta$pred1,"ISUP"=mdta$pred2, "Ki67+ISUP"=mdta$pred3,"QuantGleason"=mdta$pred4, "IQGleason"=mdta$pred5,"Ki67+IQGlea."=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("orange","royalblue3","red","turquoise","forestgreen","firebrick"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe ROC.riskRegression<-ROC.riskRegression %>% add_row(model = unique(ROC.riskRegression$model),times=times,TPR=0,FPR=0) ### create a start point for ROC AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts p.RR<- p.RR[c(1,6,10,13,15)] #### select the p-values from the list we need #AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",sprintf("%.2f", round(AUC,2))," ","(",sprintf("%.2f", round(lower,2)),"-",sprintf("%.2f", round(upper,2)),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Ki67-LI","ISUP","Ki67+ISUP","QuantGleason", "IQGleason","Ki67+IQGlea.")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.9)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=alpha(c("orange","royalblue3","red","turquoise","forestgreen","firebrick"),c(1,1,1,1,1,1)))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$ISUP)) plot(density(C.data$ISUP)) # Colors: palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), # "blue","cyan3","#EFE814","#6B8DBD","#348F57","Firebrick" #Order old built in RiskRegression plot "orange","darkblue","royalblue3","forestgreen","mediumseagreen","firebrick" #Order ggplot: "firebrick","mediumseagreen","forestgreen","royalblue3","darkblue","orange" ############################################################################################################################################################################ ############################################################################################################################################################################ ############################################################################################################################################################################ ######################################################## Figure 4 Heterogenious prognosis Message 6 better than 1 biopsy ###################################################################### ############################################################################################################################################################################ ############################################################################################################################################################################ ############################################################################################################################################################################ ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data hetero.xlsx", sheet="Blessin_autoKi67LI_hetero", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] #B.data.nomogram=read.xlsx("Prostate TMA 2nd round 39 slides new2 FINAL.xlsx", sheet="Nomogram", colNames = TRUE) ### Include patients like the JMP-Datafilter #B.data <- A.data.patients[A.data.patients$Filter.final == "include",] #B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ### calculations for the Paper library(dplyr) D.data <-B.data%>% select("TMA1place"="RPE_Punch1_TMAlocalisation","TMA2place"="RPE_Punch2_TMAlocalisation","TMA3place"="RPE_Punch3_TMAlocalisation", "TMA4place"="RPE_Punch4_TMAlocalisation","TMA5place"="RPE_Punch5_TMAlocalisation","TMA6place"="RPE_Punch6_TMAlocalisation", "Foci1.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy1","Foci2.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy2", "Foci3.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy3","Foci4.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy4", "Foci5.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy5","Foci6.23um"="Ki67.Li.(after.23um.exclusion).of.Biopsy6", "Ki67ofMAXGleason"="Ki67-LI.of.spot.with.max.Gleason") ## finding out the number of cores with 6 D.data <- B.data[,3:8] any(is.na(D.data)) sum(is.na(D.data)) D.data <- D.data[complete.cases(D.data),] any(is.na(D.data)) summary(D.data) colSums(!is.na(D.data)) ###################################### ######################################################################### ###################################### HeteroProg Mean Roc-Curve for Heteroprognosis analysis un-randomizded (Sample 1-6 improve a lot) ######################################################################### ###################################### ######################################################################### # Mean of 1-6 samples vs. each other # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #Datainfo summary(B.data) colSums(!is.na(B.data)) barplot(table(B.data$`Ki67.Li.(after.23um.exclusion).of.Biopsy1`)) #####23um #### pickup data needed C.data<-B.data[c("BCR.censor","BCR.months")] C.data$MeanBiopsy1<-B.data$`Ki67.Li.(after.23um.exclusion).of.Biopsy1` C.data$MeanBiopsy2<-B.data$`Mean.of.2.(23um)` C.data$MeanBiopsy3<-B.data$`Mean.of.3.(23um)` C.data$MeanBiopsy4<-B.data$`Mean.of.4.(23um)` C.data$MeanBiopsy5<-B.data$`Mean.of.5.(23um)` C.data$MeanBiopsy6<-B.data$`Mean.of.6.(23um)` #C.data<-A.data.patients%>%select(PatID,"Ki67.score","BCR.censor","BCR.months", Biobsy.Gleason=Bx.Gleason.max.group, Clinical.stage=Bx.klin.Stadium, Presurgery.PSA=PSA.preop) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### old c1<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy1, C.data) ### new c2<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy2, C.data) ### newest c3<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy3, C.data) c4<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy4, C.data) c5<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy5, C.data) c6<-coxph(Surv(BCR.months,BCR.censor==0)~MeanBiopsy6, C.data) mdta<-model.frame(~BCR.months+BCR.censor+MeanBiopsy1+MeanBiopsy2+MeanBiopsy3+MeanBiopsy4+MeanBiopsy5+MeanBiopsy6, C.data) ### calculate pred 1 & 2 mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred4~pred3,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step #times<-1:5*12 times<-5*12 #mdta_s<-mdta[sample(nrow(mdta),3000),] ROC.data <- Score(list("Mean of 1"=mdta$pred1,"Mean of 2"=mdta$pred2,"Mean of 3"=mdta$pred3,"Mean of 4"=mdta$pred4, "Mean of 5"=mdta$pred5,"Mean of 6"=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("black","firebrick","red","firebrick1","orange","gold"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts #p.RR<- p.RR[c(1,6,10,13,15)] p.RR<- p.RR[c(15,13,10,6,1)] #### select the p-values from the list we need #AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",sprintf("%.2f", round(AUC,2))," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Mean of 1","Mean of 2","Mean of 3","Mean of 4", "Mean of 5","Mean of 6")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.75)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=alpha(c("gold2","orange","#E18727FF","#FF0000","firebrick","#750000"),c(1,1,1,1,1,1)))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward")+ geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$xx)) plot(density(C.data$xxx)) #scale_colour_manual(values=c("gold","orange","firebrick1","red","firebrick","black")) #col =c("firebrick","forestgreen","turquoise","royalblue3","black","orange"), # Colors: palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), # "blue","cyan3","#EFE814","#6B8DBD","#348F57","Firebrick" #Order old built in RiskRegression plot "orange","darkblue","royalblue3","forestgreen","mediumseagreen","firebrick" #Order ggplot: "firebrick","mediumseagreen","forestgreen","royalblue3","darkblue","orange" ###################################### ######################################################################### ###################################### HeteroProg Mean of 6 vs. MAX of 6 vs. 1 with worst Gleason ######################################################################### ###################################### ######################################################################### # mean vs. max. vs. worst gleason #Datainfo summary(B.data) colSums(!is.na(B.data)) #### pickup data needed C.data<-B.data[c("BCR.censor","BCR.months")] C.data$Max <-B.data$`Ki67-LI.of.spot.with.max.Gleason` C.data$Mean <-B.data$`Mean.of.6.(23um)` C.data$Max6 <-B.data$`Max.of.6.(23um)` C.data <- C.data[C.data$Max>0.0001,] ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #Datainfo summary(B.data) colSums(!is.na(C.data)) barplot(table(C.data$Mean)) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### old c1<-coxph(Surv(BCR.months,BCR.censor==0)~Max, C.data) ### new c2<-coxph(Surv(BCR.months,BCR.censor==0)~Mean, C.data) ### newest c3<-coxph(Surv(BCR.months,BCR.censor==0)~Max6, C.data) mdta<-model.frame(~BCR.months+BCR.censor+Mean+Max+Max6, C.data) ### calculate pred 1 & 2 mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") plot(pred2~pred1,mdta) ### Multivariate regression analysis step by step #times<-1:5*12 times<-5*12 #mdta_s<-mdta[sample(nrow(mdta),3000),] ROC.data <- Score(list("Ki67-LI of 1 (worst Gleason)"=mdta$pred1,"Max of 6"=mdta$pred3, "Mean of 6"=mdta$pred2), ### change names only here Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("orange","red","firebrick"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts #p.RR<- p.RR[c(1,6,10,13,15)] p.RR<- p.RR[c(1,3)] #### select the p-values from the list we need AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$y<-c(0.1,0.15,0.2) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Mean of 6","Max of 6","Ki67-LI of 1 (worst Gleason)")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.9,0.7) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.75)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=alpha(c("firebrick","red","orange"),c(1,1,1)))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) colSums(!is.na(C.data)) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$xx)) plot(density(C.data$xxx)) #col =c("firebrick","forestgreen","turquoise","royalblue3","black","orange"), # Colors: palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), # "blue","cyan3","#EFE814","#6B8DBD","#348F57","Firebrick" #Order old built in RiskRegression plot "orange","darkblue","royalblue3","forestgreen","mediumseagreen","firebrick" #Order ggplot: "firebrick","mediumseagreen","forestgreen","royalblue3","darkblue","orange" ###################################### ######################################################################### ###################################### MAX of 1-6 ######################################################################### ###################################### ######################################################################### # mean vs. max. vs. worst gleason ###Load raw data setwd("/Users/niclasblessin/MS08_Prostate ki67 score/") ###Import and select Data detach("package:xlsx") library("openxlsx") Header = TRUE A.data.patients=read.xlsx("Blessin_autoKi67LI_per patient data hetero.xlsx", sheet="Blessin_autoKi67LI_hetero", colNames = TRUE) #A.data.patients <- A.data.patients[1:800,] #B.data.nomogram=read.xlsx("Prostate TMA 2nd round 39 slides new2 FINAL.xlsx", sheet="Nomogram", colNames = TRUE) ### Include patients like the JMP-Datafilter B.data <- A.data.patients[A.data.patients$Filter.final == "include",] B.data <- B.data[B.data$RPE_TMA == "Pro 19",] ###################################### ######################################################################### ###################################### HeteroProg Mean Roc-Curve for Heteroprognosis analysis un-randomizded (Sample 1-6 improve a lot) ######################################################################### ###################################### ######################################################################### # Mean of 1-6 samples vs. each other # Package riskRegression (12.2020) (https://github.com/tagteam/riskRegression) ### select data and load required packages library(dplyr) library(riskRegression) library(survival) library(tibble) library(magrittr) library(tidyr) library(ggplot2) #Datainfo summary(B.data) colSums(!is.na(B.data)) barplot(table(B.data$`Ki67.Li.(after.23um.exclusion).of.Biopsy1`)) #####23um #### pickup data needed C.data<-B.data[c("BCR.censor","BCR.months")] C.data$MaxBiopsy1<-B.data$`Ki67.Li.(after.23um.exclusion).of.Biopsy1` C.data$MaxBiopsy2<-B.data$`Max.of.2.(23um)` C.data$MaxBiopsy3<-B.data$`Max.of.3.(23um)` C.data$MaxBiopsy4<-B.data$`Max.of.4.(23um)` C.data$MaxBiopsy5<-B.data$`Max.of.5.(23um)` C.data$MaxBiopsy6<-B.data$`Max.of.6.(23um)` #C.data<-A.data.patients%>%select(PatID,"Ki67.score","BCR.censor","BCR.months", Biobsy.Gleason=Bx.Gleason.max.group, Clinical.stage=Bx.klin.Stadium, Presurgery.PSA=PSA.preop) ### is.na any(is.na(C.data)) sum(is.na(C.data)) C.data <- C.data[complete.cases(C.data),] any(is.na(C.data)) summary(C.data) ### pick the variables like in the multivariate analysis (independ?) for testing them for their predictive power ### old c1<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy1, C.data) ### new c2<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy2, C.data) ### newest c3<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy3, C.data) c4<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy4, C.data) c5<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy5, C.data) c6<-coxph(Surv(BCR.months,BCR.censor==0)~MaxBiopsy6, C.data) mdta<-model.frame(~BCR.months+BCR.censor+MaxBiopsy1+MaxBiopsy2+MaxBiopsy3+MaxBiopsy4+MaxBiopsy5+MaxBiopsy6, C.data) ### calculate pred 1 & 2 mdta$pred1<-predict(c1,type="lp") mdta$pred2<-predict(c2,type="lp") mdta$pred3<-predict(c3,type="lp") mdta$pred4<-predict(c4,type="lp") mdta$pred5<-predict(c5,type="lp") mdta$pred6<-predict(c6,type="lp") plot(pred2~pred1,mdta) plot(pred4~pred3,mdta) plot(pred6~pred5,mdta) ### Multivariate regression analysis step by step #times<-1:5*12 times<-5*12 #mdta_s<-mdta[sample(nrow(mdta),3000),] ROC.data <- Score(list("Max of 1"=mdta$pred1,"Max of 2"=mdta$pred2,"Max of 3"=mdta$pred3,"Max of 4"=mdta$pred4, "Max of 5"=mdta$pred5,"Max of 6"=mdta$pred6), Surv(BCR.months,BCR.censor==0)~1,data=mdta, times=times,plots="roc",metrics="auc",summary='risks') ### plot the time-dependent receiver operating characteristic curves plotROC(ROC.data,times=times, xlab = "1-Specifity (False-Postive rate)", ylab= "Sensitivity (True-Positive rate)", col =c("black","firebrick","red","firebrick1","orange","gold"), lwd=2, lty=1, cex=1,pch=1, legend=TRUE,auc.in.legend =TRUE,brier.in.legend=FALSE, add = FALSE) ### close the box & make the diagonal line less obvious lines(x = c(-0.1,1.5), y = c(-0.1,1.5), col="white",lwd=3.5) lines(x = c(-0.05,1.05), y = c(-0.05,1.05), col="lightgrey",lwd=1,lty="dotted") box(which = "plot", lty = "solid", col="black") ### p-value for AUC comparison print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) ### ggplot 2 preperation ROC.riskRegression<- ROC.data%$%ROC%$%plotframe AUC.RR<- ROC.data%$%AUC%$%score p.RR<- ROC.data%$%AUC%$%contrasts #p.RR<- p.RR[c(1,6,10,13,15)] p.RR<- p.RR[c(15,13,10,6,1)] #### select the p-values from the list we need AUC.RR$label<-AUC.RR %$% paste0(model,": "," AUC","=",round(AUC,2)," ","(",round(lower,2),"-",round(upper,2),")") AUC.RR$y<-c(0.1,0.15,0.2,0.25,0.3,0.35) ### just add 0.25, 0.3 ... according to the number of additonal curves p.RR$label<-p.RR %$% paste0("p","=",round(p,4)) AUC.RR$model <- factor(AUC.RR$model, levels = c("Max of 1","Max of 2","Max of 3","Max of 4", "Max of 5","Max of 6")) ### order them here! ### finally plot all data with ggplot AUC.RR$model ### to change the names for the corresponding colors z=c(0.2,0.3,0.5,0.7,0.9) ggplot(ROC.riskRegression,aes(x=FPR,y=TPR, colour=model, group=model))+ geom_line(size=0.75)+facet_wrap(~times)+theme_bw()+ geom_abline(intercept = 0, slope=1, linetype=3, colour="gray")+ labs(colour="Ki67-LI", x="1-Specifity (False-Postive rate)", y="Sensitivity (True-Positive rate)")+ theme(panel.grid=element_blank(),strip.background = element_blank(), panel.spacing = unit(2, "lines"))+ scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))+ scale_colour_manual(values=c("gold","orange","firebrick1","red","firebrick","black"))+ geom_text(data=AUC.RR,aes(label=label,x=.95,y=y),size=10/.pt, show.legend = FALSE,hjust="inward") + geom_text(data=p.RR,aes(label=label,x=z,y=0.05, group=NULL, colour=NULL),size=8/.pt, show.legend = FALSE,hjust="inward") ROC.data[["ROC"]] print(ROC.data, times, digits=3, eps=10^-4,verbose=TRUE, conf.int=0.95) plotRisk(ROC.data,times=times) # Distribuiton of the Data barplot(table(C.data$xx)) plot(density(C.data$xxx)) #col =c("firebrick","forestgreen","turquoise","royalblue3","black","orange"), # Colors: palette=c("black","darkblue","royalblue3","forestgreen","mediumseagreen","orange","gold3","red","firebrick"), # "blue","cyan3","#EFE814","#6B8DBD","#348F57","Firebrick" #Order old built in RiskRegression plot "orange","darkblue","royalblue3","forestgreen","mediumseagreen","firebrick" #Order ggplot: "firebrick","mediumseagreen","forestgreen","royalblue3","darkblue","orange"