STn-in-TNBC / TCGA-TNBC Survival Analysis based on ST6GALNAC1.R
TCGA-TNBC Survival Analysis based on ST6GALNAC1.R
Raw
#############################################################################################################
########################### Complete Overall Survival Analysis of TNBC TCGA Cohort ##########################
#############################################################################################################

##Libraries used
#install.packages("survival")
#install.packages("survminer")

##Load libraries
#library(survival)
#library(survminer)

##Select the Directory where you can find the RData with the information regarding the TCGA-TNBC cohort obtained from the "Select the TNBC.R" code
load("data/TNBC_data.RData")

tnbc.vars <- c(
  "year_of_initial_pathologic_diagnosis",
  'vital_status','days_to_death','days_to_last_followup',
  'age_at_initial_pathologic_diagnosis','tissue_source_site',
  "tumor_tissue_site",'primary_lymph_node_presentation_assessment',
  'lymph_node_examined_count',"anatomic_neoplasm_subdivisions",
  'number_of_lymphnodes_positive_by_ihc','number_of_lymphnodes_positive_by_he',
  'axillary_lymph_node_stage_method_type',
  'axillary_lymph_node_stage_other_method_descriptive_text',
  'histological_type','histological_type_other',
  'cytokeratin_immunohistochemistry_staining_method_micrometastasis_indicator',
  "margin_status",'first_nonlymph_node_metastasis_anatomic_sites',
  'distant_metastasis_present_ind2','radiation_therapy',
  'stage_event_pathologic_stage','stage_event_tnm_categories',
  "stage_event_clinical_stage",'surv_event_time',"gender"
)


clinicalInfo <- clinical$primary.solid.tumor[, tnbc.vars]

patientsIDs <- colnames(logTPMs)

time <- apply(clinicalInfo[patientsIDs, c("days_to_death","days_to_last_followup")], 1, function(x) as.numeric(na.omit(x)))/365
vitalStatus <- as.numeric(clinicalInfo[patientsIDs,"vital_status"])
data<-data.frame("time"=time, "status"=vitalStatus)
surv_data<-Surv(data$time, data$status)
surv_fit<-survfit(surv_data ~ 1, data = data)
summary(surv_fit)
survfit(formula = surv_data ~ 1, data = data)
#A 50% survival probability is reached after 8 years
median(data$time)
#1.293151, 1 year and approximately 4 months of median follow-up time
min(data$time)
#0
max(data$time)
# 9.512329

png("TCGA_TNBC_OS.png", res=1200, width=7000, height=7000)
#grDevices::windows()
ggsurv <- ggsurvplot(surv_fit,
                     xlab = "Time (years)",
                     ylab = "Survival Probability",
                     palette = c("red"),
                     conf.int = T,
                     pval.coord = c(0,0),
                     risk.table = FALSE,
                     risk.table.col = " ",
                     risk.table.height = 0.25,
                     ggtheme = theme_bw(base_family =  "sans"),
                     censor = T,
                     data=data,
                     font.title=c(20, "bold", "black"),
                     font.x=c(15, "bold", "black"), 
                     font.y=c(15, "bold", "black"), 
                     font.tickslab=c(15, "bold", "black"),
                     font.legend=c(13, "bold", "black"),
                     surv.median.line = "hv"
)
ggsurv$plot <- ggsurv$plot+ 
  ggplot2::annotate("text", 
                    x = 8.7, y = 0.5, # x and y coordinates of the text
                    label = "≈ 8 y", size = 5)
ggsurv
dev.off()

#Survival by ST6GALNAC1 high and low
geneExpLevels <- logTPMs["ST6GALNAC1",]
geneExpStatus <- ifelse(geneExpLevels > median(geneExpLevels), "High", "Low")

clinicalInfo$vital_status <- ifelse(clinicalInfo$vital_status == "Alive", 0, 1)

time <- apply(clinicalInfo[patientsIDs, c("days_to_death","days_to_last_followup")], 1, function(x) as.numeric(na.omit(x)))/365
vitalStatus <- as.numeric(clinicalInfo[patientsIDs,"vital_status"])
data <- data.frame("time"=time, "status"=vitalStatus, "group"=geneExpStatus)
surv_data <- Surv(data$time, data$status)
surv_fit_groups <- survfit(surv_data ~ group, data=data)
surv_groups <- survdiff(surv_data ~ group, data=data)
pvalue <- pchisq(surv_groups$chisq, length(surv_groups$n)-1, lower.tail = FALSE)
#pvalue=0.9051407
summary(surv_fit_groups)
survfit(formula = surv_data ~ group, data = data)
#A 50% of disease free survival probability is reached for the low expression group after 8.1 years.


png("ST6GALNAC1_OS.png", res = 1200, width = 7000, height = 7000)
#grDevices::windows()
ggsurv<-ggsurvplot(surv_fit_groups,
                   xlab = "Time (years)",
                   ylab = "Survival Probability",
                   legend.labs = c("ST6GALNAC1 High", "ST6GALNAC1 Low"),
                   palette = c("red", "blue"),
                   conf.int = T,
                   conf.int.alpha = 0.05,
                   pval = F,
                   pval.coord = c(0,0),
                   risk.table = FALSE,
                   risk.table.col = " ",
                   risk.table.height = 0.25,
                   ggtheme = theme_bw(base_family =  "sans"),
                   censor = T,
                   data=data,
                   font.title=c(20, "bold", "black"),
                   font.x=c(15, "bold", "black"), 
                   font.y=c(15, "bold", "black"), 
                   font.tickslab=c(15, "bold", "black"),
                   font.legend=c(13, "bold", "black"),
                   surv.median.line = "hv"
)

ggsurv$plot <- ggsurv$plot+ 
  ggplot2::annotate("text", 
                    x = 8.7, y = 0.47, # x and y coordinates of the text
                    label = "≈ 8 y", size = 5)
ggsurv$plot <- ggsurv$plot+ 
  ggplot2::annotate ("text",
                     x = 0.8, y = 0.01, 
                     label = "p = 0.91", cex=5, col="black", fontface=3)
ggsurv
dev.off()

############################################################## END ##########################################