STn-in-TNBC / HSJ-TNBC STn- Vs STn+ biomarkers analysis.R
HSJ-TNBC STn- Vs STn+ biomarkers analysis.R
Raw
#############################################################################################################
########################### Comparing biomarkers expression between STn- and STn+  groups####################
#############################################################################################################

##Librarie used
#install.packages(openxlsx)

##Load librarie
#library(openxlsx)

##Select the Directory where you can find the Table S2 with the information regarding the HSJ-TNBC cohort
TNBC_Data <- as.data.frame(read.xlsx("data/HSJ-TNBC_Data.xlsx"))
Biomarkers <- TNBC_Data[,24:37]
rownames(Biomarkers) <- TNBC_Data$ID

X <- split(Biomarkers, Biomarkers$`Sialyl-Tn_CM_Tumor`)
Y <- lapply(seq_along(X), function(x) as.data.frame(X[[x]])[, 1:14]) 
STnNegative <- Y[[1]]
STnPositive <- rbind(Y[[2]], Y[[3]], Y[[4]], Y[[5]], Y[[6]], Y[[7]], Y[[8]])

STnPositive$CD44_CM_Tumor <- as.numeric(STnPositive[,1])
STnPositive$CD44_CM_Stroma <- as.numeric(STnPositive[,2])
STnPositive$`N-Cad_CM_tumor` <- as.numeric(STnPositive[,3])
STnPositive$`N-Cad_N_Tumor` <- as.numeric(STnPositive[,4])
STnPositive$`B-Cat_CM_Tumor` <- as.numeric(STnPositive[,5])
STnPositive$MMP9_C_Tumor <- as.numeric(STnPositive[,6])
STnPositive$MMP9_C_Stroma <- as.numeric(STnPositive[,7])
STnPositive$Oct4_N_Tumor <- as.numeric(STnPositive[,8])
STnPositive$Oct4_C_Tumor <- as.numeric(STnPositive[,9])
STnPositive$Sox2_N_Tumor <- as.numeric(STnPositive[,10])
STnPositive$KI67_N_Tumor <- as.numeric(STnPositive[,11])
STnPositive$MYC_N_Tumor <- as.numeric(STnPositive[,12])
STnPositive$Sall4_N_Tumor <- as.numeric(STnPositive[,13])
STnPositive$`Sialyl-Tn_CM_Tumor` <- as.numeric(STnPositive[,14])

STnNegative$CD44_CM_Tumor <- as.numeric(STnNegative[,1])
STnNegative$CD44_CM_Stroma <- as.numeric(STnNegative[,2])
STnNegative$`N-Cad_CM_tumor` <- as.numeric(STnNegative[,3])
STnNegative$`N-Cad_N_Tumor` <- as.numeric(STnNegative[,4])
STnNegative$`B-Cat_CM_Tumor` <- as.numeric(STnNegative[,5])
STnNegative$MMP9_C_Tumor <- as.numeric(STnNegative[,6])
STnNegative$MMP9_C_Stroma <- as.numeric(STnNegative[,7])
STnNegative$Oct4_N_Tumor <- as.numeric(STnNegative[,8])
STnNegative$Oct4_C_Tumor <- as.numeric(STnNegative[,9])
STnNegative$Sox2_N_Tumor <- as.numeric(STnNegative[,10])
STnNegative$KI67_N_Tumor <- as.numeric(STnNegative[,11])
STnNegative$MYC_N_Tumor <- as.numeric(STnNegative[,12])
STnNegative$Sall4_N_Tumor <- as.numeric(STnNegative[,13])
STnNegative$`Sialyl-Tn_CM_Tumor` <- as.numeric(STnNegative[,14])


Test <- colnames(Biomarkers)[1:13]
Cor <- matrix(data=0, nrow=length(Test), ncol=1)
rownames(Cor) <- Test
colnames(Cor) <- c("pValue")
for(i in 1:length(Test)){
  wilcoxTest <- wilcox.test(STnNegative[,Test[i]], STnPositive[,Test[i]])
  Cor[i,] <- c(wilcoxTest$p.value)
}
adj.pValues <- p.adjust(Cor[,"pValue"], method="fdr")
Cor <- cbind(Cor, adj.pValues)
Cor <- as.data.frame(Cor)
#Biomarker pValue, adj pValue
#MYC_N_Tumor 0.002044194, 0.02657453
#KI67_N_Tumor 0.018820096, 0.12233063
#MMP9_C_Tumor 0.050270745, 0.21783989
#CD44_CM_Stroma 0.302682128, 0.49348355
#N-Cad_CM_tumor 0.303682184, 0.49348355
#MMP9_C_Stroma 0.159326913, 0.49348355
#Oct4_N_Tumor 0.279905979, 0.49348355
#Oct4_C_Tumor 0.302271338, 0.49348355
#B-Cat_CM_Tumor 0.419047474, 0.54476172 
#Sox2_N_Tumor 0.412487825, 0.54476172
#CD44_CM_Tumor 0.574495095, 0.67894875
#N-Cad_N_Tumor 0.697983222, 0.75614849
#Sall4_N_Tumor 0.997259081, 0.99725908
write.xlsx(Cor, "WilcoxTes_STn+VsSTn-.xlsx")

##Obtain data do make the plots in graphPad prims
Test <- colnames(Biomarkers)[1:13]
DataNeg <- matrix(data=0, nrow=length(Test), ncol=3)
rownames(DataNeg) <- Test
colnames(DataNeg) <- c("Mean", "SD", "N")
for(i in 1:length(Test)){
  meanSTnNeg <- mean(STnNegative[,Test[i]], na.rm=T)
  sdSTnNeg <- sd(STnNegative[,Test[i]], na.rm=T)
  nNeg <- length(STnNegative[,Test[i]])
  DataNeg[i,] <- c(meanSTnNeg, sdSTnNeg, nNeg)
}

Test <- colnames(Biomarkers)[1:13]
DataPos <- matrix(data=0, nrow=length(Test), ncol=3)
rownames(DataPos) <- Test
colnames(DataPos) <- c("Mean", "SD", "N")
for(i in 1:length(Test)){
  meanSTnPos <- mean(STnPositive[,Test[i]], na.rm=T)
  sdSTnPos <- sd(STnPositive[,Test[i]], na.rm=T)
  nPos <- length(STnPositive[,Test[i]])
  DataPos[i,] <- c(meanSTnPos, sdSTnPos, nPos)
}

#Save the data tables
write.table(DataPos, "STn+ stats.csv", sep=";", quote=F, col.names=NA)
write.table(DataNeg, "STn- stats.csv", sep=";", quote=F, col.names=NA)