STn-in-TNBC / TCGA-TNBC Immune Cell Analysis.R
TCGA-TNBC Immune Cell Analysis.R
Raw


#############################################################################################################
######################################### Immune cells frequency: ###########################################
######################### Comparing between high and low ST6GALNAC1 expression groups #######################
###################################### Correlations with ST6GALNAC1 #########################################
#############################################################################################################

##Libraries used
#install.packages("corrplot")
#install.packages("readxl")
#install.packages("tidyverse")

##Load libraries
#library(readxl)
#library(corrplot)
#library(tidyverse)

##Select the Directory where you can find the RData with the information regarding the TCGA-TNBC cohort obtained from the "Select the TNBC.R" code
load("data/logTPMs.RData")

####Separating samples with the median to define high and low####
Exp_level<-function(Gene){
  Info <- list()
  for (i in 1:length(Gene)){
    GeneExpLevels <- logTPMs[Gene[i],]
    GeneExpStatus <- ifelse(GeneExpLevels > median(GeneExpLevels), "High", "Low")
    Final_status <- list(data.frame(names(GeneExpLevels),GeneExpLevels,GeneExpStatus))
    Info <- append(Info,Final_status)
  }
  names(Info) <- Gene
  return (Info)
}
#Name the gene of interest
Gene <- c("ST6GALNAC1")
GenesNames <- c("CD44", "CDH2", "CTNNB1", "MMP9", "POU5F1", "SOX2", "MKI67","MYC", "SALL4")

GeneExp <- c()
for (i in GenesNames){
  GeneExp[[i]] <- logTPMs[i,]
}
GeneExp<-data.frame(GeneExp)
GeneExp <- GeneExp[sort(rownames(GeneExp)),]
Info <- as.data.frame(Exp_level(Gene))
Info <- Info[,-1]
colnames(Info) <- c("ST6GALNAC1", "Group")
Info <- Info[sort(rownames(Info)),]
GeneTable <- cbind(Info, GeneExp)
GeneTable <- GeneTable[,c(2,1,3,4,5,6,7,8,9,10,11)]

##Acquiring the table with the cell type fractions in each sample
data <- as.data.frame(read_excel('data/Filtrates.xlsx'))
rownames(data) <- data$Patients
data <- data[,-1]
colnames(data)
colnames(data) <- c("Uncharacterized Cells", "Treg Cells", "Neutrophiles", "NK Cells", "Monocytes", "Macrophages M1", "Macrophages M2", "Dendritic Cells", "CD8 T Cells", "CD4 T Cells", "B Cells")
data <- data[order(row.names(data)), ]
GeneTable <- GeneTable[order(row.names(GeneTable)), ]
data <- cbind(GeneTable$ST6GALNAC1, data)
names(data)[names(data) == 'GeneTable$ST6GALNAC1'] <- 'ST6GALNAC1'

###Without Uncharacterized cells
options(scipen = 999, digits = 3)
Test <- colnames(data[,3:12])
Cor <- matrix(data=0, nrow=length(Test), ncol=2)
rownames(Cor) <- Test
colnames(Cor) <- c("CorValue", "pValue")
for(i in 1:length(Test)){
  cur_result <- cor.test(data[,"ST6GALNAC1"], data[,Test[i]], method="spearman", exact=F)
  Cor[i,] <- c(cur_result$estimate, cur_result$p.value)
}

adj.pValues <- p.adjust(Cor[,"pValue"], method="fdr")
Cor <- cbind(Cor, adj.pValues)
Cor<- as.data.frame(Cor)
Cor$CorValue <- as.numeric(as.character(Cor$CorValue))
Cor$pValue <- as.numeric(as.character(Cor$pValue))
Cor$adj.pValues <- as.numeric(as.character(Cor$adj.pValues))
TCGACor <- Cor
write.table(TCGACor, file = "TCGACor.csv")

M <- as.matrix(Cor$CorValue)
colnames(M) <- "ST6GALNAC1"
rownames(M) <- rownames(Cor)
p.mat <- as.matrix(Cor$adj.pValues)
colnames(p.mat) <- "ST6GALNAC1"
rownames(p.mat) <- rownames(Cor)
#horizontal heatmap
M <- t(M)
p.mat <- t(p.mat)

png("HeatmapSI.png", res=600, width=10100, height=3500)
corrplot(M, 
         method="color", 
         type="full", 
         cl.pos = "n",
         tl.srt=65,
         order="original", 
         p.mat = p.mat, 
         insig = "label_sig",
         sig.level = c(0.001, 0.01, 0.05),
         pch.cex = 2,
         diag=TRUE, 
         tl.col="black", 
         tl.cex =2,
         col=colorRampPalette(c("#2166AC","white","#D6604D"))(10),
         addgrid.col="black",
         mar = c(0, 5, 0, 14)
)
colorlegend(
  colorRampPalette(c("#2166AC","white","#D6604D"))(10),
  c(seq(-1,1,0.2)),
  xlim=c(0.5,10.5),
  ylim=c(-0.2,0.4),
  align="l",
  vertical=FALSE,
  addlabels=TRUE,
  cex=2
)
text(2.2, -0.5, "Spearman Correlation", col = "black", cex = 2)
dev.off()

GeneExpLevels <- data$ST6GALNAC1
GeneExpStatus <- ifelse(GeneExpLevels > median(GeneExpLevels), "High", "Low")
Group <- GeneExpStatus
data <- cbind(Group, data)

high_subset <- data[data$Group=='High', 4:13]
low_subset <- data[data$Group=='Low', 4:13]
high_subset <- high_subset %>% mutate_if(is.character, as.numeric)
low_subset <- low_subset %>% mutate_if(is.character, as.numeric)

cellType <- colnames(data[,4:13])

Cor <- matrix(data=0, nrow=length(cellType), ncol=1)
rownames(Cor) <- cellType
colnames(Cor) <- c("pValue")
for (i in cellType) {
  wilcoxTest <- wilcox.test(high_subset[,i],low_subset[,i], paired = FALSE )
  Cor[i,] <- c(wilcoxTest$p.value)
}
adj.pValues <- p.adjust(Cor[,"pValue"], method="fdr")
Cor <- cbind(Cor, adj.pValues)

#Cell type pValue, adj. pValue
#Macrophages M2 0.00000316, 0.0000316
#Treg Cells 0.00003426, 0.0001713
#B Cells 0.00008090, 0.0002697
#CD4 T Cells 0.00138362, 0.0034590
#CD8 T Cells 0.03303383, 0.0660677
#Monocytes 0.08671830, 0.1445305
#Macrophages M1 0.21542079, 0.3077440
#NK Cells 0.58737381, 0.7342173
#Neutrophiles 0.75744004, 0.8131563
#Dendritic Cells 0.81315635, 0.8131563
Cor <- as.data.frame(Cor)
write.table(Cor, file = "WilcoxTes_ST6GALNAC1_immune.csv")

#prepare data for graphpad plot
DataLow <- matrix(data=0, nrow=length(cellType), ncol=3)
rownames(DataLow) <- cellType
colnames(DataLow) <- c("Mean", "SD", "N")
for(i in cellType){
  meanLow <- mean(low_subset[,i], na.rm=T)
  sdLow <- sd(low_subset[,i], na.rm=T)
  nLow<- length(low_subset[,i])
  DataLow[i,] <- c(meanLow, sdLow, nLow)
}

DataHigh <- matrix(data=0, nrow=length(cellType), ncol=3)
rownames(DataHigh) <- cellType
colnames(DataHigh) <- c("Mean", "SD", "N")
for(i in cellType){
  meanHigh <- mean(high_subset[,i], na.rm=T)
  sdHigh <- sd(high_subset[,i], na.rm=T)
  nHigh<- length(high_subset[,i])
  DataHigh[i,] <- c(meanHigh, sdHigh, nHigh)
}

write.table(DataHigh, "High stats ST6GALNAC1.csv", sep=";", quote=F, col.names=NA)
write.table(DataLow, "Low stats ST6GALNAC1.csv", sep=";", quote=F, col.names=NA)

############################################################## END ##########################################