---
title: "GO_cluster"
author: "Luise A. Seeker"
date: "08/11/2021"
output: html_document
---
# introduction
I tested GSEA and gene ontology both for molecular functions and biological
pathways and different visualisations. In general GO works better for my data
than GSEA so therefore in later versions of this document, there may be no
code for GSEA any more.
```{r}
library(here)
library(tibble)
library(Seurat)
library(dplyr)
library(ggplot2)
library(tidyr)
#GO & GSEA
library(biomaRt)
library(org.Hs.eg.db)
library(ReactomePA)
library(msigdbr)
library(fgsea)
library(NMF)
library(ggupset)
library(clusterProfiler)
library(enrichplot)
```
# Read in data
```{r}
nad_ol <- readRDS(here("data",
"single_nuc_data",
"oligodendroglia",
"srt_oligos_and_opcs_LS.RDS"))
```
# Prepare hallmarks for GSEA and gene sets
```{r, eval = FALSE}
hallmark <- msigdbr(species = "Homo sapiens", category = "H")
geneset <- hallmark %>% split(x = .$gene_symbol, f = .$gs_name)
```
# Read in tissue, age and sex DEG gene lists
```{r}
go_dir <- here("outs",
"GO",
"david",
"GO_cell_profiler_age_sex_tissue_all_celltypes")
dir.create(go_dir, recursive = TRUE)
emap_dir <- here("outs",
"GO",
"david",
"emap_plots_age_sex_tissue")
dir.create(emap_dir , recursive = TRUE)
go_dot <- here("outs",
"GO",
"GO_luise",
"biological_process",
"GO_dotplot_age_sex_tissue")
dir.create(go_dot, recursive = TRUE)
go_dir_cnetpl <- here("outs",
"GO",
"GO_luise",
"biological_process",
"GO_cnet_age_sex_tissue")
dir.create(go_dir_cnetpl, recursive = TRUE)
go_dir_tree<- here("outs",
"GO",
"GO_luise",
"biological_process",
"GO_ctree_age_sex_tissue")
dir.create(go_dir_tree)
go_dir_upset_plot<- here("outs",
"GO",
"GO_luise",
"biological_process",
"GO_upset_age_sex_tissue")
dir.create(go_dir_upset_plot)
go_dir_hm<- here("outs",
"GO",
"GO_luise",
"biological_process",
"GO_hm_age_sex_tissue")
dir.create(go_dir_hm)
go_dir_mol_f<- here("outs",
"GO",
"GO_luise",
"GO_molecular_function",
"dot_pl_mf_age_sex_tissue") # the others are all biological processes
dir.create(go_dir_mol_f, recursive = TRUE)
go_dir_mol_f_tree<- here("outs",
"GO",
"GO_luise",
"GO_molecular_function",
"tree_mf_age_sex_tissue") # the others are all biological processes
dir.create(go_dir_mol_f_tree)
go_dir_bp_compare<- here("outs",
"GO",
"GO_luise",
"biological_process",
"cluster_compare_age_sex_tissue") # the others are all biological processes
dir.create(go_dir_bp_compare)
```
# for oligos
```{r}
oligo_file_path_tissue <- here("outs",
"DGE_tissue",
"overall")
oligo_file_path_age_sex <- here("outs",
"DGE_age_sex",
"overall")
oligo_files_tissue <- list.files(oligo_file_path_tissue, full.names = TRUE)
oligo_files_age_sex <- list.files(oligo_file_path_age_sex, full.names = TRUE)
oligo_files <- c(oligo_files_tissue, oligo_files_age_sex)
# Prepare marts to convert to entrez
listMarts()
ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl",
host = "www.ensembl.org")
listDatasets(ensembl)
attributes = listAttributes(ensembl)
named_list <- list()
for(i in 1: length(oligo_files)){
data <- read.csv(oligo_files[i])
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 12)
cell_t <- sapply(strsplit(cell_t, "_markers"), `[`, 1)
label <- paste0("oligos_", cell_t, "_", cluster)
plot_label <- paste0(label, ".pdf")
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
genes <- fil_data$gene
# convert to ENTREZID
genes_ent <- bitr(genes, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
named_list[[k]] <- genes_ent$ENTREZID
names(named_list)[k] <- label
}
if(i == 1){
return_list <- named_list
}else{
return_list <- append(return_list, named_list)
}
named_list <- list()
}
str(return_list)
tissue_list <- return_list[1:3]
age_list <- return_list[4:5]
age_sex_list <- return_list[6:9]
sex_list <- return_list[10:11]
# Tissue
ck_kegg_tissue <- compareCluster(geneCluster = tissue_list, fun = enrichKEGG)
ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_tissue)
dotplot(ck_kegg_tissue)
# Age
ck_kegg_age <- compareCluster(geneCluster = age_list, fun = enrichKEGG)
ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age)
dotplot(ck_kegg_age)
# Age*sex
ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list, fun = enrichKEGG)
ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age_sex)
dotplot(ck_kegg_age_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# sex
ck_kegg_sex <- compareCluster(geneCluster = sex_list, fun = enrichKEGG)
ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_sex)
dotplot(ck_kegg_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
```{r}
for(i in 1: length(oligo_files)){
data <- read.csv(oligo_files[i])
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 12)
cell_t <- sapply(strsplit(cell_t, "_markers"), `[`, 1)
label <- paste0("oligos_", cell_t, "_", cluster)
plot_label <- paste0(label, ".pdf")
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
genes <- fil_data$gene
# Prepare input data
ranks <- fil_data %>% dplyr::select(gene, avg_log2FC)
ranks <- deframe(ranks)
#RUN GO using clusterProfiler
fil_data$Gene <- fil_data$gene
#remove any duplicates (sanity check for me)
genemodulesGO <- fil_data[!duplicated(fil_data$Gene),]
Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id",
"entrezgene_id",
"gene_biotype",
"hgnc_symbol"),
filters = "hgnc_symbol",
values = fil_data$Gene,
mart = ensembl,
uniqueRows = FALSE)
Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <-
as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype'])
#Filter genes
#biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
# %in% nad_ol@assays$RNA@var.features)
#entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
# %in% nad_ol@assays$RNA@var.features)
# if(nrow(biotype_all_dataset) == 0){
biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
%in% rownames(nad_ol@assays$RNA))
entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
%in% rownames(nad_ol@assays$RNA))
# }
entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),]
#you might need to remove NAs
entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),]
allLLIDs <- entrezmatched$entrezgene
modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human",
pvalueCutoff=0.01,
qvalueCutoff = 0.3,
pAdjustMethod = "none",
readable=T)
if(nrow(as.data.frame(modulesReactome)) > 0){
dot_plot <- dotplot(modulesReactome, showCategory=20)
pdf(file = here(go_dir, plot_label),
width = 12,
height = 10)
print(dot_plot)
dev.off()
x2 <- pairwise_termsim(modulesReactome)
emap_plot <- emapplot(x2)
pdf(file = here(emap_dir, plot_label),
width = 12,
height = 10)
print(emap_plot)
dev.off()
#Alternative GO clasissification
ent_uni <- rownames(nad_ol@assays$RNA)
ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
ent_uni <- ent_uni_entrez$ENTREZID
# Molecular function
go_mf <- enrichGO(gene = as.character(allLLIDs),
universe = ent_uni,
OrgDb = org.Hs.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE)
go_mf_plot <- dotplot(go_mf, showCategory = 20)
pdf(file = here(go_dir_mol_f, plot_label),
width = 12,
height = 10)
print(go_mf_plot)
dev.off()
# GO biological process
ego <- enrichGO(gene = as.character(allLLIDs),
OrgDb = org.Hs.eg.db,
ont = "BP",
universe = ent_uni )
#head(ego)
if(nrow(ego) > 1){
dp2 <- dotplot(ego, showCategory=20)
pdf(file = here(go_dot, plot_label),
width = 12,
height = 10)
print(dp2)
dev.off()
## convert gene ID to Symbol
edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID')
cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE,
colorEdge = TRUE)
pdf(file = here(go_dir_cnetpl, plot_label),
width = 14,
height = 12)
print(cnet_pl)
dev.off()
edox2 <- enrichplot::pairwise_termsim(edox)
if(nrow(edox2) > 1){
tree_plot <- treeplot(edox2)
pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in
width = 16, # The width of the plot in inches
height = 10) # The height of the plot in inches
print(tree_plot)
dev.off()
}
u_plot <- upsetplot(ego)
pdf(file = here(go_dir_upset_plot, plot_label),
width = 15,
height = 10)
print(u_plot)
dev.off()
if(nrow(edox) > 1){
hm <- heatplot(edox, foldChange=ranks, showCategory=5)
pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in
width = 15, # The width of the plot in inches
height = 10) # The height of the plot in inches
print(hm)
dev.off()
}
}
edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID')
edox2_mf <- pairwise_termsim(edox_mf)
tree_plot_mf <- treeplot(edox2_mf)
pdf(file = here(go_dir_mol_f_tree, plot_label),
width = 16,
height = 10)
print(tree_plot_mf)
dev.off()
}
}
}
```
# for non oligos
```{r}
files <- c( here("outs",
"astrocytes",
"tissue_marker_list_astro",
"astro_tissue_marker.csv"),
here("outs",
"astrocytes",
"age_marker_list_astro",
"astro_age_marker.csv"),
here("outs",
"astrocytes",
"sex_marker_list_astro",
"astro_sex_marker.csv"),
here("outs",
"astrocytes",
"age_sex_marker_list_astro",
"astro_age_sex_marker.csv"),
###
here("outs",
"microglia_macrophages",
"tissue_marker_list_mm",
"mm_tissue_marker.csv"),
here("outs",
"microglia_macrophages",
"age_marker_list_mm",
"mm_age_marker.csv"),
here("outs",
"microglia_macrophages",
"sex_marker_list_mm",
"mm_sex_marker.csv"),
here("outs",
"microglia_macrophages",
"age_sex_marker_list_mm",
"mm_age_sex_marker.csv"),
###
here("outs",
"vascular_cells",
"tissue_marker_list_vasc",
"vasc_tissue_mark.csv"),
here("outs",
"vascular_cells",
"age_marker_list_vasc",
"vasc_age_marker.csv"),
here("outs",
"vascular_cells",
"sex_marker_list_vasc",
"vasc_sex_marker.csv"),
here("outs",
"vascular_cells",
"age_sex_marker_list_vasc",
"vasc_age_sex_mark.csv"),
###
here("outs",
"neurons",
"tissue_marker_list_neuron",
"neuron_tissue_marker.csv"),
here("outs",
"neurons",
"age_marker_list_neuron",
"neuron_age_marker.csv"),
here("outs",
"neurons",
"sex_marker_list_neuron",
"neuron_sex_marker.csv"),
here("outs",
"neurons",
"age_sex_marker_list_neuron",
"age_sex_neurons_marker.csv")
)
single_nuc_data <- c(here("data",
"single_nuc_data",
"astrocytes",
"HCA_astrocytes.RDS"),
here("data",
"single_nuc_data",
"microglia",
"HCA_microglia.RDS"),
here("data",
"single_nuc_data",
"vascular_cells",
"HCA_vascular_cells.RDS"),
here("data",
"single_nuc_data",
"neurons",
"HCA_neurons.RDS")
)
```
```{r}
named_list <- list()
for(i in 1: length(files)){
data <- read.csv(files[i])
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
cell_t <- sapply(strsplit(files[i], "/"), `[`, 12)
cell_t <- sapply(strsplit(cell_t, "_marker"), `[`, 1)
label <- paste0(cell_t, "_", cluster)
plot_label <- paste0(label, ".pdf")
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
genes <- fil_data$gene
# convert to ENTREZID
genes_ent <- bitr(genes, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
named_list[[k]] <- genes_ent$ENTREZID
names(named_list)[k] <- label
}
if(i == 1){
return_list <- named_list
}else{
return_list <- append(return_list, named_list)
}
named_list <- list()
}
str(return_list)
tissue_list_astro <- return_list[1:3]
age_list_astro <- return_list[4:5]
sex_list_astro <- return_list[6:7]
age_sex_list_astro <- return_list[8:11]
tissue_list_mm <- return_list[12:14]
age_list_mm <- return_list[15:16]
sex_list_mm <- return_list[17:18]
age_sex_list_mm <- return_list[19:22]
tissue_list_vasc <- return_list[23:25]
age_list_vasc <- return_list[26:27]
sex_list_vasc <- return_list[28:29]
age_sex_list_vasc <- return_list[30:33]
tissue_list_neur <- return_list[34:36]
age_list_neur <- return_list[37:38]
sex_list_neur <- return_list[39:40]
age_sex_list_neur <- return_list[41:44]
# ASTROCYTES
# Tissue
ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_astro, fun = enrichKEGG)
ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_tissue)
dotplot(ck_kegg_tissue)
# Age
ck_kegg_age <- compareCluster(geneCluster = age_list_astro, fun = enrichKEGG)
ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age)
dotplot(ck_kegg_age)
# Age*sex
ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_astro, fun = enrichKEGG)
ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age_sex)
dotplot(ck_kegg_age_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# sex
ck_kegg_sex <- compareCluster(geneCluster = sex_list_astro, fun = enrichKEGG)
ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_sex)
dotplot(ck_kegg_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# MICROGLIA
# Tissue
ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_mm, fun = enrichKEGG)
ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_tissue)
dotplot(ck_kegg_tissue)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age
ck_kegg_age <- compareCluster(geneCluster = age_list_mm, fun = enrichKEGG)
ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age)
dotplot(ck_kegg_age)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age*sex
ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_mm, fun = enrichKEGG)
ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age_sex)
dotplot(ck_kegg_age_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# sex
ck_kegg_sex <- compareCluster(geneCluster = sex_list_mm, fun = enrichKEGG)
ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_sex)
dotplot(ck_kegg_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# VASCULAR
# Tissue
ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_vasc, fun = enrichKEGG)
ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_tissue)
dotplot(ck_kegg_tissue)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age
ck_kegg_age <- compareCluster(geneCluster = age_list_vasc, fun = enrichKEGG)
ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age)
dotplot(ck_kegg_age)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age*sex
ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_vasc, fun = enrichKEGG)
ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age_sex)
dotplot(ck_kegg_age_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# sex
ck_kegg_sex <- compareCluster(geneCluster = sex_list_vasc, fun = enrichKEGG)
ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_sex)
dotplot(ck_kegg_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# neurons
# Tissue
ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_neur, fun = enrichKEGG)
ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_tissue)
dotplot(ck_kegg_tissue)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age
ck_kegg_age <- compareCluster(geneCluster = age_list_neur, fun = enrichKEGG)
ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age)
dotplot(ck_kegg_age)+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Age*sex
ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_neur, fun = enrichKEGG)
ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_age_sex)
dotplot(ck_kegg_age_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# sex
ck_kegg_sex <- compareCluster(geneCluster = sex_list_neur, fun = enrichKEGG)
ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
head(ck_kegg_sex)
dotplot(ck_kegg_sex) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```
```{r}
for(i in 1: length(files)){
data <- read.csv(files[i])
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
cluster <- gsub(" ", "_", cluster)
cell_t <- sapply(strsplit(files[i], "/"), `[`, 12)
cell_t <- sapply(strsplit(cell_t, "_mark"), `[`, 1)
label <- paste0(cell_t, "_", cluster)
plot_label <- paste0(label, ".pdf")
cell_type <- sapply(strsplit(cell_t, "_"), `[`, 1)
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
if(nrow(fil_data) > 0){
genes <- fil_data$gene
# Prepare input data
ranks <- fil_data %>% dplyr::select(gene, avg_log2FC)
ranks <- deframe(ranks)
#RUN GO using clusterProfiler
fil_data$Gene <- fil_data$gene
#remove any duplicates (sanity check for me)
genemodulesGO <- fil_data[!duplicated(fil_data$Gene),]
Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id",
"entrezgene_id",
"gene_biotype",
"hgnc_symbol"),
filters = "hgnc_symbol",
values = fil_data$Gene,
mart = ensembl,
uniqueRows = FALSE)
Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <-
as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype'])
if(cell_type == "astro"){
srt <- readRDS(single_nuc_data[1])
}else if(cell_type == "mm"){
srt <- readRDS(single_nuc_data[2])
}else if(cell_type == "vasc"){
srt <- readRDS(single_nuc_data[3])
}else if(cell_type == "neuron" | cell_type == "age"){
srt <- readRDS(single_nuc_data[4])
}else{
print("check srt.")
}
#Filter genes
#biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
# %in% srt@assays$RNA@var.features)
#entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
# %in% srt@assays$RNA@var.features)
# if(nrow(biotype_all_dataset) == 0){
biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
%in% rownames(srt@assays$RNA))
entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
%in% rownames(srt@assays$RNA))
# }
entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),]
#you might need to remove NAs
entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),]
allLLIDs <- entrezmatched$entrezgene
modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human",
pvalueCutoff=0.01,
qvalueCutoff = 0.3,
pAdjustMethod = "none",
readable=T)
if(nrow(as.data.frame(modulesReactome)) > 0){
dot_plot <- dotplot(modulesReactome, showCategory=20)
pdf(file = here(go_dir, plot_label),
width = 12,
height = 10)
print(dot_plot)
dev.off()
x2 <- pairwise_termsim(modulesReactome)
emap_plot <- emapplot(x2)
pdf(file = here(emap_dir, plot_label),
width = 12,
height = 10)
print(emap_plot)
dev.off()
#Alternative GO clasissification
ent_uni <- rownames(srt@assays$RNA)
ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
ent_uni <- ent_uni_entrez$ENTREZID
# Molecular function
go_mf <- enrichGO(gene = as.character(allLLIDs),
universe = ent_uni,
OrgDb = org.Hs.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 1,
qvalueCutoff = 1,
readable = TRUE)
go_mf_plot <- dotplot(go_mf, showCategory = 20)
pdf(file = here(go_dir_mol_f, plot_label),
width = 12,
height = 10)
print(go_mf_plot)
dev.off()
# GO biological process
ego <- enrichGO(gene = as.character(allLLIDs),
OrgDb = org.Hs.eg.db,
ont = "BP",
universe = ent_uni )
#head(ego)
if(nrow(ego) > 1){
dp2 <- dotplot(ego, showCategory=20)
pdf(file = here(go_dot, plot_label),
width = 12,
height = 10)
print(dp2)
dev.off()
## convert gene ID to Symbol
edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID')
cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE,
colorEdge = TRUE)
pdf(file = here(go_dir_cnetpl, plot_label),
width = 14,
height = 12)
print(cnet_pl)
dev.off()
# edox2 <- enrichplot::pairwise_termsim(edox)
# if(nrow(edox2) > 1){
# tree_plot <- treeplot(edox2)
#pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in
# width = 16, # The width of the plot in inches
# height = 10) # The height of the plot in inches
# print(tree_plot)
#dev.off()
u_plot <- upsetplot(ego)
pdf(file = here(go_dir_upset_plot, plot_label),
width = 15,
height = 10)
print(u_plot)
dev.off()
}
if(nrow(edox) > 1){
hm <- heatplot(edox, foldChange=ranks, showCategory=5)
pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in
width = 15, # The width of the plot in inches
height = 10) # The height of the plot in inches
print(hm)
dev.off()
}
}
edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID')
edox2_mf <- pairwise_termsim(edox_mf)
tree_plot_mf <- treeplot(edox2_mf)
pdf(file = here(go_dir_mol_f_tree, plot_label),
width = 16,
height = 10)
print(tree_plot_mf)
dev.off()
}
}
}
```
# Create and save files that contain GO output for oligos
```{r}
oligo_files
srt <- nad_ol
plot_df <- data.frame()
for(i in 1: length(oligo_files)){
data <- read.csv(oligo_files[i])
#srt <- readRDS(single_nuc_data[i])
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
#cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 10)
cell_t <- "oligos_"
plot_label <- paste0(cell_t, "cluster", "GO_term_age_sex_region.csv")
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
#RUN GO using clusterProfiler
fil_data$Gene <- fil_data$gene
#remove any duplicates (sanity check for me)
genemodulesGO <- fil_data[!duplicated(fil_data$Gene),]
Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id",
"entrezgene_id",
"gene_biotype",
"hgnc_symbol"),
filters = "hgnc_symbol",
values = fil_data$Gene,
mart = ensembl,
uniqueRows = FALSE)
Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <-
as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype'])
biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
%in% rownames(srt@assays$RNA))
entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
%in% rownames(srt@assays$RNA))
entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),]
#you might need to remove NAs
entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),]
allLLIDs <- entrezmatched$entrezgene
allLLIDs<-allLLIDs[!is.na(allLLIDs)]
ent_uni <- rownames(nad_ol@assays$RNA)
ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
ent_uni <- ent_uni_entrez$ENTREZID
ego <- enrichGO(gene = as.character(allLLIDs),
OrgDb = org.Hs.eg.db,
ont = "BP",
universe = ent_uni )
ego_df <- as.data.frame(ego)
if(nrow(ego_df)> 0){
ego_df$Cell_type <- cell_t
ego_df$cluster <- cluster_lev[k]
if(nrow(plot_df) < 1){
plot_df <- ego_df
}else{
plot_df <- rbind(plot_df, ego_df)
}
}
}
}
dir.create(here("outs",
"GO",
"tissue_age_sex_table_output",
"oligos"), recursive = TRUE)
write.csv(plot_df, here("outs",
"GO",
"tissue_age_sex_table_output",
"oligos",
paste0(plot_label)))
```
# Create and save files that contain GO output for non_oligos
```{r}
plot_df <- data.frame()
for(i in 1: length(files)){
data <- read.csv(files[i])
cell_t <- sapply(strsplit(files[i], "/"), `[`, 10)
if(cell_t == "astrocytes"){
srt <- readRDS(single_nuc_data[1])
}else if(cell_t == "microglia_macrophages"){
srt <- readRDS(single_nuc_data[2])
}else if(cell_t == "vascular_cells"){
srt <- readRDS(single_nuc_data[3])
}else if(cell_t == "neurons"){
srt <- readRDS(single_nuc_data[4])
}else{
print("check srt.")
}
cluster_lev <- levels(as.factor(data$cluster))
for(k in 1:length(cluster_lev)){
curr_data <- subset(data, data$cluster == cluster_lev[k])
cluster <- curr_data[["cluster"]][1]
plot_label <- paste0(cell_t, "_", cluster, "_GO_term_age_sex_region.csv")
fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 &
curr_data$avg_log2FC > 0)
if(nrow(fil_data) > 0){
#RUN GO using clusterProfiler
fil_data$Gene <- fil_data$gene
#remove any duplicates (sanity check for me)
genemodulesGO <- fil_data[!duplicated(fil_data$Gene),]
Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id",
"entrezgene_id",
"gene_biotype",
"hgnc_symbol"),
filters = "hgnc_symbol",
values = fil_data$Gene,
mart = ensembl,
uniqueRows = FALSE)
Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <-
as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype'])
biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol
%in% rownames(srt@assays$RNA))
entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
%in% rownames(srt@assays$RNA))
entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),]
#you might need to remove NAs
entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),]
allLLIDs <- entrezmatched$entrezgene
allLLIDs<-allLLIDs[!is.na(allLLIDs)]
ent_uni <- rownames(nad_ol@assays$RNA)
ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
ent_uni <- ent_uni_entrez$ENTREZID
ego <- enrichGO(gene = as.character(allLLIDs),
OrgDb = org.Hs.eg.db,
ont = "BP",
universe = ent_uni )
ego_df <- as.data.frame(ego)
if(nrow(ego_df)> 0){
ego_df$Cell_type <- cell_t
ego_df$cluster <- cluster_lev[k]
if(nrow(plot_df) < 1){
plot_df <- ego_df
}else{
plot_df <- rbind(plot_df, ego_df)
}
}
}
}
}
dir.create(here("outs",
"GO",
"tissue_age_sex_table_output",
"non_oligos"), recursive = TRUE)
write.csv(plot_df, here("outs",
"GO",
"tissue_age_sex_table_output",
"non_oligos",
"non_oligos_GO_term_age_sex_region.csv"))
```
# Session Info
```{r}
sessionInfo()
```