--- title: "GO_cluster" author: "Luise A. Seeker" date: "08/11/2021" output: html_document --- # introduction I tested GSEA and gene ontology both for molecular functions and biological pathways and different visualisations. In general GO works better for my data than GSEA so therefore in later versions of this document, there may be no code for GSEA any more. ```{r} library(here) library(tibble) library(Seurat) library(dplyr) library(ggplot2) library(tidyr) #GO & GSEA library(biomaRt) library(org.Hs.eg.db) library(ReactomePA) library(msigdbr) library(fgsea) library(NMF) library(ggupset) library(clusterProfiler) library(enrichplot) ``` # Read in data ```{r} nad_ol <- readRDS(here("data", "single_nuc_data", "oligodendroglia", "srt_oligos_and_opcs_LS.RDS")) ``` # Prepare hallmarks for GSEA and gene sets ```{r, eval = FALSE} hallmark <- msigdbr(species = "Homo sapiens", category = "H") geneset <- hallmark %>% split(x = .$gene_symbol, f = .$gs_name) ``` # Read in tissue, age and sex DEG gene lists ```{r} go_dir <- here("outs", "GO", "david", "GO_cell_profiler_age_sex_tissue_all_celltypes") dir.create(go_dir, recursive = TRUE) emap_dir <- here("outs", "GO", "david", "emap_plots_age_sex_tissue") dir.create(emap_dir , recursive = TRUE) go_dot <- here("outs", "GO", "GO_luise", "biological_process", "GO_dotplot_age_sex_tissue") dir.create(go_dot, recursive = TRUE) go_dir_cnetpl <- here("outs", "GO", "GO_luise", "biological_process", "GO_cnet_age_sex_tissue") dir.create(go_dir_cnetpl, recursive = TRUE) go_dir_tree<- here("outs", "GO", "GO_luise", "biological_process", "GO_ctree_age_sex_tissue") dir.create(go_dir_tree) go_dir_upset_plot<- here("outs", "GO", "GO_luise", "biological_process", "GO_upset_age_sex_tissue") dir.create(go_dir_upset_plot) go_dir_hm<- here("outs", "GO", "GO_luise", "biological_process", "GO_hm_age_sex_tissue") dir.create(go_dir_hm) go_dir_mol_f<- here("outs", "GO", "GO_luise", "GO_molecular_function", "dot_pl_mf_age_sex_tissue") # the others are all biological processes dir.create(go_dir_mol_f, recursive = TRUE) go_dir_mol_f_tree<- here("outs", "GO", "GO_luise", "GO_molecular_function", "tree_mf_age_sex_tissue") # the others are all biological processes dir.create(go_dir_mol_f_tree) go_dir_bp_compare<- here("outs", "GO", "GO_luise", "biological_process", "cluster_compare_age_sex_tissue") # the others are all biological processes dir.create(go_dir_bp_compare) ``` # for oligos ```{r} oligo_file_path_tissue <- here("outs", "DGE_tissue", "overall") oligo_file_path_age_sex <- here("outs", "DGE_age_sex", "overall") oligo_files_tissue <- list.files(oligo_file_path_tissue, full.names = TRUE) oligo_files_age_sex <- list.files(oligo_file_path_age_sex, full.names = TRUE) oligo_files <- c(oligo_files_tissue, oligo_files_age_sex) # Prepare marts to convert to entrez listMarts() ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl", host = "www.ensembl.org") listDatasets(ensembl) attributes = listAttributes(ensembl) named_list <- list() for(i in 1: length(oligo_files)){ data <- read.csv(oligo_files[i]) cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 12) cell_t <- sapply(strsplit(cell_t, "_markers"), `[`, 1) label <- paste0("oligos_", cell_t, "_", cluster) plot_label <- paste0(label, ".pdf") fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) genes <- fil_data$gene # convert to ENTREZID genes_ent <- bitr(genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) named_list[[k]] <- genes_ent$ENTREZID names(named_list)[k] <- label } if(i == 1){ return_list <- named_list }else{ return_list <- append(return_list, named_list) } named_list <- list() } str(return_list) tissue_list <- return_list[1:3] age_list <- return_list[4:5] age_sex_list <- return_list[6:9] sex_list <- return_list[10:11] # Tissue ck_kegg_tissue <- compareCluster(geneCluster = tissue_list, fun = enrichKEGG) ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_tissue) dotplot(ck_kegg_tissue) # Age ck_kegg_age <- compareCluster(geneCluster = age_list, fun = enrichKEGG) ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age) dotplot(ck_kegg_age) # Age*sex ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list, fun = enrichKEGG) ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age_sex) dotplot(ck_kegg_age_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # sex ck_kegg_sex <- compareCluster(geneCluster = sex_list, fun = enrichKEGG) ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_sex) dotplot(ck_kegg_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ``` ```{r} for(i in 1: length(oligo_files)){ data <- read.csv(oligo_files[i]) cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 12) cell_t <- sapply(strsplit(cell_t, "_markers"), `[`, 1) label <- paste0("oligos_", cell_t, "_", cluster) plot_label <- paste0(label, ".pdf") fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) genes <- fil_data$gene # Prepare input data ranks <- fil_data %>% dplyr::select(gene, avg_log2FC) ranks <- deframe(ranks) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) #Filter genes #biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol # %in% nad_ol@assays$RNA@var.features) #entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol # %in% nad_ol@assays$RNA@var.features) # if(nrow(biotype_all_dataset) == 0){ biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(nad_ol@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(nad_ol@assays$RNA)) # } entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human", pvalueCutoff=0.01, qvalueCutoff = 0.3, pAdjustMethod = "none", readable=T) if(nrow(as.data.frame(modulesReactome)) > 0){ dot_plot <- dotplot(modulesReactome, showCategory=20) pdf(file = here(go_dir, plot_label), width = 12, height = 10) print(dot_plot) dev.off() x2 <- pairwise_termsim(modulesReactome) emap_plot <- emapplot(x2) pdf(file = here(emap_dir, plot_label), width = 12, height = 10) print(emap_plot) dev.off() #Alternative GO clasissification ent_uni <- rownames(nad_ol@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID # Molecular function go_mf <- enrichGO(gene = as.character(allLLIDs), universe = ent_uni, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH", pvalueCutoff = 1, qvalueCutoff = 1, readable = TRUE) go_mf_plot <- dotplot(go_mf, showCategory = 20) pdf(file = here(go_dir_mol_f, plot_label), width = 12, height = 10) print(go_mf_plot) dev.off() # GO biological process ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) #head(ego) if(nrow(ego) > 1){ dp2 <- dotplot(ego, showCategory=20) pdf(file = here(go_dot, plot_label), width = 12, height = 10) print(dp2) dev.off() ## convert gene ID to Symbol edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID') cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, colorEdge = TRUE) pdf(file = here(go_dir_cnetpl, plot_label), width = 14, height = 12) print(cnet_pl) dev.off() edox2 <- enrichplot::pairwise_termsim(edox) if(nrow(edox2) > 1){ tree_plot <- treeplot(edox2) pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in width = 16, # The width of the plot in inches height = 10) # The height of the plot in inches print(tree_plot) dev.off() } u_plot <- upsetplot(ego) pdf(file = here(go_dir_upset_plot, plot_label), width = 15, height = 10) print(u_plot) dev.off() if(nrow(edox) > 1){ hm <- heatplot(edox, foldChange=ranks, showCategory=5) pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in width = 15, # The width of the plot in inches height = 10) # The height of the plot in inches print(hm) dev.off() } } edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID') edox2_mf <- pairwise_termsim(edox_mf) tree_plot_mf <- treeplot(edox2_mf) pdf(file = here(go_dir_mol_f_tree, plot_label), width = 16, height = 10) print(tree_plot_mf) dev.off() } } } ``` # for non oligos ```{r} files <- c( here("outs", "astrocytes", "tissue_marker_list_astro", "astro_tissue_marker.csv"), here("outs", "astrocytes", "age_marker_list_astro", "astro_age_marker.csv"), here("outs", "astrocytes", "sex_marker_list_astro", "astro_sex_marker.csv"), here("outs", "astrocytes", "age_sex_marker_list_astro", "astro_age_sex_marker.csv"), ### here("outs", "microglia_macrophages", "tissue_marker_list_mm", "mm_tissue_marker.csv"), here("outs", "microglia_macrophages", "age_marker_list_mm", "mm_age_marker.csv"), here("outs", "microglia_macrophages", "sex_marker_list_mm", "mm_sex_marker.csv"), here("outs", "microglia_macrophages", "age_sex_marker_list_mm", "mm_age_sex_marker.csv"), ### here("outs", "vascular_cells", "tissue_marker_list_vasc", "vasc_tissue_mark.csv"), here("outs", "vascular_cells", "age_marker_list_vasc", "vasc_age_marker.csv"), here("outs", "vascular_cells", "sex_marker_list_vasc", "vasc_sex_marker.csv"), here("outs", "vascular_cells", "age_sex_marker_list_vasc", "vasc_age_sex_mark.csv"), ### here("outs", "neurons", "tissue_marker_list_neuron", "neuron_tissue_marker.csv"), here("outs", "neurons", "age_marker_list_neuron", "neuron_age_marker.csv"), here("outs", "neurons", "sex_marker_list_neuron", "neuron_sex_marker.csv"), here("outs", "neurons", "age_sex_marker_list_neuron", "age_sex_neurons_marker.csv") ) single_nuc_data <- c(here("data", "single_nuc_data", "astrocytes", "HCA_astrocytes.RDS"), here("data", "single_nuc_data", "microglia", "HCA_microglia.RDS"), here("data", "single_nuc_data", "vascular_cells", "HCA_vascular_cells.RDS"), here("data", "single_nuc_data", "neurons", "HCA_neurons.RDS") ) ``` ```{r} named_list <- list() for(i in 1: length(files)){ data <- read.csv(files[i]) cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] cell_t <- sapply(strsplit(files[i], "/"), `[`, 12) cell_t <- sapply(strsplit(cell_t, "_marker"), `[`, 1) label <- paste0(cell_t, "_", cluster) plot_label <- paste0(label, ".pdf") fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) genes <- fil_data$gene # convert to ENTREZID genes_ent <- bitr(genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) named_list[[k]] <- genes_ent$ENTREZID names(named_list)[k] <- label } if(i == 1){ return_list <- named_list }else{ return_list <- append(return_list, named_list) } named_list <- list() } str(return_list) tissue_list_astro <- return_list[1:3] age_list_astro <- return_list[4:5] sex_list_astro <- return_list[6:7] age_sex_list_astro <- return_list[8:11] tissue_list_mm <- return_list[12:14] age_list_mm <- return_list[15:16] sex_list_mm <- return_list[17:18] age_sex_list_mm <- return_list[19:22] tissue_list_vasc <- return_list[23:25] age_list_vasc <- return_list[26:27] sex_list_vasc <- return_list[28:29] age_sex_list_vasc <- return_list[30:33] tissue_list_neur <- return_list[34:36] age_list_neur <- return_list[37:38] sex_list_neur <- return_list[39:40] age_sex_list_neur <- return_list[41:44] # ASTROCYTES # Tissue ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_astro, fun = enrichKEGG) ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_tissue) dotplot(ck_kegg_tissue) # Age ck_kegg_age <- compareCluster(geneCluster = age_list_astro, fun = enrichKEGG) ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age) dotplot(ck_kegg_age) # Age*sex ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_astro, fun = enrichKEGG) ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age_sex) dotplot(ck_kegg_age_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # sex ck_kegg_sex <- compareCluster(geneCluster = sex_list_astro, fun = enrichKEGG) ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_sex) dotplot(ck_kegg_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # MICROGLIA # Tissue ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_mm, fun = enrichKEGG) ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_tissue) dotplot(ck_kegg_tissue)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age ck_kegg_age <- compareCluster(geneCluster = age_list_mm, fun = enrichKEGG) ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age) dotplot(ck_kegg_age)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age*sex ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_mm, fun = enrichKEGG) ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age_sex) dotplot(ck_kegg_age_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # sex ck_kegg_sex <- compareCluster(geneCluster = sex_list_mm, fun = enrichKEGG) ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_sex) dotplot(ck_kegg_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # VASCULAR # Tissue ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_vasc, fun = enrichKEGG) ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_tissue) dotplot(ck_kegg_tissue)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age ck_kegg_age <- compareCluster(geneCluster = age_list_vasc, fun = enrichKEGG) ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age) dotplot(ck_kegg_age)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age*sex ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_vasc, fun = enrichKEGG) ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age_sex) dotplot(ck_kegg_age_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # sex ck_kegg_sex <- compareCluster(geneCluster = sex_list_vasc, fun = enrichKEGG) ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_sex) dotplot(ck_kegg_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # neurons # Tissue ck_kegg_tissue <- compareCluster(geneCluster = tissue_list_neur, fun = enrichKEGG) ck_kegg_tissue <- setReadable(ck_kegg_tissue, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_tissue) dotplot(ck_kegg_tissue)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age ck_kegg_age <- compareCluster(geneCluster = age_list_neur, fun = enrichKEGG) ck_kegg_age <- setReadable(ck_kegg_age, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age) dotplot(ck_kegg_age)+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # Age*sex ck_kegg_age_sex <- compareCluster(geneCluster = age_sex_list_neur, fun = enrichKEGG) ck_kegg_age_sex <- setReadable(ck_kegg_age_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_age_sex) dotplot(ck_kegg_age_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) # sex ck_kegg_sex <- compareCluster(geneCluster = sex_list_neur, fun = enrichKEGG) ck_kegg_sex <- setReadable(ck_kegg_sex, OrgDb = org.Hs.eg.db, keyType="ENTREZID") head(ck_kegg_sex) dotplot(ck_kegg_sex) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ``` ```{r} for(i in 1: length(files)){ data <- read.csv(files[i]) cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] cluster <- gsub(" ", "_", cluster) cell_t <- sapply(strsplit(files[i], "/"), `[`, 12) cell_t <- sapply(strsplit(cell_t, "_mark"), `[`, 1) label <- paste0(cell_t, "_", cluster) plot_label <- paste0(label, ".pdf") cell_type <- sapply(strsplit(cell_t, "_"), `[`, 1) fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) if(nrow(fil_data) > 0){ genes <- fil_data$gene # Prepare input data ranks <- fil_data %>% dplyr::select(gene, avg_log2FC) ranks <- deframe(ranks) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) if(cell_type == "astro"){ srt <- readRDS(single_nuc_data[1]) }else if(cell_type == "mm"){ srt <- readRDS(single_nuc_data[2]) }else if(cell_type == "vasc"){ srt <- readRDS(single_nuc_data[3]) }else if(cell_type == "neuron" | cell_type == "age"){ srt <- readRDS(single_nuc_data[4]) }else{ print("check srt.") } #Filter genes #biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol # %in% srt@assays$RNA@var.features) #entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol # %in% srt@assays$RNA@var.features) # if(nrow(biotype_all_dataset) == 0){ biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(srt@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(srt@assays$RNA)) # } entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human", pvalueCutoff=0.01, qvalueCutoff = 0.3, pAdjustMethod = "none", readable=T) if(nrow(as.data.frame(modulesReactome)) > 0){ dot_plot <- dotplot(modulesReactome, showCategory=20) pdf(file = here(go_dir, plot_label), width = 12, height = 10) print(dot_plot) dev.off() x2 <- pairwise_termsim(modulesReactome) emap_plot <- emapplot(x2) pdf(file = here(emap_dir, plot_label), width = 12, height = 10) print(emap_plot) dev.off() #Alternative GO clasissification ent_uni <- rownames(srt@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID # Molecular function go_mf <- enrichGO(gene = as.character(allLLIDs), universe = ent_uni, OrgDb = org.Hs.eg.db, ont = "MF", pAdjustMethod = "BH", pvalueCutoff = 1, qvalueCutoff = 1, readable = TRUE) go_mf_plot <- dotplot(go_mf, showCategory = 20) pdf(file = here(go_dir_mol_f, plot_label), width = 12, height = 10) print(go_mf_plot) dev.off() # GO biological process ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) #head(ego) if(nrow(ego) > 1){ dp2 <- dotplot(ego, showCategory=20) pdf(file = here(go_dot, plot_label), width = 12, height = 10) print(dp2) dev.off() ## convert gene ID to Symbol edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID') cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, colorEdge = TRUE) pdf(file = here(go_dir_cnetpl, plot_label), width = 14, height = 12) print(cnet_pl) dev.off() # edox2 <- enrichplot::pairwise_termsim(edox) # if(nrow(edox2) > 1){ # tree_plot <- treeplot(edox2) #pdf(file = here(go_dir_tree, plot_label), # The directory you want to save the file in # width = 16, # The width of the plot in inches # height = 10) # The height of the plot in inches # print(tree_plot) #dev.off() u_plot <- upsetplot(ego) pdf(file = here(go_dir_upset_plot, plot_label), width = 15, height = 10) print(u_plot) dev.off() } if(nrow(edox) > 1){ hm <- heatplot(edox, foldChange=ranks, showCategory=5) pdf(file = here(go_dir_hm, plot_label), # The directory you want to save the file in width = 15, # The width of the plot in inches height = 10) # The height of the plot in inches print(hm) dev.off() } } edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID') edox2_mf <- pairwise_termsim(edox_mf) tree_plot_mf <- treeplot(edox2_mf) pdf(file = here(go_dir_mol_f_tree, plot_label), width = 16, height = 10) print(tree_plot_mf) dev.off() } } } ``` # Create and save files that contain GO output for oligos ```{r} oligo_files srt <- nad_ol plot_df <- data.frame() for(i in 1: length(oligo_files)){ data <- read.csv(oligo_files[i]) #srt <- readRDS(single_nuc_data[i]) cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] #cell_t <- sapply(strsplit(oligo_files[i], "/"), `[`, 10) cell_t <- "oligos_" plot_label <- paste0(cell_t, "cluster", "GO_term_age_sex_region.csv") fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(srt@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(srt@assays$RNA)) entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene allLLIDs<-allLLIDs[!is.na(allLLIDs)] ent_uni <- rownames(nad_ol@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) ego_df <- as.data.frame(ego) if(nrow(ego_df)> 0){ ego_df$Cell_type <- cell_t ego_df$cluster <- cluster_lev[k] if(nrow(plot_df) < 1){ plot_df <- ego_df }else{ plot_df <- rbind(plot_df, ego_df) } } } } dir.create(here("outs", "GO", "tissue_age_sex_table_output", "oligos"), recursive = TRUE) write.csv(plot_df, here("outs", "GO", "tissue_age_sex_table_output", "oligos", paste0(plot_label))) ``` # Create and save files that contain GO output for non_oligos ```{r} plot_df <- data.frame() for(i in 1: length(files)){ data <- read.csv(files[i]) cell_t <- sapply(strsplit(files[i], "/"), `[`, 10) if(cell_t == "astrocytes"){ srt <- readRDS(single_nuc_data[1]) }else if(cell_t == "microglia_macrophages"){ srt <- readRDS(single_nuc_data[2]) }else if(cell_t == "vascular_cells"){ srt <- readRDS(single_nuc_data[3]) }else if(cell_t == "neurons"){ srt <- readRDS(single_nuc_data[4]) }else{ print("check srt.") } cluster_lev <- levels(as.factor(data$cluster)) for(k in 1:length(cluster_lev)){ curr_data <- subset(data, data$cluster == cluster_lev[k]) cluster <- curr_data[["cluster"]][1] plot_label <- paste0(cell_t, "_", cluster, "_GO_term_age_sex_region.csv") fil_data <- subset(curr_data, curr_data$p_val_adj < 0.05 & curr_data$avg_log2FC > 0) if(nrow(fil_data) > 0){ #RUN GO using clusterProfiler fil_data$Gene <- fil_data$gene #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$Gene),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$Gene, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(srt@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(srt@assays$RNA)) entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene allLLIDs<-allLLIDs[!is.na(allLLIDs)] ent_uni <- rownames(nad_ol@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) ego_df <- as.data.frame(ego) if(nrow(ego_df)> 0){ ego_df$Cell_type <- cell_t ego_df$cluster <- cluster_lev[k] if(nrow(plot_df) < 1){ plot_df <- ego_df }else{ plot_df <- rbind(plot_df, ego_df) } } } } } dir.create(here("outs", "GO", "tissue_age_sex_table_output", "non_oligos"), recursive = TRUE) write.csv(plot_df, here("outs", "GO", "tissue_age_sex_table_output", "non_oligos", "non_oligos_GO_term_age_sex_region.csv")) ``` # Session Info ```{r} sessionInfo() ```