--- title: '[clem] GO per condition' author: "Nina-Lydia Kazakou" date: '2022-10-03' output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(SingleCellExperiment) library(Seurat) library(tidyverse) library(Matrix) library(scales) library(cowplot) library(RCurl) library(ggplot2) library(edgeR) library(dplyr) library(ggsci) library(here) library(clusterProfiler) library(biomaRt) library(ReactomePA) library(org.Hs.eg.db) library(enrichplot) library(RColorBrewer) library(plotrix) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4) ``` ### Set up for distinctive colours ```{r} plot_colour <- grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)] ``` ### Load object ```{r load_object} srt <- readRDS(here("data", "Human_R_Analysis", "Clemastine_DMSO", "clem_srt_ol.rds")) ``` ```{r UMAP_Oligos} DimPlot(srt, cols = mycoloursP[15:40], pt.size = 1.5, label = TRUE, group.by = "new_annot") & NoLegend() ``` ### Create Directory ```{r eval=FALSE} dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition")) ``` ### Control vs Treated markers (for the whole dataset) ```{r} condition <- read.csv(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "DGE", "Treatment", "control_vs_treated_de_markers.csv")) head(condition) ``` ### Generate marker lists per condition ```{r} dim(condition) # Positive avg_log2FC represents the first comparison group (in this case DMSO) & negative avg_log2FC represents the second comparison group (in this case clem) logFC = 0 ctrl <- subset(condition, condition$avg_log2FC > logFC) dim(ctrl) clem <- subset(condition, condition$avg_log2FC < logFC) dim(clem) ## convert avg_log2FC of clem into postive values to use together with crtl in a loop clem$avg_log2FC <- abs(clem$avg_log2FC) clem$condition <- "Clemastine" ctrl$condition <- "DMSO" dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists")) write.csv(ctrl, here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists", "control_genes.csv")) write.csv(clem, here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists", "clem_genes.csv")) ``` ### Read in de gene lists ```{r} file_path <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists") files <- list.files(file_path) ``` ### Prepare marts to convert to entrez ```{r message=FALSE, warning=FALSE} listMarts() ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl", host = "www.ensembl.org") listDatasets(ensembl) attributes = listAttributes(ensembl) named_list <- list() for(i in 1: length(files)){ data <- read.csv(here(file_path, files[i])) cluster <- data[["condition"]][1] fil_data <- subset(data, data$avg_log2FC >= 0.5 & data$p_val <= 0.05) genes <- fil_data$X # convert to ENTREZID genes_ent <- bitr(genes, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) named_list[[i]] <- genes_ent$ENTREZID names(named_list)[i] <- cluster } str(named_list) ck_go <- compareCluster(geneCluster = named_list, fun = enrichGO, OrgDb = org.Hs.eg.db) ck_go <- setReadable(ck_go, OrgDb = org.Hs.eg.db, keyType="ENTREZID") ``` ```{r} ck_go ``` ```{r fig.height=15, fig.width=20} dp <- dotplot(ck_go) + theme(axis.text.x = element_text(angle = 90), axis.text = element_text(size = 8)) ``` ```{r eval=FALSE} dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Plots")) ``` ```{r eval=FALSE} pdf(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Plots", "oligos_enrich_GO.pdf"), paper="a4", width=15, height=20) print(dp) dev.off ``` ### Prepare folders ```{r eval=FALSE} dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Cluster_Plots")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "EMAP_Plots")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Biological_Process")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Symbol")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Tree")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Upset_Plots")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "HeatMaps")) dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions_Tree")) go_dir <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Cluster_Plots") emap_dir <-here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "EMAP_Plots") go_dir_mol_f <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions") go_dot <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Biological_Process") go_dir_cnetpl <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Symbol") go_dir_tree <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Tree") go_dir_upset_plot <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Upset_Plots") go_dir_hm <-here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "HeatMaps") go_dir_mol_f_tree <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions_Tree") ``` ### Account for package update ```{r eval=FALSE} srt@assays$RNA <- srt@assays$originalexp ``` ### Create GO plots ```{r warning=FALSE, eval=FALSE} for(i in 1: length(files)){ data <- read.csv(here(file_path, files[i])) cluster <- data[["condition"]][1] plot_label <- paste0(cluster, ".pdf") # filter data fil_data <- subset(data, data$p_val_adj <= 0.05 & avg_log2FC >= 0.5) # Prepare input data ranks <- fil_data %>% dplyr::select(X, avg_log2FC) ranks <- deframe(ranks) #RUN GO using clusterProfiler fil_data$Gene <- fil_data$X #remove any duplicates (sanity check for me) genemodulesGO <- fil_data[!duplicated(fil_data$X),] Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id", "entrezgene_id", "gene_biotype", "hgnc_symbol"), filters = "hgnc_symbol", values = fil_data$X, mart = ensembl, uniqueRows = FALSE) Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype']) biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol %in% rownames(srt@assays$RNA)) entrezID <- subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol %in% rownames(srt@assays$RNA)) entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),] #you might need to remove NAs entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),] allLLIDs <- entrezmatched$entrezgene modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human", pvalueCutoff=0.01, qvalueCutoff = 0.3, readable=T) ### Cluster_Plots if(nrow(as.data.frame(modulesReactome)) > 0){ dot_plot <- dotplot(modulesReactome, showCategory=20) pdf(file = here(go_dir, plot_label), width = 12, height = 12) print(dot_plot) dev.off() ### EMAP_Plots x2 <- pairwise_termsim(modulesReactome) emap_plot <- emapplot(x2) pdf(file = here(emap_dir, plot_label), width = 12, height = 12) print(emap_plot) dev.off() # Alternative GO clasissification ent_uni <- rownames(srt@assays$RNA) ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db) ent_uni <- ent_uni_entrez$ENTREZID ## Molecular function go_mf <- enrichGO(gene = as.character(allLLIDs), universe = ent_uni, OrgDb = org.Hs.eg.db, ont = "MF", pvalueCutoff = 1, qvalueCutoff = 1, readable = TRUE) ### Molecular_Function_Plots go_mf_plot <- dotplot(go_mf, showCategory = 20) pdf(file = here(go_dir_mol_f, plot_label), width = 12, height = 10) print(go_mf_plot) dev.off() # GO biological process ego <- enrichGO(gene = as.character(allLLIDs), OrgDb = org.Hs.eg.db, ont = "BP", universe = ent_uni ) if(nrow(ego) > 0){ dp2 <- dotplot(ego, showCategory=20) ### Biological_Processes_Plots pdf(file = here(go_dot, plot_label), width = 12, height = 10) print(dp2) dev.off() ## convert gene ID to Symbol edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID') cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, colorEdge = TRUE) ### Symbol_Plots pdf(file = here(go_dir_cnetpl, plot_label), width = 14, height = 12) print(cnet_pl) dev.off() ### Tree_Plots edox2 <- enrichplot::pairwise_termsim(edox) if(nrow(edox2) > 1){ tree_plot <- treeplot(edox2) pdf(file = here(go_dir_tree, plot_label), width = 16, height = 10) print(tree_plot) dev.off() ### Upset_Plots u_plot <- upsetplot(ego) pdf(file = here(go_dir_upset_plot, plot_label), width = 15, height = 10) print(u_plot) dev.off() if(nrow(edox) > 1){ hm <- heatplot(edox, foldChange=ranks, showCategory=5) pdf(file = here(go_dir_hm, plot_label), width = 15, height = 10) print(hm) dev.off() } } ### Molecular_Function_Tree_Plots edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID') edox2_mf <- pairwise_termsim(edox_mf) tree_plot_mf <- treeplot(edox2_mf) pdf(file = here(go_dir_mol_f_tree, plot_label), width = 16, height = 10) print(tree_plot_mf) dev.off() } } } ``` ##### SessionInfo <details> <summary> Click to expand </summary> ```{r} sessionInfo() ``` </details>