Chimeras / Scripts / Human_R_Analysis / Clemastine_DMSO / clem_GO_condition.Rmd
clem_GO_condition.Rmd
Raw
---
title: '[clem] GO per condition'
author: "Nina-Lydia Kazakou"
date: '2022-10-03'
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

### Load libraries

```{r message=FALSE, warning=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(tidyverse)
library(Matrix)
library(scales)
library(cowplot)
library(RCurl)
library(ggplot2)
library(edgeR)
library(dplyr)
library(ggsci)  
library(here)
library(clusterProfiler)
library(biomaRt)
library(ReactomePA)
library(org.Hs.eg.db)
library(enrichplot)
library(RColorBrewer)
library(plotrix)
```

### Colour Palette

```{r load_palette}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```

### Set up for distinctive colours

```{r}
plot_colour <- grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
```

### Load object

```{r load_object}
srt <- readRDS(here("data", "Human_R_Analysis", "Clemastine_DMSO", "clem_srt_ol.rds"))
```

```{r UMAP_Oligos}
DimPlot(srt, cols = mycoloursP[15:40], pt.size = 1.5, label = TRUE, group.by = "new_annot") & NoLegend()
```

### Create Directory

```{r eval=FALSE}
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition"))
```

### Control vs Treated markers (for the whole dataset)
```{r}
condition <- read.csv(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "DGE", "Treatment", "control_vs_treated_de_markers.csv"))

head(condition)
```

### Generate marker lists per condition
```{r}
dim(condition)

# Positive avg_log2FC represents the first comparison group (in this case DMSO) & negative avg_log2FC represents the second comparison group (in this case clem)
logFC = 0

ctrl <- subset(condition, condition$avg_log2FC > logFC)
dim(ctrl)
clem <- subset(condition, condition$avg_log2FC < logFC)
dim(clem)
## convert avg_log2FC of clem into postive values to use together with crtl in a loop
clem$avg_log2FC <- abs(clem$avg_log2FC)

clem$condition <- "Clemastine"
ctrl$condition <- "DMSO"

dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists"))

write.csv(ctrl, here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists", "control_genes.csv"))
write.csv(clem, here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists", "clem_genes.csv"))
```

### Read in de gene lists

```{r}
file_path <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "gene_lists")

files <- list.files(file_path)
```

### Prepare marts to convert to entrez
```{r message=FALSE, warning=FALSE}
listMarts()
ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl",
                      host = "www.ensembl.org")
    
    
listDatasets(ensembl)
attributes = listAttributes(ensembl)
named_list <- list()
for(i in 1: length(files)){
  data <- read.csv(here(file_path, files[i]))
  cluster <- data[["condition"]][1]
  fil_data <- subset(data, data$avg_log2FC >= 0.5 & data$p_val <= 0.05)
  
  genes <- fil_data$X
  
  # convert to ENTREZID
  
  genes_ent <- bitr(genes, fromType = "SYMBOL",
            toType = "ENTREZID",
            OrgDb = org.Hs.eg.db)
  
  named_list[[i]] <- genes_ent$ENTREZID
  names(named_list)[i] <- cluster
}
str(named_list)
ck_go <- compareCluster(geneCluster = named_list, fun = enrichGO, 
                          OrgDb = org.Hs.eg.db)
ck_go <- setReadable(ck_go, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
```

```{r}
ck_go
```

```{r fig.height=15, fig.width=20}
dp <- dotplot(ck_go) +
  theme(axis.text.x = element_text(angle = 90), axis.text = element_text(size = 8))
```


```{r eval=FALSE}
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Plots"))
```

```{r eval=FALSE}
pdf(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Plots", "oligos_enrich_GO.pdf"), paper="a4", width=15, height=20)
print(dp)
dev.off
```

### Prepare folders

```{r eval=FALSE}
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Cluster_Plots"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "EMAP_Plots"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Biological_Process"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Symbol"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Tree"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Upset_Plots"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "HeatMaps"))
dir.create(here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions_Tree"))

go_dir <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Cluster_Plots")
emap_dir <-here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "EMAP_Plots")
go_dir_mol_f <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions")
go_dot <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Biological_Process")
go_dir_cnetpl <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Symbol")
go_dir_tree <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Tree")
go_dir_upset_plot <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Upset_Plots")
go_dir_hm <-here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "HeatMaps")
go_dir_mol_f_tree <- here("outs", "filter_70", "Human_R_Analysis", "Clemastine_DMSO", "GO_Condition", "Molecular_Functions_Tree")
```

### Account for package update

```{r eval=FALSE}
srt@assays$RNA <- srt@assays$originalexp
```

### Create GO plots


```{r warning=FALSE, eval=FALSE}
for(i in 1: length(files)){
  data <- read.csv(here(file_path, files[i]))
  cluster <- data[["condition"]][1]
  plot_label <- paste0(cluster, ".pdf")
  
  # filter data
  fil_data <- subset(data, data$p_val_adj <= 0.05 &
                       avg_log2FC >= 0.5)
  
  # Prepare input data
  ranks <- fil_data %>% dplyr::select(X, avg_log2FC)
  ranks <- deframe(ranks)
  #RUN GO using clusterProfiler
  fil_data$Gene <- fil_data$X
  #remove any duplicates (sanity check for me)
  genemodulesGO <- fil_data[!duplicated(fil_data$X),]
  
  Biomart_gencode_ensembl84_biotypes <- getBM(attributes=c("ensembl_gene_id",
                                                           "entrezgene_id",
                                                           "gene_biotype",
                                                           "hgnc_symbol"), 
                                              filters = "hgnc_symbol", 
                                              values = fil_data$X, 
                                              mart = ensembl,
                                              uniqueRows = FALSE)
  
  
  Biomart_gencode_ensembl84_biotypes[, 'gene_biotype'] <- 
    as.factor(Biomart_gencode_ensembl84_biotypes[,'gene_biotype'])
  
  
  biotype_all_dataset <- subset(Biomart_gencode_ensembl84_biotypes, hgnc_symbol 
                                %in% rownames(srt@assays$RNA))
  
  entrezID <-  subset(biotype_all_dataset, biotype_all_dataset$hgnc_symbol
                      %in% rownames(srt@assays$RNA))
  
  
  entrezmatched <- entrezID[match(genemodulesGO$Gene,entrezID$hgnc_symbol),]
  
  #you might need to remove NAs
  entrezmatched <- entrezmatched[! apply(entrezmatched[,c(1,3)], 1,function (x) anyNA(x)),]
  allLLIDs <- entrezmatched$entrezgene
  
  modulesReactome <- enrichPathway(gene=as.character(allLLIDs),organism="human",
                                   pvalueCutoff=0.01,
                                   qvalueCutoff = 0.3, 
                                   readable=T)
  
  
  ### Cluster_Plots 
  if(nrow(as.data.frame(modulesReactome)) > 0){
    dot_plot <- dotplot(modulesReactome, showCategory=20)
    
    pdf(file = here(go_dir, plot_label),   
        width = 12, 
        height = 12) 
    print(dot_plot)
    dev.off()
    
    ### EMAP_Plots     
    x2 <- pairwise_termsim(modulesReactome) 
    emap_plot <- emapplot(x2)
    
    pdf(file = here(emap_dir, plot_label),   
        width = 12, 
        height = 12) 
    print(emap_plot)
    dev.off()
    
    # Alternative GO clasissification
    ent_uni <- rownames(srt@assays$RNA)
    ent_uni_entrez <- bitr(ent_uni, fromType = "SYMBOL",
                           toType = "ENTREZID",
                           OrgDb = org.Hs.eg.db)
    
    ent_uni <- ent_uni_entrez$ENTREZID
    
    ## Molecular function
    go_mf <- enrichGO(gene  = as.character(allLLIDs),
                      universe      = ent_uni,
                      OrgDb         = org.Hs.eg.db,
                      ont           = "MF",
          
                      pvalueCutoff  = 1,
                      qvalueCutoff  = 1,
                      readable      = TRUE)
    
    ### Molecular_Function_Plots         
    go_mf_plot <- dotplot(go_mf, showCategory = 20)
    pdf(file = here(go_dir_mol_f, plot_label),   
        width = 12, 
        height = 10) 
    print(go_mf_plot)
    dev.off()
    
    # GO biological process
    ego <- enrichGO(gene  = as.character(allLLIDs),
                    OrgDb = org.Hs.eg.db,
                    ont = "BP",
                    universe = ent_uni )
    
    if(nrow(ego) > 0){
      dp2 <- dotplot(ego, showCategory=20)
      
      ### Biological_Processes_Plots    
      pdf(file = here(go_dot, plot_label),   
          width = 12, 
          height = 10) 
      print(dp2)
      dev.off()
      
      ## convert gene ID to Symbol
      edox <- setReadable(ego, 'org.Hs.eg.db', 'ENTREZID')
      cnet_pl <- cnetplot(edox, foldChange=ranks, circular = TRUE, 
                          colorEdge = TRUE)
      
      ### Symbol_Plots    
      pdf(file = here(go_dir_cnetpl, plot_label),   
          width = 14, 
          height = 12) 
      print(cnet_pl)
      dev.off()
      
      ### Tree_Plots      
      edox2 <- enrichplot::pairwise_termsim(edox)    
      if(nrow(edox2) > 1){
        tree_plot <- treeplot(edox2)
        
        pdf(file = here(go_dir_tree, plot_label),  
            width = 16, 
            height = 10) 
        print(tree_plot)
        dev.off()
        
        ### Upset_Plots               
        u_plot <- upsetplot(ego)
        
        pdf(file = here(go_dir_upset_plot, plot_label),   
            width = 15, 
            height = 10) 
        print(u_plot)
        dev.off()
        
        if(nrow(edox) > 1){
          hm <- heatplot(edox, foldChange=ranks, showCategory=5)
          
          pdf(file = here(go_dir_hm, plot_label),   
              width = 15, 
              height = 10) 
          print(hm)
          dev.off()
        }
      }
      
      ### Molecular_Function_Tree_Plots        
      edox_mf <- setReadable(go_mf, 'org.Hs.eg.db', 'ENTREZID')
      edox2_mf <- pairwise_termsim(edox_mf)
      tree_plot_mf <- treeplot(edox2_mf)
      
      pdf(file = here(go_dir_mol_f_tree, plot_label),   
          width = 16, 
          height = 10) 
      print(tree_plot_mf)
      dev.off()
    }
  }
}
```

##### SessionInfo

<details>

<summary>

Click to expand

</summary>

```{r}
sessionInfo()
```

</details>