scr/oligodendroglia/DE analysis_Oligos_forBestRes.Rmd · CorticalOrganoids

---
title: "DE analysis_Oligos_forBestRes"
author: "Nina-Lydia Kazakou"
date: "27/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```

# Set the colour pallete 
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

```{r}
co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds"))

dim(co.oligos) # 22735 301
head(co.oligos@meta.data)
```

```{r}
DefaultAssay(co.oligos) <- "RNA"
```

```{r}
 # General Plots
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[5:7], group.by = "Phase") & NoAxes()
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[12:16], group.by = "Sample") & NoAxes()
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[21:22], group.by = "scDblFinder.class") & NoAxes()
```



# Identification of marker genes
```{r}
int_res_all_mark <- function(seur_obj, 
                             int_cols,
                             only_pos = TRUE,
                             min_pct = 0.25,
                             logfc_threshold = 0.25,
                             fil_pct_1 = 0.25,
                             fil_pct_2 = 0.6,
                             avg_log = 1.2,
                             save_dir = getwd(),
                             test_use = "MAST"
                             ){
  for(i in 1:length(int_cols)){
    Idents(seur_obj) <- int_cols[i]
    all_mark <- FindAllMarkers(seur_obj, 
                               only.pos = only_pos, 
                               min.pct = min_pct, 
                               logfc.threshold = logfc_threshold,
                               test.use = test_use)
    fil_mark<- subset(all_mark, 
                      all_mark$pct.1 > fil_pct_1 & 
                        all_mark$pct.2 < fil_pct_2 )
    
    write.csv(all_mark, paste(save_dir, "/all_mark", int_cols[i], ".csv", sep = "" ))
    write.csv(fil_mark, paste(save_dir, "/fil_mark", int_cols[i], ".csv", sep = "" ))
    
  }
}

dir.create(here("outs", "Oligos_only", "MarkerGenes"))

int_res_all_mark(co.oligos, 
                 int_cols = c("RNA_snn_res.0.05",
                              "RNA_snn_res.0.1",
                              "RNA_snn_res.0.2",
                              "RNA_snn_res.0.3",
                              "RNA_snn_res.0.6",
                              "RNA_snn_res.0.7",
                              "RNA_snn_res.0.9",
                              "RNA_snn_res.1"),
                 save_dir = here("outs", "Oligos_only", "MarkerGenes"))

```

Resolutions 0.5 & 0.6 look very similar; a single cell is allocated within a different cluster and that is all the difference. The same happens between resolution 0.7 & 0.9, so I decided to use the later for comparisons in both cases. 
Here, I will plot the filtered genes for both resolutions to see which one is more suitable. 
```{r}
Idents(co.oligos) <- "RNA_snn_res.0.6"
fil.de.markers.0.6 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.6.csv"))
head(fil.de.markers.0.6)

 # Identify the top10 genes
top10_res.0.6 <- fil.de.markers.0.6 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.0.6

 # Calculate the cluster averages to plot those as well
cluster_averages_res.0.6 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.6",
                                      return.seurat = TRUE)

cluster_averages_res.0.6@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.6)), 
                                             levels = c(0, 1, 2, 3))
```

```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.0.6$gene, label = TRUE, group.by = "RNA_snn_res.0.6", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)

DoHeatmap(object = cluster_averages_res.0.6, features = top10_res.0.6$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F)
```


```{r}
Idents(co.oligos) <- "RNA_snn_res.0.9"
fil.de.markers.0.9 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.9.csv"))
head(fil.de.markers.0.9)

 # Identify the top10 genes
top10_res.0.9 <- fil.de.markers.0.9 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.0.9

 # Calculate the cluster averages to plot those as well
cluster_averages_res.0.9 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.9",
                                      return.seurat = TRUE)

cluster_averages_res.0.9@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.9)), 
                                             levels = c(0, 1, 2, 3, 4))
```

```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.0.9$gene, label = TRUE, group.by = "RNA_snn_res.0.9", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)

DoHeatmap(object = cluster_averages_res.0.9, features = top10_res.0.9$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F)
```

Maybe I should also look at resolution 1
```{r}
Idents(co.oligos) <- "RNA_snn_res.1"
fil.de.markers.1 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.1.csv"))
head(fil.de.markers.1)

 # Identify the top10 genes
top10_res.1 <- fil.de.markers.1 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.1

 # Calculate the cluster averages to plot those as well
cluster_averages_res.1 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.1",
                                      return.seurat = TRUE)

cluster_averages_res.1@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.1)), 
                                             levels = c(0, 1, 2, 3, 4, 5))
```

```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.1$gene, label = TRUE, group.by = "RNA_snn_res.1", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)

DoHeatmap(object = cluster_averages_res.1, features = top10_res.1$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F)
```

```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
 # Oligolineage
FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10", "PPP1R16B"), ncol = 2, pt.size = 1) & NoAxes()
```


```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
 # OPCs
FeaturePlot(co.oligos, features = c("PDGFRA", "CSPG4",  "GPR17",   "PTPRZ1", "OLIG1",  "OLIG2", "PCDH15", "PTGDS", "BCAN", "CABLES1", "GFRA1", "LINC01965"), ncol = 2, pt.size = 1) & NoAxes()
```

```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
 # COPs
FeaturePlot(co.oligos, features = c("ETV1", "CHST9",  "MYT1",  "TENM2",  "CAMK2A",  "KCNQ5", "SEMA5B", "SYT1", "GPR17", "BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes()
```

```{r fig.width=7, fig.height=10, fig.fullwidth=TRUE}
 # Oligodendrocytes
FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG",  "MOBP", "MBP", "SOX10" ), pt.size = 1, ncol = 2) & NoAxes()
```

```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
 # More Oligodendrocytes
FeaturePlot(co.oligos, features = c("SNAP25", "CNTN1", "FRY", "PLXDC2", "PTPRM",  "FOS","VIM", "JUNB", "AFF3", "FSTL5", "SGCZ", "MDGA2" ), pt.size = 1, ncol = 2) & NoAxes()
```

Some markers from the monolayer dataset that helped me identify oligo clusters there
```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
 # OAPCs
FeaturePlot(co.oligos, features = c("HOPX", "SPARCL1", "SPARC", "GFAP"), ncol = 2, pt.size = 1) & NoAxes()
```

```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
 # Primitive OPCs
FeaturePlot(co.oligos, features = c("PPP1R14B", "DLL1", "DLL3", "EGFR"), ncol = 2, pt.size = 1) & NoAxes()
```

```{r}
 # COPs
FeaturePlot(co.oligos, features = c("MYRF", "NKX2-2", "MYC"), ncol = 2, pt.size = 1) & NoAxes()
```


```{r}
sessionInfo()
```