---
title: "DE analysis_Oligos_forBestRes"
author: "Nina-Lydia Kazakou"
date: "27/06/2021"
output: html_document
---
# load libraries
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```
# Set the colour pallete
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```
```{r}
co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds"))
dim(co.oligos) # 22735 301
head(co.oligos@meta.data)
```
```{r}
DefaultAssay(co.oligos) <- "RNA"
```
```{r}
# General Plots
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[5:7], group.by = "Phase") & NoAxes()
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[12:16], group.by = "Sample") & NoAxes()
DimPlot(co.oligos, pt.size = 1.5, cols = mycoloursP[21:22], group.by = "scDblFinder.class") & NoAxes()
```
# Identification of marker genes
```{r}
int_res_all_mark <- function(seur_obj,
int_cols,
only_pos = TRUE,
min_pct = 0.25,
logfc_threshold = 0.25,
fil_pct_1 = 0.25,
fil_pct_2 = 0.6,
avg_log = 1.2,
save_dir = getwd(),
test_use = "MAST"
){
for(i in 1:length(int_cols)){
Idents(seur_obj) <- int_cols[i]
all_mark <- FindAllMarkers(seur_obj,
only.pos = only_pos,
min.pct = min_pct,
logfc.threshold = logfc_threshold,
test.use = test_use)
fil_mark<- subset(all_mark,
all_mark$pct.1 > fil_pct_1 &
all_mark$pct.2 < fil_pct_2 )
write.csv(all_mark, paste(save_dir, "/all_mark", int_cols[i], ".csv", sep = "" ))
write.csv(fil_mark, paste(save_dir, "/fil_mark", int_cols[i], ".csv", sep = "" ))
}
}
dir.create(here("outs", "Oligos_only", "MarkerGenes"))
int_res_all_mark(co.oligos,
int_cols = c("RNA_snn_res.0.05",
"RNA_snn_res.0.1",
"RNA_snn_res.0.2",
"RNA_snn_res.0.3",
"RNA_snn_res.0.6",
"RNA_snn_res.0.7",
"RNA_snn_res.0.9",
"RNA_snn_res.1"),
save_dir = here("outs", "Oligos_only", "MarkerGenes"))
```
Resolutions 0.5 & 0.6 look very similar; a single cell is allocated within a different cluster and that is all the difference. The same happens between resolution 0.7 & 0.9, so I decided to use the later for comparisons in both cases.
Here, I will plot the filtered genes for both resolutions to see which one is more suitable.
```{r}
Idents(co.oligos) <- "RNA_snn_res.0.6"
fil.de.markers.0.6 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.6.csv"))
head(fil.de.markers.0.6)
# Identify the top10 genes
top10_res.0.6 <- fil.de.markers.0.6 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.0.6
# Calculate the cluster averages to plot those as well
cluster_averages_res.0.6 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.6",
return.seurat = TRUE)
cluster_averages_res.0.6@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.6)),
levels = c(0, 1, 2, 3))
```
```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.0.6$gene, label = TRUE, group.by = "RNA_snn_res.0.6", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)
DoHeatmap(object = cluster_averages_res.0.6, features = top10_res.0.6$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F)
```
```{r}
Idents(co.oligos) <- "RNA_snn_res.0.9"
fil.de.markers.0.9 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.0.9.csv"))
head(fil.de.markers.0.9)
# Identify the top10 genes
top10_res.0.9 <- fil.de.markers.0.9 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.0.9
# Calculate the cluster averages to plot those as well
cluster_averages_res.0.9 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.0.9",
return.seurat = TRUE)
cluster_averages_res.0.9@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.0.9)),
levels = c(0, 1, 2, 3, 4))
```
```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.0.9$gene, label = TRUE, group.by = "RNA_snn_res.0.9", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)
DoHeatmap(object = cluster_averages_res.0.9, features = top10_res.0.9$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F)
```
Maybe I should also look at resolution 1
```{r}
Idents(co.oligos) <- "RNA_snn_res.1"
fil.de.markers.1 <- read.csv(here("outs", "Oligos_only", "MArkerGenes", "fil_markRNA_snn_res.1.csv"))
head(fil.de.markers.1)
# Identify the top10 genes
top10_res.1 <- fil.de.markers.1 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes
top10_res.1
# Calculate the cluster averages to plot those as well
cluster_averages_res.1 <- AverageExpression(co.oligos, group.by = "RNA_snn_res.1",
return.seurat = TRUE)
cluster_averages_res.1@meta.data$cluster <- factor(levels(as.factor(co.oligos@meta.data$RNA_snn_res.1)),
levels = c(0, 1, 2, 3, 4, 5))
```
```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DoHeatmap(co.oligos, features = top10_res.1$gene, label = TRUE, group.by = "RNA_snn_res.1", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)
DoHeatmap(object = cluster_averages_res.1, features = top10_res.1$gene, label = TRUE, group.by = "cluster", group.colors = mycoloursP[6:40], draw.lines = F)
```
```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
# Oligolineage
FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10", "PPP1R16B"), ncol = 2, pt.size = 1) & NoAxes()
```
```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
# OPCs
FeaturePlot(co.oligos, features = c("PDGFRA", "CSPG4", "GPR17", "PTPRZ1", "OLIG1", "OLIG2", "PCDH15", "PTGDS", "BCAN", "CABLES1", "GFRA1", "LINC01965"), ncol = 2, pt.size = 1) & NoAxes()
```
```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
# COPs
FeaturePlot(co.oligos, features = c("ETV1", "CHST9", "MYT1", "TENM2", "CAMK2A", "KCNQ5", "SEMA5B", "SYT1", "GPR17", "BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes()
```
```{r fig.width=7, fig.height=10, fig.fullwidth=TRUE}
# Oligodendrocytes
FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG", "MOBP", "MBP", "SOX10" ), pt.size = 1, ncol = 2) & NoAxes()
```
```{r fig.width=7, fig.height=18, fig.fullwidth=TRUE}
# More Oligodendrocytes
FeaturePlot(co.oligos, features = c("SNAP25", "CNTN1", "FRY", "PLXDC2", "PTPRM", "FOS","VIM", "JUNB", "AFF3", "FSTL5", "SGCZ", "MDGA2" ), pt.size = 1, ncol = 2) & NoAxes()
```
Some markers from the monolayer dataset that helped me identify oligo clusters there
```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
# OAPCs
FeaturePlot(co.oligos, features = c("HOPX", "SPARCL1", "SPARC", "GFAP"), ncol = 2, pt.size = 1) & NoAxes()
```
```{r fig.width=7, fig.height=7, fig.fullwidth=TRUE}
# Primitive OPCs
FeaturePlot(co.oligos, features = c("PPP1R14B", "DLL1", "DLL3", "EGFR"), ncol = 2, pt.size = 1) & NoAxes()
```
```{r}
# COPs
FeaturePlot(co.oligos, features = c("MYRF", "NKX2-2", "MYC"), ncol = 2, pt.size = 1) & NoAxes()
```
```{r}
sessionInfo()
```