CorticalOrganoids / scr / AllCells / Final annotation of the whole dataset.Rmd
Final annotation of the whole dataset.Rmd
Raw
---
title: "Final annotation of the whole dataset"
author: "Nina-Lydia Kazakou"
date: "23/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```

# Set the colour pallete 
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))

dim(norm.co.seu) # 22735 12923
head(norm.co.seu@meta.data)
```

 # Plot Cells
```{r}
DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "man_clust", cols= mycoloursP) & NoAxes()
```

# Assign Cluster IDs
```{r}
norm.co.seu@meta.data$ClusterID <- norm.co.seu@meta.data$man_clust

current.ids <- c("0", "1", "2", "3", "4_A", "4_B", "5", "6", "7", "8", "9", "10_A", "10_B", "11", "12", "13")
new.ids <- c("Neurons", "Neurons", "Immature Neurons", "Neurons", "Glioblasts", "pre-OPC", "Astrocytes", "Neurons", "Immature Neurons", "Pericytes", "Early Neurons", "oRG/Astrocytes", "Radial Glia", "CyP", "Oligodendroglia", "Astrocytes")
norm.co.seu@meta.data$ClusterID <- plyr::mapvalues(x = norm.co.seu@meta.data$ClusterID, from = current.ids, to = new.ids)

head(norm.co.seu@meta.data)
```

```{r}
Idents(norm.co.seu) <- "ClusterID"
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols= mycoloursP) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, cols= mycoloursP) & NoAxes() & NoLegend()
```
```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
 # Astrocytes
FeaturePlot(norm.co.seu, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
 # Astrocytes
VlnPlot(norm.co.seu, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Oligodendroglia
FeaturePlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Oligodendroglia
VlnPlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```


```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Pre-OPC
FeaturePlot(norm.co.seu, features = c("EGFR", "DLL1", "DLL3", "BCAN"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Pre-OPC
VlnPlot(norm.co.seu, features = c("EGFR", "DLL1", "DLL3", "BCAN"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} 
 # oRG
  FeaturePlot(norm.co.seu, features = c("HOPX", "PTPRZ1", "TNC", "LIFR"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} 
 # oRG
VlnPlot(norm.co.seu, features = c("HOPX", "PTPRZ1", "TNC", "LIFR"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
 # Radial Glia
FeaturePlot(norm.co.seu, features = c("VIM", "PAX6", "HES1", "HES5", "SOX2"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
 # Radial Glia
VlnPlot(norm.co.seu, features = c("VIM", "PAX6", "HES1", "HES5", "SOX2"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # CyP
FeaturePlot(norm.co.seu, features = c("MKI67", "TOP2A"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # CyP
VlnPlot(norm.co.seu, features = c("MKI67", "TOP2A"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # Pericytes
FeaturePlot(norm.co.seu, features = c("PDGFRB", "VCAM1"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # Pericytes
VlnPlot(norm.co.seu, features = c("PDGFRB", "VCAM1"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Immature neurons
FeaturePlot(norm.co.seu, features = c("STMN2", "STMN1", "TUBB3", "SLC32A1"), ncol = 2, pt.size = 0.01) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Immature neurons
VlnPlot(norm.co.seu, features = c("STMN2", "STMN1", "TUBB3", "SLC32A1"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # Inhibitory neurons
FeaturePlot(norm.co.seu, features = c("GAD1", "GAD2"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
 # Inhibitory neurons
VlnPlot(norm.co.seu, features = c("GAD1", "GAD2"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Neurons
FeaturePlot(norm.co.seu, features = c("MAP2", "DCX", "SYP", "RBFOX1"), ncol = 2, pt.size = 0.1) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Neurons
VlnPlot(norm.co.seu, features = c("MAP2", "DCX", "SYP", "RBFOX1"), cols = mycoloursP, ncol = 2, pt.size = 0.01) 
```

```{r include=FALSE}
saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds"))
```

# Plot top5 DE Genes (filtered for pct.1 > 0.25 & pct.2 < 0.5)
```{r}
de.fil.markers_man.clust <- read.csv(here("outs", "de.fil.markers_man.clust.csv")) #load filtered DE genes
top5 <- de.fil.markers_man.clust %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
```

```{r fig.width=13, fig.height=15, fig.fullwidth=TRUE}
DoHeatmap(norm.co.seu, features = top5$gene, label = TRUE, group.by = "average_HP", group.colors = mycoloursP[6:40], draw.lines = FALSE, size = 3)
```

```{r fig.width=13, fig.height=20, fig.fullwidth=TRUE}
 # Do a heatmap from the cluster averages 
Idents(norm.co.seu) <- "man_clust"

norm.co.seu@meta.data$average_HP <- norm.co.seu@meta.data$man_clust

current.ids <- c("0", "1", "2", "3", "4_A", "4_B", "5", "6", "7", "8", "9", "10_A", "10_B", "11", "12", "13")
new.ids <- c("Neurons 1", "Neurons 2", "Immature Neurons 1", "Neurons 3",  "Glioblasts", "pre-OPC", "Astrocytes 1", 
             "Neurons 4", "Immature Neurons 2", "Pericytes", "Early Neurons", "oRG/Astrocytes", "Radial Glia", "CyP", 
              "Oligodendroglia", "Astrocytes 2")
norm.co.seu@meta.data$average_HP <- plyr::mapvalues(x = norm.co.seu@meta.data$average_HP, from = current.ids, to = new.ids) #add a column to the meta.data to match the amount of clusters, after the manual clustering
                                                                                                          
head(norm.co.seu@meta.data)

 # DE Genes calculated after the mannual clustering
top10 <- de.fil.markers_man.clust %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) #subset for the top10 genes

cluster_averages <- AverageExpression(norm.co.seu, group.by = "average_HP",
                                      return.seurat = TRUE)

cluster_averages@meta.data$cluster <- factor(levels(as.factor(norm.co.seu@meta.data$average_HP)), 
                                             levels = c("Neurons 1", "Neurons 2", "Immature Neurons 1", "Neurons 3", 
                                                        "Glioblasts", "pre-OPC", "Astrocytes 1", "Neurons 4", 
                                                        "Immature Neurons 2", "Pericytes", "Early Neurons", 
                                                        "oRG/Astrocytes", "Radial Glia", "CyP", 
                                                        "Oligodendroglia", "Astrocytes 2"))

DoHeatmap(object = cluster_averages, features = top10$gene, label = TRUE, group.by = "cluster",  group.colors = mycoloursP[6:40], draw.lines = F)
```

```{r}
sessionInfo()
```