CorticalOrganoids / scr / AllCells / Cluster Annotation (part 3).Rmd
Cluster Annotation (part 3).Rmd
Raw
---
title: "Cluster Annotation (part 3)"
author: "Nina-Lydia Kazakou"
date: "22/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```

# Set the colour pallete 
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))

dim(norm.co.seu) # 22735 12923
head(norm.co.seu@meta.data)
```

 # Plot the two different resolutions I am going to use for the final annotation
```{r}
DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "RNA_snn_res.0.5", cols= mycoloursP) #actual resolution

DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "RNA_snn_res.2", cols= mycoloursP)
```
 
Here, I will try to separate clusters 4 & 10 (from resolution 0.5), because the seem like a mixed population. 
I will also split the Cyp in G & S phase. 
For the time being the rest of the suits my interest in the oligodendroglia. When I move to annotating
the neurons extensively I might need to re-visit the clustering.

# Alter the clustering
```{r}
norm.co.seu@meta.data$man_clust <- ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 18, "10_A",
                                       ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 21, "10_B",
                                               ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 12, "4_A",
                                                       ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 16, "4_B",
                                                              paste(norm.co.seu@meta.data$RNA_snn_res.0.5)))))


norm.co.seu@meta.data$man_clust <- ifelse(norm.co.seu@meta.data$man_clust == 10, "4_B",
                                          ifelse(norm.co.seu@meta.data$man_clust == 4, "4_B",
                                                 paste(norm.co.seu@meta.data$man_clust)))

DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "man_clust", cols= mycoloursP)           
```

# Investigate the newly generated clusters
```{r}
DefaultAssay(norm.co.seu) <- "RNA"
Idents(norm.co.seu) <- "man_clust"
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Astrocytes
VlnPlot(norm.co.seu, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
# Radial Glia
VlnPlot(norm.co.seu, features = c("VIM", "PAX6", "HES1", "HES5"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
# oRG
VlnPlot(norm.co.seu, features = c("PTPRZ1", "HOPX", "FAM170A", "TNC", "LIFR"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
 # Pre-OPC
VlnPlot(norm.co.seu, features = c("EGFR", "DLL1", "DLL3", "BCAN"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```

# Identify the top10 Differentially Expressed Genes
```{r}
de.markers <- FindAllMarkers(norm.co.seu, min.pct = 0.25, logfc.threshold = 0.25,term.use = "MAST", only.pos = TRUE )
head(de.markers)

write.csv(de.markers, here("outs", file = "de.markers_man.clust.csv"))

 #Filter them 
pct.1_fil = 0.25
pct.2_fil = 0.5

de.fil.markers <- subset(de.markers, de.markers$pct.1 > pct.1_fil & de.markers$pct.2 < pct.2_fil)
head(de.fil.markers)

write.csv(de.fil.markers, here("outs", file = "de.fil.markers_man.clust.csv"))

 #Filter for the filtered markers of the clusters of interest
int_de.fil.markers <- de.fil.markers %>% filter(cluster == c("4_A", "4_B", "10_A", "10_B"))
head(int_de.fil.markers)

write.csv(int_de.fil.markers, here("outs", file = "int_de.fil.markers_man.clust.csv"))

 #Identify top10 genes
int_top10 <- int_de.fil.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) 

levels(norm.co.seu)
int_clust <- subset(norm.co.seu, ident = c("4_A", "4_B", "10_A", "10_B"))
```

```{r fig.width=13, fig.height=15, fig.fullwidth=TRUE}
DoHeatmap(int_clust, features = int_top10$gene, label = TRUE, group.by = "man_clust", group.colors = mycoloursP[6:40], draw.lines = FALSE)
```

NXPH4 has a very striking expression only in cluster 4_B. Here, is a relevant paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6763262/
More, specifically, selectively expressed Neurexophilin4, an α-neurexin ligand, regulates specific synapse function 
and modulates cerebellar motor control.

```{r fig.width=8, fig.height=3.5, fig.fullwidth=TRUE}
VlnPlot(norm.co.seu, features = "NXPH4", cols = mycoloursP, ncol = 2, pt.size = 0.01) & NoLegend()
```


```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
VlnPlot(int_clust, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), cols = mycoloursP, ncol = 2, pt.size = 0.01)

VlnPlot(int_clust, features = c("OLIG1", "OLIG2", "SOX10"), cols = mycoloursP, ncol = 2, pt.size = 0.01)

VlnPlot(int_clust, features = c("EGFR", "DLL1", "DLL3", "BCAN"), cols = mycoloursP, ncol = 2, pt.size = 0.01)

VlnPlot(int_clust, features = c("HOPX", "PTPRZ1", "TNC", "LIFR"), cols = mycoloursP, ncol = 2, pt.size = 0.01)

VlnPlot(int_clust, features = c("VIM", "PAX6", "HES1", "HES5", "SOX2"), cols = mycoloursP, ncol = 2, pt.size = 0.01)
```