--- title: "Cluster Annotation (part 3)" author: "Nina-Lydia Kazakou" date: "22/06/2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) ``` # Set the colour pallete ```{r include=FALSE} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load normalised seu.object ```{r} norm.co.seu <- readRDS(here("data", "norm.co.seu.rds")) dim(norm.co.seu) # 22735 12923 head(norm.co.seu@meta.data) ``` # Plot the two different resolutions I am going to use for the final annotation ```{r} DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "RNA_snn_res.0.5", cols= mycoloursP) #actual resolution DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "RNA_snn_res.2", cols= mycoloursP) ``` Here, I will try to separate clusters 4 & 10 (from resolution 0.5), because the seem like a mixed population. I will also split the Cyp in G & S phase. For the time being the rest of the suits my interest in the oligodendroglia. When I move to annotating the neurons extensively I might need to re-visit the clustering. # Alter the clustering ```{r} norm.co.seu@meta.data$man_clust <- ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 18, "10_A", ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 21, "10_B", ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 12, "4_A", ifelse(norm.co.seu@meta.data$RNA_snn_res.2 == 16, "4_B", paste(norm.co.seu@meta.data$RNA_snn_res.0.5))))) norm.co.seu@meta.data$man_clust <- ifelse(norm.co.seu@meta.data$man_clust == 10, "4_B", ifelse(norm.co.seu@meta.data$man_clust == 4, "4_B", paste(norm.co.seu@meta.data$man_clust))) DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, group.by = "man_clust", cols= mycoloursP) ``` # Investigate the newly generated clusters ```{r} DefaultAssay(norm.co.seu) <- "RNA" Idents(norm.co.seu) <- "man_clust" ``` ```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} # Astrocytes VlnPlot(norm.co.seu, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), cols = mycoloursP, ncol = 2, pt.size = 0.01) ``` ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} # Radial Glia VlnPlot(norm.co.seu, features = c("VIM", "PAX6", "HES1", "HES5"), cols = mycoloursP, ncol = 2, pt.size = 0.01) ``` ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} # oRG VlnPlot(norm.co.seu, features = c("PTPRZ1", "HOPX", "FAM170A", "TNC", "LIFR"), cols = mycoloursP, ncol = 2, pt.size = 0.01) ``` ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} # Pre-OPC VlnPlot(norm.co.seu, features = c("EGFR", "DLL1", "DLL3", "BCAN"), cols = mycoloursP, ncol = 2, pt.size = 0.01) ``` # Identify the top10 Differentially Expressed Genes ```{r} de.markers <- FindAllMarkers(norm.co.seu, min.pct = 0.25, logfc.threshold = 0.25,term.use = "MAST", only.pos = TRUE ) head(de.markers) write.csv(de.markers, here("outs", file = "de.markers_man.clust.csv")) #Filter them pct.1_fil = 0.25 pct.2_fil = 0.5 de.fil.markers <- subset(de.markers, de.markers$pct.1 > pct.1_fil & de.markers$pct.2 < pct.2_fil) head(de.fil.markers) write.csv(de.fil.markers, here("outs", file = "de.fil.markers_man.clust.csv")) #Filter for the filtered markers of the clusters of interest int_de.fil.markers <- de.fil.markers %>% filter(cluster == c("4_A", "4_B", "10_A", "10_B")) head(int_de.fil.markers) write.csv(int_de.fil.markers, here("outs", file = "int_de.fil.markers_man.clust.csv")) #Identify top10 genes int_top10 <- int_de.fil.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) levels(norm.co.seu) int_clust <- subset(norm.co.seu, ident = c("4_A", "4_B", "10_A", "10_B")) ``` ```{r fig.width=13, fig.height=15, fig.fullwidth=TRUE} DoHeatmap(int_clust, features = int_top10$gene, label = TRUE, group.by = "man_clust", group.colors = mycoloursP[6:40], draw.lines = FALSE) ``` NXPH4 has a very striking expression only in cluster 4_B. Here, is a relevant paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6763262/ More, specifically, selectively expressed Neurexophilin4, an α-neurexin ligand, regulates specific synapse function and modulates cerebellar motor control. ```{r fig.width=8, fig.height=3.5, fig.fullwidth=TRUE} VlnPlot(norm.co.seu, features = "NXPH4", cols = mycoloursP, ncol = 2, pt.size = 0.01) & NoLegend() ``` ```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} VlnPlot(int_clust, features = c("AQP4", "GFAP", "S100B", "SPON1", "SPARCL1", "SLC1A3"), cols = mycoloursP, ncol = 2, pt.size = 0.01) VlnPlot(int_clust, features = c("OLIG1", "OLIG2", "SOX10"), cols = mycoloursP, ncol = 2, pt.size = 0.01) VlnPlot(int_clust, features = c("EGFR", "DLL1", "DLL3", "BCAN"), cols = mycoloursP, ncol = 2, pt.size = 0.01) VlnPlot(int_clust, features = c("HOPX", "PTPRZ1", "TNC", "LIFR"), cols = mycoloursP, ncol = 2, pt.size = 0.01) VlnPlot(int_clust, features = c("VIM", "PAX6", "HES1", "HES5", "SOX2"), cols = mycoloursP, ncol = 2, pt.size = 0.01) ```