scr/oligodendroglia/Subset & ReCluster Oligodendroglia.Rmd · CorticalOrganoids

---
title: "Subset & ReCluster Oligodendroglia"
author: "Nina-Lydia Kazakou"
date: "23/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```

# Set the colour pallete 
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))

dim(norm.co.seu) # 22735 12923
head(norm.co.seu@meta.data)
```

# Plot Cells
```{r}
Idents(norm.co.seu) <- "ClusterID"
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols= mycoloursP) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", label = TRUE, pt.size = 0.5, cols= mycoloursP) & NoAxes() & NoLegend()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 # Oligodendroglia
FeaturePlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), ncol = 2, pt.size = 0.1) & NoAxes()
```

Oligodendroglia is mostly located in one cluster, but there quite a few positive cells in the Glioblasts cluster.
Here, I will use OLIG1+ & OLIG2+ cells from both clusters. 

```{r}
 # First, isolate the cells of interest from the Glioblasts cluster
glioblasts <- subset(norm.co.seu, ident = "Glioblasts")
gli_ol <- subset(glioblasts, subset = OLIG1 >= 1 | OLIG2 >= 1)
head(gli_ol@meta.data)

 # See how many cells I have isolated 
n_cells_gli_ol <- FetchData(gli_ol, vars = c("ClusterID", "orig.ident")) %>% dplyr::count(ClusterID, orig.ident) %>% tidyr::spread(ClusterID, n)
n_cells_gli_ol

write.csv(n_cells_gli_ol, here("outs", "Oligos_only", file = "n_cells_gli_ol.csv"))

 # Add an origins cluster to mark these cells 
gli_ol@meta.data$Origins <- gli_ol@meta.data$ClusterID
head(gli_ol@meta.data)
```

```{r}
 # Isolate the Oligodendroglia cluster
oligodendroglia <- subset(norm.co.seu, ident = "Oligodendroglia")
head(oligodendroglia@meta.data)

 # See how many cells I have isolated 
n_cells_ol <- FetchData(oligodendroglia, vars = c("ClusterID", "orig.ident")) %>% dplyr::count(ClusterID, orig.ident) %>% tidyr::spread(ClusterID, n)
n_cells_ol

write.csv(n_cells_ol, here("outs", "Oligos_only", file = "n_cells_ol.csv"))

 # Add an origins cluster to mark these cells 
oligodendroglia@meta.data$Origins <- oligodendroglia@meta.data$ClusterID
head(oligodendroglia@meta.data)
```

```{r}
 # Merge the two seurat objects
co.oligos <- merge(x = gli_ol, y = oligodendroglia)

head(co.oligos@meta.data)
```

```{r}
 # Clean up the merged oject; get rid of any meta.data that I don't need
co.oligos@meta.data$low_lib_size <- NULL
co.oligos@meta.data$large_lib_size <- NULL
co.oligos@meta.data$low_n_features <- NULL
co.oligos@meta.data$high_n_features_wk8 <- NULL
co.oligos@meta.data$high_n_features_other<- NULL
co.oligos@meta.data$high_subsets_mito_percent <- NULL
co.oligos@meta.data$discard <- NULL

co.oligos@meta.data$label <- NULL

co.oligos@meta.data$scDblFinder.cluster <- NULL
co.oligos@meta.data$scDblFinder.sample <- NULL
co.oligos@meta.data$scDblFinder.score <- NULL
co.oligos@meta.data$scDblFinder.ratio <- NULL
co.oligos@meta.data$scDblFinder.weighted <- NULL
co.oligos@meta.data$scDblFinder.nearestClass <- NULL
co.oligos@meta.data$scDblFinder.difficulty <- NULL
co.oligos@meta.data$scDblFinder.cxds_score <- NULL
co.oligos@meta.data$scDblFinder.mostLikelyOrigin <- NULL
co.oligos@meta.data$scDblFinder.originAmbiguous<- NULL

co.oligos@meta.data$RNA_snn_res.0.5 <- NULL
co.oligos@meta.data$RNA_snn_res.0.8 <- NULL
co.oligos@meta.data$RNA_snn_res.2 <- NULL
co.oligos@meta.data$seurat_clusters <- NULL

co.oligos@meta.data$man_clust <- NULL
co.oligos@meta.data$ClusterID <- NULL

head(co.oligos@meta.data)
```

```{r}
saveRDS(co.oligos, here("data", "Oligos_only", file = "co.oligos.merge.rds"))
```

# QC Check
```{r fig.width=8, fig.height=4, fig.fullwidth=TRUE}
Idents(co.oligos) <- "Sample"

VlnPlot(co.oligos, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40])

plot1 <- FeatureScatter(co.oligos, feature1 = "nCount_RNA", feature2 = "percent.mito", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot2 <- FeatureScatter(co.oligos, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot1 + plot2
```

# Identify the highly variable genes
```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
co.oligos <- FindVariableFeatures(co.oligos, selection.method = "vst", nfeatures = 2000)

# top10 genes
top10 <- head(VariableFeatures(co.oligos), 10)
plotA <- VariableFeaturePlot(co.oligos, cols = mycoloursP[4:5])
plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE)
plotB
```

# Scale data
```{r}
all.genes <- rownames(co.oligos)
co.oligos <- ScaleData(co.oligos, features = all.genes)
```

# Linear dimensional reduction
```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
co.oligos <- RunPCA(co.oligos, features = VariableFeatures(object = co.oligos))
print(co.oligos[["pca"]], dims = 1:5, nfeatures = 5)

# Plots
VizDimLoadings(co.oligos, dims = 1:2, reduction = "pca")

DimPlot(co.oligos, reduction = "pca")

DimHeatmap(co.oligos, dims = 1, cells = 500, balanced = TRUE)
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
DimHeatmap(co.oligos, dims = 1:6, cells = 500, balanced = TRUE, ncol = 2)
DimHeatmap(co.oligos, dims = 7:10, cells = 500, balanced = TRUE, ncol = 2)
```

# Determine the ‘dimensionality’ of the dataset
```{r}
# Elbowplot
ElbowPlot(co.oligos)
```

# Cluster Cells
```{r message=FALSE}
co.oligos <- FindNeighbors(co.oligos, dims = 1:14)

co.oligos <- FindClusters(co.oligos, resolution = c(0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))
```

# Run non-linear dimensional reduction
```{r}
co.oligos <- RunUMAP(co.oligos, dims = 1:12)
colnames(co.oligos@meta.data)
head(co.oligos@meta.data)
```

```{r}
n_cells_co.oligos <- FetchData(co.oligos, vars = c("Sample", "orig.ident")) %>% dplyr::count(Sample, orig.ident) %>% tidyr::spread(Sample, n)
n_cells_co.oligos

write.csv(n_cells_co.oligos, here("outs", "Oligos_only", file = "n_cells_co.oligos.csv"))
```

```{r}
saveRDS(co.oligos, here("data", "Oligos_only", file = "co.oligos.rds"))
```