scr/AllCells/Cell Cycle Annotation & Clustering.Rmd · CorticalOrganoids

---
title: "Cell Cycle Annotation & Clustering"
author: "Nina-Lydia Kazakou"
date: "16 June 2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```

# Set the colour pallete 
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))

dim(norm.co.seu) #22735 12923
```

# Calculate cell-cycle scores
```{r}
norm.co.seu <- CellCycleScoring(object = norm.co.seu, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes)

VlnPlot(norm.co.seu, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP)

View(norm.co.seu@meta.data)

saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds"))
```

# Plot QC again before Clustering
```{r}
Idents(norm.co.seu) <- "Sample"

VlnPlot(norm.co.seu, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40])

plot1 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "percent.mito", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot2 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot1 + plot2
```

# Identify the highly variable genes
```{r}
norm.co.seu <- FindVariableFeatures(norm.co.seu, selection.method = "vst", nfeatures = 2000)

# top10 genes
top10 <- head(VariableFeatures(norm.co.seu), 10)
plotA <- VariableFeaturePlot(norm.co.seu, cols = mycoloursP[4:5])
plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE)
plotB
```

# Scale data
```{r}
all.genes <- rownames(norm.co.seu)
norm.co.seu <- ScaleData(norm.co.seu, features = all.genes)
```

# Linear dimensional reduction
```{r}
norm.co.seu <- RunPCA(norm.co.seu, features = VariableFeatures(object = norm.co.seu))
print(norm.co.seu[["pca"]], dims = 1:5, nfeatures = 5)

# Plots
VizDimLoadings(norm.co.seu, dims = 1:2, reduction = "pca")

DimPlot(norm.co.seu, reduction = "pca")

DimHeatmap(norm.co.seu, dims = 1, cells = 500, balanced = TRUE)

DimHeatmap(norm.co.seu, dims = 1:15, cells = 500, balanced = TRUE)
```

# Determine the ‘dimensionality’ of the dataset
```{r}
# Elbowplot
ElbowPlot(norm.co.seu)
```

I could chose anything between 13 & 15 PCs, but I think I will go with 14.

# Cluster Cells
```{r message=FALSE}
norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14)

norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.5, 0.8))
```

```{r message=FALSE}
Idents(norm.co.seu) <- "RNA_snn_res.0.5"
norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca")
```

# Plots
```{r}
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP, label = TRUE) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[1:40], group.by = "Sample") & NoAxes()
```

```{r fig.height=4}
# Cell Cycle 
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase") & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Sample", split.by = "Phase") & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase", split.by = "Sample") & NoAxes()
```

```{r}
# Segregation of clusters by various sources of uninteresting variation
metrics <-  c("nCount_RNA", "nFeature_RNA", "S.Score", "G2M.Score", "percent.mito")
FeaturePlot(norm.co.seu, 
            reduction = "umap", 
            features = metrics, 
            pt.size = 0.5, ## plot positive cells above negative 
            order = TRUE) & NoAxes() 
```

```{r}
norm.co.seu@meta.data$ident <- NULL

DefaultAssay(norm.co.seu) <- "RNA"

FeaturePlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 0.5, label = TRUE) & NoAxes()
FeaturePlot(norm.co.seu, features = c("PDGFRA", "PCDH15"), pt.size = 0.5, label = TRUE) & NoAxes()
FeaturePlot(norm.co.seu, features = c("MBP", "PLP1", "CNP", "MOG", "MAG"), pt.size = 0.5, label = TRUE) & NoAxes()
```

```{r}
# Check that all samples contribute to the formed clusters
IDsPerCluster <- as.data.frame(tapply(norm.co.seu@meta.data$Sample, 
                                      norm.co.seu@meta.data$RNA_snn_res.0.5, 
                                      function(x) length(unique(x)) ))
names(IDsPerCluster) <- "NumberOfIDs"
IDsPerCluster$RNA_snn_res.0.5 <- rownames(IDsPerCluster)
IDsPerCluster$Cluster <- rownames(IDsPerCluster)
IDsPerCluster
```

```{r}
# Identify the number of cells per cluster
n_cells_0.5 <- FetchData(norm.co.seu, 
                     vars = c("RNA_snn_res.0.5", "orig.ident")) %>%
  dplyr::count(RNA_snn_res.0.5, orig.ident) %>%
  tidyr::spread(RNA_snn_res.0.5, n)

n_cells_0.5

write.csv(n_cells_0.5, here("outs", file = "no.ofCellsperCluster_res.0.5.csv"))
```

Check the same for resolution 0.8
```{r}
Idents(norm.co.seu) <- "RNA_snn_res.0.8"
norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca")

# Check that all samples contribute to the formed clusters
IDsPerCluster_0.8 <- as.data.frame(tapply(norm.co.seu@meta.data$Sample, 
                                      norm.co.seu@meta.data$RNA_snn_res.0.8, 
                                      function(x) length(unique(x)) ))
names(IDsPerCluster_0.8) <- "NumberOfIDs_0.8"
IDsPerCluster_0.8$RNA_snn_res.0.8 <- rownames(IDsPerCluster_0.8)
IDsPerCluster_0.8$Cluster <- rownames(IDsPerCluster_0.8)
IDsPerCluster_0.8

# Identify the number of cells per cluster
n_cells_0.8 <- FetchData(norm.co.seu, 
                     vars = c("RNA_snn_res.0.8", "orig.ident")) %>%
  dplyr::count(RNA_snn_res.0.8, orig.ident) %>%
  tidyr::spread(RNA_snn_res.0.8, n)

n_cells_0.8

write.csv(n_cells_0.8, here("outs", file = "no.ofCellsperCluster_res.0.8.csv"))
```


```{r}
saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds"))
```

```{r}
sessionInfo()
```