---
title: "Cell Cycle Annotation & Clustering"
author: "Nina-Lydia Kazakou"
date: "16 June 2021"
output: html_document
---
# load libraries
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```
# Set the colour pallete
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```
# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))
dim(norm.co.seu) #22735 12923
```
# Calculate cell-cycle scores
```{r}
norm.co.seu <- CellCycleScoring(object = norm.co.seu, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes)
VlnPlot(norm.co.seu, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP)
View(norm.co.seu@meta.data)
saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds"))
```
# Plot QC again before Clustering
```{r}
Idents(norm.co.seu) <- "Sample"
VlnPlot(norm.co.seu, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40])
plot1 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "percent.mito", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot2 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8))
plot1 + plot2
```
# Identify the highly variable genes
```{r}
norm.co.seu <- FindVariableFeatures(norm.co.seu, selection.method = "vst", nfeatures = 2000)
# top10 genes
top10 <- head(VariableFeatures(norm.co.seu), 10)
plotA <- VariableFeaturePlot(norm.co.seu, cols = mycoloursP[4:5])
plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE)
plotB
```
# Scale data
```{r}
all.genes <- rownames(norm.co.seu)
norm.co.seu <- ScaleData(norm.co.seu, features = all.genes)
```
# Linear dimensional reduction
```{r}
norm.co.seu <- RunPCA(norm.co.seu, features = VariableFeatures(object = norm.co.seu))
print(norm.co.seu[["pca"]], dims = 1:5, nfeatures = 5)
# Plots
VizDimLoadings(norm.co.seu, dims = 1:2, reduction = "pca")
DimPlot(norm.co.seu, reduction = "pca")
DimHeatmap(norm.co.seu, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(norm.co.seu, dims = 1:15, cells = 500, balanced = TRUE)
```
# Determine the ‘dimensionality’ of the dataset
```{r}
# Elbowplot
ElbowPlot(norm.co.seu)
```
I could chose anything between 13 & 15 PCs, but I think I will go with 14.
# Cluster Cells
```{r message=FALSE}
norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14)
norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.5, 0.8))
```
```{r message=FALSE}
Idents(norm.co.seu) <- "RNA_snn_res.0.5"
norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca")
```
# Plots
```{r}
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP, label = TRUE) & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[1:40], group.by = "Sample") & NoAxes()
```
```{r fig.height=4}
# Cell Cycle
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase") & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Sample", split.by = "Phase") & NoAxes()
DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase", split.by = "Sample") & NoAxes()
```
```{r}
# Segregation of clusters by various sources of uninteresting variation
metrics <- c("nCount_RNA", "nFeature_RNA", "S.Score", "G2M.Score", "percent.mito")
FeaturePlot(norm.co.seu,
reduction = "umap",
features = metrics,
pt.size = 0.5, ## plot positive cells above negative
order = TRUE) & NoAxes()
```
```{r}
norm.co.seu@meta.data$ident <- NULL
DefaultAssay(norm.co.seu) <- "RNA"
FeaturePlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 0.5, label = TRUE) & NoAxes()
FeaturePlot(norm.co.seu, features = c("PDGFRA", "PCDH15"), pt.size = 0.5, label = TRUE) & NoAxes()
FeaturePlot(norm.co.seu, features = c("MBP", "PLP1", "CNP", "MOG", "MAG"), pt.size = 0.5, label = TRUE) & NoAxes()
```
```{r}
# Check that all samples contribute to the formed clusters
IDsPerCluster <- as.data.frame(tapply(norm.co.seu@meta.data$Sample,
norm.co.seu@meta.data$RNA_snn_res.0.5,
function(x) length(unique(x)) ))
names(IDsPerCluster) <- "NumberOfIDs"
IDsPerCluster$RNA_snn_res.0.5 <- rownames(IDsPerCluster)
IDsPerCluster$Cluster <- rownames(IDsPerCluster)
IDsPerCluster
```
```{r}
# Identify the number of cells per cluster
n_cells_0.5 <- FetchData(norm.co.seu,
vars = c("RNA_snn_res.0.5", "orig.ident")) %>%
dplyr::count(RNA_snn_res.0.5, orig.ident) %>%
tidyr::spread(RNA_snn_res.0.5, n)
n_cells_0.5
write.csv(n_cells_0.5, here("outs", file = "no.ofCellsperCluster_res.0.5.csv"))
```
Check the same for resolution 0.8
```{r}
Idents(norm.co.seu) <- "RNA_snn_res.0.8"
norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca")
# Check that all samples contribute to the formed clusters
IDsPerCluster_0.8 <- as.data.frame(tapply(norm.co.seu@meta.data$Sample,
norm.co.seu@meta.data$RNA_snn_res.0.8,
function(x) length(unique(x)) ))
names(IDsPerCluster_0.8) <- "NumberOfIDs_0.8"
IDsPerCluster_0.8$RNA_snn_res.0.8 <- rownames(IDsPerCluster_0.8)
IDsPerCluster_0.8$Cluster <- rownames(IDsPerCluster_0.8)
IDsPerCluster_0.8
# Identify the number of cells per cluster
n_cells_0.8 <- FetchData(norm.co.seu,
vars = c("RNA_snn_res.0.8", "orig.ident")) %>%
dplyr::count(RNA_snn_res.0.8, orig.ident) %>%
tidyr::spread(RNA_snn_res.0.8, n)
n_cells_0.8
write.csv(n_cells_0.8, here("outs", file = "no.ofCellsperCluster_res.0.8.csv"))
```
```{r}
saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds"))
```
```{r}
sessionInfo()
```