--- title: "Cell Cycle Annotation & Clustering" author: "Nina-Lydia Kazakou" date: "16 June 2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) ``` # Set the colour pallete ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load normalised seu.object ```{r} norm.co.seu <- readRDS(here("data", "norm.co.seu.rds")) dim(norm.co.seu) #22735 12923 ``` # Calculate cell-cycle scores ```{r} norm.co.seu <- CellCycleScoring(object = norm.co.seu, g2m.features = cc.genes$g2m.genes, s.features = cc.genes$s.genes) VlnPlot(norm.co.seu, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP) View(norm.co.seu@meta.data) saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds")) ``` # Plot QC again before Clustering ```{r} Idents(norm.co.seu) <- "Sample" VlnPlot(norm.co.seu, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"),ncol = 3, pt.size = 0.1, cols = mycoloursP[3:40]) plot1 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "percent.mito", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8)) plot2 <- FeatureScatter(norm.co.seu, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", cols = mycoloursP[3:40], pt.size = 0.4) + theme(axis.title = element_text(size = 10), axis.text = element_text(size = 10), plot.caption = element_text(size = 8)) plot1 + plot2 ``` # Identify the highly variable genes ```{r} norm.co.seu <- FindVariableFeatures(norm.co.seu, selection.method = "vst", nfeatures = 2000) # top10 genes top10 <- head(VariableFeatures(norm.co.seu), 10) plotA <- VariableFeaturePlot(norm.co.seu, cols = mycoloursP[4:5]) plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE) plotB ``` # Scale data ```{r} all.genes <- rownames(norm.co.seu) norm.co.seu <- ScaleData(norm.co.seu, features = all.genes) ``` # Linear dimensional reduction ```{r} norm.co.seu <- RunPCA(norm.co.seu, features = VariableFeatures(object = norm.co.seu)) print(norm.co.seu[["pca"]], dims = 1:5, nfeatures = 5) # Plots VizDimLoadings(norm.co.seu, dims = 1:2, reduction = "pca") DimPlot(norm.co.seu, reduction = "pca") DimHeatmap(norm.co.seu, dims = 1, cells = 500, balanced = TRUE) DimHeatmap(norm.co.seu, dims = 1:15, cells = 500, balanced = TRUE) ``` # Determine the ‘dimensionality’ of the dataset ```{r} # Elbowplot ElbowPlot(norm.co.seu) ``` I could chose anything between 13 & 15 PCs, but I think I will go with 14. # Cluster Cells ```{r message=FALSE} norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14) norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.5, 0.8)) ``` ```{r message=FALSE} Idents(norm.co.seu) <- "RNA_snn_res.0.5" norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca") ``` # Plots ```{r} DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP) & NoAxes() DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP, label = TRUE) & NoAxes() DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[1:40], group.by = "Sample") & NoAxes() ``` ```{r fig.height=4} # Cell Cycle DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase") & NoAxes() DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Sample", split.by = "Phase") & NoAxes() DimPlot(norm.co.seu, reduction = "umap", pt.size = 0.5, cols = mycoloursP[5:40], group.by = "Phase", split.by = "Sample") & NoAxes() ``` ```{r} # Segregation of clusters by various sources of uninteresting variation metrics <- c("nCount_RNA", "nFeature_RNA", "S.Score", "G2M.Score", "percent.mito") FeaturePlot(norm.co.seu, reduction = "umap", features = metrics, pt.size = 0.5, ## plot positive cells above negative order = TRUE) & NoAxes() ``` ```{r} norm.co.seu@meta.data$ident <- NULL DefaultAssay(norm.co.seu) <- "RNA" FeaturePlot(norm.co.seu, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 0.5, label = TRUE) & NoAxes() FeaturePlot(norm.co.seu, features = c("PDGFRA", "PCDH15"), pt.size = 0.5, label = TRUE) & NoAxes() FeaturePlot(norm.co.seu, features = c("MBP", "PLP1", "CNP", "MOG", "MAG"), pt.size = 0.5, label = TRUE) & NoAxes() ``` ```{r} # Check that all samples contribute to the formed clusters IDsPerCluster <- as.data.frame(tapply(norm.co.seu@meta.data$Sample, norm.co.seu@meta.data$RNA_snn_res.0.5, function(x) length(unique(x)) )) names(IDsPerCluster) <- "NumberOfIDs" IDsPerCluster$RNA_snn_res.0.5 <- rownames(IDsPerCluster) IDsPerCluster$Cluster <- rownames(IDsPerCluster) IDsPerCluster ``` ```{r} # Identify the number of cells per cluster n_cells_0.5 <- FetchData(norm.co.seu, vars = c("RNA_snn_res.0.5", "orig.ident")) %>% dplyr::count(RNA_snn_res.0.5, orig.ident) %>% tidyr::spread(RNA_snn_res.0.5, n) n_cells_0.5 write.csv(n_cells_0.5, here("outs", file = "no.ofCellsperCluster_res.0.5.csv")) ``` Check the same for resolution 0.8 ```{r} Idents(norm.co.seu) <- "RNA_snn_res.0.8" norm.co.seu <- RunUMAP(norm.co.seu, dims = 1:14, reduction = "pca") # Check that all samples contribute to the formed clusters IDsPerCluster_0.8 <- as.data.frame(tapply(norm.co.seu@meta.data$Sample, norm.co.seu@meta.data$RNA_snn_res.0.8, function(x) length(unique(x)) )) names(IDsPerCluster_0.8) <- "NumberOfIDs_0.8" IDsPerCluster_0.8$RNA_snn_res.0.8 <- rownames(IDsPerCluster_0.8) IDsPerCluster_0.8$Cluster <- rownames(IDsPerCluster_0.8) IDsPerCluster_0.8 # Identify the number of cells per cluster n_cells_0.8 <- FetchData(norm.co.seu, vars = c("RNA_snn_res.0.8", "orig.ident")) %>% dplyr::count(RNA_snn_res.0.8, orig.ident) %>% tidyr::spread(RNA_snn_res.0.8, n) n_cells_0.8 write.csv(n_cells_0.8, here("outs", file = "no.ofCellsperCluster_res.0.8.csv")) ``` ```{r} saveRDS(norm.co.seu, here("data", file = "norm.co.seu.rds")) ``` ```{r} sessionInfo() ```