--- title: '[met] Clustering' author: "Nina-Lydia Kazakou" date: "06/06/2022" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Set-up ### Load libraries ```{r message=FALSE, warning=FALSE} library(here) # For reproducible paths library(SingleCellExperiment) library(Seurat) library(ggplot2) # For plots library(ggsci) # Colours library(tidyverse) library(scales) library(RCurl) ``` ### Colour Palette ```{r load_palette} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3<-pal_lancet("lanonc", alpha = 0.7)(9) mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) ``` ### Load sce ```{r} sce <- readRDS(here("Data", "Human_R_Analysis", "Metformin_ddH2O", "met_human_sce_norm_db.rds")) ``` ### Convert sce into srt ```{r message=FALSE, warning=FALSE} srt <- as.Seurat(sce) ``` # Clustering The dataset has been previously normalised, so it won't be normalised again. ### Identify Variable Features: ```{r var_features} srt <- FindVariableFeatures(srt, selection.method = "vst", nfeatures = 2000) ``` ### Identify the 10 most highly variable genes ```{r} top10 <- head(VariableFeatures(srt), 10) ``` ### Plot Variable Features ```{r message=FALSE, warning=FALSE} plotA <- VariableFeaturePlot(srt, cols = mycoloursP[4:5]) plotB <- LabelPoints(plot = plotA, points = top10, repel = TRUE) plotB ``` ### Scale all Genes ```{r scale_genes} all_genes <- rownames(srt) srt <- ScaleData(srt, features = all_genes) ``` ### Linear Dimension Reduction ```{r PCA} srt <- RunPCA(srt, features = VariableFeatures(object = srt), npcs = 100) ``` ### Visualise ```{r, fig.width=10, fig.height=8, fig.fullwidth=TRUE} VizDimLoadings(srt, dims = 1:2, reduction = "pca") ``` ```{r} srt@meta.data$SampleID <- srt@meta.data$Sample sample_id <- c("19880WApool01__ddH2O_A_S3", "19880WApool01__ddH2O_B_S8", "19880WApool01__Metformin_A_S1", "19880WApool01__Metformin_B_S2", "19880WApool01__Metformin_C_S7") sample <- c("ddH2O_A", "ddH2O_B", "Metformin_A", "Metformin_B", "Metformin_C") srt@meta.data$Sample <- plyr::mapvalues(x = srt@meta.data$Sample, from = sample_id, to = sample) head(srt@meta.data, 2) ``` ```{r fig.width=8} Idents(srt) <- "SampleID" DimPlot(srt, reduction = "pca", cols= mycoloursP[18:40], pt.size = 1) ``` ```{r} Idents(srt) <- "Sample" DimPlot(srt, reduction = "pca", cols= mycoloursP[18:40], pt.size = 1, split.by = "Sample") & NoLegend() ``` ```{r} DimHeatmap(srt, dims = 1, cells = 500, balanced = TRUE) ``` ```{r fig.height=70, fig.width=25} DimHeatmap(srt, dims = 1:50, cells = 500, balanced = TRUE) ``` ### Dimensional Reduction #### ElbowPlot ```{r} ElbowPlot(srt, ndims = 100) ElbowPlot(srt, ndims = 50) ``` The small PCs in the tail should correspond to noise. #### Horn's paralell analysis ```{r} srt_to_sce <- as.SingleCellExperiment(srt) var_feat <- VariableFeatures(srt) set.seed(100) horn <- PCAtools::parallelPCA(logcounts(srt_to_sce)[var_feat,], BSPARAM=BiocSingular::IrlbaParam(), niters=10) horn$n var_horn <- horn$original$variance plot(var_horn, type="b", log="y", pch=16, xlab = "Principal components", ylab = "Variance explained") permuted <- horn$permuted for (i in seq_len(ncol(permuted))) { points(permuted[,i], col="grey80", pch=16) lines(permuted[,i], col="grey80", pch=16) } abline(v=horn$n, col="red") ``` I used a few PCAtools features to estimate the correct number of PCs, such as the elbow and Horn's parallel analysis, but it seems that the proposed PCs are less than 15. Since these methods tend to underestimate the number of PCs, and it is usually recommended to use between 20 and 40 PCs, I will choose 22 PCs; there seems to be a tiny drop between 22 & 23 PCs in the Elbowplot. ```{r message=FALSE, warning=FALSE} srt <- FindNeighbors(srt, dims = 1:22) ``` ```{r message=FALSE, warning=FALSE} srt <- FindClusters(srt, resolution = c(0.01, 0.03, 0.05, 0.07, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)) ``` ```{r message=FALSE, warning=FALSE} srt <- RunUMAP(srt, dims = 1:22) ``` ```{r} srt <- RunTSNE(srt, dims = 1:22) ``` ```{r} Idents(srt) <- "seurat_clusters" DimPlot(srt, reduction = "pca", pt.size = 1, label = FALSE, cols = mycoloursP) DimPlot(srt, reduction = "umap", pt.size = 1, label = TRUE, cols = mycoloursP) & NoLegend() DimPlot(srt, reduction = "tsne", pt.size = 1, label = TRUE, cols = mycoloursP) & NoLegend() ``` ```{r} DimPlot(srt, reduction = "umap", pt.size = 1, label = FALSE, cols = mycoloursP, group.by = "Treatment") DimPlot(srt, reduction = "tsne", pt.size = 1, label = FALSE, cols = mycoloursP, group.by = "Treatment") ``` ```{r} DimPlot(srt, reduction = "umap", pt.size = 1, label = TRUE, cols = mycoloursP, group.by = "seurat_clusters", split.by = "Treatment") & NoLegend() DimPlot(srt, reduction = "tsne", pt.size = 1, label = TRUE, cols = mycoloursP, group.by = "seurat_clusters", split.by = "Treatment") & NoLegend() ``` ## Doublet marking ```{r} DimPlot(srt, group.by = "scDblFinder.class", pt.size = 1, cols = mycoloursP[25:26], reduction = "umap") DimPlot(srt, group.by = "scDblFinder.class", pt.size = 1, cols = mycoloursP[25:26], reduction = "tsne") ``` The artificially calculated doublets are both very few and widely spread on the dimensional reduction, thus I will not exlude these cells. It is clear that there no artificial clusters generated by technical errors in these data. ## Cell-cycle scoring ```{r warning=TRUE} srt <- CellCycleScoring(object = srt, g2m.features = cc.genes$g2m.genes,s.features = cc.genes$s.genes, set.ident = TRUE) # Cell cycle scores & Phase assignment head(srt@meta.data$Phase) ``` ```{r fig.height=6, fig.width=12} VlnPlot(srt, features = c("S.Score","G2M.Score"), pt.size = 0.01, cols = mycoloursP) ``` During the Gap1 (G1) phase the cell increases in size , during the Synthesis (S) phase it copies its DNA and during the Gap2 (G2) phase it prepares to divide, while during the Mitosis (M) phase it divides. [](https://www.jmedsciences.com/doi/JMEDS/pdf/10.5005/jp-journals-10045-00138) ```{r eval=FALSE} saveRDS(srt, here("Data", "Human_R_Analysis", "Metformin_ddH2O", "met_srt.rds")) ``` ##### SessionInfo <details> <summary> Click to expand </summary> ```{r} sessionInfo() ``` </details>