--- title: "Approximation models for best resolution_allCells" author: "Nina-Lydia Kazakou" date: "25/06/2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) library(clustree) library(gtools) library(cluster) library(bluster) ``` # Set the colour pallete ```{r include=FALSE} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load normalised seu.object ```{r} norm.co.seu <- readRDS(here("data", "norm.co.seu.rds")) dim(norm.co.seu) # 22735 12923 head(norm.co.seu@meta.data) ``` # Calculate more clustering resolutions ```{r message=FALSE} norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14) norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.01,0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2)) ``` ```{r} # Generate DimPlots for all the different resolutions dir.create(here("outs", "DimPlots_allRes")) plot_list_func <- function(seur_obj, col_pattern, plot_cols, clust_lab = TRUE, label_size = 8, num_col = 4, save_dir = getwd(), width=7, height=5){ extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data)) res_names <- names(seur_obj@meta.data[extr_res_col]) # gtools function, sorts gene_names alphanumeric: res_names <- mixedsort(res_names) plot_l <-list() for(i in 1: length(res_names)){ pdf(paste0(save_dir, res_names[i], "_umap.pdf"), width=width, height=height) dim_plot <- DimPlot(seur_obj, reduction = "umap", cols= plot_cols, group.by = res_names[i], label = clust_lab, label.size = label_size) + NoLegend() print(dim_plot) dev.off() print(dim_plot) } } plot_list_func(seur_obj = norm.co.seu, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("outs","DimPlots_allRes")) ``` ```{r fig.width=9, fig.height=10, fig.fullwidth=TRUE} # Plot a ClusterTree dir.create(here("outs", "Cluster_Tree")) #For ploting all the resolutions clust_tree <- clustree(norm.co.seu, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6? print(clust_tree) ``` ```{r} norm.co.sce <- as.SingleCellExperiment(norm.co.seu) dim(norm.co.sce) colnames(colData(norm.co.sce)) ``` ```{r fig.width=10, fig.height=8, fig.fullwidth=TRUE} # Approximate Silhouette Plots dir.create(here("outs", "Silhouette_Plots")) dir.create(here("outs", "ClusterPurity_Plots")) #1.Approximate Silhouette approx_sil <- function(sce_obj, reduction = "PCA", col_pattern = "RNA_snn_res", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5){ #does not work with only one cluster, need to have something to make comparisons to, add min_i res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(norm.co.sce)) for(i in 1: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction), clusters = clust_int) sil_data <- as.data.frame(sil_approx) sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other)) sil_data$cluster <- factor(clust_int) apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(apr_sil_plot) dev.off() print(apr_sil_plot) } print("Done") } approx_sil(sce_obj = norm.co.sce, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("outs", "Silhouette_Plots")) #2.Cluster Purity clu_pure <- function(sce_obj, reduction = "PCA", col_pattern = "RNA_snn_res", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5){ res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(norm.co.sce)) for(i in 1: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int) pure_data <- as.data.frame(pure) pure_data$maximum <- factor(pure_data$maximum) pure_data$cluster <- factor(clust_int) pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(pure_plot) dev.off() print(pure_plot) } print("Done") } clu_pure(sce_obj = norm.co.sce, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("outs", "ClusterPurity_Plots")) ``` ```{r} sessionInfo() ```