--- title: "Approximation models for best resolution" author: "Nina-Lydia Kazakou" date: "24/06/2021" output: html_document --- # load libraries ```{r message=FALSE} library(SingleCellExperiment) library(Seurat) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) library(clustree) library(gtools) library(cluster) library(bluster) ``` # Set the colour pallete ```{r include=FALSE} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load co.oligos seu.object ```{r} co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds")) dim(co.oligos) # 22735 301 head(co.oligos@meta.data) ``` # Try different resolutions ## I am starting here with a resolution that is just large enough to differentiate between oligodendrocytes and OPCs. From there I increase it slowly and keep resolutions that are associated with the appearance of a new cluster. I do this until I think I reached overclustering and still continue in 0.1 steps until a resolution of 1 is reached. Here, I will use cluster stabitily and purity measures ```{r} # Generate DimPlots for all the different resolutions dir.create(here("outs", "Oligos_only", "DimPlots_allRes")) plot_list_func <- function(seur_obj, col_pattern, plot_cols, clust_lab = TRUE, label_size = 8, num_col = 4, save_dir = getwd(), width=7, height=5){ extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data)) res_names <- names(seur_obj@meta.data[extr_res_col]) # gtools function, sorts gene_names alphanumeric: res_names <- mixedsort(res_names) plot_l <-list() for(i in 1: length(res_names)){ pdf(paste0(save_dir, res_names[i], "_umap.pdf"), width=width, height=height) dim_plot <- DimPlot(seur_obj, reduction = "umap", cols= plot_cols, group.by = res_names[i], label = clust_lab, label.size = label_size) + NoLegend() print(dim_plot) dev.off() print(dim_plot) } } plot_list_func(seur_obj = co.oligos, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], save_dir = here("outs", "Oligos_only", "DimPlots_allRes")) ``` ```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE} # Relevant FeaturePlots for most common OPC & OL markers DefaultAssay(co.oligos) <- "RNA" FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 1) & NoAxes() FeaturePlot(co.oligos, features = c("PDGFRA", "PCDH15", "MBP", "MAG"), pt.size = 1) & NoAxes() ``` ```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} ## OPC FeaturePlot(co.oligos, features = c("CSPG4", "GPR17", "PTGDS", "GFRA1", "LINC01965", "CABLES1"), pt.size = 1) & NoAxes() ``` ```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} ## COP FeaturePlot(co.oligos, features = c("ETV1", "CHST9", "MYT1", "TENM2", "CAMK2A", "KCNQ5"), pt.size = 1, ncol = 2) & NoAxes() FeaturePlot(co.oligos, features = c("SEMA5B", "SYT1","BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes() ``` ```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE} ## OL FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG", "BCAS1", "ZNF488", "GALC", "MOBP"), pt.size = 1, ncol = 2) & NoAxes() ``` ```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE} # Plot a ClusterTree dir.create(here("outs", "Oligos_only", "Cluster_Tree")) #For ploting all the resolutions clust_tree <- clustree(co.oligos, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6? pdf(save_dir = "/Users/s1241040/Desktop/SingleCell_BrainOrganoids/Analysis/outs/Oligos_only/Cluster_Tree/Clust_tree.pdf", width = 8, height = 10) print(clust_tree) dev.off() ``` ```{r} co.oligos.sce <- as.SingleCellExperiment(co.oligos) dim(co.oligos.sce) colnames(colData(co.oligos.sce)) ``` ```{r} # Approximate Silhouette Plots dir.create(here("outs", "Oligos_only", "Silhouette_Plots")) dir.create(here("outs", "Oligos_only", "ClusterPurity_Plots")) #1.Approximate Silhouette approx_sil <- function(sce_obj, reduction = "PCA", col_pattern = "RNA_snn_res", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5, min_i = 1){ #does not work with only one cluster, need to have something to make comparisons to, add min_i res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(co.oligos.sce)) for(i in min_i: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction), clusters = clust_int) sil_data <- as.data.frame(sil_approx) sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other)) sil_data$cluster <- factor(clust_int) apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(apr_sil_plot) dev.off() print(apr_sil_plot) } print("Done") } approx_sil(sce_obj = co.oligos.sce, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], min_i = 2, save_dir = here("outs", "Oligos_only", "Silhouette_Plots")) #2.Cluster Purity clu_pure <- function(sce_obj, reduction = "PCA", col_pattern = "RNA_snn_res", plot_cols, clust_lab = TRUE, label_size = 8, save_dir = getwd(), width=7, height=5, min_i = 1){ res_col <- grep(pattern = col_pattern, names(colData(sce_obj))) names_col <- names(colData(sce_obj))[res_col] # gtools function, sorts gene_names alphanumeric: names_col <- mixedsort(names_col) met_dat <- as.data.frame(colData(co.oligos.sce)) for(i in min_i: length(names_col)){ clust <- met_dat[[names_col[i]]] clust_int <- as.integer(paste0(clust)) pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int) pure_data <- as.data.frame(pure) pure_data$maximum <- factor(pure_data$maximum) pure_data$cluster <- factor(clust_int) pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) + ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) + xlab(names_col[i]) pdf(paste0(save_dir, names_col[i], "_sil.pdf"), width=width, height=height) print(pure_plot) dev.off() print(pure_plot) } print("Done") } clu_pure(sce_obj = co.oligos.sce, col_pattern = "RNA_snn_res.", plot_cols = mycoloursP[6:40], min_i = 2, save_dir = here("outs", "Oligos_only", "ClusterPurity_Plots")) ``` ```{r} sessionInfo() ```