---
title: "Approximation models for best resolution"
author: "Nina-Lydia Kazakou"
date: "24/06/2021"
output: html_document
---
# load libraries
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
library(clustree)
library(gtools)
library(cluster)
library(bluster)
```
# Set the colour pallete
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```
# Load co.oligos seu.object
```{r}
co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds"))
dim(co.oligos) # 22735 301
head(co.oligos@meta.data)
```
# Try different resolutions
## I am starting here with a resolution that is just large enough to differentiate between oligodendrocytes and OPCs.
From there I increase it slowly and keep resolutions that are associated with the appearance of a new cluster.
I do this until I think I reached overclustering and still continue in 0.1 steps until a resolution of 1 is reached.
Here, I will use cluster stabitily and purity measures
```{r}
# Generate DimPlots for all the different resolutions
dir.create(here("outs", "Oligos_only", "DimPlots_allRes"))
plot_list_func <- function(seur_obj,
col_pattern,
plot_cols,
clust_lab = TRUE,
label_size = 8,
num_col = 4,
save_dir = getwd(),
width=7,
height=5){
extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data))
res_names <- names(seur_obj@meta.data[extr_res_col])
# gtools function, sorts gene_names alphanumeric:
res_names <- mixedsort(res_names)
plot_l <-list()
for(i in 1: length(res_names)){
pdf(paste0(save_dir,
res_names[i], "_umap.pdf"), width=width, height=height)
dim_plot <- DimPlot(seur_obj,
reduction = "umap",
cols= plot_cols,
group.by = res_names[i],
label = clust_lab,
label.size = label_size) + NoLegend()
print(dim_plot)
dev.off()
print(dim_plot)
}
}
plot_list_func(seur_obj = co.oligos,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
save_dir = here("outs", "Oligos_only", "DimPlots_allRes"))
```
```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
# Relevant FeaturePlots for most common OPC & OL markers
DefaultAssay(co.oligos) <- "RNA"
FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 1) & NoAxes()
FeaturePlot(co.oligos, features = c("PDGFRA", "PCDH15", "MBP", "MAG"), pt.size = 1) & NoAxes()
```
```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
## OPC
FeaturePlot(co.oligos, features = c("CSPG4", "GPR17", "PTGDS", "GFRA1", "LINC01965", "CABLES1"), pt.size = 1) & NoAxes()
```
```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
## COP
FeaturePlot(co.oligos, features = c("ETV1", "CHST9", "MYT1", "TENM2", "CAMK2A", "KCNQ5"), pt.size = 1, ncol = 2) & NoAxes()
FeaturePlot(co.oligos, features = c("SEMA5B", "SYT1","BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes()
```
```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
## OL
FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG", "BCAS1", "ZNF488", "GALC", "MOBP"), pt.size = 1, ncol = 2) & NoAxes()
```
```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
# Plot a ClusterTree
dir.create(here("outs", "Oligos_only", "Cluster_Tree"))
#For ploting all the resolutions
clust_tree <- clustree(co.oligos, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6?
pdf(save_dir = "/Users/s1241040/Desktop/SingleCell_BrainOrganoids/Analysis/outs/Oligos_only/Cluster_Tree/Clust_tree.pdf", width = 8, height = 10)
print(clust_tree)
dev.off()
```
```{r}
co.oligos.sce <- as.SingleCellExperiment(co.oligos)
dim(co.oligos.sce)
colnames(colData(co.oligos.sce))
```
```{r}
# Approximate Silhouette Plots
dir.create(here("outs", "Oligos_only", "Silhouette_Plots"))
dir.create(here("outs", "Oligos_only", "ClusterPurity_Plots"))
#1.Approximate Silhouette
approx_sil <- function(sce_obj, reduction = "PCA",
col_pattern = "RNA_snn_res",
plot_cols,
clust_lab = TRUE,
label_size = 8,
save_dir = getwd(),
width=7,
height=5,
min_i = 1){ #does not work with only one cluster, need to have something to make comparisons to, add min_i
res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
names_col <- names(colData(sce_obj))[res_col]
# gtools function, sorts gene_names alphanumeric:
names_col <- mixedsort(names_col)
met_dat <- as.data.frame(colData(co.oligos.sce))
for(i in min_i: length(names_col)){
clust <- met_dat[[names_col[i]]]
clust_int <- as.integer(paste0(clust))
sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction),
clusters = clust_int)
sil_data <- as.data.frame(sil_approx)
sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other))
sil_data$cluster <- factor(clust_int)
apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) +
ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) +
xlab(names_col[i])
pdf(paste0(save_dir,
names_col[i], "_sil.pdf"), width=width, height=height)
print(apr_sil_plot)
dev.off()
print(apr_sil_plot)
}
print("Done")
}
approx_sil(sce_obj = co.oligos.sce,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
min_i = 2,
save_dir = here("outs", "Oligos_only", "Silhouette_Plots"))
#2.Cluster Purity
clu_pure <- function(sce_obj, reduction = "PCA",
col_pattern = "RNA_snn_res",
plot_cols,
clust_lab = TRUE,
label_size = 8,
save_dir = getwd(),
width=7,
height=5,
min_i = 1){
res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
names_col <- names(colData(sce_obj))[res_col]
# gtools function, sorts gene_names alphanumeric:
names_col <- mixedsort(names_col)
met_dat <- as.data.frame(colData(co.oligos.sce))
for(i in min_i: length(names_col)){
clust <- met_dat[[names_col[i]]]
clust_int <- as.integer(paste0(clust))
pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int)
pure_data <- as.data.frame(pure)
pure_data$maximum <- factor(pure_data$maximum)
pure_data$cluster <- factor(clust_int)
pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) +
ggbeeswarm::geom_quasirandom(method="smiley") +
theme_bw(20) +
xlab(names_col[i])
pdf(paste0(save_dir,
names_col[i], "_sil.pdf"), width=width, height=height)
print(pure_plot)
dev.off()
print(pure_plot)
}
print("Done")
}
clu_pure(sce_obj = co.oligos.sce,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
min_i = 2,
save_dir = here("outs", "Oligos_only", "ClusterPurity_Plots"))
```
```{r}
sessionInfo()
```