---
title: "Approximation models for best resolution_allCells"
author: "Nina-Lydia Kazakou"
date: "25/06/2021"
output: html_document
---
# load libraries
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
library(clustree)
library(gtools)
library(cluster)
library(bluster)
```
# Set the colour pallete
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```
# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))
dim(norm.co.seu) # 22735 12923
head(norm.co.seu@meta.data)
```
# Calculate more clustering resolutions
```{r message=FALSE}
norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14)
norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.01,0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2))
```
```{r}
# Generate DimPlots for all the different resolutions
dir.create(here("outs", "DimPlots_allRes"))
plot_list_func <- function(seur_obj,
col_pattern,
plot_cols,
clust_lab = TRUE,
label_size = 8,
num_col = 4,
save_dir = getwd(),
width=7,
height=5){
extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data))
res_names <- names(seur_obj@meta.data[extr_res_col])
# gtools function, sorts gene_names alphanumeric:
res_names <- mixedsort(res_names)
plot_l <-list()
for(i in 1: length(res_names)){
pdf(paste0(save_dir,
res_names[i], "_umap.pdf"), width=width, height=height)
dim_plot <- DimPlot(seur_obj,
reduction = "umap",
cols= plot_cols,
group.by = res_names[i],
label = clust_lab,
label.size = label_size) + NoLegend()
print(dim_plot)
dev.off()
print(dim_plot)
}
}
plot_list_func(seur_obj = norm.co.seu,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
save_dir = here("outs","DimPlots_allRes"))
```
```{r fig.width=9, fig.height=10, fig.fullwidth=TRUE}
# Plot a ClusterTree
dir.create(here("outs", "Cluster_Tree"))
#For ploting all the resolutions
clust_tree <- clustree(norm.co.seu, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6?
print(clust_tree)
```
```{r}
norm.co.sce <- as.SingleCellExperiment(norm.co.seu)
dim(norm.co.sce)
colnames(colData(norm.co.sce))
```
```{r fig.width=10, fig.height=8, fig.fullwidth=TRUE}
# Approximate Silhouette Plots
dir.create(here("outs", "Silhouette_Plots"))
dir.create(here("outs", "ClusterPurity_Plots"))
#1.Approximate Silhouette
approx_sil <- function(sce_obj, reduction = "PCA",
col_pattern = "RNA_snn_res",
plot_cols,
clust_lab = TRUE,
label_size = 8,
save_dir = getwd(),
width=7,
height=5){ #does not work with only one cluster, need to have something to make comparisons to, add min_i
res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
names_col <- names(colData(sce_obj))[res_col]
# gtools function, sorts gene_names alphanumeric:
names_col <- mixedsort(names_col)
met_dat <- as.data.frame(colData(norm.co.sce))
for(i in 1: length(names_col)){
clust <- met_dat[[names_col[i]]]
clust_int <- as.integer(paste0(clust))
sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction),
clusters = clust_int)
sil_data <- as.data.frame(sil_approx)
sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other))
sil_data$cluster <- factor(clust_int)
apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) +
ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) +
xlab(names_col[i])
pdf(paste0(save_dir,
names_col[i], "_sil.pdf"), width=width, height=height)
print(apr_sil_plot)
dev.off()
print(apr_sil_plot)
}
print("Done")
}
approx_sil(sce_obj = norm.co.sce,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
save_dir = here("outs", "Silhouette_Plots"))
#2.Cluster Purity
clu_pure <- function(sce_obj, reduction = "PCA",
col_pattern = "RNA_snn_res",
plot_cols,
clust_lab = TRUE,
label_size = 8,
save_dir = getwd(),
width=7,
height=5){
res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
names_col <- names(colData(sce_obj))[res_col]
# gtools function, sorts gene_names alphanumeric:
names_col <- mixedsort(names_col)
met_dat <- as.data.frame(colData(norm.co.sce))
for(i in 1: length(names_col)){
clust <- met_dat[[names_col[i]]]
clust_int <- as.integer(paste0(clust))
pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int)
pure_data <- as.data.frame(pure)
pure_data$maximum <- factor(pure_data$maximum)
pure_data$cluster <- factor(clust_int)
pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) +
ggbeeswarm::geom_quasirandom(method="smiley") +
theme_bw(20) +
xlab(names_col[i])
pdf(paste0(save_dir,
names_col[i], "_sil.pdf"), width=width, height=height)
print(pure_plot)
dev.off()
print(pure_plot)
}
print("Done")
}
clu_pure(sce_obj = norm.co.sce,
col_pattern = "RNA_snn_res.",
plot_cols = mycoloursP[6:40],
save_dir = here("outs", "ClusterPurity_Plots"))
```
```{r}
sessionInfo()
```