CorticalOrganoids / scr / AllCells / Approximation models for best resolution_allCells.Rmd
Approximation models for best resolution_allCells.Rmd
Raw
---
title: "Approximation models for best resolution_allCells"
author: "Nina-Lydia Kazakou"
date: "25/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
library(clustree)
library(gtools)
library(cluster)
library(bluster)
```

# Set the colour pallete 
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load normalised seu.object
```{r}
norm.co.seu <- readRDS(here("data", "norm.co.seu.rds"))

dim(norm.co.seu) # 22735 12923
head(norm.co.seu@meta.data)
```

# Calculate more clustering resolutions 
```{r message=FALSE}
norm.co.seu <- FindNeighbors(norm.co.seu, dims = 1:14)

norm.co.seu <- FindClusters(norm.co.seu, resolution = c(0.01,0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2))
```

```{r}
 # Generate DimPlots for all the different resolutions 
dir.create(here("outs", "DimPlots_allRes"))

plot_list_func <- function(seur_obj,
                           col_pattern, 
                           plot_cols, 
                           clust_lab = TRUE,
                           label_size = 8,
                           num_col = 4,
                           save_dir = getwd(),
                           width=7,
                           height=5){
  extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data))
  res_names <- names(seur_obj@meta.data[extr_res_col])
  # gtools function, sorts gene_names alphanumeric:
  res_names <- mixedsort(res_names) 
  plot_l <-list()
  for(i in 1: length(res_names)){
    pdf(paste0(save_dir, 
               res_names[i], "_umap.pdf"), width=width, height=height)
    dim_plot <- DimPlot(seur_obj, 
                        reduction = "umap", 
                        cols= plot_cols,
                        group.by = res_names[i],
                        label = clust_lab,
                        label.size = label_size) + NoLegend()
    print(dim_plot)
    dev.off()
    print(dim_plot)
  }
}

plot_list_func(seur_obj = norm.co.seu,
               col_pattern = "RNA_snn_res.",
               plot_cols = mycoloursP[6:40],
               save_dir = here("outs","DimPlots_allRes"))
```

```{r fig.width=9, fig.height=10, fig.fullwidth=TRUE}
 # Plot a ClusterTree
dir.create(here("outs", "Cluster_Tree"))

#For ploting all the resolutions
clust_tree <- clustree(norm.co.seu, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6?

print(clust_tree)
```

```{r}
norm.co.sce <- as.SingleCellExperiment(norm.co.seu)

dim(norm.co.sce)
colnames(colData(norm.co.sce))
```

```{r fig.width=10, fig.height=8, fig.fullwidth=TRUE}
 # Approximate Silhouette Plots
dir.create(here("outs", "Silhouette_Plots"))
dir.create(here("outs", "ClusterPurity_Plots"))

#1.Approximate Silhouette
approx_sil <- function(sce_obj, reduction = "PCA",
                       col_pattern = "RNA_snn_res", 
                       plot_cols, 
                       clust_lab = TRUE,
                       label_size = 8,
                       save_dir = getwd(),
                       width=7,
                       height=5){  #does not work with only one cluster, need to have something to make comparisons to, add min_i
  res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
  names_col <- names(colData(sce_obj))[res_col]
  # gtools function, sorts gene_names alphanumeric:
  names_col <- mixedsort(names_col)
  met_dat <- as.data.frame(colData(norm.co.sce))
  for(i in  1: length(names_col)){
    clust <- met_dat[[names_col[i]]]
    clust_int <- as.integer(paste0(clust))
    
    sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction), 
                                   clusters = clust_int)
    sil_data <- as.data.frame(sil_approx)
    sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other))
    sil_data$cluster <- factor(clust_int)
    
    apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) + 
      ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) +
      xlab(names_col[i])
    
    
    pdf(paste0(save_dir, 
               names_col[i], "_sil.pdf"), width=width, height=height)
    
    
    print(apr_sil_plot)
    
    dev.off()
    
    print(apr_sil_plot)
  }
  print("Done")
}
approx_sil(sce_obj = norm.co.sce, 
                          col_pattern = "RNA_snn_res.",
                          plot_cols = mycoloursP[6:40], 
                          save_dir = here("outs", "Silhouette_Plots"))


#2.Cluster Purity
clu_pure <- function(sce_obj, reduction = "PCA",
                     col_pattern = "RNA_snn_res", 
                     plot_cols, 
                     clust_lab = TRUE,
                     label_size = 8,
                     save_dir = getwd(),
                     width=7,
                     height=5){
  res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
  names_col <- names(colData(sce_obj))[res_col]
  # gtools function, sorts gene_names alphanumeric:
  names_col <- mixedsort(names_col)
  met_dat <- as.data.frame(colData(norm.co.sce))
  for(i in 1: length(names_col)){
    clust <- met_dat[[names_col[i]]]
    clust_int <- as.integer(paste0(clust))
    
    pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int)
    pure_data <- as.data.frame(pure)
    pure_data$maximum <- factor(pure_data$maximum)
    pure_data$cluster <- factor(clust_int)
    
    
    pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) + 
      ggbeeswarm::geom_quasirandom(method="smiley") +
      theme_bw(20) +
      xlab(names_col[i])
    
    
    pdf(paste0(save_dir, 
               names_col[i], "_sil.pdf"), width=width, height=height)
    
    print(pure_plot)
    
    dev.off()
    
    print(pure_plot)
  }
  print("Done")
}

clu_pure(sce_obj = norm.co.sce, 
         col_pattern = "RNA_snn_res.",
         plot_cols = mycoloursP[6:40],
         save_dir = here("outs", "ClusterPurity_Plots"))

```

```{r}
sessionInfo()
```