CorticalOrganoids / scr / oligodendroglia / Approximation models for best resolution.Rmd
Approximation models for best resolution.Rmd
Raw
---
title: "Approximation models for best resolution"
author: "Nina-Lydia Kazakou"
date: "24/06/2021"
output: html_document
---

# load libraries 
```{r message=FALSE}
library(SingleCellExperiment)
library(Seurat)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
library(clustree)
library(gtools)
library(cluster)
library(bluster)
```

# Set the colour pallete 
```{r include=FALSE}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load co.oligos seu.object
```{r}
co.oligos <- readRDS(here("data", "Oligos_only", "co.oligos.rds"))

dim(co.oligos) # 22735 301
head(co.oligos@meta.data)
```

# Try different resolutions
## I am starting here with a resolution that is just large enough to differentiate between oligodendrocytes and OPCs. 
From there I increase it slowly and keep  resolutions that are associated with the appearance of a new cluster. 
I do this until I think I reached overclustering and still continue in 0.1 steps until a resolution of 1 is reached.

Here, I will use cluster stabitily and purity measures

```{r}
 # Generate DimPlots for all the different resolutions 
dir.create(here("outs", "Oligos_only", "DimPlots_allRes"))

plot_list_func <- function(seur_obj,
                           col_pattern, 
                           plot_cols, 
                           clust_lab = TRUE,
                           label_size = 8,
                           num_col = 4,
                           save_dir = getwd(),
                           width=7,
                           height=5){
  extr_res_col <- grep(pattern = col_pattern, names(seur_obj@meta.data))
  res_names <- names(seur_obj@meta.data[extr_res_col])
  # gtools function, sorts gene_names alphanumeric:
  res_names <- mixedsort(res_names) 
  plot_l <-list()
  for(i in 1: length(res_names)){
    pdf(paste0(save_dir, 
               res_names[i], "_umap.pdf"), width=width, height=height)
    dim_plot <- DimPlot(seur_obj, 
                        reduction = "umap", 
                        cols= plot_cols,
                        group.by = res_names[i],
                        label = clust_lab,
                        label.size = label_size) + NoLegend()
    print(dim_plot)
    dev.off()
    print(dim_plot)
  }
}

plot_list_func(seur_obj = co.oligos,
               col_pattern = "RNA_snn_res.",
               plot_cols = mycoloursP[6:40],
               save_dir = here("outs", "Oligos_only", "DimPlots_allRes"))
```

```{r fig.width=8, fig.height=6, fig.fullwidth=TRUE}
 # Relevant FeaturePlots for most common OPC & OL markers
DefaultAssay(co.oligos) <- "RNA"

FeaturePlot(co.oligos, features = c("OLIG1", "OLIG2", "SOX10"), pt.size = 1) & NoAxes()
FeaturePlot(co.oligos, features = c("PDGFRA", "PCDH15", "MBP", "MAG"), pt.size = 1) & NoAxes()
```
```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
## OPC
FeaturePlot(co.oligos, features = c("CSPG4", "GPR17", "PTGDS", "GFRA1", "LINC01965", "CABLES1"), pt.size = 1) & NoAxes()
```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
 ## COP
FeaturePlot(co.oligos, features = c("ETV1", "CHST9",  "MYT1",  "TENM2",  "CAMK2A",   "KCNQ5"), pt.size = 1, ncol = 2) & NoAxes()

FeaturePlot(co.oligos, features = c("SEMA5B", "SYT1","BMPER", "EPHB1", "ARHGAP24"), pt.size = 1, ncol = 2) & NoAxes()

```

```{r fig.width=8, fig.height=8, fig.fullwidth=TRUE}
  ## OL
FeaturePlot(co.oligos, features = c("PLP1", "CNP", "MAG", "MOG", "BCAS1", "ZNF488", "GALC", "MOBP"), pt.size = 1, ncol = 2) & NoAxes()
```

```{r fig.width=8, fig.height=10, fig.fullwidth=TRUE}
 # Plot a ClusterTree
dir.create(here("outs", "Oligos_only", "Cluster_Tree"))

#For ploting all the resolutions
clust_tree <- clustree(co.oligos, prefix = "RNA_snn_res.", edge_arrow=FALSE) # Maybe res.0.6?

pdf(save_dir = "/Users/s1241040/Desktop/SingleCell_BrainOrganoids/Analysis/outs/Oligos_only/Cluster_Tree/Clust_tree.pdf", width = 8, height = 10)

print(clust_tree)

dev.off()
```

```{r}
co.oligos.sce <- as.SingleCellExperiment(co.oligos)

dim(co.oligos.sce)
colnames(colData(co.oligos.sce))
```

```{r}
 # Approximate Silhouette Plots
dir.create(here("outs", "Oligos_only", "Silhouette_Plots"))
dir.create(here("outs", "Oligos_only", "ClusterPurity_Plots"))

#1.Approximate Silhouette
approx_sil <- function(sce_obj, reduction = "PCA",
                       col_pattern = "RNA_snn_res", 
                       plot_cols, 
                       clust_lab = TRUE,
                       label_size = 8,
                       save_dir = getwd(),
                       width=7,
                       height=5, 
                       min_i = 1){  #does not work with only one cluster, need to have something to make comparisons to, add min_i
  res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
  names_col <- names(colData(sce_obj))[res_col]
  # gtools function, sorts gene_names alphanumeric:
  names_col <- mixedsort(names_col)
  met_dat <- as.data.frame(colData(co.oligos.sce))
  for(i in  min_i: length(names_col)){
    clust <- met_dat[[names_col[i]]]
    clust_int <- as.integer(paste0(clust))
    
    sil_approx <- approxSilhouette(reducedDim(sce_obj, reduction), 
                                   clusters = clust_int)
    sil_data <- as.data.frame(sil_approx)
    sil_data$closest <- factor(ifelse(sil_data$width > 0, clust_int, sil_data$other))
    sil_data$cluster <- factor(clust_int)
    
    apr_sil_plot <-ggplot(sil_data, aes(x=cluster, y=width, colour=closest)) + 
      ggbeeswarm::geom_quasirandom(method="smiley") + theme_bw(20) +
      xlab(names_col[i])
    
    
    pdf(paste0(save_dir, 
               names_col[i], "_sil.pdf"), width=width, height=height)
    
    
    print(apr_sil_plot)
    
    dev.off()
    
    print(apr_sil_plot)
  }
  print("Done")
}
approx_sil(sce_obj = co.oligos.sce, 
                          col_pattern = "RNA_snn_res.",
                          plot_cols = mycoloursP[6:40], 
                          min_i = 2,
                          save_dir = here("outs", "Oligos_only", "Silhouette_Plots"))


#2.Cluster Purity
clu_pure <- function(sce_obj, reduction = "PCA",
                     col_pattern = "RNA_snn_res", 
                     plot_cols, 
                     clust_lab = TRUE,
                     label_size = 8,
                     save_dir = getwd(),
                     width=7,
                     height=5,
                     min_i = 1){
  res_col <- grep(pattern = col_pattern, names(colData(sce_obj)))
  names_col <- names(colData(sce_obj))[res_col]
  # gtools function, sorts gene_names alphanumeric:
  names_col <- mixedsort(names_col)
  met_dat <- as.data.frame(colData(co.oligos.sce))
  for(i in min_i: length(names_col)){
    clust <- met_dat[[names_col[i]]]
    clust_int <- as.integer(paste0(clust))
    
    pure <- neighborPurity(reducedDim(sce_obj, reduction), clusters = clust_int)
    pure_data <- as.data.frame(pure)
    pure_data$maximum <- factor(pure_data$maximum)
    pure_data$cluster <- factor(clust_int)
    
    
    pure_plot <- ggplot(pure_data, aes(x=cluster, y=purity, colour=maximum)) + 
      ggbeeswarm::geom_quasirandom(method="smiley") +
      theme_bw(20) +
      xlab(names_col[i])
    
    
    pdf(paste0(save_dir, 
               names_col[i], "_sil.pdf"), width=width, height=height)
    
    print(pure_plot)
    
    dev.off()
    
    print(pure_plot)
  }
  print("Done")
}

clu_pure(sce_obj = co.oligos.sce, 
         col_pattern = "RNA_snn_res.",
         plot_cols = mycoloursP[6:40],
         min_i = 2,
         save_dir = here("outs", "Oligos_only", "ClusterPurity_Plots"))

```

```{r}
sessionInfo()
```