scr/AllCells/ScranNormalization_DoubletIdentification.Rmd · CorticalOrganoids

---
title: "Organoids_Normalization & Doublets Identification"
author: "Nina-Lydia Kazakou"
date: "18 May 2021"
output: html_document
---

# load libraries 
```{r}
library(scater)
library(scran)
library(Seurat)
library(devtools)
library(dplyr)
library(here)
library(scDblFinder)
```

# set working directory
```{r}
setwd("/exports/eddie/scratch/s1241040/BO/Ananlysis/")
```

# load Scatter filtered object or take from environment
```{r}
filtered.co.sce <- readRDS("/exports/eddie/scratch/s1241040/BO/Ananlysis/data/filtered.co.sce.rds")
```

# Normalization by deconvolution
```{r}
# Calculation of deconvolution factors
set.seed(100)
clust.filtered.co.sce <- quickCluster(filtered.co.sce)
table(clust.filtered.co.sce)

# Calculate size factors & add them directly to the metadata
norm.co.sce <- filtered.co.sce

norm.co.sce <- computeSumFactors(norm.co.sce, cluster = clust.filtered.co.sce, min.mean = 0.1)

# Visualise size factors
deconv.sf <- sizeFactors(norm.co.sce)
summary(deconv.sf) #check that there are no 0 or negative factors

# Apply size factors
norm.co.sce<- logNormCounts(norm.co.sce)
assayNames(norm.co.sce)

colnames(colData(norm.co.sce))
head(sizeFactors(norm.co.sce))
```

# Mark the doublets
```{r}
# Model variance and select top 10% most variable genes
set.seed(1001)
dec_sce <- modelGeneVarByPoisson(norm.co.sce)
top_genes <- getTopHVGs(dec_sce, prop=0.1)

set.seed(10000)
norm.co.sce <- denoisePCA(norm.co.sce, 
                           subset.row=top_genes, 
                           technical=dec_sce)

set.seed(100000)
norm.co.sce <- runTSNE(norm.co.sce, 
                        dimred="PCA",
                        pca = 25)

#  Quick Clustering
g <- buildSNNGraph(norm.co.sce, k=10, use.dimred = 'PCA')
clust <- igraph::cluster_walktrap(g)$membership
colLabels(norm.co.sce) <- factor(clust)

table(colLabels(norm.co.sce))

plotTSNE(norm.co.sce, colour_by="label")
```

```{r}
set.seed(100)

norm.co.sce <- scDblFinder(norm.co.sce, samples = "Sample")
table(norm.co.sce$scDblFinder.class)
# singlet doublet
# 12177    746
```

```{r}
write.csv(as.data.frame(colData(norm.co.sce)), here("outs", file = "doubletscore.csv"))

saveRDS(norm.co.sce, here("data", file = "norm.co.sce.rds"))
```