---
title: "Organoids_Normalization & Doublets Identification"
author: "Nina-Lydia Kazakou"
date: "18 May 2021"
output: html_document
---
# load libraries
```{r}
library(scater)
library(scran)
library(Seurat)
library(devtools)
library(dplyr)
library(here)
library(scDblFinder)
```
# set working directory
```{r}
setwd("/exports/eddie/scratch/s1241040/BO/Ananlysis/")
```
# load Scatter filtered object or take from environment
```{r}
filtered.co.sce <- readRDS("/exports/eddie/scratch/s1241040/BO/Ananlysis/data/filtered.co.sce.rds")
```
# Normalization by deconvolution
```{r}
# Calculation of deconvolution factors
set.seed(100)
clust.filtered.co.sce <- quickCluster(filtered.co.sce)
table(clust.filtered.co.sce)
# Calculate size factors & add them directly to the metadata
norm.co.sce <- filtered.co.sce
norm.co.sce <- computeSumFactors(norm.co.sce, cluster = clust.filtered.co.sce, min.mean = 0.1)
# Visualise size factors
deconv.sf <- sizeFactors(norm.co.sce)
summary(deconv.sf) #check that there are no 0 or negative factors
# Apply size factors
norm.co.sce<- logNormCounts(norm.co.sce)
assayNames(norm.co.sce)
colnames(colData(norm.co.sce))
head(sizeFactors(norm.co.sce))
```
# Mark the doublets
```{r}
# Model variance and select top 10% most variable genes
set.seed(1001)
dec_sce <- modelGeneVarByPoisson(norm.co.sce)
top_genes <- getTopHVGs(dec_sce, prop=0.1)
set.seed(10000)
norm.co.sce <- denoisePCA(norm.co.sce,
subset.row=top_genes,
technical=dec_sce)
set.seed(100000)
norm.co.sce <- runTSNE(norm.co.sce,
dimred="PCA",
pca = 25)
# Quick Clustering
g <- buildSNNGraph(norm.co.sce, k=10, use.dimred = 'PCA')
clust <- igraph::cluster_walktrap(g)$membership
colLabels(norm.co.sce) <- factor(clust)
table(colLabels(norm.co.sce))
plotTSNE(norm.co.sce, colour_by="label")
```
```{r}
set.seed(100)
norm.co.sce <- scDblFinder(norm.co.sce, samples = "Sample")
table(norm.co.sce$scDblFinder.class)
# singlet doublet
# 12177 746
```
```{r}
write.csv(as.data.frame(colData(norm.co.sce)), here("outs", file = "doubletscore.csv"))
saveRDS(norm.co.sce, here("data", file = "norm.co.sce.rds"))
```