---
title: "Organoids_QC"
author: "Nina-Lydia Kazakou"
date: "18 May 2021"
output: html_document
---
# load libraries
```{r}
library(SingleCellExperiment)
library(Seurat)
library(scater)
library(scran)
library(devtools)
library(dplyr)
library(ggsci)
library(tidyverse)
library(Matrix)
library(scales)
library(here)
```
# set working directory
```{r}
setwd("/exports/eddie/scratch/s1241040/BO/Ananlysis/")
```
# Load fastq files & Create Seurat Object
```{r}
T1 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk8/outs/filtered_feature_bc_matrix/")
T2 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk16/outs/filtered_feature_bc_matrix/")
T3 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk21//outs/filtered_feature_bc_matrix/")
T4 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk25///outs/filtered_feature_bc_matrix/")
wk8 <- CreateSeuratObject(counts = T1, project = "Cortical_Organoids_wk8")
wk16 <- CreateSeuratObject(counts = T2, project = "Cortical_Organoids_wk16")
wk21 <- CreateSeuratObject(counts = T3, project = "Cortical_Organoids_wk21")
wk25 <- CreateSeuratObject(counts = T4, project = "Cortical_Organoids_wk25")
CO.merge <- merge(x = wk8, y = c(wk16, wk21, wk25), add.cell.id = c("wk8", "wk16", "wk21", "wk25"), project = "Cortical Brain Organoids")
View(CO.merge@meta.data)
sparse.size <- object.size(CO.merge)
sparse.size
#705283216 bytes
```
# Populate the metadata
```{r}
# Add number of genes per UMI for each cell to metadata.filt
CO.merge$log10GenesPerUMI <- log10(CO.merge$nFeature_RNA) / log10(CO.merge$nCount_RNA)
# Calculate the percentage of mitochondrial genes
CO.merge[["percent.mito"]] <- PercentageFeatureSet(CO.merge, pattern = "^MT-")
# Calculate the percentage of ribosomal genes
CO.merge[["percent.ribo"]] <- PercentageFeatureSet(CO.merge, pattern = "^RP[SL]")
# Add 10x kit chemistry version
chem <- rep("v3.1", ncol(CO.merge))
CO.merge <- AddMetaData(CO.merge, chem, col.name = "Chemistry")
## Create a data.frame to add a few more meta.data, so that there is no confusion that I will bind in the end with my object
metadata.filt <- CO.merge@meta.data
# Add a Cells column
metadata.filt$Cells <- rownames(metadata.filt)
# Add a column specific to each Sample
metadata.filt$Sample <- NA
t1 <- grepl("^wk8_", metadata.filt$Cells)
t2 <- grepl("^wk16_", metadata.filt$Cells)
t3 <- grepl("^wk21_", metadata.filt$Cells)
t4 <- grepl("^wk25_", metadata.filt$Cells)
metadata.filt$Sample[which(t1)] <- "wk8"
metadata.filt$Sample[which(t2)] <- "wk16"
metadata.filt$Sample[which(t3)] <- "wk21"
metadata.filt$Sample[which(t4)] <- "wk25"
# Megre data.frame with Seurat Object
CO.merge@meta.data <- metadata.filt
View(CO.merge@meta.data)
```
# Set the colour pallete
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```
# Do quick QC VlnPlots
```{r}
VlnPlot(CO.merge, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3, pt.size = 0.02, cols = mycoloursP)
```
# Save merged seurat.object
```{r}
saveRDS(CO.merge, here("data", file = "CO.merge.object.rds"))
```
# Filter out unwanted genes
```{r}
# Generate SCE object
co.sce <- as.SingleCellExperiment(CO.merge)
# Check the number of genes
nrow(co.sce)
#36601
keep_feature <- rowSums(counts(co.sce) > 0) >= 10
co.sce <- co.sce[keep_feature, ]
nrow(co.sce)
#22735
```
# Filter out low quality cells
```{r}
df <- perCellQCMetrics(co.sce, subsets=list(Mito=grep("MT-", rownames(co.sce))))
df
df$Cell.id <- rownames(df)
df$Timepoint <- NA
C1 <- grepl("^wk8_", df$Cell.id)
C2 <- grepl("^wk16_", df$Cell.id)
C3 <- grepl("^wk21_",df$Cell.id)
C4 <- grepl("^wk25_", df$Cell.id)
df$Timepoint[which(C1)] <- "wk8"
df$Timepoint[which(C2)] <- "wk16"
df$Timepoint[which(C3)] <- "wk21"
df$Timepoint[which(C4)] <- "wk25"
head(df)
```
```{r}
# Add metrics to the object meta.data
co.sce <- addPerCellQC(co.sce, subsets=list(Mito=grep("MT-", rownames(co.sce))))
colnames(colData(co.sce))
```
```{r}
# Plot data
plotColData(co.sce, x = "sum", y="detected", colour_by="Sample") + ggtitle("Total Cells")
plotColData(co.sce, x = "Sample", y="sum") + ggtitle("Total Count")
plotColData(co.sce, x = "Sample", y="detected") + ggtitle("Detected Features")
```
# Detect low quality cells automatically & check applied thresholds
```{r}
# Low UMI Counts
qc.lib.lower <- isOutlier(df$sum, log=TRUE, type="lower")
lowUMI <- as.numeric(attr(qc.lib.lower, "thresholds")[2])
if(lowUMI == Inf){
lowUMI <-0
}
if(min(df$sum)>= lowUMI){
print("Filter for low UMI counts are too permissive. Apply manual set threshold. Default is 100")
qc.lib.lower<- df$sum<20
lowUMI<-20
}else{
print("Calculated thresholds seem to filter for low UMI counts")
}
```
```{r}
# High UMI Counts
qc.lib.higher <-isOutlier(df$sum, log=TRUE, type="higher")
higherUMI <- as.numeric(attr(qc.lib.higher, "thresholds")[2])
if(max(df$sum)<= higherUMI){
print("Filter for high UMI counts too permissive. Apply manual set threshold.")
qc.lib.higher<- df$sum > 30000
higherUMI<-30000
}else{
print("Calculated thresholds seem to filter for high UMI counts")
}
```
```{r}
attr(qc.lib.higher, "thresholds")
```
```{r}
# Low Gene Counts
qc.nexprs.lower <- isOutlier(df$detected, log=TRUE, type="lower")
lowerFeatures <- as.numeric(attr(qc.nexprs.lower, "thresholds")[2])
if(min(df$detected)>= lowerFeatures | lowerFeatures < 10){
print("Filter for high gene counts too permissive. Apply manual set threshold.")
qc.nexprs.lower<-(df$detected)<10
lowerFeatures<-10
}else{
print("Calculated thresholds seem to filter for high gene counts")
}
```
```{r}
attr(qc.nexprs.lower, "thresholds")
```
```{r}
# High Gene Counts
qc.nexprs.higher <- isOutlier(df$detected, log=TRUE, type="higher")
higherFeatures <- as.numeric(attr(qc.nexprs.higher, "thresholds")[2])
if(max(df$detected)<= higherFeatures){
print("Filter for high gene counts too permissive. Apply manual set threshold.")
qc.nexprs.higher<-(df$detected)> 8000
higherFeatures<-8000
}else{
print("Calculated thresholds seem to filter for high gene counts")
}
```
```{r}
# Mitochondrial Genes
qc.mito <- isOutlier(df$subsets_Mito_percent, type="higher")
attr(qc.mito, "thresholds")
percMitoThres<- as.numeric(attr(qc.mito, "thresholds"))
```
# Apply manual thresholds where the is.Outlier was too permissive
```{r}
# Low UMI Counts; use the default which is 100
qc.lib.low <- df$sum < 100
# High Gene Counts; apply different thresholds for separate samples
qc.nexprs.high.wk8 <- (df$detected > 3000 & df$Timepoint == "wk8")
qc.nexprs.high.other <- (df$detected > 7500 & df$Timepoint %in% c("wk16", "wk21", "wk25"))
sum(qc.nexprs.high.wk8) #9034
sum(qc.nexprs.high.other) #5410
qc.nexprs.high <- cbind(qc.nexprs.high.other, qc.nexprs.high.wk8)
unique(qc.nexprs.high)
```
# Create a vector that contains all the low quality cells
```{r}
discard <- qc.lib.low | qc.lib.higher | qc.nexprs.lower | qc.nexprs.high.wk8 | qc.nexprs.high.other | qc.mito
```
# Create output file with thresholds
```{r}
filterDF <- DataFrame(LibSize_low = sum(qc.lib.low),
LibSize_low_threshold = lowUMI,
LibSize_high = sum(qc.lib.higher),
LibSize_high_threshold = higherUMI,
NExprs_low = sum(qc.nexprs.lower),
NExprs_low_threshold = lowerFeatures,
NExprs_high = sum(qc.nexprs.high),
NExprs_high_threshold = higherFeatures,
MitoPerc = sum(qc.mito),
MitoPeec_threshold = percMitoThres,
Total = sum(discard))
write.csv(filterDF, here("outs", file = "cell_qc_threshold_table.csv"))
```
# Add the data to the object meta.data (optional)
```{r}
new.dataframe <- data.frame(low_lib_size = qc.lib.low,
large_lib_size = qc.lib.higher,
low_n_features = qc.nexprs.lower,
high_n_features_wk8 = qc.nexprs.high.wk8,
high_n_features_other = qc.nexprs.high.other,
high_subsets_mito_percent = qc.mito,
discard = discard)
colData(co.sce) <- cbind(colData(co.sce), new.dataframe)
```
# Add a column to the object meta.data with the cells that failed QC
```{r}
co.sce$ScaterQC_failed <- new.dataframe$discard
colnames(colData(co.sce))
```
# Plot Cells
```{r}
plotColData(co.sce, x = "sum", y="detected", colour_by="Sample") + ggtitle("Total Cells")
plotColData(co.sce, x = "Sample", y="sum", colour_by="ScaterQC_failed") + ggtitle("Total Count")
plotColData(co.sce, x = "Sample", y="detected", colour_by="ScaterQC_failed") + ggtitle("Detected Features")
plotColData(co.sce, x="Sample", y="subsets_Mito_percent", colour_by="ScaterQC_failed") + ggtitle("Mito percent")
plotColData(co.sce, x="sum", y="subsets_Mito_percent", colour_by="ScaterQC_failed", other_fields = "Sample") + facet_wrap(~Sample)
```
```{r}
UMIplot <- plotColData(co.sce,
x="Sample",
y="sum",
colour_by="ScaterQC_failed") +
scale_y_log10() +
ggtitle("Total UMI count per cell") +
guides(fill=guide_legend(title= "Failed Scater QC"))
FeaturePlotQC <- plotColData(co.sce,
x="Sample",
y="detected",
colour_by="ScaterQC_failed") +
scale_y_log10() +
ggtitle("Detected total features per cell") +
guides(fill=guide_legend(title= "Failed Scater QC"))
MitoPlot<-plotColData(co.sce,
x="Sample",
y="percent.mito",
colour_by="ScaterQC_failed") +
ggtitle("Percent mitochondrial genes per cell") +
guides(fill=guide_legend(title= "Failed Scater QC"))
myQCPlotList<- list()
myQCPlotList[[1]]<-UMIplot
myQCPlotList[[2]]<-FeaturePlotQC
myQCPlotList[[3]]<-MitoPlot
mp <- multiplot(plotlist = myQCPlotList, cols = 1)
dir.create(here("outs", "QC"))
pdf(here("outs", "QC", file = "Scater_Filterplot.pdf"), width = 7, height = 6)
print(mp)
dev.off()
```
```{r}
scatter <- plotColData(co.sce,
x = "sum",
y = "percent.mito",
colour_by = "ScaterQC_failed")
pdf(here("outs", "QC", file = "ScatterPlot.pdf"), width = 5, height = 5)
print(scatter)
dev.off()
```
# Create a new sce without the low quality cells
```{r}
filtered.co.sce <- co.sce[,!new.dataframe$discard]
```
```{r}
saveRDS(filtered.co.sce, here("data", file = "filtered.co.sce.rds"))
```