--- title: "Organoids_QC" author: "Nina-Lydia Kazakou" date: "18 May 2021" output: html_document --- # load libraries ```{r} library(SingleCellExperiment) library(Seurat) library(scater) library(scran) library(devtools) library(dplyr) library(ggsci) library(tidyverse) library(Matrix) library(scales) library(here) ``` # set working directory ```{r} setwd("/exports/eddie/scratch/s1241040/BO/Ananlysis/") ``` # Load fastq files & Create Seurat Object ```{r} T1 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk8/outs/filtered_feature_bc_matrix/") T2 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk16/outs/filtered_feature_bc_matrix/") T3 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk21//outs/filtered_feature_bc_matrix/") T4 <- Read10X("/exports/eddie/scratch/s1241040/BO/CellRanger_outputs/14462WApool01__BO_wk25///outs/filtered_feature_bc_matrix/") wk8 <- CreateSeuratObject(counts = T1, project = "Cortical_Organoids_wk8") wk16 <- CreateSeuratObject(counts = T2, project = "Cortical_Organoids_wk16") wk21 <- CreateSeuratObject(counts = T3, project = "Cortical_Organoids_wk21") wk25 <- CreateSeuratObject(counts = T4, project = "Cortical_Organoids_wk25") CO.merge <- merge(x = wk8, y = c(wk16, wk21, wk25), add.cell.id = c("wk8", "wk16", "wk21", "wk25"), project = "Cortical Brain Organoids") View(CO.merge@meta.data) sparse.size <- object.size(CO.merge) sparse.size #705283216 bytes ``` # Populate the metadata ```{r} # Add number of genes per UMI for each cell to metadata.filt CO.merge$log10GenesPerUMI <- log10(CO.merge$nFeature_RNA) / log10(CO.merge$nCount_RNA) # Calculate the percentage of mitochondrial genes CO.merge[["percent.mito"]] <- PercentageFeatureSet(CO.merge, pattern = "^MT-") # Calculate the percentage of ribosomal genes CO.merge[["percent.ribo"]] <- PercentageFeatureSet(CO.merge, pattern = "^RP[SL]") # Add 10x kit chemistry version chem <- rep("v3.1", ncol(CO.merge)) CO.merge <- AddMetaData(CO.merge, chem, col.name = "Chemistry") ## Create a data.frame to add a few more meta.data, so that there is no confusion that I will bind in the end with my object metadata.filt <- CO.merge@meta.data # Add a Cells column metadata.filt$Cells <- rownames(metadata.filt) # Add a column specific to each Sample metadata.filt$Sample <- NA t1 <- grepl("^wk8_", metadata.filt$Cells) t2 <- grepl("^wk16_", metadata.filt$Cells) t3 <- grepl("^wk21_", metadata.filt$Cells) t4 <- grepl("^wk25_", metadata.filt$Cells) metadata.filt$Sample[which(t1)] <- "wk8" metadata.filt$Sample[which(t2)] <- "wk16" metadata.filt$Sample[which(t3)] <- "wk21" metadata.filt$Sample[which(t4)] <- "wk25" # Megre data.frame with Seurat Object CO.merge@meta.data <- metadata.filt View(CO.merge@meta.data) ``` # Set the colour pallete ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Do quick QC VlnPlots ```{r} VlnPlot(CO.merge, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3, pt.size = 0.02, cols = mycoloursP) ``` # Save merged seurat.object ```{r} saveRDS(CO.merge, here("data", file = "CO.merge.object.rds")) ``` # Filter out unwanted genes ```{r} # Generate SCE object co.sce <- as.SingleCellExperiment(CO.merge) # Check the number of genes nrow(co.sce) #36601 keep_feature <- rowSums(counts(co.sce) > 0) >= 10 co.sce <- co.sce[keep_feature, ] nrow(co.sce) #22735 ``` # Filter out low quality cells ```{r} df <- perCellQCMetrics(co.sce, subsets=list(Mito=grep("MT-", rownames(co.sce)))) df df$Cell.id <- rownames(df) df$Timepoint <- NA C1 <- grepl("^wk8_", df$Cell.id) C2 <- grepl("^wk16_", df$Cell.id) C3 <- grepl("^wk21_",df$Cell.id) C4 <- grepl("^wk25_", df$Cell.id) df$Timepoint[which(C1)] <- "wk8" df$Timepoint[which(C2)] <- "wk16" df$Timepoint[which(C3)] <- "wk21" df$Timepoint[which(C4)] <- "wk25" head(df) ``` ```{r} # Add metrics to the object meta.data co.sce <- addPerCellQC(co.sce, subsets=list(Mito=grep("MT-", rownames(co.sce)))) colnames(colData(co.sce)) ``` ```{r} # Plot data plotColData(co.sce, x = "sum", y="detected", colour_by="Sample") + ggtitle("Total Cells") plotColData(co.sce, x = "Sample", y="sum") + ggtitle("Total Count") plotColData(co.sce, x = "Sample", y="detected") + ggtitle("Detected Features") ``` # Detect low quality cells automatically & check applied thresholds ```{r} # Low UMI Counts qc.lib.lower <- isOutlier(df$sum, log=TRUE, type="lower") lowUMI <- as.numeric(attr(qc.lib.lower, "thresholds")[2]) if(lowUMI == Inf){ lowUMI <-0 } if(min(df$sum)>= lowUMI){ print("Filter for low UMI counts are too permissive. Apply manual set threshold. Default is 100") qc.lib.lower<- df$sum<20 lowUMI<-20 }else{ print("Calculated thresholds seem to filter for low UMI counts") } ``` ```{r} # High UMI Counts qc.lib.higher <-isOutlier(df$sum, log=TRUE, type="higher") higherUMI <- as.numeric(attr(qc.lib.higher, "thresholds")[2]) if(max(df$sum)<= higherUMI){ print("Filter for high UMI counts too permissive. Apply manual set threshold.") qc.lib.higher<- df$sum > 30000 higherUMI<-30000 }else{ print("Calculated thresholds seem to filter for high UMI counts") } ``` ```{r} attr(qc.lib.higher, "thresholds") ``` ```{r} # Low Gene Counts qc.nexprs.lower <- isOutlier(df$detected, log=TRUE, type="lower") lowerFeatures <- as.numeric(attr(qc.nexprs.lower, "thresholds")[2]) if(min(df$detected)>= lowerFeatures | lowerFeatures < 10){ print("Filter for high gene counts too permissive. Apply manual set threshold.") qc.nexprs.lower<-(df$detected)<10 lowerFeatures<-10 }else{ print("Calculated thresholds seem to filter for high gene counts") } ``` ```{r} attr(qc.nexprs.lower, "thresholds") ``` ```{r} # High Gene Counts qc.nexprs.higher <- isOutlier(df$detected, log=TRUE, type="higher") higherFeatures <- as.numeric(attr(qc.nexprs.higher, "thresholds")[2]) if(max(df$detected)<= higherFeatures){ print("Filter for high gene counts too permissive. Apply manual set threshold.") qc.nexprs.higher<-(df$detected)> 8000 higherFeatures<-8000 }else{ print("Calculated thresholds seem to filter for high gene counts") } ``` ```{r} # Mitochondrial Genes qc.mito <- isOutlier(df$subsets_Mito_percent, type="higher") attr(qc.mito, "thresholds") percMitoThres<- as.numeric(attr(qc.mito, "thresholds")) ``` # Apply manual thresholds where the is.Outlier was too permissive ```{r} # Low UMI Counts; use the default which is 100 qc.lib.low <- df$sum < 100 # High Gene Counts; apply different thresholds for separate samples qc.nexprs.high.wk8 <- (df$detected > 3000 & df$Timepoint == "wk8") qc.nexprs.high.other <- (df$detected > 7500 & df$Timepoint %in% c("wk16", "wk21", "wk25")) sum(qc.nexprs.high.wk8) #9034 sum(qc.nexprs.high.other) #5410 qc.nexprs.high <- cbind(qc.nexprs.high.other, qc.nexprs.high.wk8) unique(qc.nexprs.high) ``` # Create a vector that contains all the low quality cells ```{r} discard <- qc.lib.low | qc.lib.higher | qc.nexprs.lower | qc.nexprs.high.wk8 | qc.nexprs.high.other | qc.mito ``` # Create output file with thresholds ```{r} filterDF <- DataFrame(LibSize_low = sum(qc.lib.low), LibSize_low_threshold = lowUMI, LibSize_high = sum(qc.lib.higher), LibSize_high_threshold = higherUMI, NExprs_low = sum(qc.nexprs.lower), NExprs_low_threshold = lowerFeatures, NExprs_high = sum(qc.nexprs.high), NExprs_high_threshold = higherFeatures, MitoPerc = sum(qc.mito), MitoPeec_threshold = percMitoThres, Total = sum(discard)) write.csv(filterDF, here("outs", file = "cell_qc_threshold_table.csv")) ``` # Add the data to the object meta.data (optional) ```{r} new.dataframe <- data.frame(low_lib_size = qc.lib.low, large_lib_size = qc.lib.higher, low_n_features = qc.nexprs.lower, high_n_features_wk8 = qc.nexprs.high.wk8, high_n_features_other = qc.nexprs.high.other, high_subsets_mito_percent = qc.mito, discard = discard) colData(co.sce) <- cbind(colData(co.sce), new.dataframe) ``` # Add a column to the object meta.data with the cells that failed QC ```{r} co.sce$ScaterQC_failed <- new.dataframe$discard colnames(colData(co.sce)) ``` # Plot Cells ```{r} plotColData(co.sce, x = "sum", y="detected", colour_by="Sample") + ggtitle("Total Cells") plotColData(co.sce, x = "Sample", y="sum", colour_by="ScaterQC_failed") + ggtitle("Total Count") plotColData(co.sce, x = "Sample", y="detected", colour_by="ScaterQC_failed") + ggtitle("Detected Features") plotColData(co.sce, x="Sample", y="subsets_Mito_percent", colour_by="ScaterQC_failed") + ggtitle("Mito percent") plotColData(co.sce, x="sum", y="subsets_Mito_percent", colour_by="ScaterQC_failed", other_fields = "Sample") + facet_wrap(~Sample) ``` ```{r} UMIplot <- plotColData(co.sce, x="Sample", y="sum", colour_by="ScaterQC_failed") + scale_y_log10() + ggtitle("Total UMI count per cell") + guides(fill=guide_legend(title= "Failed Scater QC")) FeaturePlotQC <- plotColData(co.sce, x="Sample", y="detected", colour_by="ScaterQC_failed") + scale_y_log10() + ggtitle("Detected total features per cell") + guides(fill=guide_legend(title= "Failed Scater QC")) MitoPlot<-plotColData(co.sce, x="Sample", y="percent.mito", colour_by="ScaterQC_failed") + ggtitle("Percent mitochondrial genes per cell") + guides(fill=guide_legend(title= "Failed Scater QC")) myQCPlotList<- list() myQCPlotList[[1]]<-UMIplot myQCPlotList[[2]]<-FeaturePlotQC myQCPlotList[[3]]<-MitoPlot mp <- multiplot(plotlist = myQCPlotList, cols = 1) dir.create(here("outs", "QC")) pdf(here("outs", "QC", file = "Scater_Filterplot.pdf"), width = 7, height = 6) print(mp) dev.off() ``` ```{r} scatter <- plotColData(co.sce, x = "sum", y = "percent.mito", colour_by = "ScaterQC_failed") pdf(here("outs", "QC", file = "ScatterPlot.pdf"), width = 5, height = 5) print(scatter) dev.off() ``` # Create a new sce without the low quality cells ```{r} filtered.co.sce <- co.sce[,!new.dataframe$discard] ``` ```{r} saveRDS(filtered.co.sce, here("data", file = "filtered.co.sce.rds")) ```