--- title: "CellRangerQC" author: "Nina-Lydia Kazakou" date: "19 May 2021" output: html_document --- # load libraries ```{r} library(ggplot2) library(tidyr) library(ggsci) ``` # load data ```{r} data <- read.csv("/exports/eddie/scratch/s1241040/BO/Ananlysis/outs/CellRanger_Quality_Stats.csv") names(data) ``` # Pick colour pallets ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) ``` # Summary Statistics 1. Mean number of cells captured ```{r} mean(data$Estimated.Number.of.Cells) ``` 2. Mean number of reads per cells ```{r} mean(data$Mean.Reads.per.Cell) ``` 3. Mean number of reads ```{r} mean(data$Number.of.Reads) ``` 4. Mean Reads Mapped Confidently to Intronic Regions ```{r} mean(data$Reads.Mapped.Confidently.to.Intronic.Regions) ``` 5. Mean Reads Mapped Confidently to Exonic Regions ```{r} mean(data$Reads.Mapped.Confidently.to.Exonic.Regions) ``` 6. Mean Reads Mapped Confidently to Intergenic Regions ```{r} mean(data$Reads.Mapped.Confidently.to.Intergenic.Regions) ``` 7. Mean Reads Mapped Antisense to Gene ```{r} mean(data$Reads.Mapped.Antisense.to.Gene) ``` 8. Mean Reads Mapped Confidently to Transcriptome ```{r} mean(data$Reads.Mapped.Confidently.to.Transcriptome) ``` 9. Mean Fraction Reads in Cells ```{r} mean(data$Fraction.Reads.in.Cells) ``` 10. Median Genes per Cell ```{r} mean(data$Median.Genes.per.Cell) ``` # Plots ```{r} ggplot(data, aes(x = Timepoint, y = (Reads.Mapped.Confidently.to.Exonic.Regions * 100) / (Reads.Mapped.Confidently.to.Intronic.Regions * 100 + Reads.Mapped.Confidently.to.Exonic.Regions * 100))) + geom_bar(stat="identity") + ylab("Percentage exonic reads") + xlab("Sample ID") + theme_minimal(14) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + scale_fill_manual(values = c(mycoloursP[17], mycoloursP[16])) + geom_hline( yintercept = 0.5, linetype = "dashed", color = "blue", size = 1) + geom_hline( yintercept = 0.7, linetype = "dashed", color = "blue", size = 1) + theme(legend.position = "none") ``` ```{r} long_data <- gather(data, IntEx, percentage, Reads.Mapped.Confidently.to.Intronic.Regions:Reads.Mapped.Confidently.to.Exonic.Regions, factor_key = TRUE) long_data$spliced_status <- ifelse(long_data$IntEx == "Reads_Mapped_Confidently_to_Intronic_Regions", "intronic", "exonic") ``` ```{r} ggplot(data = long_data, aes( x = Timepoint, y = percentage * 100, fill = spliced_status )) + geom_bar(stat = "identity", position = "stack") + scale_fill_manual(values = mycoloursP) + theme_minimal() + ylab("percentage") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ geom_hline( yintercept = mean(data$Reads.Mapped.Confidently.to.Intronic.Regions) * 100, linetype = "dashed", color = "blue", size = 1 ) + geom_hline( yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions) * 100, linetype = "dashed", color = "red", size = 1 ) + geom_hline( yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions + data$Reads.Mapped.Confidently.to.Intronic.Regions) * 100, linetype = "dashed", color = "yellow", size = 1 ) ``` ```{r} ggplot(data = long_data, aes(x = Timepoint, y = percentage, fill = spliced_status)) + geom_bar(stat = "identity", position = "fill") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Proportion") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline(yintercept = 0.5, color = "black", size = 1) ``` ```{r} data$other <- 1 - (data$Reads.Mapped.Confidently.to.Exonic.Regions + data$Reads.Mapped.Confidently.to.Intronic.Regions) ggplot(data = data, aes(x = Timepoint, y = other)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[6]) + theme_minimal() + ylab("other than intronic or exonic reads") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline(yintercept = mean(data$other), color = "red", size = 1) ``` ```{r} ggplot(data = data, aes(x = Timepoint, y = Estimated.Number.of.Cells)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Number of cells") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline( yintercept = mean(data$Estimated.Number.of.Cells), color = "red", size = 1 ) ``` ```{r} ggplot(data = data, aes(x = Timepoint, y = Mean.Reads.per.Cell)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Mean reads per cell") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline(yintercept = mean(data$Mean.Reads.per.Cell), color = "red", size = 1) ``` ```{r} ggplot(data = data, aes(x = Timepoint, y = Number.of.Reads)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Mean number of reads") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline(yintercept = mean(data$Number.of.Reads), color = "red", size = 1) ``` ```{r} ggplot(data = data, aes(x = Timepoint, y = Fraction.Reads.in.Cells)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Fraction reads in cells") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline( yintercept = mean(data$Fraction.Reads.in.Cells), color = "red", size = 1 ) ``` ```{r} ggplot(data = data, aes(x = Timepoint, y = Median.Genes.per.Cell)) + geom_bar(stat = "identity") + scale_fill_manual(values = mycoloursP[4:5]) + theme_minimal() + ylab("Median genes per cell") + xlab("sample ID") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + geom_hline( yintercept = mean(data$Median.Genes.per.Cell), color = "red", size = 1 ) ```