---
title: "CellRangerQC"
author: "Nina-Lydia Kazakou"
date: "19 May 2021"
output: html_document
---
# load libraries
```{r}
library(ggplot2)
library(tidyr)
library(ggsci)
```
# load data
```{r}
data <- read.csv("/exports/eddie/scratch/s1241040/BO/Ananlysis/outs/CellRanger_Quality_Stats.csv")
names(data)
```
# Pick colour pallets
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
```
# Summary Statistics
1. Mean number of cells captured
```{r}
mean(data$Estimated.Number.of.Cells)
```
2. Mean number of reads per cells
```{r}
mean(data$Mean.Reads.per.Cell)
```
3. Mean number of reads
```{r}
mean(data$Number.of.Reads)
```
4. Mean Reads Mapped Confidently to Intronic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Intronic.Regions)
```
5. Mean Reads Mapped Confidently to Exonic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Exonic.Regions)
```
6. Mean Reads Mapped Confidently to Intergenic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Intergenic.Regions)
```
7. Mean Reads Mapped Antisense to Gene
```{r}
mean(data$Reads.Mapped.Antisense.to.Gene)
```
8. Mean Reads Mapped Confidently to Transcriptome
```{r}
mean(data$Reads.Mapped.Confidently.to.Transcriptome)
```
9. Mean Fraction Reads in Cells
```{r}
mean(data$Fraction.Reads.in.Cells)
```
10. Median Genes per Cell
```{r}
mean(data$Median.Genes.per.Cell)
```
# Plots
```{r}
ggplot(data, aes(x = Timepoint,
y = (Reads.Mapped.Confidently.to.Exonic.Regions * 100) /
(Reads.Mapped.Confidently.to.Intronic.Regions * 100 +
Reads.Mapped.Confidently.to.Exonic.Regions * 100))) +
geom_bar(stat="identity") +
ylab("Percentage exonic reads") +
xlab("Sample ID") +
theme_minimal(14) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_fill_manual(values = c(mycoloursP[17], mycoloursP[16])) +
geom_hline(
yintercept = 0.5,
linetype = "dashed", color = "blue", size = 1) +
geom_hline(
yintercept = 0.7,
linetype = "dashed", color = "blue", size = 1) +
theme(legend.position = "none")
```
```{r}
long_data <- gather(data, IntEx, percentage,
Reads.Mapped.Confidently.to.Intronic.Regions:Reads.Mapped.Confidently.to.Exonic.Regions,
factor_key = TRUE)
long_data$spliced_status <- ifelse(long_data$IntEx ==
"Reads_Mapped_Confidently_to_Intronic_Regions",
"intronic", "exonic")
```
```{r}
ggplot(data = long_data, aes(
x = Timepoint, y = percentage * 100,
fill = spliced_status
)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = mycoloursP) +
theme_minimal() +
ylab("percentage") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+
geom_hline(
yintercept = mean(data$Reads.Mapped.Confidently.to.Intronic.Regions)
* 100,
linetype = "dashed", color = "blue", size = 1
) +
geom_hline(
yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions)
* 100,
linetype = "dashed", color = "red", size = 1
) +
geom_hline(
yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions +
data$Reads.Mapped.Confidently.to.Intronic.Regions)
* 100,
linetype = "dashed", color = "yellow", size = 1
)
```
```{r}
ggplot(data = long_data, aes(x = Timepoint, y = percentage, fill = spliced_status)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Proportion") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(yintercept = 0.5, color = "black", size = 1)
```
```{r}
data$other <- 1 - (data$Reads.Mapped.Confidently.to.Exonic.Regions +
data$Reads.Mapped.Confidently.to.Intronic.Regions)
ggplot(data = data, aes(x = Timepoint, y = other)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[6]) +
theme_minimal() +
ylab("other than intronic or exonic reads") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(yintercept = mean(data$other), color = "red", size = 1)
```
```{r}
ggplot(data = data, aes(x = Timepoint, y = Estimated.Number.of.Cells)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Number of cells") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(
yintercept = mean(data$Estimated.Number.of.Cells), color = "red",
size = 1
)
```
```{r}
ggplot(data = data, aes(x = Timepoint, y = Mean.Reads.per.Cell)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Mean reads per cell") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(yintercept = mean(data$Mean.Reads.per.Cell), color = "red", size = 1)
```
```{r}
ggplot(data = data, aes(x = Timepoint, y = Number.of.Reads)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Mean number of reads") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(yintercept = mean(data$Number.of.Reads), color = "red", size = 1)
```
```{r}
ggplot(data = data, aes(x = Timepoint, y = Fraction.Reads.in.Cells)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Fraction reads in cells") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(
yintercept = mean(data$Fraction.Reads.in.Cells), color = "red",
size = 1
)
```
```{r}
ggplot(data = data, aes(x = Timepoint, y = Median.Genes.per.Cell)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = mycoloursP[4:5]) +
theme_minimal() +
ylab("Median genes per cell") +
xlab("sample ID") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
geom_hline(
yintercept = mean(data$Median.Genes.per.Cell), color = "red",
size = 1
)
```