CorticalOrganoids / scr / AllCells / CellRangerQC.Rmd
CellRangerQC.Rmd
Raw
---
title: "CellRangerQC"
author: "Nina-Lydia Kazakou"
date: "19 May 2021"
output: html_document
---

# load libraries
```{r}
library(ggplot2)
library(tidyr)
library(ggsci)
```

# load data
```{r}
data <- read.csv("/exports/eddie/scratch/s1241040/BO/Ananlysis/outs/CellRanger_Quality_Stats.csv")

names(data)
```

# Pick colour pallets
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
```

# Summary Statistics

1. Mean number of cells captured
```{r}
mean(data$Estimated.Number.of.Cells)
```

2. Mean number of reads per cells
```{r}
mean(data$Mean.Reads.per.Cell)
```

3. Mean number of reads
```{r}
mean(data$Number.of.Reads)
```

4. Mean Reads Mapped Confidently to Intronic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Intronic.Regions)
```

5. Mean Reads Mapped Confidently to Exonic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Exonic.Regions)
```

6. Mean Reads Mapped Confidently to Intergenic Regions
```{r}
mean(data$Reads.Mapped.Confidently.to.Intergenic.Regions)
```

7. Mean Reads Mapped Antisense to Gene
```{r}
mean(data$Reads.Mapped.Antisense.to.Gene)
```

8. Mean Reads Mapped Confidently to Transcriptome
```{r}
mean(data$Reads.Mapped.Confidently.to.Transcriptome)
```

9. Mean Fraction Reads in Cells
```{r}
mean(data$Fraction.Reads.in.Cells)
```

10. Median Genes per Cell
```{r}
mean(data$Median.Genes.per.Cell)
```

# Plots
```{r}
ggplot(data, aes(x = Timepoint, 
                  y = (Reads.Mapped.Confidently.to.Exonic.Regions * 100) / 
                      (Reads.Mapped.Confidently.to.Intronic.Regions * 100 +
                           Reads.Mapped.Confidently.to.Exonic.Regions * 100))) +
     geom_bar(stat="identity") +
     ylab("Percentage exonic reads") +
     xlab("Sample ID") +
     theme_minimal(14) +
     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
     scale_fill_manual(values = c(mycoloursP[17], mycoloursP[16])) +
     geom_hline(
         yintercept = 0.5,
         linetype = "dashed", color = "blue", size = 1) +
     geom_hline(
         yintercept = 0.7,
         linetype = "dashed", color = "blue", size = 1) +
     theme(legend.position = "none")
```

```{r}
long_data <- gather(data, IntEx, percentage,
                   Reads.Mapped.Confidently.to.Intronic.Regions:Reads.Mapped.Confidently.to.Exonic.Regions,
                   factor_key = TRUE)

long_data$spliced_status <- ifelse(long_data$IntEx ==
  "Reads_Mapped_Confidently_to_Intronic_Regions",
   "intronic", "exonic") 
```

```{r}
ggplot(data = long_data, aes(
  x = Timepoint, y = percentage * 100,
  fill = spliced_status
)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(values = mycoloursP) +
  theme_minimal() +
  ylab("percentage") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+
  geom_hline(
    yintercept = mean(data$Reads.Mapped.Confidently.to.Intronic.Regions)
    * 100,
    linetype = "dashed", color = "blue", size = 1 
  ) +
  geom_hline(
    yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions)
    * 100,
    linetype = "dashed", color = "red", size = 1
  ) +
  geom_hline(
    yintercept = mean(data$Reads.Mapped.Confidently.to.Exonic.Regions +
      data$Reads.Mapped.Confidently.to.Intronic.Regions)
    * 100,
    linetype = "dashed", color = "yellow", size = 1
  )
```

```{r}
ggplot(data = long_data, aes(x = Timepoint, y = percentage, fill = spliced_status)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Proportion") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(yintercept = 0.5, color = "black", size = 1)
```

```{r}
data$other <- 1 - (data$Reads.Mapped.Confidently.to.Exonic.Regions +
  data$Reads.Mapped.Confidently.to.Intronic.Regions)
ggplot(data = data, aes(x = Timepoint, y = other)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[6]) +
  theme_minimal() +
  ylab("other than intronic or exonic reads") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(yintercept = mean(data$other), color = "red", size = 1)
```

```{r}
ggplot(data = data, aes(x = Timepoint, y = Estimated.Number.of.Cells)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Number of cells") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(
    yintercept = mean(data$Estimated.Number.of.Cells), color = "red",
    size = 1
  )
```

```{r}
ggplot(data = data, aes(x = Timepoint, y = Mean.Reads.per.Cell)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Mean reads per cell") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(yintercept = mean(data$Mean.Reads.per.Cell), color = "red", size = 1)
```

```{r}
ggplot(data = data, aes(x = Timepoint, y = Number.of.Reads)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Mean number of reads") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(yintercept = mean(data$Number.of.Reads), color = "red", size = 1)
```

```{r}
ggplot(data = data, aes(x = Timepoint, y = Fraction.Reads.in.Cells)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Fraction reads in cells") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(
    yintercept = mean(data$Fraction.Reads.in.Cells), color = "red",
    size = 1
  )
```

```{r}
ggplot(data = data, aes(x = Timepoint, y = Median.Genes.per.Cell)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = mycoloursP[4:5]) +
  theme_minimal() +
  ylab("Median genes per cell") +
  xlab("sample ID") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  geom_hline(
    yintercept = mean(data$Median.Genes.per.Cell), color = "red",
    size = 1
  )
```