Luise-Seeker-Human-WM-Glia / src / 24_RNA_velocity.Rmd
24_RNA_velocity.Rmd
Raw
---
title: "HCA RNA velocity using scVelo in R"
author: "Luise A. Seeker"
date: "23/04/2021"
output: html_document
---

```{r}
library(scater)
library(scran)
library(SingleCellExperiment)
library(velociraptor)
library(Seurat) # to convert Seurat to SCE
library(ggplot2)
library(here)
library(ggsci)


```

```{r, echo = F}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)

mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)

```


# RLoad oligodendroglia Seurat object

```{r}
nad_ol <- nad_ol <- readRDS(here("data", 
                                 "single_nuc_data", 
                                 "oligodendroglia",
                                 "srt_oligos_and_opcs_LS.RDS"))

```


# Read in spliced and unspliced datasets
```{r}
spliced <- readRDS("/Users/lseeker/Documents/Work/HumanCellAtlas/splice_control_out/datasets/02_scater_cell_filtered/spliced/SCE_spliced_gene_cell_filtered.RDS")
unspliced <-  readRDS("/Users/lseeker/Documents/Work/HumanCellAtlas/splice_control_out/datasets/02_scater_cell_filtered/unspliced/SCE_unspliced_gene_cell_filtered.RDS")
```

# Filter for nulclei that are in the oligodendroglia dataset

```{r}

# are colnames of spliced and unspliced the same?
summary(colnames(spliced) == colnames(unspliced))

# no cells must be in a different order

subs_bool <- colnames(spliced) %in% nad_ol@meta.data$Barcode
ol_spliced <- spliced[,subs_bool]

subs_bool_2 <- colnames(unspliced) %in% nad_ol@meta.data$Barcode
ol_unspliced <- unspliced[,subs_bool_2]


```

Now I have the same nuclei in the spliced and unspliced dataset, nut they
are not in the same order yet. 

```{r}
ol_unspliced <- ol_unspliced[,match(colnames(ol_spliced), colnames(ol_unspliced))]

summary(colnames(ol_unspliced) == colnames(ol_spliced))
summary(rownames(ol_unspliced) == rownames(ol_spliced))
```

Now both feature count matrices have the same gene and nuclei order so that the
count data can be matched. 


# Normalise data using scran

Data needs to be normalised. Previously data of different tissues were normalised
separately, but here I try it together. Previously, spliced and unspliced were
added first and then normalised. This is obviously not an option here.


```{r}
set.seed(100)
    clust_sce_sp <- quickCluster(ol_spliced) 
    #deconv_sf <- calculateSumFactors(split_sce, cluster=clust_sce)
    ol_spliced <- computeSumFactors(ol_spliced, 
                                   cluster=clust_sce_sp, 
                                   min.mean=0.1)
    ol_spliced <- logNormCounts(ol_spliced)
    
set.seed(100)
    clust_sce_usp <- quickCluster(ol_unspliced) 
    #deconv_sf <- calculateSumFactors(split_sce, cluster=clust_sce)
    ol_unspliced <- computeSumFactors(ol_unspliced, 
                                   cluster=clust_sce_usp, 
                                   min.mean=0.1)
    ol_unspliced <- logNormCounts(ol_unspliced)
        

```

# Re-order nuclei so that they match the order in the Seurat object

```{r}
ol_unspliced <- ol_unspliced[,match(nad_ol@meta.data$Barcode, colnames(ol_unspliced))]
ol_spliced <- ol_spliced[,match(nad_ol@meta.data$Barcode, colnames(ol_spliced))]

summary(colnames(ol_unspliced) == colnames(ol_spliced))
summary(rownames(ol_unspliced) == rownames(ol_spliced))

summary(colnames(ol_unspliced) == nad_ol@meta.data$Barcode)
```



# Create singleCellExperiment object with the assays "spliced" and "unspliced

```{r}


ol_SCE <- ol_spliced


assays(ol_SCE)$spliced <- assays(ol_spliced)$logcounts
assays(ol_SCE)$unspliced <- assays(ol_unspliced)$logcounts
```

Alternatively I tried the below, but it does not look right
```{r}


#ol_SCE <- as.SingleCellExperiment(nad_ol)

#assays(ol_SCE, withDimnames = FALSE)$spliced <- assays(ol_spliced)$logcounts
#assays(ol_SCE, withDimnames = FALSE)$unspliced <- assays(ol_unspliced)$logcounts
```

# Get top 200 highly variable genes based on the spliced assay
```{r}
dec <- modelGeneVarByPoisson(ol_SCE, assay.type="spliced")
hvgs <- getTopHVGs(dec, n=2000)


```


# Dimensional reduction

```{r}
set.seed(1000101)
ol_SCE <- runPCA(ol_SCE, ncomponents=25, subset_row=hvgs)
ol_SCE <- runTSNE(ol_SCE, dimred="PCA")
#ol_SCE <- runUMAP(ol_SCE, dimred="PCA")

```

# save ol_SCE
```{r}
saveRDS(ol_SCE, here("data", "scVelo_out", "ol_SCE.RDS"))

```

# Run veociraptor
```{r}
velo_out <- scvelo(ol_SCE, assay.X = "spliced", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out

```
# save velo_out
```{r}
dir.create(here("data", "scVelo_out"))
saveRDS(velo_out, here("data", "scVelo_out", "velo_out.RDS"))
```

```{r}

ol_SCE$pseudotime <- velo_out$velocity_pseudotime


# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(ol_SCE, "TSNE"), velo_out)
grid_df <- gridVectors(reducedDim(ol_SCE, "TSNE"), embedded, resolution=30)


plotTSNE(ol_SCE, colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.1, y=start.2, xend=end.1, yend=end.2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
ol_SCE$ol_clu <- nad_ol@meta.data$ol_clusters_named

plotTSNE(ol_SCE, colour_by="ol_clu", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.1, y=start.2, xend=end.1, yend=end.2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed")) + 
    scale_color_manual(values = mycoloursP[6:40])

```




# Add umap embedding to object
```{r}
ol_SCE
reducedDim(ol_SCE, "umap") <- nad_ol[["umap"]]@cell.embeddings

```
# Run veociraptor
```{r}
velo_out <- scvelo(ol_SCE, assay.X = "spliced", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out

```
# save velo_out
```{r}
dir.create(here("data", "scVelo_out"))
saveRDS(velo_out, here("data", "scVelo_out", "velo_out.RDS"))
```

```{r}

ol_SCE$pseudotime <- velo_out$velocity_pseudotime
ol_SCE$ol_clusters_named <- nad_ol$ol_clusters_named

# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(ol_SCE, "umap"), velo_out)
grid_df <- gridVectors(reducedDim(ol_SCE, "umap"), embedded, resolution=30)


plotReducedDim(ol_SCE, dimred = "umap", colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
plotReducedDim(ol_SCE, dimred = "umap", colour_by="ol_clusters_named", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed")) + 
    scale_color_manual(values = mycoloursP[6:40])



```

# Replacing matrix X

Above I have been usign the spliced matric as matrix X. Now I'll rerun it with
the conventional matrix that I have used in other analyses as matrix X


# Run veociraptor
```{r}


velo_out <- scvelo(ol_SCE, assay.X = "logcounts", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out

```

```{r}
# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(ol_SCE, "umap"), velo_out)
grid_df <- gridVectors(reducedDim(ol_SCE, "umap"), embedded, resolution=30)


plotReducedDim(ol_SCE, dimred = "umap", colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
plotReducedDim(ol_SCE, dimred = "umap", colour_by="ol_clusters_named", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed")) + 
    scale_color_manual(values = mycoloursP[6:40])


```

# Subset for tissue and repeat

## BA4

```{r}
bool <- ol_SCE$Tissue == "BA4"
ba4 <- ol_SCE[,bool]

```

### Run veociraptor
```{r}
velo_out_ba4 <- scvelo(ba4, assay.X = "logcounts", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out_ba4

saveRDS(velo_out_ba4, here("data", "scVelo_out", "velo_out_ba4.RDS"))
```

### Embedd and plot
```{r}
# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(ba4, "umap"), velo_out_ba4)
grid_df <- gridVectors(reducedDim(ba4, "umap"), embedded, resolution=30)


plotReducedDim(ba4, dimred = "umap", colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
plotReducedDim(ba4, dimred = "umap", colour_by="ol_clusters_named", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))+ 
    scale_color_manual(values = mycoloursP[6:40])


```

## CB

```{r}
bool <- ol_SCE$Tissue == "CB"
cb <- ol_SCE[,bool]

```

### Run veociraptor
```{r}
velo_out_cb <- scvelo(cb, assay.X = "logcounts", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out_cb

saveRDS(velo_out_cb, here("data", "scVelo_out", "velo_out_cb.RDS"))
```

### Embedd and plot
```{r}
# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(cb, "umap"), velo_out_cb)
grid_df <- gridVectors(reducedDim(cb, "umap"), embedded, resolution=30)


plotReducedDim(cb, dimred = "umap", colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
plotReducedDim(cb, dimred = "umap", colour_by="ol_clusters_named", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))+ 
    scale_color_manual(values = mycoloursP[6:40])


```

## CSC

```{r}
bool <- ol_SCE$Tissue == "CSC"
csc <- ol_SCE[,bool]

```

### Run veociraptor
```{r}
velo_out_csc <- scvelo(csc, assay.X = "logcounts", 
    subset.row = hvgs, use.dimred = "PCA")
velo_out_csc

saveRDS(velo_out_csc, here("data", "scVelo_out", "velo_out_csc.RDS"))
```

### Embedd and plot
```{r}
# Embedd the velocity vectors
embedded <- embedVelocity(reducedDim(csc, "umap"), velo_out_csc)
grid_df <- gridVectors(reducedDim(csc, "umap"), embedded, resolution=30)


plotReducedDim(csc, dimred = "umap", colour_by="pseudotime", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))

```

```{r}
plotReducedDim(csc, dimred = "umap", colour_by="ol_clusters_named", point_alpha=0.3) +
    geom_segment(data=grid_df, 
        mapping=aes(x=start.UMAP_1, y=start.UMAP_2, xend=end.UMAP_1, yend=end.UMAP_2), 
        arrow=arrow(length=unit(0.05, "inches"), type="closed"))+ 
    scale_color_manual(values = mycoloursP[6:40])


```

```{r}
sessionInfo()
```