CorticalOrganoids / scr / oligodendroglia / Integration(comb_ol)_HCA.Rmd
Integration(comb_ol)_HCA.Rmd
Raw
---
title: "Integration(comb_ol)_HCA"
author: "Nina-Lydia Kazakou"
date: "19/09/2021"
output: html_document
---

# Load required libraries
```{r}
library(Seurat)
library(ggsci)
library(dplyr)
library(devtools)
library(tidyverse)
library(here)
library(scales)
library(ggplot2)
```

# Set the colour pallete 
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)
show_col(mycoloursP, labels =F)
```

# Load objects 
```{r}
ol_comb <- readRDS(here("Materials", "GSE115011", "ol_comb_alt.co.oligos_pasca.rds"))

HCA <- readRDS("/Users/s1241040/Desktop/GFP32 vs RC17 Monolayer/Materials/srt_oligos_and_opcs_LS.RDS")
```

```{r}
DimPlot(HCA, cols = mycoloursP[15:40], pt.size = 0.7, group.by = "ol_clusters_named", label = TRUE)
```
```{r fig.width=9, fig.height=8, fig.fullwidth=TRUE}
FeaturePlot(HCA, features = c("SPARC", "OPALIN", "RBFOX1", "FMN1"), ncol = 2)
```


# CleanUp the objects meta.data
```{r}
 # Organoids Oligodendrocytes
keep.metadata.ol_comb <- c("orig.ident", "Cells", "Sample", "Phase", "ClusterID", "dataset")
ol_comb@meta.data <- ol_comb@meta.data[, keep.metadata.ol_comb]
head(ol_comb@meta.data)

# Add new column to meta.data to mark these cells
ol_comb@meta.data$Dataset <- "Organoids_combOL"


# HCA Oligodendrocytes
keep.metadata.HCA <- c("orig.ident", "Tissue", "nad_ol", "Phase", "ol_clusters_named")
HCA@meta.data <- HCA@meta.data[, keep.metadata.HCA]
head(HCA@meta.data)

# Add new column to meta.data to mark these cells
HCA@meta.data$Dataset <- "HCA_OL"
HCA@meta.data$ClusterID <- HCA@meta.data$ol_clusters_named
```

# Label Transfer
```{r}
lt_anchors <- FindTransferAnchors(reference = HCA , query = ol_comb, 
    dims = 1:30)
predictions <- TransferData(anchorset = lt_anchors, refdata = HCA$ol_clusters_named, 
    dims = 1:30)
lt_ol_comb <- AddMetaData(ol_comb, metadata = predictions)
```

```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE}
DimPlot(lt_ol_comb, group.by = "predicted.id", split.by = "predicted.id", 
        cols = mycoloursP[6:40], ncol = 2,
        pt.size = 1)
DimPlot(lt_ol_comb, group.by = "dataset", split.by = "predicted.id", 
        cols = mycoloursP[6:40], ncol = 2,
        pt.size = 1)
```
Note: Oligo E is one of the RBFOX1 positive populations

# Try RPCA: a slightly modified workflow for the integration of scRNA-seq datasets.
Instead of utilizing canonical correlation analysis (‘CCA’) to identify anchors, we instead utilize reciprocal PCA (‘RPCA’). When determining anchors between any two datasets using RPCA, we project each dataset into the others PCA space and constrain the anchors by the same mutual neighborhood requirement.

By identifying shared sources of variation between datasets, CCA is well-suited for identifying anchors when cell types are conserved, but there are very substantial differences in gene expression across experiments. CCA-based integration therefore enables integrative analysis when experimental conditions or disease states introduce very strong expression shifts, or when integrating datasets across modalities and species. However, CCA-based integration may also lead to overcorrection, especially when a large proportion of cells are non-overlapping across datasets.

RPCA-based integration runs significantly faster, and also represents a more conservative approach where cells in different biological states are less likely to ‘align’ after integration. We therefore,recommend RPCA during integrative analysis where: * A substantial fraction of cells in one dataset have no matching type in the other * Datasets originate from the same platform (i.e. multiple lanes of 10x genomics) *

```{r}
srt_list <- list(ol_comb, HCA)

# Normalize and identify variable features for each dataset independently
srt_list<- lapply(X = srt_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration run PCA on each dataset using these features
rpca_features <- SelectIntegrationFeatures(object.list = srt_list)

srt_list <- lapply(X = srt_list, FUN = function(x) {
    x <- ScaleData(x, features = rpca_features, verbose = FALSE)
    x <- RunPCA(x, features = rpca_features, verbose = FALSE)
})
```

```{r}
# Identify common anchors
rpca_anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = rpca_features, reduction = "rpca", k.anchor = 15)
 ## To increase the strength of alignment I can increase the k.anchor, i.e. to 20

# this command creates an 'integrated' data assay
rpca_combined <- IntegrateData(anchorset = rpca_anchors)
```

```{r}
# Run a single integrated analysis on all cells
DefaultAssay(rpca_combined) <- "integrated"

# Run the standard workflow for visualization and clustering
rpca_combined <- ScaleData(rpca_combined, verbose = FALSE)
rpca_combined <- RunPCA(rpca_combined, npcs = 30, verbose = FALSE)
rpca_combined <- RunUMAP(rpca_combined, reduction = "pca", dims = 1:30)
rpca_combined <- FindNeighbors(rpca_combined, reduction = "pca", dims = 1:30)
rpca_combined <- FindClusters(rpca_combined, resolution = 0.5)

saveRDS(rpca_combined, here("Materials", "HumanOL", "HCA", file = "rpca_combined_HCA_ol_comb.rds"))
```

# Vizualize RPCA
```{r}
DimPlot(rpca_combined, pt.size = 0.7, cols = mycoloursP[15:40], group.by = "Dataset") & NoAxes()
DimPlot(rpca_combined, group.by = "Dataset", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[15:40]) & NoAxes()
```

```{r fig.width=12, fig.height=6, fig.fullwidth=TRUE}
DimPlot(rpca_combined, group.by = "ClusterID", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend()
DimPlot(rpca_combined, group.by = "ClusterID", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend() 
```



# Run CCA
```{r}
object.list <- list(ol_comb, HCA)

# Normalize and identify variable features for each dataset independently
object.list<- lapply(X = object.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# Select features that are repeatedly variable across datasets for integration
cca_features <- SelectIntegrationFeatures(object.list = object.list)

# Create anchors to integrate the datasets
cca_anchors <- FindIntegrationAnchors(object.list = object.list, anchor.features = cca_features, k.anchor = 15)

# Create an 'integrated' data assay 
cca_combined <- IntegrateData(anchorset = cca_anchors)
```

# Perform integrated analysis 
```{r}
# Specify that we will perform downstream analysis on the corrected data; set the assay to "integrated"
DefaultAssay(cca_combined) <- "integrated"

# Run the standard workflow for visualization and clustering
cca_combined <- ScaleData(cca_combined, verbose = FALSE)
cca_combined <- RunPCA(cca_combined, verbose = FALSE)

# Choose the number of PCs
ElbowPlot(cca_combined, ndims = 50) #20
cca_combined <- RunUMAP(cca_combined, reduction = "pca", dims = 1:30)

cca_combined <- FindNeighbors(cca_combined, reduction = "pca", dims = 1:30)
cca_combined <- FindClusters(cca_combined, resolution = 0.5)

saveRDS(cca_combined, here("Materials", "HumanOL", "HCA", file = "cca_combined_HCA_ol_comb.rds"))
```

# Plot Cells
```{r}
DimPlot(cca_combined, pt.size = 0.7, cols = mycoloursP[15:40]) & NoAxes()

DimPlot(cca_combined, pt.size = 0.7, cols = mycoloursP[15:40], group.by = "Dataset") & NoAxes()
DimPlot(cca_combined, group.by = "Dataset", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[15:40]) & NoAxes()
```

```{r fig.width=12, fig.height=6, fig.fullwidth=TRUE}
DimPlot(cca_combined, group.by = "ClusterID", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend()
DimPlot(cca_combined, group.by = "ClusterID", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend()
```

```{r}
sessionInfo()
```