--- title: "Integration(comb_ol)_MonolayerOL" author: "Nina-Lydia Kazakou" date: "20/09/2021" output: html_document --- # Load required libraries ```{r} library(Seurat) library(ggsci) library(dplyr) library(devtools) library(tidyverse) library(here) library(scales) library(ggplot2) ``` # Set the colour pallete ```{r} mypal <- pal_npg("nrc", alpha = 0.7)(10) mypal2 <-pal_tron("legacy", alpha = 0.7)(7) mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9) mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16) mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6) mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5) mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5) mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7) show_col(mycoloursP, labels =F) ``` # Load objects ```{r} ol_comb <- readRDS(here("Materials", "GSE115011", "ol_comb_alt.co.oligos_pasca.rds")) monolayer_RC17 <- readRDS("/Users/s1241040/Desktop/GFP32 vs RC17 Monolayer/Materials/RC17_oligos.rds") ``` # Explore the objects ```{r} # Monolayer Oligodendrocytes head(monolayer_RC17@meta.data) dim(monolayer_RC17@assays$RNA@counts) dim(monolayer_RC17@assays$RNA@data) dim(monolayer_RC17@assays$RNA@scale.data) DimPlot(monolayer_RC17, group.by = "Cluster_id", cols = mycoloursP[10:40], pt.size = 0.7, label = FALSE) & NoAxes() ``` # CleanUp the objects meta.data ```{r} # Organoids Oligodendrocytes keep.metadata.ol_comb <- c("orig.ident", "Cells", "Sample", "Phase", "ClusterID", "dataset") ol_comb@meta.data <- ol_comb@meta.data[, keep.metadata.ol_comb] head(ol_comb@meta.data) # Add new column to meta.data to mark these cells ol_comb@meta.data$Dataset <- "Organoids_combOL" ol_comb@meta.data$ClusterRename <- ol_comb@meta.data$ClusterID Idents(ol_comb) <- "ClusterID" levels(ol_comb) current.ids <- c("OPC", "mOL1", "pri-OPC", "Cyc", "imOL", "OPAC", "other OPC", "mOL2") new.ids <- c("co.OPC", "co.mOL1", "co.pri-OPC", "co.Cyc", "co.imOL", "co.OPAC", "co.other OPC", "co.mOL2") ol_comb@meta.data$ClusterRename <- plyr::mapvalues(x = ol_comb@meta.data$ClusterRename, from = current.ids, to = new.ids) levels(ol_comb@meta.data$ClusterRename) # Monolayer Oligodendrocytes keep.metadata.mono <- c("orig.ident", "Cells", "Sample", "Phase", "Cluster_id") monolayer_RC17@meta.data <- monolayer_RC17@meta.data[, keep.metadata.mono] head(monolayer_RC17@meta.data) # Add new column to meta.data to mark these cells monolayer_RC17@meta.data$Dataset <- "MonolayerOL_RC17" monolayer_RC17@meta.data$ClusterRename <- monolayer_RC17@meta.data$Cluster_id Idents(monolayer_RC17) <- "Cluster_id" levels(monolayer_RC17) current.ids <- c("MAO", "opc1", "cyp2", "opc2", "oligo1", "oapc1", "cop", "oligo3", "cyp1", "opc3", "pri-opc", "oligo2") new.ids <- c("mono.MAO", "mono.OPC1", "mono.Cyp2", "mono.OPC2", "mono.OL1", "mono.OAPC", "mono.COP", "mono.OL3", "mono.Cyp1", "mono.OPC3", "mono.pri-OPC", "mono.OL2") monolayer_RC17@meta.data$ClusterRename <- plyr::mapvalues(x = monolayer_RC17@meta.data$ClusterRename, from = current.ids, to = new.ids) head(monolayer_RC17@meta.data) levels(monolayer_RC17@meta.data$ClusterRename) ``` # Label Transfer ```{r} lt_anchors <- FindTransferAnchors(reference = monolayer_RC17 , query = ol_comb, dims = 1:30) predictions <- TransferData(anchorset = lt_anchors, refdata = monolayer_RC17$ClusterRename, dims = 1:30) lt_ol_comb <- AddMetaData(ol_comb, metadata = predictions) ``` ```{r fig.width=7, fig.height=8, fig.fullwidth=TRUE} DimPlot(lt_ol_comb, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[6:40], ncol = 2, pt.size = 1) DimPlot(lt_ol_comb, group.by = "dataset", split.by = "predicted.id", cols = mycoloursP[6:40], ncol = 2, pt.size = 1) ``` # RPCA: a slightly modified workflow for the integration of scRNA-seq datasets. ```{r} srt_list <- list(ol_comb, monolayer_RC17) # Normalize and identify variable features for each dataset independently srt_list<- lapply(X = srt_list, FUN = function(x) { x <- NormalizeData(x) x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) }) # select features that are repeatedly variable across datasets for integration run PCA on each dataset using these features rpca_features <- SelectIntegrationFeatures(object.list = srt_list) srt_list <- lapply(X = srt_list, FUN = function(x) { x <- ScaleData(x, features = rpca_features, verbose = FALSE) x <- RunPCA(x, features = rpca_features, verbose = FALSE) }) ``` ```{r} # Identify common anchors rpca_anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = rpca_features, reduction = "rpca", k.anchor = 5) ## To increase the strength of alignment I can increase the k.anchor, i.e. to 20 # this command creates an 'integrated' data assay rpca_combined <- IntegrateData(anchorset = rpca_anchors) ``` ```{r} # Run a single integrated analysis on all cells DefaultAssay(rpca_combined) <- "integrated" # Run the standard workflow for visualization and clustering rpca_combined <- ScaleData(rpca_combined, verbose = FALSE) rpca_combined <- RunPCA(rpca_combined, npcs = 30, verbose = FALSE) rpca_combined <- RunUMAP(rpca_combined, reduction = "pca", dims = 1:30) rpca_combined <- FindNeighbors(rpca_combined, reduction = "pca", dims = 1:30) rpca_combined <- FindClusters(rpca_combined, resolution = 0.5) saveRDS(rpca_combined, here("Materials", "HumanOL", "MonolayerOL", file = "rpca_combined_MonolayerOL(RC17)_ol_comb.rds")) ``` # Vizualize RPCA ```{r} DimPlot(rpca_combined, pt.size = 0.7, cols = mycoloursP[5:40], group.by = "Dataset") & NoAxes() DimPlot(rpca_combined, group.by = "Dataset", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[5:40]) & NoAxes() ``` ```{r fig.width=12, fig.height=6, fig.fullwidth=TRUE} DimPlot(rpca_combined, group.by = "ClusterRename", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend() DimPlot(rpca_combined, group.by = "ClusterRename", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend() ``` # Run CCA ```{r} object.list <- list(ol_comb, monolayer_RC17) # Normalize and identify variable features for each dataset independently object.list<- lapply(X = object.list, FUN = function(x) { x <- NormalizeData(x) x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000) }) # Select features that are repeatedly variable across datasets for integration cca_features <- SelectIntegrationFeatures(object.list = object.list) # Create anchors to integrate the datasets cca_anchors <- FindIntegrationAnchors(object.list = object.list, anchor.features = cca_features, k.anchor = 5) # Create an 'integrated' data assay cca_combined <- IntegrateData(anchorset = cca_anchors) ``` # Perform integrated analysis ```{r} # Specify that we will perform downstream analysis on the corrected data; set the assay to "integrated" DefaultAssay(cca_combined) <- "integrated" # Run the standard workflow for visualization and clustering cca_combined <- ScaleData(cca_combined, verbose = FALSE) cca_combined <- RunPCA(cca_combined, verbose = FALSE) # Choose the number of PCs ElbowPlot(cca_combined, ndims = 50) #20 cca_combined <- RunUMAP(cca_combined, reduction = "pca", dims = 1:30) cca_combined <- FindNeighbors(cca_combined, reduction = "pca", dims = 1:30) cca_combined <- FindClusters(cca_combined, resolution = 0.5) saveRDS(cca_combined, here("Materials", "HumanOL", "MonolayerOL", file = "cca_combined_MonolayerOL_ol_comb.rds")) ``` # Plot Cells ```{r} DimPlot(cca_combined, pt.size = 0.7, cols = mycoloursP[5:40]) & NoAxes() DimPlot(cca_combined, pt.size = 0.7, cols = mycoloursP[5:40], group.by = "Dataset") & NoAxes() DimPlot(cca_combined, group.by = "Dataset", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[5:40]) & NoAxes() ``` ```{r fig.width=12, fig.height=6, fig.fullwidth=TRUE} DimPlot(cca_combined, group.by = "ClusterRename", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend() DimPlot(cca_combined, group.by = "ClusterRename", split.by = "Dataset", pt.size = 0.7, cols = mycoloursP[3:40], label = TRUE) & NoAxes() & NoLegend() ``` ```{r} sessionInfo() ```