MonolayerCultures / src / Integrations / Nat19 / Int_RC17_GFP32_Nat19.Rmd
Int_RC17_GFP32_Nat19.Rmd
Raw
---
title: "Int_RC17_GFP32_Nat19"
author: "Nina-Lydia Kazakou"
date: "31/03/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

Here, I will bring together 4 different scripts from my original analysis ("Integration_RC17_CtrlNat19", "Integration_GFP32_Nat19", "CCA_GFP32&RC17_vs_SarahPostMortem", RC17 & Ctrl post-mortem Label Transfer & Heatmap & CCA) that aim to an integrative analysis between the control oligodendroglia from the Nat19 dataset and the monolayer oligodendroglia. I will make the structure a bit tidier and add the sessionInfo, but I will not change the original scripts; at least not more that redirecting pathways to the right folder, under this project.

# Set-up

### Load libraries 
```{r message=FALSE, warning=FALSE}
library(Seurat)
library(here)
library(ggsci)
library(ggplot2)
library(dplyr)
library(corrplot)
library(RColorBrewer)
```

### Load colour palets
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```


### Load objects 
```{r}
RC17 <- readRDS(here("data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds"))

GFP32 <- readRDS(here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds"))

Nat19 <- load(here("data", "Processed", "Original_Objects", "Nat19_OL_v3.Robj"))
```

#### Adjust metadata
```{r}
# RC17
RC17@meta.data$dataset <- "RC17_Monolayer_Oligodendroglia"

# GFP32
Idents(GFP32) <- "ClusterID"
GFP32 <- subset(GFP32, idents = c("pre-OPCs", "OPCs_1", "OPCs_2", "Oligos_1", "Oligos_2", "Oligos_3"))
GFP32@meta.data$dataset <- "GFP32_Monolayer_Oligodendroglia"
GFP32@meta.data$Author <-"Kazakou et al."

#Nat19
Idents(Nature19.OL_v3) <- "Condition"
Nat19 <- subset(Nature19.OL_v3, idents = c("Ctrl"))
Nat19@meta.data$ClusterID <- Nat19@meta.data$Celltype_OL
Nat19@meta.data$dataset <-"Adult_Ctrl_Oligodendroglia"
Nat19@meta.data$Author <-"Jaekel et al., 2019"
Nat19@meta.data$condition <- NULL
Nat19@meta.data$lesion <- NULL
Nat19@meta.data$patient <- NULL
Nat19@meta.data$res.2 <- NULL
Nat19@meta.data$res.4 <- NULL
Nat19@meta.data$res.0.8 <- NULL
Nat19@meta.data$Cluster_res4 <- NULL
Nat19@meta.data$Celltype_res4 <- NULL
Nat19@meta.data$res2_Celltype <- NULL
Nat19@meta.data$V1 <- NULL
Nat19@meta.data$V2 <- NULL
```

```{r}
DimPlot(RC17, group.by = "ClusterID", pt.size = 0.8, cols = mycoloursP[6:40])

DimPlot(GFP32, group.by = "ClusterID", pt.size = 0.8, cols = mycoloursP[10:40])

DimPlot(Nat19, group.by = "ClusterID", pt.size = 0.8, cols = mycoloursP)
```


# RC17 & Nat19

## Label Transfer
```{r}
lt_anchors <- FindTransferAnchors(reference = Nat19 , query = RC17, dims = 1:30)

predictions <- TransferData(anchorset = lt_anchors, refdata = Nat19$ClusterID,  dims = 1:30)

lt_RC17 <- AddMetaData(RC17, metadata = predictions)
```

```{r fig.width=10, fig.height=6, fig.fullwidth=TRUE}
DimPlot(lt_RC17, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[3:40], ncol = 2, pt.size = 1)
```

# Correlation between ol ID and predicted ID
```{r}
met_dat <- lt_RC17@meta.data
cor_df <- data.frame(orig_id = met_dat$ClusterID,
                     pred_id = met_dat$predicted.id)
cor_df$pred_id <- as.factor(cor_df$pred_id)
cor_df$pred_id <- factor(cor_df$pred_id, levels = c("OPCs",
                                                        "COPs",
                                                        "Oligo1",
                                                        "Oligo2",
                                                        "Oligo3",
                                                        "Oligo4",
                                                        "Oligo5",
                                                        "Oligo6", 
                                                        "ImOlGs"))
cor_matr <- matrix(data= 0, 
                   nrow = length(levels(cor_df$orig_id)), 
                   ncol = length(levels(cor_df$pred_id)))
rownames(cor_matr) <- levels(cor_df$orig_id)
colnames(cor_matr) <- levels(cor_df$pred_id)
for(i in 1: nrow(cor_df)){
  name1 <- as.character(cor_df[i, 1])
  name2 <- as.character(cor_df[i, 2])
  cor_matr[name1, name2] <- cor_matr[name1, name2] + 1
}
max(cor_matr)
centr_cor_matr <- cor_matr/ max(cor_matr)
corrplot(centr_cor_matr)
```

```{r}
clu_counts <- table(cor_counts = lt_RC17@meta.data$ClusterID)
resiz_mat <- cor_matr
for(i in length(rownames(resiz_mat))){
  resiz_mat[rownames(resiz_mat)[i], ]  <- resiz_mat["pri-OPC", ] / clu_counts[rownames(resiz_mat)[i]]
}
# Now bring the data on a scale of 0-1 to enable corrplots
centr_resiz_mat <- resiz_mat/ max(resiz_mat)
```

```{r fig.height=5, fig.width=6, message=FALSE, warning=FALSE}
corrplot(centr_resiz_mat, type = "full",method = "circle", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black") 
```

## CCA
```{r eval=FALSE}
# Create a list of objects
srt_list <- list(Nat19, RC17)

# normalize and identify variable features for each dataset independently
srt_list<- lapply(X = srt_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
var_features <- SelectIntegrationFeatures(object.list = srt_list)

# create anchors to integrate the datasets
anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features)   

# create an 'integrated' data assay 
ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20)
```

```{r eval=FALSE}
DefaultAssay(ol_comb) <- "integrated"
Idents(ol_comb) <- "dataset"

# Scale data 
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)

# Dimentionality reduction
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)

# Choose the mumber of PCs
ElbowPlot(ol_comb, ndims = 50) #20

# Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```

```{r eval=FALSE}
dir.create(here("data", "Processed", "Integrations", "Nat19"))

saveRDS(ol_comb, here("data", "Processed", "Integrations", "Nat19", "RC17_Nat19.rds"))
```


```{r}
ol_comb <- readRDS(here("data", "Processed", "Integrations", "Nat19", "RC17_Nat19.rds"))
```


```{r fig.height=4, fig.width=8}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset") 
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset", split.by = "dataset") & NoLegend()
```

```{r fig.height=4, fig.width=9}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID")
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = FALSE) & NoLegend()
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = TRUE, label.size = 3) & NoLegend()
```

# GFP32 & Nat19

## Label Transfer
```{r}
lt_anchors <- FindTransferAnchors(reference = Nat19 , query = GFP32, dims = 1:20)

predictions <- TransferData(anchorset = lt_anchors, refdata = Nat19$ClusterID,  dims = 1:20, k.weight = 15)

lt_GFP32 <- AddMetaData(GFP32, metadata = predictions)
```

```{r fig.width=10, fig.height=6, fig.fullwidth=TRUE}
DimPlot(lt_GFP32, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[3:40], ncol = 2, pt.size = 1)
```

## CCA
```{r eval=FALSE}
# Create a list of objects
srt_list <- list(Nat19, GFP32)

# normalize and identify variable features for each dataset independently
srt_list<- lapply(X = srt_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
var_features <- SelectIntegrationFeatures(object.list = srt_list)

# create anchors to integrate the datasets
anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features)   

# create an 'integrated' data assay 
ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20)
```

```{r eval=FALSE}
DefaultAssay(ol_comb) <- "integrated"
Idents(ol_comb) <- "dataset"

# Scale data 
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)

# Dimentionality reduction
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)

# Choose the mumber of PCs
ElbowPlot(ol_comb, ndims = 50) #20

# Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```


```{r eval=FALSE}
saveRDS(ol_comb, here("data", "Processed", "Integrations", "GFP32_Nat19.rds"))
```

```{r}
ol_comb <- readRDS(here("data", "Processed", "Integrations", "Nat19", "GFP32_Nat19.rds"))
```

```{r fig.height=4, fig.width=8}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset") 
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset", split.by = "dataset") & NoLegend()
```

```{r fig.height=4, fig.width=9}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID")
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = FALSE) & NoLegend()
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = TRUE, label.size = 3) & NoLegend()
```

# RC17 & GFP32 & Nat19
### Intgrate RC17_ol & GFP32_ol
```{r eval=FALSE}
# Create a list of objects
srt_list <- list(RC17, GFP32)

# normalize and identify variable features for each dataset independently
srt_list<- lapply(X = srt_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
var_features <- SelectIntegrationFeatures(object.list = srt_list)

# create anchors to integrate the datasets
anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features)   

# create an 'integrated' data assay 
ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20)
```

```{r eval=FALSE}
DefaultAssay(ol_comb) <- "integrated"
Idents(ol_comb) <- "Dataset"

# Scale data 
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)

# Dimentionality reduction
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)

# Choose the mumber of PCs
ElbowPlot(ol_comb, ndims = 50) #20

# Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```

```{r eval=FALSE}
saveRDS(ol_comb, here("data", "Processed", "Integrations", "GFP32_RC17", "GFP32_RC17_ol_comb.rds"))
```


```{r}
GFP32_RC17_ol_comb <- readRDS(here("data", "Processed", "Integrations", "GFP32_RC17", "GFP32_RC17_ol_comb.rds"))
```

# Visualization
```{r fig.height=4, fig.width=8}
DimPlot(GFP32_RC17_ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset") 
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset", split.by = "dataset") & NoLegend()
```

```{r fig.height=4, fig.width=9}
DimPlot(GFP32_RC17_ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID")
```

```{r fig.height=4, fig.width=10}
DimPlot(GFP32_RC17_ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = FALSE) & NoLegend()
DimPlot(GFP32_RC17_ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = TRUE, label.size = 3) & NoLegend()
```


```{r}
DimPlot(GFP32_RC17_ol_comb, pt.size = 0.8, cols = mycoloursP[15:40], group.by = "integrated_snn_res.0.5", label = TRUE)  
```

## Label Transfer
```{r}
lt_anchors <- FindTransferAnchors(reference = Nat19 , query = GFP32_RC17_ol_comb, dims = 1:20)

predictions <- TransferData(anchorset = lt_anchors, refdata = Nat19$ClusterID,  dims = 1:20, k.weight = 15)

lt_comb <- AddMetaData(GFP32_RC17_ol_comb, metadata = predictions)
```

```{r fig.width=10, fig.height=6, fig.fullwidth=TRUE}
DimPlot(lt_comb, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[3:40], ncol = 2, pt.size = 1)
```

```{r fig.width=10, fig.height=6, fig.fullwidth=TRUE}
DimPlot(lt_comb, group.by = "dataset", split.by = "predicted.id", cols = mycoloursP[3:40], ncol = 2, pt.size = 1)
```

## CCA
```{r eval=FALSE}
ol_anchors <- FindIntegrationAnchors(object.list = list(GFP32_RC17_ol_comb, Nat19), 
                                     dims = 1:20) 

ol_comb <- IntegrateData(anchorset = ol_anchors, dims = 1:20)

DefaultAssay(ol_comb) <- "integrated"
```

```{r eval=FALSE}
DefaultAssay(ol_comb) <- "integrated"
Idents(ol_comb) <- "Dataset"

# Scale data 
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)

# Dimentionality reduction
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)

# Choose the mumber of PCs
ElbowPlot(ol_comb, ndims = 50) #20

# Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```

```{r eval=FALSE}
saveRDS(ol_comb, here("data", "Processed", "Integrations", "RC17_GFP32_Nat19.rds"))
```

```{r}
ol_comb <- readRDS(here("data", "Processed", "Integrations", "Nat19", "RC17_GFP32_Nat19.rds"))
```


# Visualization
```{r fig.height=4, fig.width=8}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset") 
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "dataset", split.by = "dataset") & NoLegend()
```

```{r fig.height=4, fig.width=9}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID")
```

```{r fig.height=4, fig.width=10}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = FALSE) & NoLegend()
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "dataset", label = TRUE, label.size = 3) & NoLegend()
```


```{r}
sessionInfo()
```