MonolayerCultures / src / Integrations / foetalHCA_RC17 / Int_foetalHCA_RC17_ol.Rmd
Int_foetalHCA_RC17_ol.Rmd
Raw
---
title: "Integration of RC17 & HCA 20wk Foetal Nuclei"
author: "Nina-Lydia Kazakou"
date: "04/04/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

### Load libraries 
```{r message=FALSE, warning=FALSE}
library(Seurat)
library(here)
library(ggsci)
library(ggplot2)
library(dplyr)
```

### Load colour palets
```{r}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```


### Load objects 
```{r}
HCA_f_opc <- readRDS(here("data", "Processed", "Original_Objects", "HCA_foetal_opcs.rds"))

RC17_ol <- readRDS(here("data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds"))
```


```{r}
DimPlot(RC17_ol, group.by = "ClusterID", pt.size = 0.8, cols = mycoloursP[6:40], label = TRUE) & NoLegend()
```

```{r foetal_metadata}
HCA_f_opc@meta.data$ClusterID <- "foetal_HCA_OPCs"
HCA_f_opc@meta.data$Dataset <- "foetal_HCA_OPCs"
```


# Integration
## CCA

```{r Integrate_Datasets}
# Create a list of objects
srt_list <- list(RC17_ol, HCA_f_opc)

# normalize and identify variable features for each dataset independently
srt_list<- lapply(X = srt_list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

# select features that are repeatedly variable across datasets for integration
var_features <- SelectIntegrationFeatures(object.list = srt_list)

# create anchors to integrate the datasets
anchors <- FindIntegrationAnchors(object.list = srt_list, anchor.features = var_features)   

# create an 'integrated' data assay 
ol_comb <- IntegrateData(anchorset = anchors, dims = 1:20)
```


```{r Run_Standard_Workflow}
DefaultAssay(ol_comb) <- "integrated"
Idents(ol_comb) <- "Dataset"

# Scale data 
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)

# Dimentionality reduction
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)

# Choose the mumber of PCs
ElbowPlot(ol_comb, ndims = 50) #20

# Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```


```{r eval=FALSE}
dir.create(here("data", "Processed", "Integrations", "foetal_HCA_RC17"))
saveRDS(ol_comb, here("data", "Processed", "Integrations", "foetal_HCA_RC17", "adultHCA_SCS_RC17.rds"))
```

# Visualization
```{r fig.height=4, fig.width=8}
DimPlot(ol_comb, pt.size = 0.8, cols = c(mycoloursP[13], mycoloursP[6]), group.by = "Dataset") 
```


```{r fig.height=4, fig.width=8}
DimPlot(ol_comb, pt.size = 0.8, cols = c(mycoloursP[13], mycoloursP[6]), group.by = "Dataset", split.by = "Dataset") & NoLegend()
```

```{r fig.height=4, fig.width=6}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID")
```

```{r fig.height=4, fig.width=9}
DimPlot(ol_comb, pt.size = 0.8, cols = mycoloursP[5:40], group.by = "ClusterID", split.by = "Dataset", label = FALSE)
```


# Differential Gene Expression 

```{r}
dir.create(here("outs", "Processed", "Integrations", "foetal_HCA_RC17"))

Idents(ol_comb) <- "Dataset"

if(!(file.exists(here("outs", "Processed", "Integrations", "foetal_HCA_RC17", "de_foetal_HCA_RC17.rds")))){
  de_foetal_HCA_RC17 <- FindMarkers(ol_comb, ident.1 = "foetal_HCA_OPCs", ident.2 = "hES-derived Monolayer Oligodendroglia", test.use = "MAST")
  de_foetal_HCA_RC17 <- rename(de_foetal_HCA_RC17, pct.1_HCA_f_ol = pct.1, pct.2_RC17_ol = pct.2)
  write.csv(de_foetal_HCA_RC17, here("outs", "Processed", "Integrations", "foetal_HCA_RC17", "de_foetal_HCA_RC17.csv"), quote = FALSE)
  saveRDS(de_foetal_HCA_RC17, here("outs", "Processed", "Integrations", "foetal_HCA_RC17", "de_foetal_HCA_RC17.rds"))
}else{
  de_foetal_HCA_RC17 <- readRDS(here("outs", "Processed", "Integrations", "foetal_HCA_RC17", "de_foetal_HCA_RC17.rds"))
}
```

```{r}
head(de_foetal_HCA_RC17, 2)

dim(de_foetal_HCA_RC17)
```

## Filtering for specific markers

To reference Luise's thresholding for DE_analysis of the adult_HCA, I will use: 
*pct.group of interest < 25%
*pct.other > 50 - 60%
*avg_log2f > |1.2|

```{r}
pct_a = 0.25
pct_b = 0.5

n = 10
```

**A positive avg_log2FC shows that the gene is more highly expressed in the first group** (HCA_f_ol in this case), while **a negative avg_log2FC shows that the gene is more expressed in the second group** (RC17_ol in this case). 

#### hES-derived oligodendroglia 
To filter for markers specific to the hES-derived monolayer oligodendroglia we need to extract the **negative values**. 
```{r}
de_RC17_neg <- de_foetal_HCA_RC17 %>% filter(avg_log2FC < 0)

# Number of upreguralted genes in hES-derived cultured oligodendroglia
dim(de_RC17_neg)
```

```{r}
RC17_top <- de_RC17_neg %>%
 filter(pct.2_RC17_ol <  pct_a &
        pct.1_HCA_f_ol > pct_b) %>% 
  arrange(avg_log2FC) %>% 
  head(n) %>% 
  row.names()

length(RC17_top) # Double-check that it actually contains 10 genes (or less if not enough)
```
```{r}
RC17_top
```

```{r fig.height=4, fig.width=8}
DotPlot(ol_comb, features = RC17_top) + FontSize(7)
```

```{r}
sessionInfo()
```