Luise-Seeker-Human-WM-Glia / src / 22_CCA_integration_human.Rmd
22_CCA_integration_human.Rmd
Raw
---
title: "CCA integratiogn with other datasets"
author: "Luise A. Seeker"
date: "18/04/2021"
output:
  html_document:
    toc: true
    toc_float:
      collapsed: false
    toc_depth: 4
    theme: united
---

# Load libraries

```{r}
library(Seurat)
library(here)
library(ggsci)
library(corrplot)
library(RColorBrewer)

```

# Build colour palette


```{r, echo = F}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3 <- pal_lancet("lanonc", alpha = 0.7)(9)
mypal4 <- pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)

mycoloursP<- c(mypal, mypal2, mypal3, mypal4, mypal5, mypal6, mypal7)


```

# Load datasets

```{r, eval = FALSE}

nad_ol <-  readRDS(file.path("/exports",
                             "eddie",
                             "scratch",
                             "lseeker",
                             "srt_oligos_and_opcs_LS.RDS"))
```

```{r, eval = FALSE}

load(file.path("/exports",
               "eddie",
               "scratch",
               "lseeker",
               "SarahsNatureDS", 
               "Nat19_OL_v3.Robj"))

```

```{r, eval = FALSE}

load(here("data",
          "downloaded_datasets",
          "Jaekel_Agirre_2019",
          "Nat19_OL_v3.Robj"))
               

```




# Label transfer
```{r, eval = FALSE}
lt_anchors <- FindTransferAnchors(reference = Nature19.OL_v3 , query = nad_ol, 
    dims = 1:30)
predictions <- TransferData(anchorset = lt_anchors, refdata = Nature19.OL_v3$Celltype_OL, 
    dims = 1:30)
lt_nad_ol <- AddMetaData(nad_ol, metadata = predictions)

```

```{r}

lt_nad_ol <- readRDS(here("data", 
                          "label_transferred", 
                          "lt_ol_Jaekel_Agirre.RDS"))

#lt_nad_ol <- readRDS("~/Documents/Work/HumanCellAtlas/git_repos/HCA_Luise/data/label_transferred/lt_ol_Jaekel_Agirre.RDS")

DimPlot(lt_nad_ol, group.by = "ol_clusters_named" , 
        cols = mycoloursP[6:40], label = TRUE)
```

```{r}

DimPlot(lt_nad_ol, group.by = "predicted.id", cols = mycoloursP[10:40] )
```

```{r}

DimPlot(lt_nad_ol, group.by = "predicted.id", cols = mycoloursP[10:40], label = "TRUE" )
```
```{r}

DimPlot(lt_nad_ol, group.by = "predicted.id", cols = mycoloursP[35:50], 
        label = "TRUE", label.size = 6, repel = TRUE )
```


```{r, fig.width=8, fig.height=8}

DimPlot(lt_nad_ol, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[7:40], ncol = 3)
```





# Correlation between ol ID and predicted ID
```{r}

met_dat <- lt_nad_ol@meta.data

cor_df <- data.frame(nad_ol_id = met_dat$ol_clusters_named, nature_id = met_dat$predicted.id)

cor_df$nature_id <- as.factor(cor_df$nature_id)
cor_df$nature_id <- factor(cor_df$nature_id, levels = c("OPCs",
                                                        "COPs",
                                                        "Oligo1",
                                                        "Oligo2",
                                                        "Oligo3",
                                                        "Oligo4",
                                                        "Oligo5",
                                                        "Oligo6"))

cor_matr <- matrix(data= 0, 
                   nrow = length(levels(cor_df$nad_ol_id)), 
                   ncol = length(levels(cor_df$nature_id)))

rownames(cor_matr) <- levels(cor_df$nad_ol_id)
colnames(cor_matr) <- levels(cor_df$nature_id)

for(i in 1: nrow(cor_df)){
  name1 <- as.character(cor_df[i, 1])
  name2 <- as.character(cor_df[i, 2])
  cor_matr[name1, name2] <- cor_matr[name1, name2] + 1
}

max(cor_matr)

centr_cor_matr <- cor_matr/ max(cor_matr)

corrplot(centr_cor_matr)

```

# Correct counts not for largest total counts, but first for cluster size

```{r}
  
clu_counts <- table(cor_counts = lt_nad_ol@meta.data$ol_clusters_named)

resiz_mat <- cor_matr

for(i in length(rownames(resiz_mat))){
  #resiz_mat[rownames(resiz_mat)[i], ]  <- resiz_mat["OPC_A", ] / clu_counts[rownames(resiz_mat)[i]]
  resiz_mat[rownames(resiz_mat)[i], ]  <- resiz_mat[i, ] / clu_counts[rownames(resiz_mat)[i]]
}

# Now bring the data on a scale of 0-1 to enable corrplots

centr_resiz_mat <- resiz_mat/ max(resiz_mat)

```


```{r, fig.heigth = 10, fig.width = 10}
corrplot(centr_resiz_mat, type = "full",method = "circle", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black") 


```
```{r, fig.heigth = 10, fig.width = 10}
corrplot(centr_resiz_mat, type = "full",method = "color", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black") 


```
```{r}
COL1(sequential = c("Oranges", "Purples", "Reds", "Blues", "Greens", 
                    "Greys", "OrRd", "YlOrRd", "YlOrBr", "YlGn"), n = 200)

COL2(diverging = c("RdBu", "BrBG", "PiYG", "PRGn", "PuOr", "RdYlBu"), n = 200)

```

```{r, fig.heigth = 10, fig.width = 10}
corrplot(centr_resiz_mat, type = "full",method = "circle", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black",
  is.corr = FALSE) 

```

Absolute count values

```{r}
corrplot(cor_matr, type = "full",method = "circle", 
  mar = c(1, 0, 1, 0),
  tl.col = "black",
  is.corr = FALSE,
  col = COL1('Greys'))


```

```{r}
corrplot(cor_matr, type = "full",method = "circle", 
  mar = c(1, 0, 1, 0),
  tl.col = "black",
  is.corr = FALSE,
  col = COL1('Blues'),
  cl.cex = 1.5,
  tl.cex = 2, 
  cl.pos="b")


corrplot(cor_matr, type = "full",method = "circle", 
  mar = c(1, 0, 1, 0),
  tl.col = "black",
  is.corr = FALSE,
  col = COL1('Blues'),
  cl.cex = 1.5,
  tl.cex = 2, 
  cl.pos="r")


```


```{r}

corrplot(cor_matr, type = "full",method = "number", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black",
  is.corr = FALSE) 

```
# Add a dataset column to individual datasets before integration

```{r, eval = FALSE}
Nature19.OL_v3@meta.data$dataset <- "Jaekel & Agirre et al. 2019"

lt_nad_ol@meta.data$dataset <- "Seeker et al."
```


# CCA integration

```{r, eval = FALSE}
oligo_anchors <- FindIntegrationAnchors(object.list = list(lt_nad_ol, 
                                                           Nature19.OL_v3), 
                                        dims = 1:20)
oligos_combined <- IntegrateData(anchorset = oligo_anchors, dims = 1:20)
```


```{r, eval = FALSE}
DefaultAssay(oligos_combined) <- "integrated"

# Run the standard workflow for visualization and clustering
oligos_combined <- ScaleData(oligos_combined, verbose = FALSE)
oligos_combined <- RunPCA(oligos_combined, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
oligos_combined <- RunUMAP(oligos_combined, reduction = "pca", dims = 1:20)
oligos_combined <- FindNeighbors(oligos_combined, reduction = "pca", dims = 1:20)
oligos_combined <- FindClusters(oligos_combined, resolution = 0.5)
```



```{r}
oligos_combined <- readRDS(here("data", "integrated", "int_Jaekel_Agirre.RDS"))
oligos_combined <- readRDS("~/Documents/Work/HumanCellAtlas/git_repos/HCA_Luise/data/integrated/int_Jaekel_Agirre.RDS")
# Visualization
DimPlot(oligos_combined, reduction = "umap", group.by = "Celltype_OL", 
        cols = mycoloursP)
```

```{r}
oligos_combined$Celltype_OL <- as.factor(oligos_combined$Celltype_OL)

levels(oligos_combined$Celltype_OL)

oligos_combined$Celltype_OL <- factor(oligos_combined$Celltype_OL, levels =
                                        c(levels(as.factor(oligos_combined$predicted.id)),
                                          "ImOlGs"))
```

```{r}
levels(as.factor(oligos_combined$predicted.id))

```

```{r}

DimPlot(oligos_combined, reduction = "umap", group.by = "predicted.id", 
        cols = mycoloursP[35:50], split.by = "Condition", label = TRUE)
```

```{r}

DimPlot(oligos_combined, reduction = "umap", group.by = "Celltype_OL", 
        cols = mycoloursP[35:50], split.by = "Condition", label = TRUE)
```

```{r}
DimPlot(oligos_combined, reduction = "umap", group.by = "Celltype_OL", 
        split.by= "Condition", 
        cols = mycoloursP )
```


```{r}

DimPlot(oligos_combined, reduction = "umap", group.by = "ol_clusters_named", 
        cols = mycoloursP )
```


```{r}

oligos_combined$ol_clusters_named <- as.factor(oligos_combined$ol_clusters_named)

oligos_combined$ol_clusters_named <- factor(oligos_combined$ol_clusters_named, levels = c("OPC_A", "OPC_B", "COP_A", "COP_B", "COP_C", "Oligo_A", "Oligo_B" ,"Oligo_C", "Oligo_D",
"Oligo_E", "Oligo_F"))


DimPlot(oligos_combined, reduction = "umap", group.by = "ol_clusters_named", 
        split.by= "Condition", 
        cols = mycoloursP[6:40])


```

```{r}

DimPlot(oligos_combined, reduction = "umap", group.by = "ol_clusters_named", 
        split.by= "dataset", 
        cols = mycoloursP[10:40] )


```

```{r}

DimPlot(oligos_combined, reduction = "umap", group.by = "ol_clusters_named", 
        cols = mycoloursP[6:40] )


```



```{r, eval=FALSE}

saveRDS(oligos_combined, here("data", 
                              "integrated", 
                              "int_Jaekel_Agirre.RDS"))
```


```{r}
sessionInfo()
```