---
title: "Milo"
author: "Luise A. Seeker"
date: "22/10/2021"
output: html_document
---
```{r}
library(Seurat)
library(here)
library(miloR)
library(dplyr)
library(statmod)
library(SingleCellExperiment)
library(scater)
library(patchwork)
library(scran)
library(gridExtra)
```
```{r}
seur <- readRDS(here("data",
"single_nuc_data",
"astrocytes",
"HCA_astrocytes.RDS"))
```
```{r}
plot_levels <- levels(as.factor(seur$cluster_id))
```
```{r}
milo_meta <- seur@meta.data
milo_obj <- Milo(as.SingleCellExperiment(seur))
milo_obj
```
Build a graph and neighbourhoods.
```{r}
milo_obj <- buildGraph(milo_obj, k=30, d=30, reduced.dim = "PCA" )
milo_obj <- makeNhoods(milo_obj, k=30, d=30, refined=TRUE, prop=0.2, reduced_dims = "PCA" )
plotNhoodSizeHist(milo_obj)
```
Ideally, histogram should be peaking at 50 -100. Or the average neighbourhood size
should be over 5 x N_samples.Increasing k and/or
prop during makeNhoods will shift distribution to the right.
# Calculate distances, count cells according to an experimental design and perform DA testing.
```{r}
#milo_obj <- calcNhoodDistance(milo_obj, d=30)
milo_obj <- countCells(milo_obj, samples="uniq_id", meta.data=milo_meta)
head(nhoodCounts(milo_obj))
```
```{r}
milo_design <- data.frame(colData(milo_obj))[,c("uniq_id", "Tissue", "gender",
"AgeGroup", "caseNO")]
## Convert batch info from integer to factor
milo_design <- distinct(milo_design)
rownames(milo_design) <- milo_design$uniq_id
milo_design
```
```{r}
milo_obj <- calcNhoodDistance(milo_obj, d=30, reduced.dim = "PCA")
```
Testing
```{r}
da_results <- testNhoods(milo_obj, design = ~ Tissue, design.df = milo_design)
head(da_results)
```
```{r}
da_results %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="Tissue",
text_by = "cluster_id",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results, layout="UMAP",alpha=0.1)
umap_pl + nh_graph_pl +
plot_layout(guides="collect")
```
```{r}
da_results <- annotateNhoods(milo_obj, da_results, coldata_col = "cluster_id")
head(da_results)
```
```{r}
ggplot(da_results, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results$clust <- ifelse(da_results$cluster_id_fraction < 0.7,
"Mixed",
da_results$cluster_id_fraction)
plotDAbeeswarm(da_results, group.by = "cluster_id")
```
# Pairwise tissue
CB vs BA4
```{r}
Idents(seur) <- "Tissue"
cb_ba4_srt <- subset(seur, idents = c("CB", "BA4"))
milo_meta_cb_ba4 <- cb_ba4_srt@meta.data
milo_obj <- Milo(as.SingleCellExperiment(cb_ba4_srt))
milo_obj
```
Build a graph and neighbourhoods.
```{r}
milo_obj <- buildGraph(milo_obj, k=30, d=30, reduced.dim = "PCA" )
milo_obj <- makeNhoods(milo_obj, k=30, d=30, refined=TRUE, prop=0.2, reduced_dims = "PCA" )
plotNhoodSizeHist(milo_obj)
```
Ideally, histogram should be peaking at 50 -100. Or the average neighbourhood size
should be over 5 x N_samples.Increasing k and/or
prop during makeNhoods will shift distribution to the right.
# Calculate distances, count cells according to an experimental design and perform DA testing.
```{r}
#milo_obj <- calcNhoodDistance(milo_obj, d=30)
milo_obj <- countCells(milo_obj, samples="uniq_id", meta.data=milo_meta_cb_ba4)
head(nhoodCounts(milo_obj))
```
```{r}
milo_design <- data.frame(colData(milo_obj))[,c("uniq_id", "Tissue", "gender",
"AgeGroup", "caseNO")]
## Convert batch info from integer to factor
milo_design <- distinct(milo_design)
rownames(milo_design) <- milo_design$uniq_id
milo_design
```
```{r}
set.seed(1001)
milo_obj <- calcNhoodDistance(milo_obj, d=30, reduced.dim = "PCA")
```
```{r}
set.seed(1001)
da_results_cb_vs_ba4 <- testNhoods(milo_obj, design = ~ 0 + Tissue, design.df = milo_design,
model.contrasts = c("TissueCB - TissueBA4"))
head(da_results_cb_vs_ba4)
```
```{r}
da_results_cb_vs_ba4 %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_cb_vs_ba4, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_cb_vs_ba4, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="Tissue",
text_by = "cluster_id",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_cb_vs_ba4, layout="UMAP",alpha=0.1)
umap_cb_ba4 <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_cb_ba4
```
```{r}
milo_save_dir <- here("outs",
"Milo_compositional_analysis",
"astrocytes_only",
"umap_plots")
dir.create(milo_save_dir, recursive = T)
pdf(file = here(milo_save_dir, "Tissue_CB_vs_BA4_noCSC"),
width = 13,
height = 7)
print(umap_cb_ba4)
dev.off()
```
```{r}
da_results_cb_vs_ba4 <- annotateNhoods(milo_obj,
da_results_cb_vs_ba4,
coldata_col = "cluster_id")
head(da_results_cb_vs_ba4)
```
```{r}
ggplot(da_results_cb_vs_ba4, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_cb_vs_ba4$clust <- ifelse(da_results_cb_vs_ba4$cluster_id_fraction < 0.7,
"Mixed",
da_results_cb_vs_ba4$cluster_id_fraction)
da_results_cb_vs_ba4$cluster_id <- factor(da_results_cb_vs_ba4$cluster_id,
levels = plot_levels)
cb_ba4 <- plotDAbeeswarm(da_results_cb_vs_ba4, group.by = "cluster_id")
cb_ba4
```
```{r}
milo_dir_bees <- here("outs",
"Milo_compositional_analysis",
"astrocytes_only",
"beeswarm_plots")
dir.create(milo_dir_bees, recursive = T)
pdf(file = here(milo_dir_bees, "Tissue_CB_vs_BA4_noCSC"),
width = 7,
height = 13)
print(cb_ba4)
dev.off()
```
# Pairwise tissue
CB vs CSC
```{r}
Idents(seur) <- "Tissue"
cb_csc_srt <- subset(seur, idents = c("CSC", "CB"))
milo_meta_cb_csc <- cb_csc_srt@meta.data
milo_obj <- Milo(as.SingleCellExperiment(cb_csc_srt))
milo_obj
```
Build a graph and neighbourhoods.
```{r}
milo_obj <- buildGraph(milo_obj, k=30, d=30, reduced.dim = "PCA" )
milo_obj <- makeNhoods(milo_obj, k=30, d=30, refined=TRUE, prop=0.2, reduced_dims = "PCA" )
plotNhoodSizeHist(milo_obj)
```
# Calculate distances, count cells according to an experimental design and perform DA testing.
```{r}
#milo_obj <- calcNhoodDistance(milo_obj, d=30)
milo_obj <- countCells(milo_obj, samples="uniq_id", meta.data=milo_meta_cb_csc)
head(nhoodCounts(milo_obj))
```
```{r}
milo_design <- data.frame(colData(milo_obj))[,c("uniq_id", "Tissue", "gender",
"AgeGroup", "caseNO")]
## Convert batch info from integer to factor
milo_design <- distinct(milo_design)
rownames(milo_design) <- milo_design$uniq_id
milo_design
```
```{r}
set.seed(1001)
milo_obj <- calcNhoodDistance(milo_obj, d=30, reduced.dim = "PCA")
```
```{r}
set.seed(1001)
da_results_cb_vs_csc <- testNhoods(milo_obj, design = ~ 0 + Tissue,
design.df = milo_design,
model.contrasts = c("TissueCB - TissueCSC"))
head(da_results_cb_vs_csc)
```
```{r}
da_results_cb_vs_csc %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_cb_vs_csc, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_cb_vs_csc, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_cb_vs_csc, layout="UMAP",alpha=0.1)
umap_cb_csc <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_cb_csc
```
```{r}
pdf(file = here(milo_save_dir, "Tissue_CB_vs_CSC_noBA4"),
width = 13,
height = 7)
print(umap_cb_csc)
dev.off()
```
```{r}
da_results_cb_vs_csc <- annotateNhoods(milo_obj,
da_results_cb_vs_csc,
coldata_col = "cluster_id")
head(da_results_cb_vs_csc)
```
```{r}
ggplot(da_results_cb_vs_csc, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_cb_vs_csc$clust <- ifelse(da_results_cb_vs_csc$cluster_id_fraction < 0.7,
"Mixed",
da_results_cb_vs_csc$cluster_id_fraction)
da_results_cb_vs_csc$cluster_id <- factor(da_results_cb_vs_csc$cluster_id,
levels = plot_levels)
cb_csc <- plotDAbeeswarm(da_results_cb_vs_csc, group.by = "cluster_id")
cb_csc
```
```{r}
pdf(file = here(milo_dir_bees, "Tissue_CB_vs_CSC"),
width = 7,
height = 13)
print(cb_csc)
dev.off()
```
# Pairwise tissue
CSC vs BA4
```{r}
Idents(seur) <- "Tissue"
csc_ba4_srt <- subset(seur, idents = c("CSC", "BA4"))
milo_meta_csc_ba4 <- csc_ba4_srt@meta.data
milo_obj <- Milo(as.SingleCellExperiment(csc_ba4_srt))
milo_obj
```
Build a graph and neighbourhoods.
```{r}
milo_obj <- buildGraph(milo_obj, k=30, d=30, reduced.dim = "PCA" )
milo_obj <- makeNhoods(milo_obj, k=30, d=30, refined=TRUE, prop=0.2, reduced_dims = "PCA" )
plotNhoodSizeHist(milo_obj)
```
Ideally, histogram should be peaking at 50 -100. Or the average neighbourhood size
should be over 5 x N_samples.Increasing k and/or
prop during makeNhoods will shift distribution to the right.
# Calculate distances, count cells according to an experimental design and perform DA testing.
```{r}
#milo_obj <- calcNhoodDistance(milo_obj, d=30)
milo_obj <- countCells(milo_obj, samples="uniq_id", meta.data=milo_meta_csc_ba4)
head(nhoodCounts(milo_obj))
```
```{r}
milo_design <- data.frame(colData(milo_obj))[,c("uniq_id", "Tissue", "gender",
"AgeGroup", "caseNO")]
## Convert batch info from integer to factor
milo_design <- distinct(milo_design)
rownames(milo_design) <- milo_design$uniq_id
milo_design
```
```{r}
set.seed(1001)
milo_obj <- calcNhoodDistance(milo_obj, d=30, reduced.dim = "PCA")
```
```{r}
da_results_csc_vs_ba4 <- testNhoods(milo_obj, design = ~ 0 + Tissue, design.df = milo_design,
model.contrasts = c("TissueCSC - TissueBA4"))
head(da_results_csc_vs_ba4)
```
```{r}
da_results_csc_vs_ba4 %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_csc_vs_ba4, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_csc_vs_ba4, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_csc_vs_ba4, layout="UMAP",alpha=0.1)
umap_csc_ba4 <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_csc_ba4
```
```{r}
pdf(file = here(milo_save_dir, "Tissue_CSC_vs_BA4_noCB"),
width = 13,
height = 7)
print(umap_csc_ba4)
dev.off()
```
```{r}
da_results_csc_vs_ba4 <- annotateNhoods(milo_obj,
da_results_csc_vs_ba4,
coldata_col = "cluster_id")
head(da_results_csc_vs_ba4)
```
```{r}
ggplot(da_results_csc_vs_ba4, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_csc_vs_ba4$clust <- ifelse(da_results_csc_vs_ba4$cluster_id_fraction < 0.7,
"Mixed",
da_results_csc_vs_ba4$cluster_id_fraction)
da_results_csc_vs_ba4$cluster_id <- factor(da_results_csc_vs_ba4$cluster_id,
levels = plot_levels)
csc_ba4 <- plotDAbeeswarm(da_results_csc_vs_ba4, group.by = "cluster_id")
csc_ba4
```
```{r}
grid.arrange(csc_ba4, cb_ba4, cb_csc, ncol = 3)
```
```{r}
pdf(file = here(milo_dir_bees, "Tissue_CSC_vs_BA4_noCB"),
width = 7,
height = 13)
print(csc_ba4)
dev.off()
```
# Age
Testing
```{r}
da_results_age <- testNhoods(milo_obj, design = ~ Tissue + gender + AgeGroup,
design.df = milo_design)
head(da_results_age)
```
```{r}
da_results_age %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_age, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_age, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="AgeGroup",
text_by = "cluster_id",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_age, layout="UMAP",alpha=0.1)
age_umap <-umap_pl + nh_graph_pl +
plot_layout(guides="collect")
age_umap
```
```{r}
da_results_age <- annotateNhoods(milo_obj,
da_results_age,
coldata_col = "cluster_id")
head(da_results_age)
```
```{r}
ggplot(da_results_age, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_age$clust <- ifelse(da_results_age$cluster_id_fraction < 0.7,
"Mixed",
da_results_age$cluster_id_fraction)
da_results_age$cluster_id <- factor(da_results_age$cluster_id,
levels = plot_levels)
plotDAbeeswarm(da_results_age, group.by = "cluster_id")
```
```{r}
pdf(file = here(milo_save_dir, "Age_corr_for_tissue_sex_astrocytes.pdf"),
width = 13,
height = 7)
print(age_umap)
dev.off()
```
```{r}
da_results_age <- annotateNhoods(milo_obj, da_results_age, coldata_col = "cluster_id")
head(da_results_age)
```
```{r}
da_results_age$cluster_id <- factor(da_results_age$cluster_id,
levels = plot_levels)
ggplot(da_results_age, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_age$clust <- ifelse(da_results_age$cluster_id_fraction < 0.7,
"Mixed",
da_results_age$cluster_id_fraction)
age_bees <- plotDAbeeswarm(da_results_age, group.by = "cluster_id")
```
```{r}
milo_dir_bees <- here("outs",
"Milo_compositional_analysis",
"beeswarm_plots")
pdf(file = here(milo_dir_bees, "Age_correct_for_tissue_sex_astrocytes.pdf"),
width = 7,
height = 13)
print(age_bees)
dev.off()
```
# Sex
Testing
```{r}
da_results_sex <- testNhoods(milo_obj, design = ~ Tissue+ AgeGroup+ gender,
design.df = milo_design)
head(da_results_sex)
```
```{r}
da_results_sex %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_sex, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_sex, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_sex, layout="UMAP",alpha=0.1)
sex_umap <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
sex_umap
```
```{r}
pdf(file = here(milo_save_dir, "Sex_corr_for_tissue_age_astrocytes.pdf"),
width = 13,
height = 7)
print(sex_umap)
dev.off()
```
```{r}
da_results_sex <- annotateNhoods(milo_obj, da_results_sex, coldata_col = "cluster_id")
head(da_results_sex)
```
```{r}
da_results_sex$cluster_id <- factor(da_results_sex$cluster_id,
levels = plot_levels)
ggplot(da_results_sex, aes(cluster_id_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_sex$clust <- ifelse(da_results_sex$cluster_id_fraction < 0.7,
"Mixed",
da_results_sex$cluster_id_fraction)
sex_beeswarm <- plotDAbeeswarm(da_results_sex, group.by = "cluster_id")
sex_beeswarm
```
```{r}
pdf(file = here(milo_dir_bees, "Sex_corr_for_tissue_age_astrocytes.pdf"),
width = 7,
height = 13)
print(sex_beeswarm)
dev.off()
```
# Save data
```{r}
milo_dir <- here("data", "Milo_datasets")
dir.create(milo_dir)
saveRDS(milo_obj, here(milo_dir, "HCA_astrocytes_milo.RDS"))
da_data_dir <- here("outs", "Milo_compositional_analysis",
"differential_abundance")
dir.create(da_data_dir)
write.csv(da_results, here(da_data_dir, "da_results_tissue_overall_astrocytes.csv"))
write.csv(da_results_age, here(da_data_dir, "da_results_age_astrocytes.csv"))
write.csv(da_results_sex, here(da_data_dir, "da_results_sex_astrocytes.csv"))
```
# Session info
```{r}
sessionInfo()
```