---
title: "Milo"
author: "Luise A. Seeker"
date: "22/10/2021"
output: html_document
---
```{r}
library(Seurat)
library(here)
library(miloR)
library(dplyr)
library(statmod)
library(SingleCellExperiment)
library(scater)
library(patchwork)
library(scran)
```
```{r}
seur <- readRDS(here("data",
"single_nuc_data",
"all_cell_types",
"srt_fine_anno_01.RDS"))
```
```{r}
plot_levels <- c("Neur",
"RELN_4",
"RELN_3",
"RELN_2",
"RELN_1",
"Ex_4",
"Ex_3",
"Ex_2",
"Ex_1",
"In_9",
"In_8",
"In_7",
"In_6",
"In_5",
"In_4",
"In_3",
"In_2",
"In_1",
"vSMC",
"Mural_vein_1",
"Mural_cap_2",
"Mural_cap_1",
"EC_art_3",
"EC_art_2",
"EC_art_1",
"EC_cap_5",
"EC_cap_4",
"EC_cap_3",
"EC_cap_2",
"EC_cap_1",
"Immune",
"BAM",
"Microglia_5",
"Microglia_4",
"Microglia_3",
"Microglia_2",
"Microglia_1",
"AS_12",
"AS_11",
"AS_10",
"AS_9",
"AS_8",
"AS_7",
"AS_6",
"AS_5",
"AS_4",
"AS_3",
"AS_2",
"AS_1",
"Oligo_F",
"Oligo_E",
"Oligo_D",
"Oligo_C",
"Oligo_B",
"Oligo_A",
"COP_C",
"COP_B",
"COP_A",
"OPC_B",
"OPC_A")
```
```{r}
milo_meta <- seur@meta.data
milo_obj <- Milo(as.SingleCellExperiment(seur))
milo_obj
```
Build a graph and neighbourhoods.
```{r}
milo_obj <- buildGraph(milo_obj, k=30, d=30, reduced.dim = "PCA" )
milo_obj <- makeNhoods(milo_obj, k=30, d=30, refined=TRUE, prop=0.2, reduced_dims = "PCA" )
plotNhoodSizeHist(milo_obj)
```
Ideally, histogram should be peaking at 50 -100. Or the average neighbourhood size
should be over 5 x N_samples.Increasing k and/or
prop during makeNhoods will shift distribution to the right.
# Calculate distances, count cells according to an experimental design and perform DA testing.
```{r}
#milo_obj <- calcNhoodDistance(milo_obj, d=30)
milo_obj <- countCells(milo_obj, samples="uniq_id", meta.data=milo_meta)
head(nhoodCounts(milo_obj))
```
```{r}
milo_design <- data.frame(colData(milo_obj))[,c("uniq_id", "Tissue", "gender",
"AgeGroup", "caseNO")]
## Convert batch info from integer to factor
milo_design <- distinct(milo_design)
rownames(milo_design) <- milo_design$uniq_id
milo_design
```
```{r}
milo_obj <- calcNhoodDistance(milo_obj, d=30, reduced.dim = "PCA")
```
Testing
```{r}
da_results <- testNhoods(milo_obj, design = ~ Tissue, design.df = milo_design)
head(da_results)
```
```{r}
da_results %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="Tissue",
text_by = "Fine_cluster",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results, layout="UMAP",alpha=0.1)
umap_pl + nh_graph_pl +
plot_layout(guides="collect")
```
```{r}
da_results <- annotateNhoods(milo_obj, da_results, coldata_col = "Fine_cluster")
head(da_results)
```
```{r}
ggplot(da_results, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results$clust <- ifelse(da_results$Fine_cluster_fraction < 0.7,
"Mixed",
da_results$Fine_cluster_fraction)
plotDAbeeswarm(da_results, group.by = "Fine_cluster")
```
# Pairwise tissue
CB vs BA4
```{r}
da_results_cb_vs_ba4 <- testNhoods(milo_obj, design = ~ 0 + Tissue, design.df = milo_design,
model.contrasts = c("TissueCB - TissueBA4"))
head(da_results_cb_vs_ba4)
```
```{r}
da_results_cb_vs_ba4 %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_cb_vs_ba4, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_cb_vs_ba4, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="Tissue",
text_by = "Fine_cluster",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_cb_vs_ba4, layout="UMAP",alpha=0.1)
umap_cb_ba4 <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_cb_ba4
```
```{r}
milo_dir_umap <- here("outs",
"Milo_compositional_analysis",
"umap_plots")
pdf(file = here(milo_dir_umap, "Tissue_CB_vs_BA4"),
width = 13,
height = 7)
print(umap_cb_ba4)
dev.off()
```
```{r}
da_results_cb_vs_ba4 <- annotateNhoods(milo_obj,
da_results_cb_vs_ba4,
coldata_col = "Fine_cluster")
head(da_results_cb_vs_ba4)
```
```{r}
ggplot(da_results_cb_vs_ba4, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_cb_vs_ba4$clust <- ifelse(da_results_cb_vs_ba4$Fine_cluster_fraction < 0.7,
"Mixed",
da_results_cb_vs_ba4$Fine_cluster_fraction)
da_results_cb_vs_ba4$Fine_cluster <- factor(da_results_cb_vs_ba4$Fine_cluster,
levels = plot_levels)
cb_ba4 <- plotDAbeeswarm(da_results_cb_vs_ba4, group.by = "Fine_cluster")
cb_ba4
```
```{r}
milo_dir_bees <- here("outs",
"Milo_compositional_analysis",
"beeswarm_plots")
pdf(file = here(milo_dir_bees, "Tissue_CB_vs_BA4"),
width = 7,
height = 13)
print(cb_ba4)
dev.off()
```
# Pairwise tissue
CB vs CSC
```{r}
da_results_cb_vs_csc <- testNhoods(milo_obj, design = ~ 0 + Tissue, design.df = milo_design,
model.contrasts = c("TissueCB - TissueCSC"))
head(da_results_cb_vs_csc)
```
```{r}
da_results_cb_vs_csc %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_cb_vs_csc, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_cb_vs_csc, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_cb_vs_csc, layout="UMAP",alpha=0.1)
umap_cb_csc <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_cb_csc
```
```{r}
pdf(file = here(milo_dir_umap, "Tissue_CB_vs_CSC"),
width = 13,
height = 7)
print(umap_cb_csc)
dev.off()
```
```{r}
da_results_cb_vs_csc <- annotateNhoods(milo_obj,
da_results_cb_vs_csc,
coldata_col = "Fine_cluster")
head(da_results_cb_vs_csc)
```
```{r}
ggplot(da_results_cb_vs_csc, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_cb_vs_csc$clust <- ifelse(da_results_cb_vs_csc$Fine_cluster_fraction < 0.7,
"Mixed",
da_results_cb_vs_csc$Fine_cluster_fraction)
da_results_cb_vs_csc$Fine_cluster <- factor(da_results_cb_vs_csc$Fine_cluster,
levels = plot_levels)
cb_csc <- plotDAbeeswarm(da_results_cb_vs_csc, group.by = "Fine_cluster")
cb_csc
```
```{r}
pdf(file = here(milo_dir_bees, "Tissue_CB_vs_CSC"),
width = 7,
height = 13)
print(cb_csc)
dev.off()
```
# Pairwise tissue
CSC vs BA4
```{r}
da_results_csc_vs_ba4 <- testNhoods(milo_obj, design = ~ 0 + Tissue, design.df = milo_design,
model.contrasts = c("TissueCSC - TissueBA4"))
head(da_results_cb_vs_csc)
```
```{r}
da_results_csc_vs_ba4 %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_csc_vs_ba4, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_csc_vs_ba4, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_csc_vs_ba4, layout="UMAP",alpha=0.1)
umap_csc_ba4 <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
umap_csc_ba4
```
```{r}
pdf(file = here(milo_dir_umap, "Tissue_CSC_vs_BA4"),
width = 13,
height = 7)
print(umap_csc_ba4)
dev.off()
```
```{r}
da_results_csc_vs_ba4 <- annotateNhoods(milo_obj,
da_results_csc_vs_ba4,
coldata_col = "Fine_cluster")
head(da_results_csc_vs_ba4)
```
```{r}
ggplot(da_results_csc_vs_ba4, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_csc_vs_ba4$clust <- ifelse(da_results_csc_vs_ba4$Fine_cluster_fraction < 0.7,
"Mixed",
da_results_csc_vs_ba4$Fine_cluster_fraction)
da_results_csc_vs_ba4$Fine_cluster <- factor(da_results_csc_vs_ba4$Fine_cluster,
levels = plot_levels)
csc_ba4 <- plotDAbeeswarm(da_results_csc_vs_ba4, group.by = "Fine_cluster")
csc_ba4
```
```{r}
pdf(file = here(milo_dir_bees, "Tissue_CSC_vs_BA4"),
width = 7,
height = 13)
print(csc_ba4)
dev.off()
```
###
# Finding markers of DA populations
## Automatic grouping of neighborhoods
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Find groups
da_results <- groupNhoods(milo_obj, da_results, max.lfc.delta = 2)
head(da_results)
```
```{r}
plotNhoodGroups(milo_obj, da_results, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results, "NhoodGroup")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results, max.lfc.delta = 0.5),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.5")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results, max.lfc.delta = 0.2),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.2")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results, max.lfc.delta = 1) ,
group.by = "NhoodGroup") + ggtitle("max LFC delta=1")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results, max.lfc.delta = 1,
overlap=3), group.by = "NhoodGroup") +
ggtitle("overlap=3")
```
```{r}
set.seed(42)
da_results <- groupNhoods(milo_obj, da_results, max.lfc.delta = 1, overlap=3)
plotNhoodGroups(milo_obj, da_results, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results, group.by = "NhoodGroup")
```
# Finding gene signatures for neighbourhoods
```{r, eval = FALSE}
keep_rows <- rowSums(logcounts(milo_obj)) != 0
milo_obj <- milo_obj[keep_rows, ]
## Find HVGs
dec <- modelGeneVar(milo_obj)
hvgs <- getTopHVGs(dec, n=2000)
head(hvgs)
```
```{r, eval= F}
nhood_markers <- findNhoodGroupMarkers(milo_obj,
da_results,
subset.row = hvgs,
aggregate.samples = TRUE,
sample_col = "uniq_id")
head(nhood_markers)
save_dir <- here("outs",
"Milo_compositional_analysis",
"Milo_neighbourhood_mark")
dir.create(save_dir)
write.csv(nhood_markers, here(save_dir,
"Milo_neighbour_markers.csv"))
```
# Age
Testing
```{r}
da_results_age <- testNhoods(milo_obj, design = ~ Tissue + gender + AgeGroup,
design.df = milo_design)
head(da_results_age)
```
```{r}
da_results_age %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_age, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_age, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Plot single-cell UMAP
umap_pl <- plotReducedDim(milo_obj, dimred = "UMAP", colour_by="AgeGroup",
text_by = "Fine_cluster",
text_size = 3, point_size=0.5) +
guides(fill="none")
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_age, layout="UMAP",alpha=0.1)
age_umap <-umap_pl + nh_graph_pl +
plot_layout(guides="collect")
age_umap
```
```{r}
pdf(file = here(milo_dir_umap, "Age_corr_for_tissue_sex.pdf"),
width = 13,
height = 7)
print(age_umap)
dev.off()
```
```{r}
da_results_age <- annotateNhoods(milo_obj, da_results_age, coldata_col = "Fine_cluster")
head(da_results_age)
```
```{r}
da_results_age$Fine_cluster <- factor(da_results_age$Fine_cluster,
levels = plot_levels)
ggplot(da_results_age, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_age$clust <- ifelse(da_results_age$Fine_cluster_fraction < 0.7,
"Mixed",
da_results_age$Fine_cluster_fraction)
age_bees <- plotDAbeeswarm(da_results_age, group.by = "Fine_cluster")
```
```{r}
milo_dir_bees <- here("outs",
"Milo_compositional_analysis",
"beeswarm_plots")
pdf(file = here(milo_dir_bees, "Age_correct_for_tissue_sex.pdf"),
width = 7,
height = 13)
print(age_bees)
dev.off()
```
# Finding markers of DA populations
## Automatic grouping of neighborhoods
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Find groups
da_results_age <- groupNhoods(milo_obj, da_results_age, max.lfc.delta = 2)
head(da_results_age)
```
```{r}
plotNhoodGroups(milo_obj, da_results_age, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results_age, "NhoodGroup")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_age, max.lfc.delta = 0.5),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.5")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_age, max.lfc.delta = 0.2),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.2")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_age, max.lfc.delta = 1) ,
group.by = "NhoodGroup") + ggtitle("max LFC delta=1")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_age, max.lfc.delta = 1,
overlap=3), group.by = "NhoodGroup") +
ggtitle("overlap=3")
```
```{r}
set.seed(42)
da_results_age <- groupNhoods(milo_obj, da_results_age, max.lfc.delta = 1, overlap=3)
plotNhoodGroups(milo_obj, da_results_age, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results_age, group.by = "NhoodGroup")
```
# Finding gene signatures for neighbourhoods
```{r, eval = FALSE}
keep_rows <- rowSums(logcounts(milo_obj)) != 0
milo_obj <- milo_obj[keep_rows, ]
## Find HVGs
dec <- modelGeneVar(milo_obj)
hvgs <- getTopHVGs(dec, n=2000)
head(hvgs)
```
```{r, eval= F}
nhood_markers <- findNhoodGroupMarkers(milo_obj,
da_results_age,
subset.row = hvgs,
aggregate.samples = TRUE,
sample_col = "uniq_id")
head(nhood_markers)
save_dir <- here("outs",
"Milo_compositional_analysis",
"Milo_neighbourhood_mark")
dir.create(save_dir)
write.csv(nhood_markers, here(save_dir,
"Milo_neighbour_markers.csv"))
```
# Sex
Testing
```{r}
da_results_sex <- testNhoods(milo_obj, design = ~ Tissue+ AgeGroup+ gender,
design.df = milo_design)
head(da_results_sex)
```
```{r}
da_results_sex %>%
arrange(SpatialFDR) %>%
head()
```
Inspecting DA results
```{r}
ggplot(da_results_sex, aes(PValue)) + geom_histogram(bins=50)
```
```{r}
ggplot(da_results_sex, aes(logFC, -log10(SpatialFDR))) +
geom_point() +
geom_hline(yintercept = 1) ## Mark significance threshold (10% FDR)
```
```{r}
## Plot neighbourhood graph
nh_graph_pl <- plotNhoodGraphDA(milo_obj, da_results_sex, layout="UMAP",alpha=0.1)
sex_umap <- umap_pl + nh_graph_pl +
plot_layout(guides="collect")
sex_umap
```
```{r}
pdf(file = here(milo_dir_umap, "Sex_corr_for_tissue_age.pdf"),
width = 13,
height = 7)
print(sex_umap)
dev.off()
```
```{r}
da_results_sex <- annotateNhoods(milo_obj, da_results_sex, coldata_col = "Fine_cluster")
head(da_results_sex)
```
```{r}
da_results_sex$Fine_cluster <- factor(da_results_sex$Fine_cluster,
levels = plot_levels)
ggplot(da_results_sex, aes(Fine_cluster_fraction)) + geom_histogram(bins=50)
```
```{r}
da_results_sex$clust <- ifelse(da_results_sex$Fine_cluster_fraction < 0.7,
"Mixed",
da_results_sex$Fine_cluster_fraction)
sex_beeswarm <- plotDAbeeswarm(da_results_sex, group.by = "Fine_cluster")
sex_beeswarm
```
```{r}
pdf(file = here(milo_dir_bees, "Sex_corr_for_tissue_age.pdf"),
width = 7,
height = 13)
print(sex_beeswarm)
dev.off()
```
# Finding markers of DA populations
## Automatic grouping of neighborhoods
```{r}
milo_obj <- buildNhoodGraph(milo_obj)
## Find groups
da_results_sex <- groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 2)
head(da_results_sex)
```
```{r}
plotNhoodGroups(milo_obj, da_results_sex, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results_sex, "NhoodGroup")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 0.5),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.5")
```
```{r, fig.height = 5, fig.width = 7}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 0.2),
group.by = "NhoodGroup") + ggtitle("max LFC delta=0.2")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 1) ,
group.by = "NhoodGroup") + ggtitle("max LFC delta=1")
```
```{r}
plotDAbeeswarm(groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 1,
overlap=3), group.by = "NhoodGroup") +
ggtitle("overlap=3")
```
```{r}
set.seed(42)
da_results_sex <- groupNhoods(milo_obj, da_results_sex, max.lfc.delta = 1, overlap=3)
plotNhoodGroups(milo_obj, da_results_sex, layout="UMAP")
```
```{r}
plotDAbeeswarm(da_results_sex, group.by = "NhoodGroup")
```
# Finding gene signatures for neighbourhoods
```{r, eval = FALSE}
keep_rows <- rowSums(logcounts(milo_obj)) != 0
milo_obj <- milo_obj[keep_rows, ]
## Find HVGs
dec <- modelGeneVar(milo_obj)
hvgs <- getTopHVGs(dec, n=2000)
head(hvgs)
```
```{r, eval= F}
nhood_markers <- findNhoodGroupMarkers(milo_obj,
da_results_sex,
subset.row = hvgs,
aggregate.samples = TRUE,
sample_col = "uniq_id")
head(nhood_markers)
save_dir <- here("outs",
"Milo_compositional_analysis",
"Milo_neighbourhood_mark")
dir.create(save_dir)
write.csv(nhood_markers, here(save_dir,
"Milo_neighbour_markers.csv"))
```
# Save data
```{r}
milo_dir <- here("data", "Milo_datasets")
dir.create(milo_dir)
saveRDS(milo_obj, here(milo_dir, "HCA_all_milo.RDS"))
da_data_dir <- here("outs", "Milo_compositional_analysis",
"differential_abundance")
dir.create(da_data_dir)
write.csv(da_results, here(da_data_dir, "da_results_tissue.csv"))
write.csv(da_results_age, here(da_data_dir, "da_results_age.csv"))
write.csv(da_results_sex, here(da_data_dir, "da_results_sex.csv"))
```
# Session info
```{r}
sessionInfo()
```