Analyses/BIC_comparison.R · SherlockLung-EvolutionaryTrajectory-Analysis

library(ggplot2)

# load BIC values for all iterations with each number of subsets
bics_list = lapply(list.files("path/to/output_data/", pattern="allbics", full.names=T), function(x) { load(x); allbics })

# compare pairwise BIC distributions for different numbers of subsets
bic_fdrs = p.adjust(unlist(lapply(1:4, function(x) { sapply((x+1):5, function(y) { wilcox.test(bics_list[[x]], bics_list[[y]])$p.value }) })), method="fdr")
max(bic_fdrs) < 1e-6
# TRUE (i.e. all pairwise comparisons are highly significant after multiple testing correction)

melted_bics = melt(bics_list)
melted_bics$L1 = as.factor(melted_bics$L1)

# plot BIC boxplots
p = ggplot(melted_bics, aes(x=L1, y=value, fill=L1), show.legend=FALSE) +
    geom_boxplot() +
    theme_bw(base_family = "Roboto Condensed") +
    theme(legend.position="none", panel.border=element_rect(linewidth = 1.25, fill=NA), axis.ticks.x=element_blank(), axis.ticks.y=element_blank(), text = element_text(size=20), axis.text.x = element_text(size=20), axis.text.y = element_text(size=18)) +
    scale_y_continuous() +
	scale_fill_manual(values=rev(colorRampPalette(c(rgb(1,0.02,0.02,0.5), rgb(1,0.9,0.02,0.5)), alpha = TRUE)(5))) +
	ylab('Bayesian Information Criterion') +
	xlab('Number of Subsets')

ggsave(file='path/to/supplementary/figures/BIC_boxplot.png',plot = p,width = 7,height = 7,device = "png")