# load per-iteration ordering matrices
load("path/to/data/SherlockLung_LUAD_2025_DN2/SherlockLung_LUAD_2025_DN2_ordering_matrices_G1.Rdata")
nsda_om = ordering_matrices
load("path/to/data/SherlockLung_LUAD_2025_DN3/SherlockLung_LUAD_2025_DN3_ordering_matrices_G1.Rdata")
nsdb_om = ordering_matrices
### RUN FROM HERE FOR tumour_set = "SherlockLung_LUAD_2025_DN2" with nsda_prop_prewgd = get_proportion_prewgd(nsda_om, wgd_i, cna_i) AND THEN FOR tumour_set = "SherlockLung_LUAD_2025_DN3" with nsdb_prop_prewgd = get_proportion_prewgd(nsdb_om, wgd_i, cna_i)
tumour_set = "SherlockLung_LUAD_2025_DN2"
#tumour_set = "SherlockLung_LUAD_2025_DN3"
input_dir = paste0("path/to/data/", tumour_set, "/")
wgd_file = "path/to/data/SherlockLung_WGDStatus.txt"
G = 1
mergedseg=read.table(paste0(input_dir, "/", tumour_set,"_mergedseg_G1.txt"),header=T,stringsAsFactors = F)
matrix_coresp=read.table(paste0(input_dir, "/", tumour_set,"_matrix_coresp_G1.txt"),header=T,stringsAsFactors = F)
matrix_out=read.table(paste0(input_dir, "/", tumour_set,"_matrix_out_G1.txt"),header=T,stringsAsFactors = F)
cnai = which(mergedseg$CNA %in% c("dGain","dLOH","dHD"))
mergedseg$ID[cnai] = sapply(cnai, function(i) { paste0(gsub("d", "", mergedseg$CNA[i]), "_", mergedseg$ID[i]) })
if (wgd_file != "none") {
wgd_status = read.table(wgd_file, stringsAsFactors=FALSE)
all_wgd_samples = wgd_status[wgd_status[,2] == "WGD",1]
wgd_samples = list(intersect(unique(mergedseg$Tumour_Name), all_wgd_samples))
} else {
wgd_samples = list(unique(mergedseg[mergedseg$tumour_ploidy > 2,'Tumour_Name']))
}
event.id=unique(mergedseg[,c("ID","noevent","CNA")])
if (including_wgd) {
event.id=rbind(data.frame(ID="Whole_Genome_Duplication",noevent=(length(unique(mergedseg$ID))+1),CNA="cWGD",stringsAsFactors = F),event.id)
}
event.id$cna=base::substring(event.id$CNA,first = 2)
event.id=unique(event.id[,c("ID","noevent","cna")])
wgd_i = event.id[event.id$cna == "WGD","noevent"]
cna_i = event.id[event.id$cna %in% c("LOH","Gain","HD"),"noevent"]
get_num_prewgd_cna = function(om, wgd_i, cna_i) {
wgd_rn = which(om[[1]][,wgd_i] > 0)
sapply(wgd_rn, function(samp_i) {
mean(sapply(om, function(ordering_mat) {
pre_wgd = which(ordering_mat[samp_i,] > 0 & ordering_mat[samp_i,] < ordering_mat[samp_i,wgd_i])
sum(pre_wgd %in% cna_i)
}))
})
}
nsda_prewgd_events = nsda_om[[1]][nsda_om[[1]][,ncol(nsda_om[[1]])] > 0,ncol(nsda_om[[1]])] - 1
nsdb_prewgd_events = nsdb_om[[1]][nsdb_om[[1]][,ncol(nsdb_om[[1]])] > 0,ncol(nsdb_om[[1]])] - 1
get_proportion_prewgd = function(om, wgd_i, cna_i) {
wgd_rn = which(om[[1]][,wgd_i] > 0)
sapply(wgd_rn, function(samp_i) {
ordering_mat = om[[1]]
pre_wgd = length(which(ordering_mat[samp_i,] > 0 & ordering_mat[samp_i,] < ordering_mat[samp_i,wgd_i]))
tot = length(which(ordering_mat[samp_i,] > 0))
pre_wgd / tot
})
}
nsda_prop_prewgd = get_proportion_prewgd(nsda_om, wgd_i, cna_i)
#nsdb_prop_prewgd = get_proportion_prewgd(nsdb_om, wgd_i, cna_i)
### END OF SECTION TO RUN FOR EACH NSD TRAJECTORY
# plot preWGD distributions
library(reshape2)
pre_wgd_proportions = melt(list(`NSD-A`=nsda_prop_prewgd, `NSD-B`=nsdb_prop_prewgd))
colnames(pre_wgd_proportions) = c("proportion", "trajectory")
p <- ggplot(pre_wgd_proportions, aes(x=proportion, fill=trajectory)) +
geom_density(alpha=0.5) +
theme_bw(base_family = "Roboto Condensed") +
scale_fill_manual(values=c("#8adbf5","#a371cb")) +
xlab('Proportion of events occurring pre-WGD')
ggsave(file='path/to/figures/Fig4/NSDA_vs_NSDB_pre_wgd_proportion_density.png',plot = p,width = 4,height = 3.5,device = "png")