var_defaults = c(
chr_file="chromosome_coordinates_hg19.txt",
input_dir=".",
output_dir="."
)
args = commandArgs(trailingOnly = T)
if (length(args) == 0) {
stop(paste0("Please pass at least one argument - the name of the tumour set.
USAGE:
Rscript TM_postFDR_2_merging_CW.R TUMOUR_SET_NAME [input_dir=INPUT_DIR] [output_dir=OUTPUT_DIR] [chr_file=CHR_FILE]
"))
}
# pass name of tumour set as first argument
tumour_set = args[1]
if (length(args) > 1) {
split_args = strsplit(args[2:length(args)], "=")
for (x in split_args) { assign(x[1], x[2]) }
}
for (x in names(var_defaults)) {
if (!exists(x)) {
assign(x, var_defaults[x])
}
}
cat(paste0("Running with the following variable values:
tumour_set = ", tumour_set, "
input_dir = ", input_dir, "
output_dir = ", output_dir, "
chr_file = ", chr_file, "
"))
library(ggplot2)
library(data.table)
library(cowplot)
chr_loc=read.table(chr_file,header=T,stringsAsFactors = F)
groups=tumour_set
cna_type=c("Gain","LOH","HD")
suppressWarnings(
for (grp in 1:length(groups)){
allsegments<-read.table(paste0(input_dir, "/", groups[grp],"_allsegments.txt"),sep="\t",stringsAsFactors=F,header=T)
for (cna in 1:length(cna_type)){
CNAtype<-paste0(c("s","c"),cna_type[cna])
#gets segments that have GAINS
cna.data<-allsegments[allsegments$CNA %in% CNAtype,]
#remove subclonal after clonal AND order by sample, chr and startpos cna.data$chr[is.na(cna.data$chr)]=23
cna.data$chr=as.numeric(cna.data$chr)
cna.data$chr[is.na(cna.data$chr)]=23
cna.data = cna.data[!duplicated(cna.data[,1:4]),]#[order(cna.data$Tumour_Name,cna.data$chr,cna.data$startpos),]
fn = paste0(input_dir, "/", groups[grp],"_enriched_regions_",cna_type[cna],"_noevent.txt")
if (file.exists(fn)){
enriched=read.table(fn,header=T,stringsAsFactors = F,sep="\t")
enriched$chr=as.numeric(enriched$chr)
enriched$chr[enriched$chr=="X"]=23
print("files read")
#
##
###
#library(data.table)
setDT(cna.data)
setDT(enriched)
setkey(cna.data,"chr","startpos","endpos")
setkey(enriched,"chr","startpos","endpos")
merged=foverlaps(cna.data,enriched,type="any",nomatch=0)
# remove regions outside of ENRICHED REGIONS
merged$i.startpos=ifelse(merged$i.startpos<merged$startpos,merged$startpos,merged$i.startpos)
merged$i.endpos=ifelse(merged$i.endpos>merged$endpos,merged$endpos,merged$i.endpos)
print("files merged")
#
##
###
# add 'y' for geom_segment() -> data QC #####################################################
u=unique(merged$Tumour_Name)
for (i in 1:length(u)){
for (j in 1:nrow(merged)){
if (merged$Tumour_Name[j]==u[i]){
merged$y[j]=i
}
}
}
# ALL CHROMOSOMES
chrs=unique(merged$chr)
for (i in 1:length(chrs)){
s=merged[which(merged$chr==chrs[i]),]
P=ggplot()+geom_segment(data=s,aes(x=i.startpos,y=y,xend=i.endpos,yend=y,col=Tumour_Name))+theme(legend.position = "NONE")+
ggtitle(paste(groups[grp],cna_type[cna]," - Enriched regions chr",chrs[i]))+expand_limits(y=0,x=0)+geom_vline(xintercept = chr_loc[chr_loc$chr==chrs[i],]$end,col="black",linetype="longdash")
save_plot(paste0(output_dir, "/", groups[grp],"_enriched_",cna_type[cna],"_chr",chrs[i],"_cowplot.pdf"),plot=P) #NAP: use ggsave if working on your R/ggplot2 version - my recent update of ggplot2 has made ggsave nonfunctional
}
############################################################################################
###
##
#
write.table(merged, paste0(output_dir, "/", groups[grp],"_segments_in_enriched_regions_",cna_type[cna],".txt"), sep="\t",quote=F,row.names = F)
print(paste(fn,"finished"))
print(merged)
} else {
print(paste(fn,"does not exist"))
}
}
}
)