SherlockLung-EvolutionaryTrajectory-Analysis / Ordering_Model / R / TM_postFDR_2_merging_CW.R
TM_postFDR_2_merging_CW.R
Raw
var_defaults = c(
  chr_file="chromosome_coordinates_hg19.txt",
  input_dir=".",
  output_dir="."
)

args = commandArgs(trailingOnly = T)

if (length(args) == 0) {
  stop(paste0("Please pass at least one argument - the name of the tumour set.

USAGE:
Rscript TM_postFDR_2_merging_CW.R TUMOUR_SET_NAME [input_dir=INPUT_DIR] [output_dir=OUTPUT_DIR] [chr_file=CHR_FILE]

"))
}

# pass name of tumour set as first argument
tumour_set = args[1]
if (length(args) > 1) {
  split_args = strsplit(args[2:length(args)], "=")
  for (x in split_args) { assign(x[1], x[2]) }
}
for (x in names(var_defaults)) {
  if (!exists(x)) {
    assign(x, var_defaults[x])
  }
}

cat(paste0("Running with the following variable values:

tumour_set = ", tumour_set, "
input_dir = ", input_dir, "
output_dir = ", output_dir, "
chr_file = ", chr_file, "

"))

library(ggplot2)
library(data.table)
library(cowplot)
chr_loc=read.table(chr_file,header=T,stringsAsFactors = F)
groups=tumour_set
cna_type=c("Gain","LOH","HD")

suppressWarnings(
for (grp in 1:length(groups)){
  allsegments<-read.table(paste0(input_dir, "/", groups[grp],"_allsegments.txt"),sep="\t",stringsAsFactors=F,header=T)
  for (cna in 1:length(cna_type)){
    CNAtype<-paste0(c("s","c"),cna_type[cna])  
    #gets segments that have GAINS
    cna.data<-allsegments[allsegments$CNA %in% CNAtype,]
    #remove subclonal after clonal AND order by sample, chr and startpos cna.data$chr[is.na(cna.data$chr)]=23
    cna.data$chr=as.numeric(cna.data$chr)
    cna.data$chr[is.na(cna.data$chr)]=23
    cna.data = cna.data[!duplicated(cna.data[,1:4]),]#[order(cna.data$Tumour_Name,cna.data$chr,cna.data$startpos),]
    
    fn = paste0(input_dir, "/", groups[grp],"_enriched_regions_",cna_type[cna],"_noevent.txt")
    if (file.exists(fn)){
      enriched=read.table(fn,header=T,stringsAsFactors = F,sep="\t")
      enriched$chr=as.numeric(enriched$chr)
      enriched$chr[enriched$chr=="X"]=23
      print("files read")
      #
      ##
      ###
      #library(data.table)
      setDT(cna.data)
      setDT(enriched)
      setkey(cna.data,"chr","startpos","endpos")
      setkey(enriched,"chr","startpos","endpos")
      
      merged=foverlaps(cna.data,enriched,type="any",nomatch=0)
      # remove regions outside of ENRICHED REGIONS 
      merged$i.startpos=ifelse(merged$i.startpos<merged$startpos,merged$startpos,merged$i.startpos)
      merged$i.endpos=ifelse(merged$i.endpos>merged$endpos,merged$endpos,merged$i.endpos)
      print("files merged")
      #
      ##
      ###
      # add 'y' for geom_segment() -> data QC #####################################################
      u=unique(merged$Tumour_Name)
      for (i in 1:length(u)){
        for (j in 1:nrow(merged)){
          if (merged$Tumour_Name[j]==u[i]){
            merged$y[j]=i
          }
        }
      }
      
      # ALL CHROMOSOMES
      chrs=unique(merged$chr)
      for (i in 1:length(chrs)){
        s=merged[which(merged$chr==chrs[i]),]
        P=ggplot()+geom_segment(data=s,aes(x=i.startpos,y=y,xend=i.endpos,yend=y,col=Tumour_Name))+theme(legend.position = "NONE")+
          ggtitle(paste(groups[grp],cna_type[cna]," - Enriched regions chr",chrs[i]))+expand_limits(y=0,x=0)+geom_vline(xintercept = chr_loc[chr_loc$chr==chrs[i],]$end,col="black",linetype="longdash")
        save_plot(paste0(output_dir, "/", groups[grp],"_enriched_",cna_type[cna],"_chr",chrs[i],"_cowplot.pdf"),plot=P) #NAP: use ggsave if working on your R/ggplot2 version - my recent update of ggplot2 has made ggsave nonfunctional
      }

      ############################################################################################
      ###
      ##
      #
      write.table(merged, paste0(output_dir, "/", groups[grp],"_segments_in_enriched_regions_",cna_type[cna],".txt"), sep="\t",quote=F,row.names = F)
      print(paste(fn,"finished"))
      print(merged)
    } else {
      print(paste(fn,"does not exist"))
    }
  }
}
)