var_defaults = c(
input_dir=".",
output_dir=".",
chr_file="chromosome_coordinates_hg19.txt"
)
args = commandArgs(trailingOnly = T)
if (length(args) == 0) {
stop(paste0("Please pass at least one arguments - the name of the tumour set.
USAGE:
Rscript TM_3_CN_landscape_CW_SGE.R TUMOUR_SET_NAME [input_dir=INPUT_DIR] [output_dir=OUTPUT_DIR] [chr_file=CHR_FILE]
"))
}
# pass name of tumour set as first argument
tumour_set = args[1]
# optional: pass file pattern (if different to "subclones.txt") as second argument
if (length(args) > 1) {
split_args = strsplit(args[2:length(args)], "=")
for (x in split_args) { assign(x[1], x[2]) }
}
for (x in names(var_defaults)) {
if (!exists(x)) {
assign(x, var_defaults[x])
}
}
cat(paste0("Running with the following variable values:
tumour_set = ", tumour_set, "
input_dir = ", input_dir, "
output_dir = ", output_dir, "
chr_file = ", chr_file, "
"))
library(GenomicRanges)
if (!dir.exists(output_dir)) {
cat(paste0("output_dir ", output_dir, " does not exist - creating."))
dir.create(output_dir)
}
CNAs <- c("LOH","HD", "Gain","cLOH","cHD", "cGain","sLOH","sHD", "sGain")
#### script collates each segment for individual tumours to give an overall landscape for each copy number state
#### writes CNA_refsegs output
L=read.table(chr_file,header=T,stringsAsFactors=F)
maxchr=L$end
copy.number.data <- read.table(paste0(input_dir, "/", tumour_set, "_allsegments.txt"),header=T, stringsAsFactors=F)
copy.number.data <- copy.number.data[copy.number.data$frac1_A>0©.number.data$frac1_A<=1©.number.data$tumour_ploidy>0,]
chr_names <- c(1:22,"X")
for (CNA in CNAs) {
# Include Amp segments as gains
if (length(grep("Gain", CNA)) > 0) {
pat = paste0(CNA, "|", gsub("Gain", "Amp", CNA))
} else {
pat = CNA
}
input.data <- copy.number.data[grep(pat, copy.number.data$CNA),]
reduced_gr = suppressWarnings( Reduce(c, lapply(unique(input.data$Tumour_Name), function(barcode) {
sample_data = input.data[input.data$Tumour_Name == barcode,]
sample_gr = reduce(GRanges(seqnames=sample_data$chr, ranges=IRanges(start=sample_data$startpos, end=sample_data$endpos)))
})) )
data_rle = coverage(reduced_gr)
data = Reduce(rbind, lapply(chr_names, function(chr_name) {
if (chr_name %in% names(data_rle)) {
chr_i = which(chr_names == chr_name)
chr_mat = cbind(chr_name, NA, cumsum(runLength(data_rle)[[chr_name]]), runValue(data_rle)[[chr_name]])
if (chr_mat[nrow(chr_mat),3] != maxchr[chr_i]) {
chr_mat = rbind(chr_mat, c(chr_name, NA, maxchr[chr_i], 0))
}
chr_mat[,2] = c(0, as.numeric(chr_mat[1:(nrow(chr_mat)-1),3])+1)
chr_mat
} else {
c(chr_name, 0, maxchr[which(chr_names == chr_name)], 0)
}
}))
colnames(data) = c("chr", "sp", "ep", "val")
data = as.data.frame(data)
write.table(data, file = paste0(output_dir, "/", tumour_set, "_", CNA, "_refsegs.txt"), sep="\t", quote=F)
}