#### script determines copy number states allocated to each segment
#### writes allsegments output
var_defaults = c(
input_dir=".",
output_dir=".",
wgd_file="none"
)
args = commandArgs(trailingOnly = T)
if (length(args) == 0) {
stop(paste0("Please pass at least one argument - the name of the tumour set.
USAGE:
Rscript TM_2_CNA_logic_CW.R TUMOUR_SET_NAME [input_dir=INPUT_DIR] [output_dir=OUTPUT_DIR]
"))
}
# pass name of tumour set as first argument (after --args)
tumour_set = args[1]
# optional: pass file pattern (if different to "subclones.txt") as second argument (after --args)
if (length(args) > 1) {
split_args = strsplit(args[2:length(args)], "=")
for (x in split_args) { assign(x[1], x[2]) }
}
for (x in names(var_defaults)) {
if (!exists(x)) {
assign(x, var_defaults[x])
}
}
cat(paste0("Running with the following variable values:
tumour_set = ", tumour_set, "
input_dir = ", input_dir, "
output_dir = ", output_dir, "
wgd_file = ", wgd_file, "
"))
# Annotate segments as their appropriate copy number varaiant type, or NoCNV
CNA_logic = function (tumour.type, input_dir, output_dir, wgd_file) {
hd <- c("Tumour_Name", "chr", "startpos", "endpos","nMaj1_A","nMin1_A", "frac1_A", "SDfrac_A", "tumour_ploidy")
print(tumour.type)
allsegs <- read.table(paste0(input_dir, "/", tumour.type,"_allsegs.txt"), header = T, stringsAsFactors = F)
tot=dim(allsegs)[1]
if (wgd_file != "none" && tot > 0) {
wgd_status = read.table(wgd_file, sep="\t", stringsAsFactors=FALSE)
sample_ploidies = unique(allsegs[,c("Tumour_Name","tumour_ploidy")])
for (row_i in 1:nrow(sample_ploidies)) {
barcode = sample_ploidies[row_i,1]
ploidy = sample_ploidies[row_i,2]
sample_wgd_status = wgd_status[wgd_status[,1] == barcode,2]
# in some sample sets, dots have been replaced with underscores
# if a WGD status is not found
if (length(sample_wgd_status) == 0) {
sample_wgd_status = wgd_status[gsub("\\.", "_", wgd_status[,1]) == barcode,2]
}
if (sample_wgd_status == "nWGD") {
new_ploidy = 2
} else if (ploidy > 4) {
new_ploidy = ploidy
} else {
new_ploidy = 4
}
if (new_ploidy != ploidy) {
allsegs[allsegs[,"Tumour_Name"] == barcode,"tumour_ploidy"] = new_ploidy
}
}
}
# If you have clonal LOH, subclonal LOH is not counted as we look for the first occurrence of a given event
# Losses annotated but will not be counted in calculations of enriched regions
allsegsa <- NULL
CNA <- NULL
for (i in 1:dim(allsegs)[1]) {
if (is.na(allsegs$nMaj2_A[i])) {
if (allsegs$nMin1_A[i] == 0 & allsegs$nMaj1_A[i] == 0) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "cHD")
}
if (xor(allsegs$nMin1_A[i] == 0, allsegs$nMaj1_A[i] == 0)) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "cLOH")
}
if ((allsegs$nMaj1_A[i] > allsegs$tumour_ploidy[i]/2) |
(allsegs$nMin1_A[i] > allsegs$tumour_ploidy[i]/2)) {
if ((allsegs$nMaj1_A[i] > allsegs$tumour_ploidy[i]*2) |
(allsegs$nMin1_A[i] > allsegs$tumour_ploidy[i]*2)) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "cAmp")
}
else {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "cGain")
}
}
if ((allsegs$nMaj1_A[i] == allsegs$tumour_ploidy[i]/2) &
(allsegs$nMin1_A[i] == allsegs$tumour_ploidy[i]/2)) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "NoCNV")
}
if ((allsegs$nMaj1_A[i] < allsegs$tumour_ploidy[i]/2) |
(allsegs$nMin1_A[i] < allsegs$tumour_ploidy[i]/2)) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "cLoss")
}
}
if (!is.na(allsegs$nMaj2_A[i])) {
if (allsegs$nMin1_A[i] == 0 & allsegs$nMaj1_A[i] == 0) {
CNA <- c(CNA, "sHD")
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
}
if ((allsegs$nMin1_A[i] == 0 | allsegs$nMaj1_A[i] == 0) &
xor(allsegs$nMin2_A[i] == 0, allsegs$nMaj2_A[i] == 0)) {
CNA <- c(CNA, "cLOH")
tmp <- allsegs[i,c(1:4,8:12)]
names(tmp) <- hd
allsegsa <- rbind(allsegsa, tmp[,hd])
}
else if (xor(allsegs$nMin1_A[i] == 0, allsegs$nMaj1_A[i] == 0)) {
CNA <- c(CNA, "sLOH")
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
}
if ((allsegs$nMaj1_A[i] > allsegs$tumour_ploidy[i]/2 |
allsegs$nMin1_A[i] > allsegs$tumour_ploidy[i]/2) &
(allsegs$nMaj2_A[i] > allsegs$tumour_ploidy[i]/2 |
allsegs$nMin2_A[i] > allsegs$tumour_ploidy[i]/2)) {
if ((allsegs$nMaj1_A[i] > allsegs$tumour_ploidy[i]*2 |
allsegs$nMin1_A[i] > allsegs$tumour_ploidy[i]*2) &
(allsegs$nMaj2_A[i] > allsegs$tumour_ploidy[i]*2 |
allsegs$nMin2_A[i] > allsegs$tumour_ploidy[i]*2)) {
CNA <- c(CNA, "cAmp")
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
}
else {
CNA <- c(CNA, "cGain")
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
}
}
if ((allsegs$nMin2_A[i] > allsegs$tumour_ploidy[i]/2 & allsegs$nMin1_A[i] != allsegs$nMin2_A[i])|
(allsegs$nMaj2_A[i] > allsegs$tumour_ploidy[i]/2 & allsegs$nMaj1_A[i] != allsegs$nMaj2_A[i])) {
if ((allsegs$nMin2_A[i] > allsegs$tumour_ploidy[i]*2 & allsegs$nMin1_A[i] != allsegs$nMin2_A[i]) |
(allsegs$nMaj2_A[i] > allsegs$tumour_ploidy[i]*2 & allsegs$nMaj1_A[i] != allsegs$nMaj2_A[i])) {
CNA <- c(CNA, "sAmp")
tmp <- allsegs[i,c(1:4,8:12)]
names(tmp) <- hd
allsegsa <- rbind(allsegsa, tmp[,hd])
}
else {
CNA <- c(CNA, "sGain")
tmp <- allsegs[i,c(1:4,8:12)]
names(tmp) <- hd
allsegsa <- rbind(allsegsa, tmp[,hd])
}
}
if ((allsegs$nMaj1_A[i] < allsegs$tumour_ploidy[i]/2 |
allsegs$nMin1_A[i] < allsegs$tumour_ploidy[i]/2) &
(allsegs$nMaj2_A[i] < allsegs$tumour_ploidy[i]/2 |
allsegs$nMin2_A[i] < allsegs$tumour_ploidy[i]/2)) {
CNA <- c(CNA, "cLoss")
tmp <- allsegs[i,c(1:4,8:12)]
names(tmp) <- hd
allsegsa <- rbind(allsegsa, tmp[,hd])
}
if (((allsegs$nMaj1_A[i] < allsegs$tumour_ploidy[i]/2 & allsegs$nMaj1_A[i] != allsegs$nMaj2_A[i]) |
(allsegs$nMin1_A[i] < allsegs$tumour_ploidy[i]/2 & allsegs$nMin1_A[i] != allsegs$nMin2_A[i]))) {
allsegsa <- rbind(allsegsa, allsegs[i,c(1:7,11:12)])
CNA <- c(CNA, "sLoss")
}
}
if(i %% 100 ==0){
print(paste(i,"/",tot))
}
}
allsegsa <- cbind(allsegsa, CNA)
allsegsa$frac1_A[allsegsa$CNA == "cGain"|allsegsa$CNA == "cAmp"|allsegsa$CNA == "cLOH"|allsegsa$CNA == "cHD"|allsegsa$CNA == "cLoss"] <- 1
write.table(allsegsa, file = paste0(output_dir, "/", tumour.type,"_allsegments.txt"),sep="\t",quote = F,row.names = F)
print("CNALogic done")
}
CNA_logic(tumour.type=tumour_set, input_dir, output_dir, wgd_file)