################################################################################
### This script is to identify the signatures that have predictive signal for
### taxanes. To this, we use cell lines from the DepMap database, which have
### been treated with different drugs
################################################################################
rm(list = ls(all = TRUE))
## PACKAGES
library(this.path)
library(dplyr)
library(ggplot2)
library(ggthemes)
## DIRECTORIES
drug_dir <- dirname(this.path())
base_dir <- dirname(dirname(drug_dir))
input_dir <- file.path(base_dir, "Input_Data")
output_dir <- file.path(drug_dir, "outputs")
dir.create(output_dir)
helper_dir <- file.path(dirname(drug_dir), 'Helper_Scripts')
figs_dir <- file.path(base_dir, "Figures")
## PLOTTING THEME
theme_set(theme_tufte(base_size = 6, base_family = "ArialMT"))
theme_update(text = element_text(size = 6),
axis.text = element_text(size = 6),
axis.line = element_line(linewidth = 0.5),
axis.ticks = element_line(linewidth = 0.5),
axis.ticks.length = unit(.1, "cm"))
## LOAD FILES
cell_exp <- readRDS(file.path(input_dir, '4_Activities_CompendiumCINSigs_THRESH95_DepMap.rds'))
cell_id_mapping <- read.table(file.path(input_dir, 'cellLine_CEL_file_mapping.tsv'), sep = '\t', header = T, stringsAsFactors = F)
samp_info <- read.csv(file.path(input_dir, 'sample_info.csv'), header = T)
drug_screen <- read.csv(file.path(input_dir, 'secondary-screen-dose-response-curve-parameters.csv'), stringsAsFactors = F)
cell_line_info <- read.csv(file.path(input_dir, 'secondary-screen-cell-line-info.csv'))
## PROCESS SIGNATURE ACTIVITIES
#remove duplicate entries
rownames(cell_exp) <- cell_id_mapping[match(rownames(cell_exp),
cell_id_mapping[cell_id_mapping$study == 'CCLE', 'fileid']),
'cellid']
cell_exp <- cell_exp[!duplicated(rownames(cell_exp)), ]
rownames(cell_exp) <- samp_info[unlist(sapply(rownames(cell_exp),
function(x) grep(x, samp_info$cell_line_name, ignore.case = T)[1])), 1]
cell_exp <- cell_exp[!is.na(rownames(cell_exp)), ]
cell_exp <- cell_exp[!duplicated(rownames(cell_exp)), ]
# Total number of cell lines with signature activities
nrow(cell_exp)
## PREPARE DRUG SCREEN & SAMPLE INFO DATA
drug_screen <- drug_screen[drug_screen$screen_id == 'HTS002', ]
cell_exp_filt_drug <- cell_exp[rownames(cell_exp) %in% drug_screen$depmap_id, ]
drug_lookup <- c()
unique_target <- unique(drug_screen[, c('name', 'target')])
for(i in 1:nrow(unique_target)){
drug <- unique_target[i, 'name']
targets <- unlist(strsplit(unique_target[i, 'target'], ', '))
drug_lookup <- rbind(drug_lookup, cbind(drug, targets))
}
drug_lookup <- unique(drug_lookup)
drug_lookup <- data.frame(drug_lookup)
colnames(drug_lookup) <- c('drug', 'target')
drug_lookup <- drug_lookup[!is.na(drug_lookup$target), ]
# Number of cell lines with drug response data
nrow(cell_exp_filt_drug)
## SELECT CYTOTOXIC CHEMOTHERAPIES
platins <- unique(drug_lookup$drug[grep('platin', drug_lookup$drug)])
taxanes <- unique(drug_lookup$drug[grep('tax', drug_lookup$drug)])
anthracyclines <- unique(drug_lookup$drug[grep('rubicin', drug_lookup$drug)])
drugs <- c(platins, taxanes, anthracyclines)
# Number of cytotoxic chemotherapies included in the drug screen
length(drugs)
## FILTER CYTOTOXIC CHEMOTHREAPIS
ranges <- c()
for(d in drugs){
screen <- drug_screen[drug_screen$name == d, ]
screen <- screen[screen$auc <= 1, ]
ranges <- rbind(ranges, c(d,
mean(screen$auc),
sd(screen$auc),
max(screen$auc),
min(screen$auc)))
p <- ggplot(screen) +
geom_density(aes(x = auc)) +
labs(title = 'AUC distribution in all cell lines') +
xlim(min(screen$auc) - 0.1, max(screen$auc) + 0.1) +
scale_color_manual(values = alpha(c('gray40', 'red'), 1)) +
scale_fill_manual(values = alpha(c('gray40', 'red'), 0.6)) +
scale_x_continuous(breaks = seq(round(min(screen$auc)) - 0.1,
round(max(screen$auc)) + 0.1,
0.1)) +
xlab(paste0('Response to ', d, ' (AUC)')) +
theme(text = element_text(size = 8),
axis.text = element_text(size = 6),
axis.line = element_line(size = 0.5),
axis.ticks = element_line(size = 0.5),
axis.ticks.length = unit(.1, 'cm'),
legend.position = 'right')
ggsave(file.path(figs_dir, paste0('Additional/Distribution_PanCancer_AUCs_', d, '.png')),
width = 80, height = 80, units = 'mm', dpi = 300)
}
colnames(ranges) <- c('drug', 'mean', 'sd', 'max', 'min')
ranges <- as.data.frame(ranges)
ranges[, 2:5] <- sapply(2:5, function(x) as.numeric(ranges[, x]))
# Distribution of AUC values in all cells per drug
print(ranges)
# We cannot use *in vitro* response data for cisplatin since AUC values of all samples are
# ~1 indicating that most cells are sensitive to this treatment at the dose tested.
# On the other hand, the range of AUC values for anthracyclines is not wide enough
# to perform correlations (AUC sd < 0.1). In addition, there is not enough number
# of cells with AUC close to 0 and thus resistant to these treatments.
# Therefore, we have to limit the correlation analysis of signatures with drug response
# data to taxanes. We also included nemorubicin (just in case we see something interesting)
# since the AUC range is wider than the rest of anthracyclines included in the drug screening.
drugs <- c('docetaxel', 'cabazitaxel', 'paclitaxel', 'nemorubicin')
## CORRELATION ANALYSIS WITH DRUG RESPONSE PAN-CANCER
sigs <- colnames(cell_exp_filt_drug)
drug_response <- lapply(1:length(sigs), function(s){
response <- c()
for (d in drugs){
pdat <- left_join(drug_screen[drug_screen$name == d, ],
data.frame(depmap_id = rownames(cell_exp_filt_drug),
cell_exp_filt_drug),
by = 'depmap_id')
pdat <- left_join(pdat, cell_line_info, by = 'depmap_id')
pdat <- pdat[pdat$passed_str_profiling.y == T, ]
pdat <- pdat[!is.na(pdat[, paste0('CX', s)]), ]
moa <- unique(pdat$moa)
#correlation
res <- cor.test(pdat[, 'auc'], pdat[, paste0('CX', s)],
method = 'k', alternative = 'less')
response <- rbind(response, c(s, d, moa, res$estimate, res$p.value))
if(res$estimate < -0.07 & res$p.value < 0.05){
dat <- as.data.frame(cbind(pdat[, 'auc'],
pdat[, paste0('CX', s)],
pdat$primary_tissue))
colnames(dat) <- c('auc', 'signature', 'tissue')
dat[, 1:2] <- sapply(1:2, function(x) as.numeric(dat[, x]))
p <- ggplot(dat, aes(x = auc, y = signature, color = tissue)) +
geom_point(size = 1.5) +
geom_smooth(method = 'lm', se = TRUE, color = 'darkred', fill = 'grey50') +
xlim(min(dat$auc, na.rm = T) - 0.1, max(dat$auc, na.rm = T) + 0.1) +
labs(title = 'Pancancer cell lines',
subtitle = paste0('tau=',
round(res$estimate, 4),
'; p-value=',
round(res$p.value, 4))) +
ylab(paste0('CX', s, ' activity levels')) +
xlab(paste0('Response to ', d, ' (AUC)')) +
theme(text = element_text(size = 8),
axis.text = element_text(size = 6),
axis.line = element_line(size = 0.5),
axis.ticks = element_line(size = 0.5),
axis.ticks.length = unit(.1, 'cm'),
legend.position = 'right')
ggsave(file.path(figs_dir, paste0('Additional/PanCancer_CX', s, '_', d, '.png')),
width = 200, height = 80, units = 'mm', dpi = 300)
}
}
colnames(response) <- c('signature', 'drug', 'moa', 'tau', 'p.value')
response <- cbind(response, q.value = p.adjust(response[, 5], method = 'BH'))
response <- data.frame(response, stringsAsFactors = F)
response[, 4:6] <- sapply(4:6, function(x) as.numeric(response[, x]))
response <- response[order(response$signature, response$q.value), ]
return(response)
})
drug_response_pan_sigs <- data.table::rbindlist(drug_response)
#Filtering
tau_thresh <- -0.07
drug_response_pan_filt <- drug_response_pan_sigs[drug_response_pan_sigs$tau < tau_thresh, ]
drug_response_pan_filt <- drug_response_pan_filt[drug_response_pan_filt$q.value < 0.05, ]
# Remove CX7 (seems to be an artefact)
drug_response_pan_filt <- drug_response_pan_filt[drug_response_pan_filt$signature != 7, ]
# Signature name
drug_response_pan_filt$signature <- paste0('CX', drug_response_pan_filt$signature)
#save
write.table(drug_response_pan_filt, file.path(output_dir, 'DrugResponse_PanCancer.txt'),
sep = '\t', quote = FALSE, col.names = TRUE, row.names = FALSE)
# Number of significant hits for drug screen at pan-cancer level
print(table(drug_response_pan_filt$signature, drug_response_pan_filt$drug))
## PACLITAXEL PREDICTION: CX5 vs CX3
sigs <- colnames(cell_exp_filt_drug)
d <- 'paclitaxel'
pdat <- left_join(drug_screen[drug_screen$name == d, ],
data.frame(depmap_id = rownames(cell_exp_filt_drug),
cell_exp_filt_drug),
by = 'depmap_id')
pdat <- left_join(pdat, cell_line_info, by = 'depmap_id')
pdat <- pdat[pdat$passed_str_profiling.y == T, ]
pdat <- pdat[pdat$auc <= 1, ]
# linear model
lm <- lm(auc~CX3 + CX5 + CX3:CX5, pdat)
summary(lm)
# CX3 and CX5 present predictive signal as biomarkers for taxane response at pan-cancer level.
# However, CX5 seems to have higher signal for predicting taxane response.
# We then aimed to create a prediction model and set a threhsold for classifying patients.
# To do so, we first scaled signature activities of cell lines to the TCGA activities (reference cohort).
# This aims to generalize the model for all clinical scenarios.
## TRAINING THE CLASSIFIER
# Load TCGA activities
TCGA.EXPS=readRDS(paste0(input_dir,"/4_Activities_CompendiumCINSigs_THRESH95_TCGA.rds"))
mTCGA = as.matrix(TCGA.EXPS[ , c("CX3", "CX5")])
# Generate a model for scaling
lModel = list(mean = attributes(scale(mTCGA))$`scaled:center`,
scale = attributes(scale(mTCGA))$`scaled:scale`)
# Scale Cell line activities
sc_cell_exp = sweep(sweep(cell_exp[,c("CX3", "CX5")], 2, lModel$mean, FUN = '-'), 2, lModel$scale, FUN = "/")
colnames(sc_cell_exp)=c("sCX3","sCX5")
sc_cell_exp=data.frame(depmap_id=rownames(sc_cell_exp),sc_cell_exp)
# Add scale values to pdat
pdat<-left_join(pdat,sc_cell_exp,by="depmap_id")
pdat<-pdat[!is.na(pdat$auc) & !is.na(pdat$CX5),]
pdat.filt<-pdat[pdat$auc<=quantile(pdat$auc,0.98),]
# Find optimal threshold for sCX5
thrs=seq(-1,1,0.1)
statistics=c()
for(thr in thrs){
pdat.filt$pred=ifelse(pdat.filt$sCX5<thr, 'Resistant', 'Sensitive')
tab=table(pdat.filt$pred)
if(length(tab)!=1){
# Compare AUC values
n.res=tab[1]
n.tot=sum(tab)
mean.res=mean(pdat.filt$auc[pdat.filt$pred=="Resistant"])
mean.sen=mean(pdat.filt$auc[pdat.filt$pred=="Sensitive"])
test=t.test(pdat.filt$auc[pdat.filt$pred=="Resistant"],pdat.filt$auc[pdat.filt$pred=="Sensitive"])
pval=test$p.value
out=c(thr,n.tot,n.res,n.res/n.tot,mean.res,mean.sen,mean.res-mean.sen,pval)
statistics=rbind(statistics,out)
}
}
statistics=as.data.frame(statistics)
colnames(statistics)=c("thrs","n","n.res","f.res","mean.res","mean.sen","mean.diff","p.val")
statistics$signature="CX5"
statistics$significant=ifelse(statistics$p.val<0.1,"yes","no")
png(paste0(figs_dir,"/CellLines_Taxane_OptThresh.png"), width = 50, height = 50, units="mm", res=300)
p<-ggplot(statistics, aes(x=thrs, y=mean.res)) +
geom_vline(xintercept = -0.1, linetype = "dashed", colour = "grey70", size = 0.5) +
geom_vline(xintercept = 0.7, linetype = "dashed", colour = "grey70", size = 0.5) +
geom_point(size=0.8) +
geom_line(size=0.5) +
labs(title="", subtitle="") +
scale_color_manual(values = c("black","red")) +
ylab("Mean AUC values of cell lines\n predicted as resistant") +
xlab("Activity threshold") +
theme(text = element_text(size = 6),
axis.text = element_text(size = 5),
axis.line = element_line(size = 0.5),
axis.ticks = element_line(size = 0.5),
axis.ticks.length = unit(.1, "cm"),
legend.position="none")
print(p)
dev.off()
ggsave(file.path(figs_dir, "CellLines_Taxane_OptThresh.svg"),p,
width = 50, height = 50, units = 'mm', dpi = 300)
# Classify cell lines with optimal threshold
pdat.filt$pred=ifelse(pdat.filt$sCX5<0, 'Resistant','Sensitive')
# Compare AUC values
test=t.test(pdat.filt$auc[pdat.filt$pred=="Resistant"],pdat.filt$auc[pdat.filt$pred=="Sensitive"])
pval=test$p.value
# Select resistant and sensitive cell lines
# Explore the distribution of AUC values & set threshold for classifying samples
p<-ggplot(pdat.filt) +
geom_density(aes(x = auc, color=pred))+
labs(title="IC50 distribution in all cell lines",
subtitle=paste0("P-value t-test = ",round(pval,3)))+
scale_x_continuous(breaks=seq(round(min(pdat$auc))-0.1,1,0.1), limits = c(0,1))+
xlab("Response to paclitaxel (AUC)") +
theme(text = element_text(size = 8),
axis.text = element_text(size = 6),
axis.line = element_line(size = 0.5),
axis.ticks = element_line(size = 0.5),
axis.ticks.length = unit(.1, "cm"),
legend.position="right")
ggsave(file.path(figs_dir, paste0('Supplementary/CellLines_Taxane_AUC.svg')),
width = 100, height = 80, units = 'mm', dpi = 300)