iSoMAs / R / plot_iso_expr_upon_gene_mutation.R
plot_iso_expr_upon_gene_mutation.R
Raw
plot_iso_expr_upon_gene_mutation <- function(data.iso,Mutations,gene_to_iso, myGene.mut="TP53",
                                             myGene.iso="TPX2", isoIDs=NULL,Log2=T,normData=T,
                                             stat.method = "wilcox",label = "p.signif",
                                             plot.percent = T,get.data=F, title=NULL,
                                             legend.position=c(0.75,0.1),
                                             vars.comp = c("WildType","Mutant"),
                                             legend.direction = "horizontal",label.y.npc=0.975,
                                             plot.gene=T,rmNorm=F,angle.p=90,vjust.p=0.75,
                                             showNsamp = T,fs = 12,sz=8){

  library(ggpubr)
  library(dplyr)

  # method = {t.test,wilcox.test,anova,kruskal.test}
  if(normData){
    cat("normalize total counts to 1,000,000 for each sample...\n")
    colsum = colSums(data.iso)
    for (j in seq(ncol(data.iso))){
      data.iso[,j] = data.iso[,j]/colsum[j]*1e6
    }
  }

  if(is.null(isoIDs)){  #4/22/22
    isoIDs = gene_to_iso[[myGene.iso]]
  }

  data.iso = as.data.frame(t(data.iso[isoIDs,])) #LUAD: 576 * 13

  if(rmNorm){
    samples.all = row.names(data.iso)
    sample.type.code = as.numeric(substr(samples.all,14,15))
    data.iso = data.iso[sample.type.code<10,,drop=F]
    cat(paste0('after removing normals: ',paste(dim(data.iso),collapse =', '),'\n'))
  }

  if(plot.percent){
    # percent should be calculated on count not log2count
    data.iso = round(100*data.iso/(rowSums(data.iso,na.rm = T)+1e-6),2)
    value.name = paste0(myGene.iso," (%)")
  }else if(Log2){
    data.iso = log2(1+data.iso)
    valueName = "Log2RSEM"
  }else{
    valueName = "RSEM"
  }

  # ymax = (max(data.iso) - min(data.iso))*1.1

  ##--attach mutant information
  if(showNsamp){
    Mutations = Mutations[rownames(data.iso),,drop=F] #7/12/22
    Nsamp = table(Mutations$Mutation)
    Mutations$Mutation = paste0(Mutations$Mutation,"(",Nsamp[Mutations$Mutation],")")
    vars.comp = paste0(vars.comp,"(",Nsamp[vars.comp],")")
  }

  if(plot.gene){
    data.iso[,myGene.iso] = rowSums(data.iso,na.rm = T,dims = 1)
  }

  data.iso$Mutation = Mutations[rownames(data.iso),'Mutation']

  data.iso.long = reshape2::melt(data.iso,
                                 id.vars = "Mutation",
                                 variable.name = "Isoform",
                                 value.name=valueName)
  data.iso.long = filter(data.iso.long, Mutation %in% vars.comp)

  data.iso.long$Mutation = factor(data.iso.long$Mutation,
                                  levels = vars.comp,
                                  ordered = T)

  print(str(data.iso.long))

  p = ggboxplot(data.iso.long, x = "Isoform", y = valueName,
                color = "Mutation", palette = "jco",
                short.panel.labs = F, font.label = 20,
                add = "jitter",add.params = list(size=0.5))+
    stat_compare_means(aes(group = Mutation),
                       label = label,
                       method = stat.method,
                       angle=angle.p,
                       vjust=vjust.p,
                       label.x.npc = 0,
                       label.y.npc = label.y.npc,
                       size=sz)+
    grids(linetype = "dashed",color = "grey92")+
    labs(x="",color=myGene.mut)+
    # lims(y=c(0,ymax))+
    ggtitle(title)+
    theme(
      plot.title=element_text(size = fs+2,face='bold',hjust = 0),
      axis.text.x = element_text(size = fs),
      # axis.text.x = element_text(size = fs,angle = 45,hjust = 1),
      # axis.text.x = element_text(size = fs,angle = 90,vjust = 0.5),
      axis.text.y = element_text(size = fs),
      axis.title = element_text(size = fs+2),
      legend.position = legend.position,
      legend.direction = legend.direction,
      legend.text = element_text(size = fs),
      legend.title = element_text(size = fs+2)
    )

  print(p)

  if(get.data){
    return(data.iso.long)
  }
}