SherlockLung-EvolutionaryTrajectory-Analysis / Ordering_Model / BASH / pipeline.sh
pipeline.sh
Raw
script_dir=`dirname $0`
TM_dir=`dirname ${script_dir}`

# ARGUMENT PARSING
while (( "$#" )); do
  case "$1" in
    -n)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        name=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -i)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        input_dir=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -o)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        output_dir=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -M)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        add_muts=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -p)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        PLmethod=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -g)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        G=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -u)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        unobserved=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -r)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        driver_symbol_file=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -m)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        nonsynonymous_mutation_file=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -d)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        dpclust_data_dir=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -w)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        wgd_file=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -s)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        sample_list=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -c)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        wcc_dir=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -a)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        assembly=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -R)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        remove_acrocentric=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -S)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        memory_saver=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -G)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        gene_gr=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -b)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        bb_file_pattern=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -e)
      if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
        estimate_MRCA=$2
        shift 2
      else
        echo "Error: Argument for $1 is missing" >&2
        exit 1
      fi
      ;;
    -*|--*=) # unsupported flags
      echo "Error: Unsupported flag $1" >&2
      exit 1
      ;;
    *) # report unsupported positional arguments
      echo "Error: Positional arguments are not supported in this pipeline"
      exit 1
      ;;
  esac
done

# SETTING DEFAULT VALUES
if [ "${assembly}" = "" ]; then
  assembly="hg38"
fi
if [ "${assembly}" = "hg19" ]; then
  if [ "${chr_coords}" = "" ]; then
    chr_coords=${TM_dir}/reference_files/chromosome_coordinates_hg19.txt
  fi
  if [ "${centro_coords}" = "" ]; then
    centro_coords=${TM_dir}/reference_files/UCSC_centromeres_hg19.txt
  fi
  if [ "${gene_gr}" = "" ]; then
    gene_gr=${TM_dir}/reference_files/all_gene_gr_hg19.Rdata
  fi
fi

if [ "${add_muts}" = "" ]; then
  add_muts="TRUE"
fi
if [ "${unobvserved}" = "" ]; then
  unobserved="notIncluded"
fi
if [ "${chr_coords}" = "" ]; then
  chr_coords=${TM_dir}/reference_files/chromosome_coordinates_hg38.txt
fi
if [ "${centro_coords}" = "" ]; then
  centro_coords=${TM_dir}/reference_files/UCSC_GRC_combined_centromeres_hg38.txt
fi
if [ "${wgd_file}" = "" ]; then
  wgd_file="none"
elif [ "${wgd_file:0:1}" != "/" ]; then
  wgd_file="`pwd`/${wgd_file}"
fi
if [ "${sample_list}" != "" ]; then
  if [ "${sample_list:0:1}" != "/" ]; then
    sample_list="`pwd`/${sample_list}"
  fi
fi
if [ "${remove_acrocentric}" = "" ]; then
  remove_acrocentric=TRUE
fi
if [ "${memory_saver}" = "" ]; then
  memory_saver=FALSE
fi
if [ "${gene_gr}" = "" ]; then
  gene_gr=${TM_dir}/reference_files/all_gene_gr_hg38.Rdata
fi
if [ "${bb_file_pattern}" = "" ]; then
  bb_file_pattern="subclones.txt"
fi
if [ "${estimate_MRCA}" = "" ]; then
  estimate_MRCA="FALSE"
fi

# make paths absolute to avoid any qsub confusion
if [ "${TM_dir:0:1}" != "/" ]; then
  TM_dir="`pwd`/${TM_dir}"
fi
if [ "${input_dir:0:1}" != "/" ]; then
  input_dir="`pwd`/${input_dir}"
fi
if [ "${output_dir:0:1}" != "/" ]; then
  output_dir="`pwd`/${output_dir}"
fi
if [ "${chr_coords:0:1}" != "/" ]; then
  chr_coords="`pwd`/${chr_coords}"
fi
if [ "${centro_coords:0:1}" != "/" ]; then
  centro_coords="`pwd`/${centro_coords}"
fi
if [ "${driver_symbol_file:0:1}" != "/" ]; then
  driver_symbol_file="`pwd`/${driver_symbol_file}"
fi
if [ "${nonsynonymous_mutation_file:0:1}" != "/" ]; then
  nonsynonymous_mutation_file="`pwd`/${nonsynonymous_mutation_file}"
fi
if [ "${dpclust_data_dir:0:1}" != "/" ]; then
  dpclust_data_dir="`pwd`/${dpclust_data_dir}"
fi
if [ "${wcc_dir}" != "" ]; then
  if [ "${wcc_dir:0:1}" != "/" ]; then
    wcc_dir="`pwd`/${wcc_dir}"
  fi
fi

# CREATE OUTPUT DIRECTORY
log_dir=${output_dir}/qsub_logs/
mkdir -p ${log_dir}

# Write input parameter values to logs
echo "name=${name}
input_dir=${input_dir}
output_dir=${output_dir}
add_muts=${add_muts}
PLmethod=${PLmethod}
G=${G}
unobserved=${unobserved}
driver_symbol_file=${driver_symbol_file}
nonsynonymous_mutation_file=${nonsynonymous_mutation_file}
dpclust_data_dir=${dpclust_data_dir}
wgd_file=${wgd_file}
sample_list=${sample_list}
wcc_dir=${wcc_dir}
assembly=${assembly}
chr_coords=${chr_coords}
centro_coords=${centro_coords}
remove_acrocentric=${remove_acrocentric}
memory_saver=${memory_saver}
bb_file_pattern=${bb_file_pattern}
estimate_MRCA=${estimate_MRCA}
" > ${log_dir}/pipeline_params.txt


echo "submitting first steps"
# NOTE: if sample_list is not passed in, the variable will be empty, and this is (correctly) handled by the scripts, by using all samples in the input directory
hj=`qsub -terse -o ${log_dir}/firstSteps_out.txt -e ${log_dir}/firstSteps_err.txt ${TM_dir}/BASH/run_first_steps.sh ${TM_dir}/R/ ${name} ${input_dir}/ ${output_dir}/ ${wgd_file} ${bb_file_pattern} ${sample_list}`

# sleep commands prior to each qsub command ensure that any dependent scripts are submitted after those on which they depend
sleep 5
echo "submitting landscape"
hj=`qsub -hold_jid ${hj} -terse -o ${log_dir} -e ${log_dir} ${TM_dir}/BASH/TM_3_CN_landscape_CW.sh ${TM_dir}/R/TM_3_CN_landscape_CW.R ${name} ${output_dir} ${output_dir} ${chr_coords}`

# For later stages, sample_list needs to be "none" if missing, rather than empty, as an empty variable will cause problems for those stages
if [ "${sample_list}" = "" ]; then
  sample_list="none"
fi

sleep 5
echo "submitting event identification"
hj=`qsub -hold_jid ${hj} -terse -o ${log_dir}/event_ident_out.txt -e ${log_dir}/event_ident_err.txt ${TM_dir}/BASH/run_event_identification_steps.sh ${TM_dir}/R/ ${name} ${output_dir}/ ${output_dir}/ ${chr_coords} ${centro_coords} ${input_dir}/ ${sample_list} ${remove_acrocentric}`

if [ "${add_muts}" = "TRUE" ]; then
  sleep 5
  echo "submitting driver prep"
  hj=`qsub -hold_jid ${hj} -terse -o ${log_dir} -e ${log_dir} ${TM_dir}/BASH/run_driver_prep.sh ${TM_dir}/R/ ${name} ${output_dir}/ ${sample_list} ${driver_symbol_file} ${nonsynonymous_mutation_file} ${dpclust_data_dir} ${gene_gr} ${wcc_dir}`
fi

sleep 5
echo "submitting timing"
OLDIFS=${IFS}
IFS=,
read -r -a allG <<< "${G}"
for thisG in ${allG[@]}; do
  qsub -hold_jid ${hj} -terse -N PLTiming -o ${output_dir}/qsub_logs/Timing_out_${thisG}.txt -e ${output_dir}/qsub_logs/Timing_err_${thisG}.txt ${TM_dir}/BASH/run_postFDR_5.sh ${TM_dir}/R/TM_postFDR_5_ordering_events_final_CW.R ${name} ${output_dir}/ ${output_dir}/ ${PLmethod} ${output_dir}/${name}_driver_mut_file_for_timing.txt ${wgd_file} ${thisG} ${unobserved} ${add_muts} ${memory_saver} ${estimate_MRCA}
done
IFS=${OLDIFS}