tensor-group-sym / python / large_scale / bsub / submit_mace_pred_seeds12.bsub
submit_mace_pred_seeds12.bsub
Raw
#!/bin/bash
# submit_mace_pred_seeds12.bsub, MACE seeds 1 and 2 with predictions.
# Array 1..8 = (4 targets) x (seeds 1, 2).
#   target = TARGETS[(idx-1) // 2], seed = SEEDS[(idx-1) % 2]

#BSUB -J starg_mace_pred12[1-8]
#BSUB -o logs/mace_pred12_%I_%J.out
#BSUB -e logs/mace_pred12_%I_%J.err
#BSUB -q normal
#BSUB -n 16
#BSUB -gpu "num=1:mode=exclusive_process"
#BSUB -W 24:00
#BSUB -M 128GB

set -uo pipefail
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
mkdir -p logs results

TARGETS=(gap alpha mu zpve)
SEEDS=(1 2)
TARGET="${TARGETS[$(( (LSB_JOBINDEX - 1) / 2 ))]}"
SEED="${SEEDS[$(( (LSB_JOBINDEX - 1) % 2 ))]}"

WORKDIR=$HOME/starg/python/large_scale
QM9_DIR=${QM9_DIR:-/u/$USER/data/qm9/dsgdb9nsd}

cd "$WORKDIR"
export PYTHONPATH=".:${PYTHONPATH:-}"

echo "[$(date)] host=$(hostname) array=$LSB_JOBINDEX target=$TARGET seed=$SEED"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true

python3 train_baseline_mace.py \
    --target  "$TARGET" \
    --qm9_dir "$QM9_DIR" \
    --seed    "$SEED" \
    --out_dir results/ \
    --device  cuda

echo "[$(date)] done"