tensor-group-sym / python / large_scale / bsub / submit_qm7x_mace_only.bsub
submit_qm7x_mace_only.bsub
Raw
#!/bin/bash
# submit_qm7x_mace_only.bsub ,  re-run MACE QM7-X jobs after fixing the
# `sample.properties[4]` AttributeError. 2 targets x 3 seeds = 6 jobs.

#BSUB -J starg_qm7x_mace[1-6]
#BSUB -o logs/qm7x_mace_%I_%J.out
#BSUB -e logs/qm7x_mace_%I_%J.err
#BSUB -q normal
#BSUB -n 16
#BSUB -gpu "num=1:mode=exclusive_process"
#BSUB -W 24:00
#BSUB -M 128GB

set -uo pipefail
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
mkdir -p logs results

TARGETS=(alpha_E alpha_T2)
SEEDS=(42 43 44)
TARGET="${TARGETS[$(( (LSB_JOBINDEX - 1) / 3 ))]}"
SEED="${SEEDS[$(( (LSB_JOBINDEX - 1) % 3 ))]}"

WORKDIR=$HOME/starg/python/large_scale
QM7X_DIR=${QM7X_DIR:-/u/$USER/data/qm7x}

cd "$WORKDIR"
export PYTHONPATH=".:${PYTHONPATH:-}"

echo "[$(date)] host=$(hostname) array=$LSB_JOBINDEX target=$TARGET seed=$SEED method=mace"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true

python3 train_baseline_mace.py \
    --dataset  qm7x \
    --target   "$TARGET" \
    --qm7x_dir "$QM7X_DIR" \
    --seed     "$SEED" \
    --out_dir  results/ \
    --device   cuda

echo "[$(date)] done"