#!/bin/bash
# submit_mace.bsub ,  MACE (current SOTA on QM9) on all targets
#
# MACE is the heaviest baseline (~6h/seed on H100). Asks for an H100 if
# available; falls back to whatever the queue assigns. Array 1..18 =
# (target × seed) for 6 targets × 3 seeds.

#BSUB -J starg_mace[1-18]
#BSUB -o logs/mace_%I_%J.out
#BSUB -e logs/mace_%I_%J.err
#BSUB -q normal
#BSUB -n 16
#BSUB -gpu "num=1:mode=exclusive_process"
#BSUB -W 24:00
#BSUB -M 128GB

set -uo pipefail
# torch 2.6+ defaults weights_only=True in torch.load; mace-torch transit-
# imports e3nn 0.4.4 whose _wigner.py loads constants.pt with a `slice`
# global, blocked by the safe-globals allowlist. Restore pre-2.6 behaviour
# only for this workload (loading a known package file, not user input).
export TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD=1
mkdir -p logs results

TARGETS=(gap alpha mu zpve mu_vector alpha_tensor)
SEEDS=(0 1 2)
TARGET="${TARGETS[$(( (LSB_JOBINDEX - 1) / 3 ))]}"
SEED="${SEEDS[$(( (LSB_JOBINDEX - 1) % 3 ))]}"

WORKDIR=$HOME/starg/python/large_scale
QM9_DIR=${QM9_DIR:-/u/$USER/data/qm9/dsgdb9nsd}

cd "$WORKDIR"
export PYTHONPATH=".:${PYTHONPATH:-}"

echo "[$(date)] host=$(hostname) array=$LSB_JOBINDEX target=$TARGET seed=$SEED"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true

python3 train_baseline_mace.py \
    --target  "$TARGET" \
    --qm9_dir "$QM9_DIR" \
    --seed    "$SEED" \
    --out_dir results/ \
    --device  cuda

echo "[$(date)] done"
