#!/bin/bash
# submit_starg_ridge.bsub , ★_G-SVD + Ridge on full QM9 (and tensor targets)
#
# Job array: index 1..18 sweeps (target × seed) for ★_G-SVD + Ridge.
# targets: gap, alpha, mu, zpve, mu_vector, alpha_tensor (6 targets)
# seeds: 0, 1, 2 (3 seeds)
# total: 18 array slots
#
# Submit: bsub < submit_starg_ridge.bsub
# Status: bjobs
# Cancel: bkill <jobid>
# Logs: logs/starg_ridge_<arrayidx>_<jobid>.{out,err}
#BSUB -J starg_ridge[1-18]
#BSUB -o logs/starg_ridge_%I_%J.out
#BSUB -e logs/starg_ridge_%I_%J.err
#BSUB -q normal
#BSUB -n 4
#BSUB -gpu "num=1:mode=exclusive_process"
#BSUB -W 8:00
#BSUB -M 32GB
set -uo pipefail
mkdir -p logs results
# Map array index → (target, seed). 6 targets × 3 seeds = 18.
TARGETS=(gap alpha mu zpve mu_vector alpha_tensor)
SEEDS=(0 1 2)
N_SEEDS=${#SEEDS[@]}
TARGET_IDX=$(( (LSB_JOBINDEX - 1) / N_SEEDS ))
SEED_IDX=$(( (LSB_JOBINDEX - 1) % N_SEEDS ))
TARGET="${TARGETS[$TARGET_IDX]}"
SEED="${SEEDS[$SEED_IDX]}"
# mu_vector and alpha_tensor use the octahedral group; scalars use Z_12.
case "$TARGET" in
mu_vector|alpha_tensor) GROUP=octahedral; GROUP_PARAM=24 ;;
*) GROUP=cyclic; GROUP_PARAM=12 ;;
esac
WORKDIR=$HOME/starg/python/large_scale
QM9_DIR=${QM9_DIR:-/u/$USER/data/qm9/dsgdb9nsd}
cd "$WORKDIR"
export PYTHONPATH=".:${PYTHONPATH:-}"
echo "[$(date)] host=$(hostname) array=$LSB_JOBINDEX target=$TARGET seed=$SEED group=$GROUP"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true
python3 train_starg.py \
--method ridge \
--target "$TARGET" \
--group "$GROUP" \
--group_param "$GROUP_PARAM" \
--qm9_dir "$QM9_DIR" \
--seed "$SEED" \
--out_dir results/ \
--device cuda
echo "[$(date)] done"