#!/bin/bash
#SBATCH --job-name=grpo # Job name
#SBATCH --nodes=1 # Number of nodes
#SBATCH --gres=gpu:8 # Number of GPUs per node
#SBATCH --time=96:00:00 # Time limit (hh:mm:ss)
conda activate your env
export HF_HOME=/your/hf/home/
export WANDB_API_KEY='your_wandb_key'
NODELIST=($(scontrol show hostnames $SLURM_JOB_NODELIST))
srun --nodes=1 --ntasks=1 \
accelerate launch \
--config_file examples/accelerate_configs/deepspeed_zero1.yaml \
--num_machines 1 \
--num_processes 8 \
--main_process_ip ${NODELIST[0]} \
--machine_rank $SLURM_PROCID \
--rdzv_backend c10d \
train_grpo.py