nlql / configs / worldcup_train_xl_859.json
worldcup_train_xl_859.json
Raw
{
    "run_name": "t5-3b-worldcup_v1_859_100epochs",
    "model_name_or_path": "tscholak/cxmefzzi",
    "dataset": "worldcup_v3_859",
    "source_prefix": "",
    "schema_serialization_type": "peteshaw",
    "schema_serialization_randomized": false,
    "schema_serialization_with_db_id": true,
    "schema_serialization_with_db_content": false,
    "schema_serialization_with_keys": false,
    "normalize_query": true,
    "target_with_db_id": true,
    "output_dir": "/train",
    "overwrite_output_dir": true,
    "cache_dir": "/transformers_cache",
    "do_train": true,
    "do_eval": true,
    "fp16": false,
    "num_train_epochs": 200,
    "per_device_train_batch_size": 1,
    "per_device_eval_batch_size": 1,
    "gradient_accumulation_steps": 512,
    "label_smoothing_factor": 0.0,
    "learning_rate": 1e-4,
    "adafactor": false,
    "adam_eps": 1e-6,
    "lr_scheduler_type": "constant",
    "warmup_ratio": 0.0,
    "warmup_steps": 0,
    "seed": 42,
    "report_to": [
        "wandb"
    ],
    "logging_strategy": "steps",
    "logging_first_step": true,
    "logging_steps": 2,
    "load_best_model_at_end": true,
    "metric_for_best_model": "exec_match",
    "greater_is_better": true,
    "save_total_limit": 5,
    "save_steps": 2,
    "evaluation_strategy": "steps",
    "eval_steps": 2,
    "predict_with_generate": true,
    "num_beams": 1,
    "num_beam_groups": 1,
    "use_picard": false,
    "deepspeed": "configs/ds_config_4gpu_z2.json"
}