--- exp_name: default seed: 1 num_workers: 8 debug: false use_gpu: true torch_dist: use: true amp: false gpus_per_model: 1 backend: nccl init_method: env:// batch_size_per_gpu: continuous: 64 seq: 64 generate: 64 exp_dir: home: '' data: name: pmc path: home: '' dataset: tasks: ['categorical','series_name','axis','caption','data'] chart_text: tgt_token: <mask_1> window_size: 16 widen_rate: 0.1 max_widen_len: 1 min_sent_len: 10 chart_data: chart_tasks: ['task6'] active_charts: [] norm_mode: minmax scale_mode: log10 scale_eps: [1.100001, 1.100001] scale_floor: [0.000001, 0.000001] sep_token: <SEP> model: active: ['continuous'] seperate_data_task: True seq: opt_mode: 1 #Stage 1: text only, Stage 2: Data only, Stage 0: Both freeze_encoder: False decoder2_num_layers: 8 hf_model: name: google/t5-v1_1-large tgt_token: <mask_1> model_max_length: 512 max_source_len: 1024 max_target_len: 256 pad_to_max_len: True ignore_pad_token_for_loss: True use_fast: True continuous_data: active: ['scale', 'continuous'] use_pos_embs: true encoder: chart_type_conditional: False max_blocks: series: 64 points: 256 conv: channels: [256, 128, 64] kernels: [3,3,3,2] stride: 1 padding: 1 use_bn: false n_res_block: 2 n_res_channel: 32 res_kernels: [3,1] res_padding: 1 transformer: use: true name: gpt n_layer: 4 n_head: 4 decoder: chart_type_conditional: True conv: channels: [32, 64, 128] kernels: [3,3,3,2] stride: 1 padding: 1 use_bn: false n_res_block: 2 n_res_channel: 32 res_kernels: [3,1] res_padding: 1 use_proj: false transformer: use: true name: t5_decoder n_layer: 4 n_head: 4 d_kv: 8 num_buckets: 8 max_distance: 32 scale: n_head: 1 disc: use: True disc_start: 100 disc_loss: hinge disc_factor: 1.0 disc_weight: 1.0 disc_conditional: True use_pos_embs: True conv: channels: [256, 128, 64] kernels: [3,3,3,2] stride: 1 padding: 1 use_bn: false n_res_block: 2 n_res_channel: 32 res_kernels: [3,1] res_padding: 1 transformer: use: true name: gpt n_layer: 4 n_head: 4 vq: name: vq emb_dim1: 32 emb_dim2: 32 n_emb1: 128 n_emb2: 128 emb_len1: 16 emb_len2: 12 beta: 0.25 ema_update: true random_restart: true tiled: true mhd: use: true name: mhd1d bottleneck: False bottleneck_dim: 64 hypothese_count: 256 hypothese_bsz: 256 residual: True loss_reduce: mean dist_reduce: mean loss_reduce_dims: [-2, -1] norm: True act: relu decoder_loss: winner dist_loss: mse gamma: 0.25 dropout_rate: 0.5 decoder: act: leakyrelu n_res_block: 2 res_kernels: [3,1] n_res_channel: 8 fid: use_pos_embs: True enc_out_dim: 32 conv: channels: [256, 128, 64] kernels: [3,3,3,2] stride: 1 padding: 1 use_bn: false n_res_block: 2 n_res_channel: 32 res_kernels: [3,1] res_padding: 1 transformer: use: true name: gpt n_layer: 4 n_head: 4 loss_fn: scale: l1 continuous: mse train: epochs: warmup: 0 continuous: 150 seq: 50 total: 100 gradient_accum_steps: 1 max_grad_norm: 1.0 loss_weights: text: 1.0 code: 1.0 cb1: 1.0 cb2: 1.0 wta: 1.0 continuous: 1.0 scale: 1.0 categorical: 1.0 series_name: 1.0 ct: 1.0 row: 1.0 col: 1.0 gan: 1.0 disc: 1.0 optim: type: AdamW learning_rate: 0.0005 betas: [0.9, 0.999] eps: 1e-8 weight_decay: 3e-7 label_smoothing_factor: 0.1 intervals: eval: 1 snapshot: 20 display: 20000 gen: 20000 val: 20000 eval: fid: False val_step_count: 50 write_samples: False gen_steps: 2 max_steps: 1000000 sample_interval: 50 display_interval: 50 sample_epoch: 50 include_inputs_for_metrics: False eval_accumulation_steps: 10 num_beams: 3 max_length: 256 repetition_penalty: 1.0 gen_temperature: 1.0 hypo_count: 1 hypo_bsz: 1 ksm: active: true name: google/pegasus-pubmed use_fast: true fp16: use: false eval: false loss_scale: 1.0 initial_scale_power: 64 loss_scale_window: 1000 hysteresis: 2 min_loss_scale: 1000 opt_level: O3 timeout: 3600