--- debug: false seed: 2023 timeout: 3600 batch_size_per_gpu: vae: 32 seq: 32 num_workers: 8 gpu: use: true data: name: imagenet64 in_shape: [64, 64] in_channels: 3 path: home: '' exp_dir: home: '' torch_dist: use: true amp: false gpus_per_model: 1 backend: nccl init_method: env:// model: backbone: mvq num_latent_space: 2 down_sampling_factor1: 4 down_sampling_factor2: 4 reconstruction_loss: mse coder: name: conv_bn seq: name: top: gpt_l bottom: gpt_l gpt_l: block_size: 512 n_layer: 12 n_head: 8 n_embd: 256 vq: name: vq emb_dim1: 256 emb_dim2: 256 n_emb1: 128 n_emb2: 128 beta: 0.25 ema_update: true random_restart: true tiled: true mhd: name: random use: True use_mhd_mask: True mask_type: spatial bottleneck: false bottleneck_dim: 64 hypothese_count: 256 hypothese_bsz: 256 loss_reduce: mean dist_reduce: mean loss_reduce_dims: [-3, -2, -1] decoder_loss: winner dist_loss: mse dropout_rate: 0.5 decoder: act: leakyrelu n_res_block: 0 res_kernels: [3,1] n_res_channel: 32 gan: use: True dist_weight: 1.0 codebook_weight: 1.0 pixelloss_weight: 1.0 perceptual_weight: 1.0 disc_num_layers: 3 disc_weight: 0.5 disc_factor: 1.0 disc_start: 100 disc_loss: hinge disc_ndf: 64 disc_conditional: false use_actnorm: true start_step: 200 train: gradient_accum_steps: 1 decoder_weight: 1.0 max_grad_norm: 0.0 epochs: warmup: 1 vae: 651 seq: 101 optim: type: Adam learning_rate: 0.0005 betas: [0.5, 0.9] eps: 1e-8 weight_decay: 3e-7 intervals: snapshot: 1 display: 1000000 eval: 1 gen: 5000 resume: is_resume: False exp_name: / snapshot_file: / eval: return_outputs: False save_imgs: subplots: active: False samples: 64 columns: 8 rows: 5 fid: use: True samples: 10000 transfer_test: use: False fp16: use: false eval: false loss_scale: 1.0 initial_scale_power: 64 loss_scale_window: 1000 hysteresis: 2 min_loss_scale: 1000 opt_level: O3