mvq / config / default.yaml
default.yaml
Raw
---
debug: false
seed: 2023
timeout: 3600
batch_size_per_gpu: 
  vae: 32
  seq: 32
num_workers: 8
gpu:
  use: true
data:
  name: imagenet64
  in_shape: [64, 64]
  in_channels: 3
  path: 
    home: ''
exp_dir: 
  home: ''
torch_dist:
  use: true
  amp: false
  gpus_per_model: 1
  backend: nccl
  init_method: env://
model:
  backbone: mvq
  num_latent_space: 2
  down_sampling_factor1: 4
  down_sampling_factor2: 4
  reconstruction_loss: mse
  coder: 
    name: conv_bn 
  seq:
    name: 
      top: gpt_l
      bottom: gpt_l
    gpt_l:
      block_size: 512
      n_layer: 12 
      n_head: 8
      n_embd: 256 
  vq:
    name: vq
    emb_dim1: 256  
    emb_dim2: 256
    n_emb1: 128  
    n_emb2: 128
    beta: 0.25
    ema_update: true
    random_restart: true
    tiled: true
  mhd:
    name: random
    use: True
    use_mhd_mask: True
    mask_type: spatial 
    bottleneck: false
    bottleneck_dim: 64
    hypothese_count: 256 
    hypothese_bsz: 256
    loss_reduce: mean
    dist_reduce: mean
    loss_reduce_dims: [-3, -2, -1]
    decoder_loss: winner 
    dist_loss: mse 
    dropout_rate: 0.5
    decoder:
      act: leakyrelu
      n_res_block: 0
      res_kernels: [3,1]
      n_res_channel: 32
  gan:
    use: True 
    dist_weight: 1.0
    codebook_weight: 1.0
    pixelloss_weight: 1.0
    perceptual_weight: 1.0
    disc_num_layers: 3
    disc_weight: 0.5
    disc_factor: 1.0
    disc_start: 100
    disc_loss: hinge
    disc_ndf: 64
    disc_conditional: false
    use_actnorm: true
    start_step: 200
train:
  gradient_accum_steps: 1
  decoder_weight: 1.0
  max_grad_norm: 0.0
  epochs:
    warmup: 1
    vae: 651
    seq: 101
  optim:
    type: Adam
    learning_rate: 0.0005
    betas: [0.5, 0.9]
    eps: 1e-8
    weight_decay: 3e-7
  intervals:
    snapshot: 1
    display: 1000000
    eval: 1
    gen: 5000
  resume:
    is_resume: False
    exp_name: /
    snapshot_file: /
eval:
  return_outputs: False
  save_imgs:
    subplots: 
      active: False 
      samples: 64
      columns: 8
      rows: 5
  fid:
    use: True
    samples: 10000
  transfer_test: 
    use: False
fp16: 
  use: false
  eval: false
  loss_scale: 1.0
  initial_scale_power: 64
  loss_scale_window: 1000
  hysteresis: 2
  min_loss_scale: 1000
  opt_level: O3