data_dir: 'data' preprocessed_dir: 'data/preprocessed' target_dir: '/aworkspace/data/save' #image_path: '/aworkspace/datasets/multimeme/Eimages' image_path: '/aworkspace/datasets/ads' model_path: 'google/flan-t5-base' text_model_path: 'bert-base-uncased' vision_model_path: 'microsoft/resnet-50' model_size: 'base' label_list: ['no', 'yes'] cot: True emb_dim: 50 model_dim: 768 hidden_size: 768 vision_dim: 1000 prefix_length: 200 output_channels: 100 speaker_emb_dims: 300 project_dims: 300 max_length: 100 generate_max_length: 100 entity_length: 20 num_heads: 4 batch_size: 16 epoch_size: 10 train_mode: random use_llava: True one_step: False filters: [3,4,5] dropout: 0.3 shuffle: True teacher: False bert_lr: 1e-5 patience: 10 val: 0.6 alpha: 0.8 max_grad_norm: 1.0 warmup_proportion: 0.1 gradient_accumulation_steps: 1 adam_epsilon: 1e-8 warmup_steps: 0 weight_decay: 0.005 scheduler_type: linear inferrence: True text_only: False image_only: False text_image: False seed: 1234