code/config.yaml · csc8114

compression:
  mode: float32
  topk_ratio: 0.125
console:
  log_step_details: false
  log_server_requests: false
artifacts:
  # none|minimal|full: controls checkpoint/meta config payload size.
  config_snapshot_policy: minimal
data:
  processed_dir: dataset/processed
  holdout_dir: dataset/holdout
  # Chronological time-based splitting (2015-01-01 to 2026-03-31)
  # TRAIN: 2015-01-01 → 2023-12-31 (~80%)
  # VAL:   2024-01-01 → 2024-12-31 (~9%)
  # TEST:  2025-01-01 → end        (~11%)
  # Boundaries are exclusive upper bounds (ts < train_end → TRAIN, etc.)
  train_end: "2024-01-01"
  val_end:   "2025-01-01"
  # Feature columns consumed by model/data pipeline (preferred location)
  feature_cols:
    - Temperature
    - Humidity
    - Pressure
    - Wind Speed
    - Rain
federated:
  num_clients: 11
  min_clients_per_round: 11
  # 11 training parquet files: strict 1:1 mapping, client N gets sorted_files[N-1]
  rho: 1
  # How long the server barrier waits for all active clients to submit weights.
  # 20s is sufficient for local native runs; increase to 150 for Pi → VPS.
  round_timeout_sec: 20
  # After quorum is reached, wait this many extra seconds for stragglers before aggregating.
  # Set to 0 to aggregate immediately upon quorum.
  grace_period_sec: 1
  # Bounded-staleness FedAvg tolerance: accept client updates up to this many rounds
  # behind the server's current round. 0 = strict synchronous FedAvg (default).
  # Set to max_rho (e.g. 3) for scenarios with heterogeneous rho across clients.
  max_staleness: 0
grpc:
  max_message_mb: 50
  server_host: fsl-server
  # server_host: localhost
  server_port: 50051
  # TLS: set tls_enabled: true and provide cert/key paths for encrypted transport.
  # When running over Tailscale the WireGuard layer already encrypts traffic,
  # so tls_enabled: false is acceptable for that topology.
  tls_enabled: false
  # tls_cert_path: /app/certs/server.crt   # server + client (CA cert)
  # tls_key_path:  /app/certs/server.key   # server only
model:
  lstm_dropout: 0.3
  hidden_size: 64
  server_head_width: 64
  server_head_dropout: 0.1
  horizon: 24
  input_size: 5
  num_layers: 2
  seq_len: 48
# output:
#   metrics_dir: /app/outputs/metrics  # unused (runtime writes to results/)
#   plots_dir: /app/outputs/plots      # unused (runtime writes to results/)
profiler:
  enabled: true
  latency_generator:
    base_latency_ms: 1.5
    client_offsets_ms: [0.0, 4.0, 9.0]
    jitter_ms: 0.8
    burst_every_steps: 0
    burst_latency_ms: 0.0
    sleep_fraction: 0.0
    max_sleep_ms: 0.0
    measured_floor_ratio: 0.25
scheduler:
  enabled: true
  latency_threshold: 4.0
  int8_latency_threshold: 10.0
  max_rho: 20
  min_rho: 1
  rho_step: 1
  topk_multiplier: 1.5
  latency_ema_alpha: 0.2
training:
  seed: 42
  torch_num_threads: 1  # threads per process; set to 1 when running multi-process on CPU
  # batch_size: 32  # unused (main training loop is step-based RPC, not DataLoader batch training)
  classification_loss_weight: 2.0
  classification_positive_weight: 1.0
  classification_loss_type: focal # weighted_bce | focal
  focal_gamma: 2.0
  focal_alpha: -1 # [0,1] to enable alpha balancing; negative means disabled
  rain_threshold_mm: 0.5
  rain_probability_threshold: 0.5
  local_steps: 10
  lr: 0.0005
  num_rounds: 50
  rain_sample_ratio: 0.45
  regression_loss_weight: 1.0
  target_transform: log1p
  start_delay: 3
  device: auto  # auto | cuda | mps | cpu
  mixed_precision: auto  # none | auto | bf16
  use_gpu: true # legacy switch (kept for backward compatibility)
  checkpoint_interval: 1  # Save a periodic checkpoint every N rounds
  early_stopping_patience: 15
  eval_max_samples_per_sensor: 500
server:
  log_flush_interval: 1000
  max_workers: 25

data_download:
  start_date: "2015-01-01T00:00:00"
  end_date: "2026-03-31T00:00:00"
  # Legacy location kept for backward compatibility (new code prefers data.feature_cols)
  feature_cols:
    - Temperature
    - Humidity
    - Pressure
    - Wind Speed
    - Rain
  # Spatial bounding box for city: [min_lon, min_lat, max_lon, max_lat]
  # bbox: [-1.71, 54.95, -1.54, 55.03]
  # Data download window settings
  raw_out_dir: dataset
  # raw_filename: newcastle_rainfall_data.csv  # unused