compression:
mode: float32
topk_ratio: 0.125
console:
log_step_details: false
log_server_requests: false
artifacts:
# none|minimal|full: controls checkpoint/meta config payload size.
config_snapshot_policy: minimal
data:
processed_dir: dataset/processed
holdout_dir: dataset/holdout
# Chronological time-based splitting (2015-01-01 to 2026-03-31)
# TRAIN: 2015-01-01 → 2023-12-31 (~80%)
# VAL: 2024-01-01 → 2024-12-31 (~9%)
# TEST: 2025-01-01 → end (~11%)
# Boundaries are exclusive upper bounds (ts < train_end → TRAIN, etc.)
train_end: "2024-01-01"
val_end: "2025-01-01"
# Feature columns consumed by model/data pipeline (preferred location)
feature_cols:
- Temperature
- Humidity
- Pressure
- Wind Speed
- Rain
federated:
num_clients: 11
min_clients_per_round: 11
# 11 training parquet files: strict 1:1 mapping, client N gets sorted_files[N-1]
rho: 1
# How long the server barrier waits for all active clients to submit weights.
# 20s is sufficient for local native runs; increase to 150 for Pi → VPS.
round_timeout_sec: 20
# After quorum is reached, wait this many extra seconds for stragglers before aggregating.
# Set to 0 to aggregate immediately upon quorum.
grace_period_sec: 1
# Bounded-staleness FedAvg tolerance: accept client updates up to this many rounds
# behind the server's current round. 0 = strict synchronous FedAvg (default).
# Set to max_rho (e.g. 3) for scenarios with heterogeneous rho across clients.
max_staleness: 0
grpc:
max_message_mb: 50
server_host: fsl-server
# server_host: localhost
server_port: 50051
# TLS: set tls_enabled: true and provide cert/key paths for encrypted transport.
# When running over Tailscale the WireGuard layer already encrypts traffic,
# so tls_enabled: false is acceptable for that topology.
tls_enabled: false
# tls_cert_path: /app/certs/server.crt # server + client (CA cert)
# tls_key_path: /app/certs/server.key # server only
model:
lstm_dropout: 0.3
hidden_size: 64
server_head_width: 64
server_head_dropout: 0.1
horizon: 24
input_size: 5
num_layers: 2
seq_len: 48
# output:
# metrics_dir: /app/outputs/metrics # unused (runtime writes to results/)
# plots_dir: /app/outputs/plots # unused (runtime writes to results/)
profiler:
enabled: true
latency_generator:
base_latency_ms: 1.5
client_offsets_ms: [0.0, 4.0, 9.0]
jitter_ms: 0.8
burst_every_steps: 0
burst_latency_ms: 0.0
sleep_fraction: 0.0
max_sleep_ms: 0.0
measured_floor_ratio: 0.25
scheduler:
enabled: true
latency_threshold: 4.0
int8_latency_threshold: 10.0
max_rho: 20
min_rho: 1
rho_step: 1
topk_multiplier: 1.5
latency_ema_alpha: 0.2
training:
seed: 42
torch_num_threads: 1 # threads per process; set to 1 when running multi-process on CPU
# batch_size: 32 # unused (main training loop is step-based RPC, not DataLoader batch training)
classification_loss_weight: 2.0
classification_positive_weight: 1.0
classification_loss_type: focal # weighted_bce | focal
focal_gamma: 2.0
focal_alpha: -1 # [0,1] to enable alpha balancing; negative means disabled
rain_threshold_mm: 0.5
rain_probability_threshold: 0.5
local_steps: 10
lr: 0.0005
num_rounds: 50
rain_sample_ratio: 0.45
regression_loss_weight: 1.0
target_transform: log1p
start_delay: 3
device: auto # auto | cuda | mps | cpu
mixed_precision: auto # none | auto | bf16
use_gpu: true # legacy switch (kept for backward compatibility)
checkpoint_interval: 1 # Save a periodic checkpoint every N rounds
early_stopping_patience: 15
eval_max_samples_per_sensor: 500
server:
log_flush_interval: 1000
max_workers: 25
data_download:
start_date: "2015-01-01T00:00:00"
end_date: "2026-03-31T00:00:00"
# Legacy location kept for backward compatibility (new code prefers data.feature_cols)
feature_cols:
- Temperature
- Humidity
- Pressure
- Wind Speed
- Rain
# Spatial bounding box for city: [min_lon, min_lat, max_lon, max_lat]
# bbox: [-1.71, 54.95, -1.54, 55.03]
# Data download window settings
raw_out_dir: dataset
# raw_filename: newcastle_rainfall_data.csv # unused