"""
Figure 2: Effect of compression mode on AUPRC, grouped by latency condition.

Purpose
-------
Shows that activation compression (float16 / int8 / Adaptive topk_int8) incurs
negligible accuracy loss relative to the float32 baseline, while substantially
reducing per-step communication payload. Supports the paper's claim that the
adaptive scheduler achieves a favourable accuracy–bandwidth trade-off.

Data source
-----------
- results/matrix_summary.csv  (one row per scenario × seed run)
- Columns used: scenario_id, auprc_mean, avg_payload_bytes
- Three seeds (42, 52, 62) per scenario; error bars = ±1 std across seeds.
- Payload labels in the legend are read from avg_payload_bytes at runtime so
  they update automatically when M09/M14 are re-run after the topk_int8 fix.

Scenarios included
------------------
  No latency : M01 (float32), M02 (float16), M03 (int8)
               — no Adaptive scenario at zero latency (scheduler stays at float32)
  Mid 10 ms  : M05 (float32), M06 (float16), M07 (int8), M09 (Adaptive)
  High 63 ms : M10 (float32), M11 (float16), M12 (int8), M14 (Adaptive)

Scenarios excluded
------------------
  M04, M08, M13  (ρ=3 sync-interval ablation — different experimental axis)

Notes
-----
- Adaptive payload label is computed per latency group from avg_payload_bytes,
  so mid (~92 B) and high (~52 B post topk_int8 fix) are reported separately.
- Dotted horizontal lines mark the float32 baseline AUPRC within each group.
- Figure sized for a single column (3.5 × 2.6 in) in a two-column paper.

Output
------
  results/graphics/fig2_compression_auprc.pdf  (vector, for LaTeX)
  results/graphics/fig2_compression_auprc.png  (raster preview, dpi=200)

Usage
-----
  uv run python src/data/plot_compression_auprc.py

LaTeX inclusion
---------------
  \\begin{figure}
    \\includegraphics[width=\\linewidth]{fig2_compression_auprc.pdf}
    \\caption{Mean AUPRC (±std, 3 seeds) per compression mode and latency
              condition. Dotted lines mark the float32 baseline within each
              group. Payload sizes are averaged across all clients and rounds.}
  \\end{figure}
"""

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from pathlib import Path

# --- LaTeX-compatible style -------------------------------------------------
plt.rcParams.update({
    "font.family":     "serif",
    "font.serif":      ["Times New Roman", "DejaVu Serif"],
    "font.size":       9,
    "axes.titlesize":  9,
    "axes.labelsize":  9,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "legend.fontsize": 8,
    "lines.linewidth": 1.2,
    "axes.linewidth":  0.7,
    "pdf.fonttype":    42,
    "ps.fonttype":     42,
})

# --- Paths ------------------------------------------------------------------
RESULTS_DIR = Path(__file__).parent.parent.parent / "results"
SESSION     = "2026-05-10_01-43-06"
SEEDS       = [42, 52, 62]
OUT_PDF = RESULTS_DIR / "graphics" / "fig2_compression_auprc.pdf"
OUT_PNG = RESULTS_DIR / "graphics" / "fig2_compression_auprc.png"

# --- Scenario metadata ------------------------------------------------------
# ρ=3 scenarios (M04, M08, M13) excluded — different experimental axis.
# Adaptive is only present at Mid and High latency (scheduler stays at float32
# when there is no network pressure).
SCENARIO_META = {
    "N01": ("No latency",   "float32"),
    "N02": ("No latency",   "float16"),
    "N03": ("No latency",   "int8"),
    "L05": ("Low (~8 ms)",  "float32"),
    "L06": ("Low (~8 ms)",  "float16"),
    "L07": ("Low (~8 ms)",  "int8"),
    "L09": ("Low (~8 ms)",  "Adaptive"),
    "H11": ("High (~50 ms)", "float32"),
    "H12": ("High (~50 ms)", "float16"),
    "H13": ("High (~50 ms)", "int8"),
    "H15": ("High (~50 ms)", "Adaptive"),
}

LATENCY_ORDER     = ["No latency", "Low (~8 ms)", "High (~50 ms)"]
COMPRESSION_ORDER = ["float32", "float16", "int8", "Adaptive"]

# Colorblind-safe palette (Wong 2011)
COLORS = {
    "float32":  "#40A9FF",   # blue
    "float16":  "#45DAD1",   # teal
    "int8":     "#FFA940",   # orange
    "Adaptive": "#9F69E2",   # purple
}


# --- Data loading -----------------------------------------------------------

def load_data() -> pd.DataFrame:
    import json, statistics as _st
    rows = []
    for scenario_id, (lat, comp) in SCENARIO_META.items():
        for seed in SEEDS:
            f = RESULTS_DIR / SESSION / f"{scenario_id}_seed{seed}_eval_report.json"
            if not f.exists():
                continue
            d = json.loads(f.read_text())
            auprc = d["weighted_overall"]["auprc"]
            payload = _st.mean(
                float(c.get("payload_bytes") or 0) for c in d["clients"]
            )
            rows.append({
                "scenario_id":      scenario_id,
                "seed":             seed,
                "auprc_mean":       auprc,
                "avg_payload_bytes": payload,
                "latency_group":    lat,
                "compression":      comp,
            })
    return pd.DataFrame(rows)


def compute_stats(df: pd.DataFrame) -> pd.DataFrame:
    """Aggregate mean/std AUPRC and mean payload across seeds."""
    auprc = (
        df.groupby(["latency_group", "compression"])["auprc_mean"]
        .agg(mean="mean", std="std")
        .reset_index()
    )
    payload = (
        df.groupby(["latency_group", "compression"])["avg_payload_bytes"]
        .mean()
        .reset_index()
        .rename(columns={"avg_payload_bytes": "payload_b"})
    )
    stats = auprc.merge(payload, on=["latency_group", "compression"])
    stats["std"] = stats["std"].fillna(0.0)
    return stats


def _payload_label(comp: str, stats: pd.DataFrame) -> str:
    """
    Build legend label with dynamic payload info read from data.
    For Adaptive, reports mid and high payloads separately since they differ
    (mid ~92 B with topk_int8 fix; high ~52 B).
    For fixed modes, payload is the same across latency groups.
    """
    if comp == "float32":
        row = stats[stats["compression"] == comp].iloc[0]
        return f"float32 (baseline, {row['payload_b']:.0f} B)"
    if comp == "Adaptive":
        low  = stats[(stats["compression"] == comp) & (stats["latency_group"] == "Low (~8 ms)")]
        high = stats[(stats["compression"] == comp) & (stats["latency_group"] == "High (~50 ms)")]
        parts = []
        if not low.empty:
            parts.append(f"low {low.iloc[0]['payload_b']:.0f} B")
        if not high.empty:
            parts.append(f"high {high.iloc[0]['payload_b']:.0f} B")
        return f"Adaptive ({' / '.join(parts)})"
    # float16 / int8: payload is latency-independent
    rows = stats[stats["compression"] == comp]
    if rows.empty:
        return comp
    p = rows.iloc[0]["payload_b"]
    baseline = stats[(stats["compression"] == "float32") &
                     (stats["latency_group"] == rows.iloc[0]["latency_group"])]
    if not baseline.empty and baseline.iloc[0]["payload_b"] > 0:
        pct = int(round((1 - p / baseline.iloc[0]["payload_b"]) * 100))
        return f"{comp} (−{pct}%, {p:.0f} B)"
    return f"{comp} ({p:.0f} B)"


# --- Drawing ----------------------------------------------------------------

def draw(stats: pd.DataFrame) -> None:
    # Single-column width for a two-column paper
    fig_w, fig_h = 3.5, 2.4
    bar_w     = 0.16
    group_gap = 0.85

    fig, ax = plt.subplots(figsize=(fig_w, fig_h))

    n_lat  = len(LATENCY_ORDER)
    n_comp = len(COMPRESSION_ORDER)
    x_centres = np.arange(n_lat, dtype=float) * group_gap
    offsets   = np.linspace(-(n_comp - 1) / 2, (n_comp - 1) / 2, n_comp) * bar_w

    for ci, comp in enumerate(COMPRESSION_ORDER):
        sub = stats[stats["compression"] == comp].set_index("latency_group")
        xs, means, stds = [], [], []
        for li, lat in enumerate(LATENCY_ORDER):
            if lat in sub.index:
                xs.append(x_centres[li] + offsets[ci])
                means.append(sub.loc[lat, "mean"])
                stds.append(sub.loc[lat, "std"])

        ax.bar(
            xs, means,
            width=bar_w * 0.95,
            color=COLORS[comp],
            label=comp,
            yerr=stds,
            capsize=2,
            error_kw={"elinewidth": 0.8, "ecolor": "#444444", "capthick": 0.8},
            zorder=3,
        )

    # Dotted baseline reference line per latency group (float32 mean)
    baseline = stats[stats["compression"] == "float32"].set_index("latency_group")
    for li, lat in enumerate(LATENCY_ORDER):
        if lat in baseline.index:
            bval  = baseline.loc[lat, "mean"]
            left  = x_centres[li] - group_gap / 2 + 0.04
            right = x_centres[li] + group_gap / 2 - 0.04
            ax.hlines(bval, left, right, colors="#666666", linestyles=":",
                      linewidth=0.8, zorder=2)

    ax.set_xticks(x_centres)
    ax.set_xticklabels(LATENCY_ORDER)
    ax.set_xlabel("Network Latency Condition")
    ax.set_ylabel("AUPRC")

    ymin = stats["mean"].min() - 0.018
    ymax = stats["mean"].max() + 0.012
    ax.set_ylim(ymin, ymax)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f"{v:.3f}"))
    ax.yaxis.set_minor_locator(plt.MultipleLocator(0.002))
    ax.grid(axis="y", linestyle=":", linewidth=0.6, alpha=0.5, zorder=0)
    ax.set_axisbelow(True)
    ax.spines[["top", "right"]].set_visible(False)

    handles = [
        mpatches.Patch(facecolor=COLORS[c], label=_payload_label(c, stats))
        for c in COMPRESSION_ORDER
    ]
    ax.legend(
        handles=handles,
        loc="lower left",
        frameon=True,
        framealpha=0.9,
        edgecolor="#cccccc",
        handlelength=1.2,
        handleheight=0.9,
        borderpad=0.5,
        labelspacing=0.3,
    )

    fig.tight_layout(pad=0.5)
    fig.savefig(OUT_PDF, format="pdf", bbox_inches="tight")
    fig.savefig(OUT_PNG, dpi=200, bbox_inches="tight")
    print(f"PDF → {OUT_PDF}")
    print(f"PNG → {OUT_PNG}")


if __name__ == "__main__":
    df    = load_data()
    stats = compute_stats(df)
    print(stats.to_string(index=False))
    draw(stats)