"""
Figure 2: Effect of compression mode on AUPRC, grouped by latency condition.
Purpose
-------
Shows that activation compression (float16 / int8 / Adaptive topk_int8) incurs
negligible accuracy loss relative to the float32 baseline, while substantially
reducing per-step communication payload. Supports the paper's claim that the
adaptive scheduler achieves a favourable accuracy–bandwidth trade-off.
Data source
-----------
- results/matrix_summary.csv (one row per scenario × seed run)
- Columns used: scenario_id, auprc_mean, avg_payload_bytes
- Three seeds (42, 52, 62) per scenario; error bars = ±1 std across seeds.
- Payload labels in the legend are read from avg_payload_bytes at runtime so
they update automatically when M09/M14 are re-run after the topk_int8 fix.
Scenarios included
------------------
No latency : M01 (float32), M02 (float16), M03 (int8)
— no Adaptive scenario at zero latency (scheduler stays at float32)
Mid 10 ms : M05 (float32), M06 (float16), M07 (int8), M09 (Adaptive)
High 63 ms : M10 (float32), M11 (float16), M12 (int8), M14 (Adaptive)
Scenarios excluded
------------------
M04, M08, M13 (ρ=3 sync-interval ablation — different experimental axis)
Notes
-----
- Adaptive payload label is computed per latency group from avg_payload_bytes,
so mid (~92 B) and high (~52 B post topk_int8 fix) are reported separately.
- Dotted horizontal lines mark the float32 baseline AUPRC within each group.
- Figure sized for a single column (3.5 × 2.6 in) in a two-column paper.
Output
------
results/graphics/fig2_compression_auprc.pdf (vector, for LaTeX)
results/graphics/fig2_compression_auprc.png (raster preview, dpi=200)
Usage
-----
uv run python src/data/plot_compression_auprc.py
LaTeX inclusion
---------------
\\begin{figure}
\\includegraphics[width=\\linewidth]{fig2_compression_auprc.pdf}
\\caption{Mean AUPRC (±std, 3 seeds) per compression mode and latency
condition. Dotted lines mark the float32 baseline within each
group. Payload sizes are averaged across all clients and rounds.}
\\end{figure}
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from pathlib import Path
# --- LaTeX-compatible style -------------------------------------------------
plt.rcParams.update({
"font.family": "serif",
"font.serif": ["Times New Roman", "DejaVu Serif"],
"font.size": 9,
"axes.titlesize": 9,
"axes.labelsize": 9,
"xtick.labelsize": 8,
"ytick.labelsize": 8,
"legend.fontsize": 8,
"lines.linewidth": 1.2,
"axes.linewidth": 0.7,
"pdf.fonttype": 42,
"ps.fonttype": 42,
})
# --- Paths ------------------------------------------------------------------
RESULTS_DIR = Path(__file__).parent.parent.parent / "results"
SESSION = "2026-05-10_01-43-06"
SEEDS = [42, 52, 62]
OUT_PDF = RESULTS_DIR / "graphics" / "fig2_compression_auprc.pdf"
OUT_PNG = RESULTS_DIR / "graphics" / "fig2_compression_auprc.png"
# --- Scenario metadata ------------------------------------------------------
# ρ=3 scenarios (M04, M08, M13) excluded — different experimental axis.
# Adaptive is only present at Mid and High latency (scheduler stays at float32
# when there is no network pressure).
SCENARIO_META = {
"N01": ("No latency", "float32"),
"N02": ("No latency", "float16"),
"N03": ("No latency", "int8"),
"L05": ("Low (~8 ms)", "float32"),
"L06": ("Low (~8 ms)", "float16"),
"L07": ("Low (~8 ms)", "int8"),
"L09": ("Low (~8 ms)", "Adaptive"),
"H11": ("High (~50 ms)", "float32"),
"H12": ("High (~50 ms)", "float16"),
"H13": ("High (~50 ms)", "int8"),
"H15": ("High (~50 ms)", "Adaptive"),
}
LATENCY_ORDER = ["No latency", "Low (~8 ms)", "High (~50 ms)"]
COMPRESSION_ORDER = ["float32", "float16", "int8", "Adaptive"]
# Colorblind-safe palette (Wong 2011)
COLORS = {
"float32": "#40A9FF", # blue
"float16": "#45DAD1", # teal
"int8": "#FFA940", # orange
"Adaptive": "#9F69E2", # purple
}
# --- Data loading -----------------------------------------------------------
def load_data() -> pd.DataFrame:
import json, statistics as _st
rows = []
for scenario_id, (lat, comp) in SCENARIO_META.items():
for seed in SEEDS:
f = RESULTS_DIR / SESSION / f"{scenario_id}_seed{seed}_eval_report.json"
if not f.exists():
continue
d = json.loads(f.read_text())
auprc = d["weighted_overall"]["auprc"]
payload = _st.mean(
float(c.get("payload_bytes") or 0) for c in d["clients"]
)
rows.append({
"scenario_id": scenario_id,
"seed": seed,
"auprc_mean": auprc,
"avg_payload_bytes": payload,
"latency_group": lat,
"compression": comp,
})
return pd.DataFrame(rows)
def compute_stats(df: pd.DataFrame) -> pd.DataFrame:
"""Aggregate mean/std AUPRC and mean payload across seeds."""
auprc = (
df.groupby(["latency_group", "compression"])["auprc_mean"]
.agg(mean="mean", std="std")
.reset_index()
)
payload = (
df.groupby(["latency_group", "compression"])["avg_payload_bytes"]
.mean()
.reset_index()
.rename(columns={"avg_payload_bytes": "payload_b"})
)
stats = auprc.merge(payload, on=["latency_group", "compression"])
stats["std"] = stats["std"].fillna(0.0)
return stats
def _payload_label(comp: str, stats: pd.DataFrame) -> str:
"""
Build legend label with dynamic payload info read from data.
For Adaptive, reports mid and high payloads separately since they differ
(mid ~92 B with topk_int8 fix; high ~52 B).
For fixed modes, payload is the same across latency groups.
"""
if comp == "float32":
row = stats[stats["compression"] == comp].iloc[0]
return f"float32 (baseline, {row['payload_b']:.0f} B)"
if comp == "Adaptive":
low = stats[(stats["compression"] == comp) & (stats["latency_group"] == "Low (~8 ms)")]
high = stats[(stats["compression"] == comp) & (stats["latency_group"] == "High (~50 ms)")]
parts = []
if not low.empty:
parts.append(f"low {low.iloc[0]['payload_b']:.0f} B")
if not high.empty:
parts.append(f"high {high.iloc[0]['payload_b']:.0f} B")
return f"Adaptive ({' / '.join(parts)})"
# float16 / int8: payload is latency-independent
rows = stats[stats["compression"] == comp]
if rows.empty:
return comp
p = rows.iloc[0]["payload_b"]
baseline = stats[(stats["compression"] == "float32") &
(stats["latency_group"] == rows.iloc[0]["latency_group"])]
if not baseline.empty and baseline.iloc[0]["payload_b"] > 0:
pct = int(round((1 - p / baseline.iloc[0]["payload_b"]) * 100))
return f"{comp} (−{pct}%, {p:.0f} B)"
return f"{comp} ({p:.0f} B)"
# --- Drawing ----------------------------------------------------------------
def draw(stats: pd.DataFrame) -> None:
# Single-column width for a two-column paper
fig_w, fig_h = 3.5, 2.4
bar_w = 0.16
group_gap = 0.85
fig, ax = plt.subplots(figsize=(fig_w, fig_h))
n_lat = len(LATENCY_ORDER)
n_comp = len(COMPRESSION_ORDER)
x_centres = np.arange(n_lat, dtype=float) * group_gap
offsets = np.linspace(-(n_comp - 1) / 2, (n_comp - 1) / 2, n_comp) * bar_w
for ci, comp in enumerate(COMPRESSION_ORDER):
sub = stats[stats["compression"] == comp].set_index("latency_group")
xs, means, stds = [], [], []
for li, lat in enumerate(LATENCY_ORDER):
if lat in sub.index:
xs.append(x_centres[li] + offsets[ci])
means.append(sub.loc[lat, "mean"])
stds.append(sub.loc[lat, "std"])
ax.bar(
xs, means,
width=bar_w * 0.95,
color=COLORS[comp],
label=comp,
yerr=stds,
capsize=2,
error_kw={"elinewidth": 0.8, "ecolor": "#444444", "capthick": 0.8},
zorder=3,
)
# Dotted baseline reference line per latency group (float32 mean)
baseline = stats[stats["compression"] == "float32"].set_index("latency_group")
for li, lat in enumerate(LATENCY_ORDER):
if lat in baseline.index:
bval = baseline.loc[lat, "mean"]
left = x_centres[li] - group_gap / 2 + 0.04
right = x_centres[li] + group_gap / 2 - 0.04
ax.hlines(bval, left, right, colors="#666666", linestyles=":",
linewidth=0.8, zorder=2)
ax.set_xticks(x_centres)
ax.set_xticklabels(LATENCY_ORDER)
ax.set_xlabel("Network Latency Condition")
ax.set_ylabel("AUPRC")
ymin = stats["mean"].min() - 0.018
ymax = stats["mean"].max() + 0.012
ax.set_ylim(ymin, ymax)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f"{v:.3f}"))
ax.yaxis.set_minor_locator(plt.MultipleLocator(0.002))
ax.grid(axis="y", linestyle=":", linewidth=0.6, alpha=0.5, zorder=0)
ax.set_axisbelow(True)
ax.spines[["top", "right"]].set_visible(False)
handles = [
mpatches.Patch(facecolor=COLORS[c], label=_payload_label(c, stats))
for c in COMPRESSION_ORDER
]
ax.legend(
handles=handles,
loc="lower left",
frameon=True,
framealpha=0.9,
edgecolor="#cccccc",
handlelength=1.2,
handleheight=0.9,
borderpad=0.5,
labelspacing=0.3,
)
fig.tight_layout(pad=0.5)
fig.savefig(OUT_PDF, format="pdf", bbox_inches="tight")
fig.savefig(OUT_PNG, dpi=200, bbox_inches="tight")
print(f"PDF → {OUT_PDF}")
print(f"PNG → {OUT_PNG}")
if __name__ == "__main__":
df = load_data()
stats = compute_stats(df)
print(stats.to_string(index=False))
draw(stats)