1use serde::{Deserialize, Serialize};
2
3use super::{BenchRunner, RunnerConfig, Workload};
4use crate::{
5 metrics::{BlockMetric, RunResult},
6 report::compare::{bootstrap_paired_delta, BootstrapDelta, MetricKey, Verdict},
7};
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct AbbaConfig {
12 pub iterations: usize,
14 pub bootstrap_iters: usize,
16 pub tolerance_pct: f64,
18 pub seed: u64,
20 pub runner: RunnerConfig,
21}
22
23impl Default for AbbaConfig {
24 fn default() -> Self {
25 Self {
26 iterations: 3,
27 bootstrap_iters: 10_000,
28 tolerance_pct: 5.0,
29 seed: 0xC0FF_EE12_3456_789A,
30 runner: RunnerConfig::default(),
31 }
32 }
33}
34
35#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct PairedSample {
38 pub iter_index: usize,
39 pub baseline: RunResult,
40 pub feature: RunResult,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct AbbaResult {
46 pub manifest_name: String,
47 pub iterations: usize,
48 pub samples: Vec<PairedSample>,
49 pub deltas: Vec<(MetricKey, BootstrapDelta)>,
50 pub verdict: Verdict,
51}
52
53pub fn run_abba<FB, FF>(
56 config: &AbbaConfig,
57 manifest_name: &str,
58 mut build_baseline: FB,
59 mut build_feature: FF,
60) -> eyre::Result<AbbaResult>
61where
62 FB: FnMut() -> eyre::Result<(Workload, Box<dyn BenchRunner>)>,
63 FF: FnMut() -> eyre::Result<(Workload, Box<dyn BenchRunner>)>,
64{
65 let mut samples = Vec::with_capacity(config.iterations);
66 for i in 0..config.iterations {
67 let order = if i % 2 == 0 {
68 [Side::Baseline, Side::Feature, Side::Feature, Side::Baseline]
69 } else {
70 [Side::Feature, Side::Baseline, Side::Baseline, Side::Feature]
71 };
72
73 let mut runs: [Option<RunResult>; 4] = Default::default();
74 for (slot, side) in order.iter().enumerate() {
75 let (workload, mut runner) = match side {
76 Side::Baseline => build_baseline()?,
77 Side::Feature => build_feature()?,
78 };
79 runs[slot] = Some(runner.execute(workload)?);
80 }
81
82 let mut baseline_runs = Vec::new();
83 let mut feature_runs = Vec::new();
84 for (slot, side) in order.iter().enumerate() {
85 let r = runs[slot].take().unwrap();
86 match side {
87 Side::Baseline => baseline_runs.push(r),
88 Side::Feature => feature_runs.push(r),
89 }
90 }
91 let baseline = average_runs(&baseline_runs)?;
92 let feature = average_runs(&feature_runs)?;
93
94 samples.push(PairedSample {
95 iter_index: i,
96 baseline,
97 feature,
98 });
99 }
100
101 let deltas = compute_paired_deltas(&samples, config);
102 let verdict = decide_verdict(&deltas, config.tolerance_pct);
103
104 Ok(AbbaResult {
105 manifest_name: manifest_name.to_string(),
106 iterations: config.iterations,
107 samples,
108 deltas,
109 verdict,
110 })
111}
112
113#[derive(Debug, Clone, Copy)]
114enum Side {
115 Baseline,
116 Feature,
117}
118
119fn average_runs(runs: &[RunResult]) -> eyre::Result<RunResult> {
121 if runs.is_empty() {
122 eyre::bail!("average_runs: empty");
123 }
124 if runs.len() == 1 {
125 return Ok(runs[0].clone());
126 }
127 let n = runs[0].blocks.len();
128 if !runs.iter().all(|r| r.blocks.len() == n) {
129 eyre::bail!("average_runs: differing block counts");
130 }
131 let mut blocks: Vec<BlockMetric> = Vec::with_capacity(n);
132 for i in 0..n {
133 let wall: u64 =
134 runs.iter().map(|r| r.blocks[i].wall_clock_ns).sum::<u64>() / runs.len() as u64;
135 let cpu: u64 = runs.iter().map(|r| r.blocks[i].cpu_ns).sum::<u64>() / runs.len() as u64;
136 let rss: u64 = runs.iter().map(|r| r.blocks[i].rss_bytes).sum::<u64>() / runs.len() as u64;
137 blocks.push(BlockMetric {
138 block_number: runs[0].blocks[i].block_number,
139 wall_clock_ns: wall,
140 cpu_ns: cpu,
141 gas_used: runs[0].blocks[i].gas_used,
142 tx_count: runs[0].blocks[i].tx_count,
143 success_count: runs[0].blocks[i].success_count,
144 rss_bytes: rss,
145 });
146 }
147 let windows = crate::metrics::rolling::build_windows(&blocks, 500);
148 let summary = crate::metrics::SummaryMetrics::from_blocks(&blocks, &windows);
149 Ok(RunResult {
150 manifest_name: runs[0].manifest_name.clone(),
151 blocks,
152 windows,
153 summary,
154 host: runs[0].host.clone(),
155 })
156}
157
158fn compute_paired_deltas(
160 samples: &[PairedSample],
161 config: &AbbaConfig,
162) -> Vec<(MetricKey, BootstrapDelta)> {
163 let mut out = Vec::new();
164 let metrics = [
165 MetricKey::WallClockNs,
166 MetricKey::GasPerSec,
167 MetricKey::CpuNs,
168 MetricKey::RssBytes,
169 ];
170 for m in metrics {
171 let mut paired = Vec::new();
172 for s in samples {
173 let n = s.baseline.blocks.len().min(s.feature.blocks.len());
174 for i in 0..n {
175 let b = metric_value(&s.baseline.blocks[i], m);
176 let f = metric_value(&s.feature.blocks[i], m);
177 paired.push((b, f));
178 }
179 }
180 if paired.is_empty() {
181 continue;
182 }
183 let delta = bootstrap_paired_delta(&paired, config.bootstrap_iters, config.seed);
184 out.push((m, delta));
185 }
186 out
187}
188
189fn metric_value(b: &BlockMetric, m: MetricKey) -> f64 {
190 match m {
191 MetricKey::WallClockNs => b.wall_clock_ns as f64,
192 MetricKey::CpuNs => b.cpu_ns as f64,
193 MetricKey::GasPerSec => b.gas_per_sec(),
194 MetricKey::RssBytes => b.rss_bytes as f64,
195 }
196}
197
198fn decide_verdict(deltas: &[(MetricKey, BootstrapDelta)], tolerance_pct: f64) -> Verdict {
199 let mut worst: Option<(MetricKey, f64)> = None;
200 for (k, d) in deltas {
201 let baseline_mean = d.baseline_mean.max(1e-9);
202 let pct = match k {
203 MetricKey::GasPerSec => -d.mean / baseline_mean * 100.0,
204 _ => d.mean / baseline_mean * 100.0,
205 };
206 let ci_pct = match k {
207 MetricKey::GasPerSec => -d.ci_low_95 / baseline_mean * 100.0,
208 _ => d.ci_low_95 / baseline_mean * 100.0,
209 };
210 if ci_pct > tolerance_pct {
211 match worst {
212 Some((_, w)) if w >= pct => {}
213 _ => worst = Some((*k, pct)),
214 }
215 }
216 }
217 if let Some((metric, pct)) = worst {
218 return Verdict::Regression {
219 metric: format!("{metric:?}"),
220 delta_pct: pct,
221 };
222 }
223
224 let mut any_improvement = false;
225 for (k, d) in deltas {
226 let baseline_mean = d.baseline_mean.max(1e-9);
227 let pct_high = match k {
228 MetricKey::GasPerSec => -d.ci_high_95 / baseline_mean * 100.0,
229 _ => d.ci_high_95 / baseline_mean * 100.0,
230 };
231 if pct_high < 0.0 {
232 any_improvement = true;
233 }
234 }
235 if any_improvement {
236 Verdict::Improvement
237 } else {
238 Verdict::Neutral
239 }
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245 use crate::capture::synthetic::generate;
246
247 #[test]
248 fn abba_smoke_runs_and_yields_neutral_for_identical_runs() {
249 let cfg = AbbaConfig {
250 iterations: 1,
251 bootstrap_iters: 200,
252 tolerance_pct: 50.0,
253 seed: 1,
254 runner: RunnerConfig {
255 rolling_window_blocks: 2,
256 abort_on_block_error: false,
257 },
258 };
259 let build = || {
260 let w = generate(
261 "test/abba",
262 421614,
263 30,
264 "transfer_train",
265 &serde_json::json!({ "block_count": 2, "txs_per_block": 2 }),
266 )?;
267 let r: Box<dyn BenchRunner> = Box::new(
268 crate::runner::in_process::InProcessRunner::new(cfg.runner.clone()),
269 );
270 Ok::<_, eyre::Report>((w, r))
271 };
272 let result = run_abba(&cfg, "test/abba", build, build).unwrap();
273 assert_eq!(result.iterations, 1);
274 assert!(!result.deltas.is_empty());
275 assert!(!matches!(result.verdict, Verdict::Regression { .. }));
278 }
279}