compare complement test results to a baseline

One thing that might be neat in the future is noticing differing results while the tests are still running, and modifying the log messages to indicate them. I can imagine situations where you would want to abort the test run immediately after seeing the first regression.
2025-12-18 16:21:24 +01:00 · 2024-06-21 16:33:51 -07:00 · 2024-06-21 16:33:51 -07:00 · 8eab6eea20
commit 8eab6eea20
parent ab883ffa44
4 changed files with 481 additions and 6 deletions
--- a/xtask/src/complement.rs
+++ b/xtask/src/complement.rs
@ -12,6 +12,7 @@ mod test2json;

 use self::{
    docker::load_docker_image,
+    summary::{compare_summary, read_summary},
    test2json::{count_complement_tests, run_complement},
 };

@ -23,6 +24,12 @@ pub(crate) struct Args {
    /// If it exists and is not empty, an error will be returned.
    #[clap(short, long)]
    out: PathBuf,
+
+    /// Baseline test summary file to compare with
+    ///
+    /// If unspecified, defaults to `$repo_root/complement-baseline.tsv`
+    #[clap(short, long)]
+    baseline: Option<PathBuf>,
 }

 #[allow(clippy::needless_pass_by_value)]
@ -30,6 +37,15 @@ pub(crate) fn main(args: Args) -> Result<()> {
    let sh = Shell::new().unwrap();
    let toplevel = get_toplevel_path(&sh)
        .wrap_err("failed to determine repository root directory")?;
+    let baseline_path = args
+        .baseline
+        .unwrap_or_else(|| toplevel.join("complement-baseline.tsv"));
+    let baseline = read_summary(&baseline_path).wrap_err_with(|| {
+        format!(
+            "failed to read baseline test result summary from \
+             {baseline_path:?}"
+        )
+    })?;
    create_out_dir(&args.out).wrap_err_with(|| {
        format!("error initializing output directory {:?}", args.out)
    })?;
@ -38,8 +54,11 @@ pub(crate) fn main(args: Args) -> Result<()> {
    )?;
    let test_count = count_complement_tests(&sh, &docker_image)
        .wrap_err("failed to determine total complement test count")?;
-    run_complement(&sh, &args.out, &docker_image, test_count)
+    let results = run_complement(&sh, &args.out, &docker_image, test_count)
        .wrap_err("failed to run complement tests")?;
+    let summary_path = args.out.join("summary.tsv");
+    compare_summary(&baseline, &results, &baseline_path, &summary_path)?;
+    println!("\nTest results were identical to baseline.");
    Ok(())
 }

--- a/xtask/src/complement/summary.rs
+++ b/xtask/src/complement/summary.rs
@ -6,10 +6,14 @@

 use std::{
    collections::BTreeMap,
+    fs,
    io::{BufWriter, Write},
+    path::Path,
 };

-use miette::{IntoDiagnostic, Result};
+use miette::{
+    miette, IntoDiagnostic, LabeledSpan, NamedSource, Result, WrapErr,
+};

 use super::test2json::TestResult;

@ -29,6 +33,30 @@ fn escape_tsv_value(value: &str) -> String {
        .replace('\r', "\\r")
 }

+/// Converts a string from a TSV value from to unescaped form.
+fn unescape_tsv_value(value: &str) -> String {
+    let mut chars = value.chars();
+    let mut out = String::new();
+    while let Some(c) = chars.next() {
+        if c == '\\' {
+            match chars.next() {
+                Some('\\') => out.push('\\'),
+                Some('n') => out.push('\n'),
+                Some('t') => out.push('\t'),
+                Some('r') => out.push('\r'),
+                Some(c2) => {
+                    out.push(c);
+                    out.push(c2);
+                }
+                None => out.push(c),
+            }
+        } else {
+            out.push(c);
+        }
+    }
+    out
+}
+
 /// Write a test result summary to a writer.
 pub(crate) fn write_summary<W: Write>(
    w: &mut BufWriter<W>,
@ -48,3 +76,211 @@ pub(crate) fn write_summary<W: Write>(
    }
    Ok(())
 }
+
+/// Reads test result summary from a TSV file written by a previous run of the
+/// complement wrapper.
+pub(crate) fn read_summary(
+    path: &Path,
+) -> Result<BTreeMap<String, TestResult>> {
+    let contents = fs::read_to_string(path)
+        .into_diagnostic()
+        .wrap_err("failed to read summary file contents")?;
+    let source = NamedSource::new(path.to_string_lossy(), contents);
+    let contents = &source.inner();
+
+    let mut offset = 0;
+    // The TSV spec allows CRLF, but we never emit these ourselves
+    let mut lines = contents.split('\n');
+
+    let header_line = lines.next().ok_or_else(|| {
+        miette!(
+            labels = vec![LabeledSpan::at_offset(0, "expected header row")],
+            "summary file missing header row",
+        )
+        .with_source_code(source.clone())
+    })?;
+    let expected_header_line = "test\tresult";
+    if header_line != expected_header_line {
+        return Err(miette!(
+            labels = vec![LabeledSpan::at(
+                0..header_line.len(),
+                "unexpected header"
+            )],
+            "summary file header row has unexpected columns. Expecting \
+             {expected_header_line:?}."
+        )
+        .with_source_code(source));
+    }
+    offset += header_line.len() + 1;
+
+    let mut results = BTreeMap::new();
+    for line in lines {
+        if line.is_empty() {
+            continue;
+        }
+
+        let tabs = line.match_indices('\t').collect::<Vec<_>>();
+        let column_count = tabs.len() + 1;
+        let (result_span, test, result) = match tabs[..] {
+            [(first_tab, _)] => {
+                let result_span = offset + first_tab + 1..offset + line.len();
+                let test = line.get(..first_tab).expect(
+                    "index should be valid because it was returned from \
+                     'match_indices'",
+                );
+                let result = line.get(first_tab + 1..).expect(
+                    "index should be valid because it was returned from \
+                     'match_indices'",
+                );
+                (result_span, test, result)
+            }
+            [] => {
+                return Err(miette!(
+                    labels = vec![LabeledSpan::at_offset(
+                        offset + line.len(),
+                        "expected more columns here"
+                    )],
+                    "each row in the summary file should have exactly two \
+                     columns. This row only has {column_count} columns.",
+                )
+                .with_source_code(source))
+            }
+            [_, (first_bad_tab, _), ..] => {
+                let span = offset + first_bad_tab..offset + line.len();
+                return Err(miette!(
+                    labels =
+                        vec![LabeledSpan::at(span, "unexpected extra columns")],
+                    "each row in the summary file should have exactly two \
+                     columns. This row has {column_count} columns.",
+                )
+                .with_source_code(source));
+            }
+        };
+
+        let test = unescape_tsv_value(test);
+        let result = unescape_tsv_value(result);
+
+        let result = result.parse().map_err(|_| {
+            miette!(
+                labels =
+                    vec![LabeledSpan::at(result_span, "invalid result value")],
+                "test result value must be one of 'PASS', 'FAIL', or 'SKIP'."
+            )
+            .with_source_code(source.clone())
+        })?;
+
+        results.insert(test, result);
+        offset += line.len() + 1;
+    }
+    Ok(results)
+}
+
+/// Print a bulleted list of test names, truncating if there are too many.
+fn print_truncated_tests(tests: &[&str]) {
+    let max = 5;
+    for test in &tests[..max.min(tests.len())] {
+        println!("    - {test}");
+    }
+    if tests.len() > max {
+        println!("    ... ({} more)", tests.len() - max);
+    }
+}
+
+/// Compares new test results against older results, returning a error if they
+/// differ.
+///
+/// A description of the differences will be logged separately from the returned
+/// error.
+pub(crate) fn compare_summary(
+    old: &TestResults,
+    new: &TestResults,
+    old_path: &Path,
+    new_path: &Path,
+) -> Result<()> {
+    let mut unexpected_pass: Vec<&str> = Vec::new();
+    let mut unexpected_fail: Vec<&str> = Vec::new();
+    let mut unexpected_skip: Vec<&str> = Vec::new();
+    let mut added: Vec<&str> = Vec::new();
+    let mut removed: Vec<&str> = Vec::new();
+
+    for (test, new_result) in new {
+        if let Some(old_result) = old.get(test) {
+            if old_result != new_result {
+                match new_result {
+                    TestResult::Pass => unexpected_pass.push(test),
+                    TestResult::Fail => unexpected_fail.push(test),
+                    TestResult::Skip => unexpected_skip.push(test),
+                }
+            }
+        } else {
+            added.push(test);
+        }
+    }
+    for test in old.keys() {
+        if !new.contains_key(test) {
+            removed.push(test);
+        }
+    }
+
+    let mut differences = false;
+    if !added.is_empty() {
+        differences = true;
+        println!(
+            "\n{} tests were added that were not present in the baseline:",
+            added.len()
+        );
+        print_truncated_tests(&added);
+    }
+    if !removed.is_empty() {
+        differences = true;
+        println!(
+            "\n{} tests present in the baseline were removed:",
+            removed.len()
+        );
+        print_truncated_tests(&removed);
+    }
+    if !unexpected_pass.is_empty() {
+        differences = true;
+        println!(
+            "\n{} tests passed that did not pass in the baseline:",
+            unexpected_pass.len()
+        );
+        print_truncated_tests(&unexpected_pass);
+    }
+    if !unexpected_skip.is_empty() {
+        differences = true;
+        println!(
+            "\n{} tests skipped that were not skipped in the baseline:",
+            unexpected_skip.len()
+        );
+        print_truncated_tests(&unexpected_skip);
+    }
+    if !unexpected_fail.is_empty() {
+        differences = true;
+        println!(
+            "\n{} tests failed that did not fail in the baseline (these are \
+             likely regressions):",
+            unexpected_fail.len()
+        );
+        print_truncated_tests(&unexpected_fail);
+    }
+
+    if differences {
+        Err(miette!(
+            help = format!(
+                "Evaluate each of the differences to determine whether they \
+                 are expected. If all differences are expected, copy the new \
+                 summary file {new_path:?} to {old_path:?} and commit the \
+                 change. If some differences are unexpected, fix them and try \
+                 another test run.\n\nAn example of an expected change would \
+                 be a test that is now passing after your changes fixed it. \
+                 An example of an unexpected change would be an unrelated \
+                 test that is now failing, which would be a regression."
+            ),
+            "Test results differed from baseline in {old_path:?}. The \
+             differences are described above."
+        ))
+    } else {
+        Ok(())
+    }
+}
--- a/xtask/src/complement/test2json.rs
+++ b/xtask/src/complement/test2json.rs
@ -14,7 +14,7 @@ use std::{
 use indicatif::{ProgressBar, ProgressStyle};
 use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
 use serde::Deserialize;
-use strum::Display;
+use strum::{Display, EnumString};
 use xshell::{cmd, Shell};

 use super::summary::{write_summary, TestResults};
@ -43,7 +43,7 @@ pub(crate) fn run_complement(
    out: &Path,
    docker_image: &str,
    test_count: u64,
-) -> Result<()> {
+) -> Result<TestResults> {
    // TODO: handle SIG{INT,TERM}
    // TODO: XTASK_PATH variable, so that we don't need to pollute devshell with
    // go
@ -70,7 +70,7 @@ pub(crate) fn run_complement(
        ctx.handle_line(&line)?;
    }

-    Ok(())
+    Ok(ctx.results)
 }

 /// Schema from <https://pkg.go.dev/cmd/test2json#hdr-Output_Format>
@ -103,7 +103,7 @@ enum GoTestEvent {
    OtherAction,
 }

-#[derive(Copy, Clone, Display, Debug)]
+#[derive(Copy, Clone, Display, EnumString, Eq, PartialEq, Debug)]
 #[strum(serialize_all = "UPPERCASE")]
 pub(crate) enum TestResult {
    Pass,