compare complement test results to a baseline

One thing that might be neat in the future is noticing differing results
while the tests are still running, and modifying the log messages to
indicate them. I can imagine situations where you would want to abort
the test run immediately after seeing the first regression.
This commit is contained in:
Benjamin Lee 2024-06-21 16:33:51 -07:00
parent ab883ffa44
commit 8eab6eea20
No known key found for this signature in database
GPG key ID: FB9624E2885D55A4
4 changed files with 481 additions and 6 deletions

View file

@ -12,6 +12,7 @@ mod test2json;
use self::{
docker::load_docker_image,
summary::{compare_summary, read_summary},
test2json::{count_complement_tests, run_complement},
};
@ -23,6 +24,12 @@ pub(crate) struct Args {
/// If it exists and is not empty, an error will be returned.
#[clap(short, long)]
out: PathBuf,
/// Baseline test summary file to compare with
///
/// If unspecified, defaults to `$repo_root/complement-baseline.tsv`
#[clap(short, long)]
baseline: Option<PathBuf>,
}
#[allow(clippy::needless_pass_by_value)]
@ -30,6 +37,15 @@ pub(crate) fn main(args: Args) -> Result<()> {
let sh = Shell::new().unwrap();
let toplevel = get_toplevel_path(&sh)
.wrap_err("failed to determine repository root directory")?;
let baseline_path = args
.baseline
.unwrap_or_else(|| toplevel.join("complement-baseline.tsv"));
let baseline = read_summary(&baseline_path).wrap_err_with(|| {
format!(
"failed to read baseline test result summary from \
{baseline_path:?}"
)
})?;
create_out_dir(&args.out).wrap_err_with(|| {
format!("error initializing output directory {:?}", args.out)
})?;
@ -38,8 +54,11 @@ pub(crate) fn main(args: Args) -> Result<()> {
)?;
let test_count = count_complement_tests(&sh, &docker_image)
.wrap_err("failed to determine total complement test count")?;
run_complement(&sh, &args.out, &docker_image, test_count)
let results = run_complement(&sh, &args.out, &docker_image, test_count)
.wrap_err("failed to run complement tests")?;
let summary_path = args.out.join("summary.tsv");
compare_summary(&baseline, &results, &baseline_path, &summary_path)?;
println!("\nTest results were identical to baseline.");
Ok(())
}

View file

@ -6,10 +6,14 @@
use std::{
collections::BTreeMap,
fs,
io::{BufWriter, Write},
path::Path,
};
use miette::{IntoDiagnostic, Result};
use miette::{
miette, IntoDiagnostic, LabeledSpan, NamedSource, Result, WrapErr,
};
use super::test2json::TestResult;
@ -29,6 +33,30 @@ fn escape_tsv_value(value: &str) -> String {
.replace('\r', "\\r")
}
/// Converts a string from a TSV value from to unescaped form.
fn unescape_tsv_value(value: &str) -> String {
let mut chars = value.chars();
let mut out = String::new();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('\\') => out.push('\\'),
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some(c2) => {
out.push(c);
out.push(c2);
}
None => out.push(c),
}
} else {
out.push(c);
}
}
out
}
/// Write a test result summary to a writer.
pub(crate) fn write_summary<W: Write>(
w: &mut BufWriter<W>,
@ -48,3 +76,211 @@ pub(crate) fn write_summary<W: Write>(
}
Ok(())
}
/// Reads test result summary from a TSV file written by a previous run of the
/// complement wrapper.
pub(crate) fn read_summary(
path: &Path,
) -> Result<BTreeMap<String, TestResult>> {
let contents = fs::read_to_string(path)
.into_diagnostic()
.wrap_err("failed to read summary file contents")?;
let source = NamedSource::new(path.to_string_lossy(), contents);
let contents = &source.inner();
let mut offset = 0;
// The TSV spec allows CRLF, but we never emit these ourselves
let mut lines = contents.split('\n');
let header_line = lines.next().ok_or_else(|| {
miette!(
labels = vec![LabeledSpan::at_offset(0, "expected header row")],
"summary file missing header row",
)
.with_source_code(source.clone())
})?;
let expected_header_line = "test\tresult";
if header_line != expected_header_line {
return Err(miette!(
labels = vec![LabeledSpan::at(
0..header_line.len(),
"unexpected header"
)],
"summary file header row has unexpected columns. Expecting \
{expected_header_line:?}."
)
.with_source_code(source));
}
offset += header_line.len() + 1;
let mut results = BTreeMap::new();
for line in lines {
if line.is_empty() {
continue;
}
let tabs = line.match_indices('\t').collect::<Vec<_>>();
let column_count = tabs.len() + 1;
let (result_span, test, result) = match tabs[..] {
[(first_tab, _)] => {
let result_span = offset + first_tab + 1..offset + line.len();
let test = line.get(..first_tab).expect(
"index should be valid because it was returned from \
'match_indices'",
);
let result = line.get(first_tab + 1..).expect(
"index should be valid because it was returned from \
'match_indices'",
);
(result_span, test, result)
}
[] => {
return Err(miette!(
labels = vec![LabeledSpan::at_offset(
offset + line.len(),
"expected more columns here"
)],
"each row in the summary file should have exactly two \
columns. This row only has {column_count} columns.",
)
.with_source_code(source))
}
[_, (first_bad_tab, _), ..] => {
let span = offset + first_bad_tab..offset + line.len();
return Err(miette!(
labels =
vec![LabeledSpan::at(span, "unexpected extra columns")],
"each row in the summary file should have exactly two \
columns. This row has {column_count} columns.",
)
.with_source_code(source));
}
};
let test = unescape_tsv_value(test);
let result = unescape_tsv_value(result);
let result = result.parse().map_err(|_| {
miette!(
labels =
vec![LabeledSpan::at(result_span, "invalid result value")],
"test result value must be one of 'PASS', 'FAIL', or 'SKIP'."
)
.with_source_code(source.clone())
})?;
results.insert(test, result);
offset += line.len() + 1;
}
Ok(results)
}
/// Print a bulleted list of test names, truncating if there are too many.
fn print_truncated_tests(tests: &[&str]) {
let max = 5;
for test in &tests[..max.min(tests.len())] {
println!(" - {test}");
}
if tests.len() > max {
println!(" ... ({} more)", tests.len() - max);
}
}
/// Compares new test results against older results, returning a error if they
/// differ.
///
/// A description of the differences will be logged separately from the returned
/// error.
pub(crate) fn compare_summary(
old: &TestResults,
new: &TestResults,
old_path: &Path,
new_path: &Path,
) -> Result<()> {
let mut unexpected_pass: Vec<&str> = Vec::new();
let mut unexpected_fail: Vec<&str> = Vec::new();
let mut unexpected_skip: Vec<&str> = Vec::new();
let mut added: Vec<&str> = Vec::new();
let mut removed: Vec<&str> = Vec::new();
for (test, new_result) in new {
if let Some(old_result) = old.get(test) {
if old_result != new_result {
match new_result {
TestResult::Pass => unexpected_pass.push(test),
TestResult::Fail => unexpected_fail.push(test),
TestResult::Skip => unexpected_skip.push(test),
}
}
} else {
added.push(test);
}
}
for test in old.keys() {
if !new.contains_key(test) {
removed.push(test);
}
}
let mut differences = false;
if !added.is_empty() {
differences = true;
println!(
"\n{} tests were added that were not present in the baseline:",
added.len()
);
print_truncated_tests(&added);
}
if !removed.is_empty() {
differences = true;
println!(
"\n{} tests present in the baseline were removed:",
removed.len()
);
print_truncated_tests(&removed);
}
if !unexpected_pass.is_empty() {
differences = true;
println!(
"\n{} tests passed that did not pass in the baseline:",
unexpected_pass.len()
);
print_truncated_tests(&unexpected_pass);
}
if !unexpected_skip.is_empty() {
differences = true;
println!(
"\n{} tests skipped that were not skipped in the baseline:",
unexpected_skip.len()
);
print_truncated_tests(&unexpected_skip);
}
if !unexpected_fail.is_empty() {
differences = true;
println!(
"\n{} tests failed that did not fail in the baseline (these are \
likely regressions):",
unexpected_fail.len()
);
print_truncated_tests(&unexpected_fail);
}
if differences {
Err(miette!(
help = format!(
"Evaluate each of the differences to determine whether they \
are expected. If all differences are expected, copy the new \
summary file {new_path:?} to {old_path:?} and commit the \
change. If some differences are unexpected, fix them and try \
another test run.\n\nAn example of an expected change would \
be a test that is now passing after your changes fixed it. \
An example of an unexpected change would be an unrelated \
test that is now failing, which would be a regression."
),
"Test results differed from baseline in {old_path:?}. The \
differences are described above."
))
} else {
Ok(())
}
}

View file

@ -14,7 +14,7 @@ use std::{
use indicatif::{ProgressBar, ProgressStyle};
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
use serde::Deserialize;
use strum::Display;
use strum::{Display, EnumString};
use xshell::{cmd, Shell};
use super::summary::{write_summary, TestResults};
@ -43,7 +43,7 @@ pub(crate) fn run_complement(
out: &Path,
docker_image: &str,
test_count: u64,
) -> Result<()> {
) -> Result<TestResults> {
// TODO: handle SIG{INT,TERM}
// TODO: XTASK_PATH variable, so that we don't need to pollute devshell with
// go
@ -70,7 +70,7 @@ pub(crate) fn run_complement(
ctx.handle_line(&line)?;
}
Ok(())
Ok(ctx.results)
}
/// Schema from <https://pkg.go.dev/cmd/test2json#hdr-Output_Format>
@ -103,7 +103,7 @@ enum GoTestEvent {
OtherAction,
}
#[derive(Copy, Clone, Display, Debug)]
#[derive(Copy, Clone, Display, EnumString, Eq, PartialEq, Debug)]
#[strum(serialize_all = "UPPERCASE")]
pub(crate) enum TestResult {
Pass,