mirror of
https://gitlab.computer.surgery/matrix/grapevine.git
synced 2025-12-18 16:21:24 +01:00
compare complement test results to a baseline
One thing that might be neat in the future is noticing differing results while the tests are still running, and modifying the log messages to indicate them. I can imagine situations where you would want to abort the test run immediately after seeing the first regression.
This commit is contained in:
parent
ab883ffa44
commit
8eab6eea20
4 changed files with 481 additions and 6 deletions
|
|
@ -12,6 +12,7 @@ mod test2json;
|
|||
|
||||
use self::{
|
||||
docker::load_docker_image,
|
||||
summary::{compare_summary, read_summary},
|
||||
test2json::{count_complement_tests, run_complement},
|
||||
};
|
||||
|
||||
|
|
@ -23,6 +24,12 @@ pub(crate) struct Args {
|
|||
/// If it exists and is not empty, an error will be returned.
|
||||
#[clap(short, long)]
|
||||
out: PathBuf,
|
||||
|
||||
/// Baseline test summary file to compare with
|
||||
///
|
||||
/// If unspecified, defaults to `$repo_root/complement-baseline.tsv`
|
||||
#[clap(short, long)]
|
||||
baseline: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
|
|
@ -30,6 +37,15 @@ pub(crate) fn main(args: Args) -> Result<()> {
|
|||
let sh = Shell::new().unwrap();
|
||||
let toplevel = get_toplevel_path(&sh)
|
||||
.wrap_err("failed to determine repository root directory")?;
|
||||
let baseline_path = args
|
||||
.baseline
|
||||
.unwrap_or_else(|| toplevel.join("complement-baseline.tsv"));
|
||||
let baseline = read_summary(&baseline_path).wrap_err_with(|| {
|
||||
format!(
|
||||
"failed to read baseline test result summary from \
|
||||
{baseline_path:?}"
|
||||
)
|
||||
})?;
|
||||
create_out_dir(&args.out).wrap_err_with(|| {
|
||||
format!("error initializing output directory {:?}", args.out)
|
||||
})?;
|
||||
|
|
@ -38,8 +54,11 @@ pub(crate) fn main(args: Args) -> Result<()> {
|
|||
)?;
|
||||
let test_count = count_complement_tests(&sh, &docker_image)
|
||||
.wrap_err("failed to determine total complement test count")?;
|
||||
run_complement(&sh, &args.out, &docker_image, test_count)
|
||||
let results = run_complement(&sh, &args.out, &docker_image, test_count)
|
||||
.wrap_err("failed to run complement tests")?;
|
||||
let summary_path = args.out.join("summary.tsv");
|
||||
compare_summary(&baseline, &results, &baseline_path, &summary_path)?;
|
||||
println!("\nTest results were identical to baseline.");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,10 +6,14 @@
|
|||
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
fs,
|
||||
io::{BufWriter, Write},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use miette::{IntoDiagnostic, Result};
|
||||
use miette::{
|
||||
miette, IntoDiagnostic, LabeledSpan, NamedSource, Result, WrapErr,
|
||||
};
|
||||
|
||||
use super::test2json::TestResult;
|
||||
|
||||
|
|
@ -29,6 +33,30 @@ fn escape_tsv_value(value: &str) -> String {
|
|||
.replace('\r', "\\r")
|
||||
}
|
||||
|
||||
/// Converts a string from a TSV value from to unescaped form.
|
||||
fn unescape_tsv_value(value: &str) -> String {
|
||||
let mut chars = value.chars();
|
||||
let mut out = String::new();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
match chars.next() {
|
||||
Some('\\') => out.push('\\'),
|
||||
Some('n') => out.push('\n'),
|
||||
Some('t') => out.push('\t'),
|
||||
Some('r') => out.push('\r'),
|
||||
Some(c2) => {
|
||||
out.push(c);
|
||||
out.push(c2);
|
||||
}
|
||||
None => out.push(c),
|
||||
}
|
||||
} else {
|
||||
out.push(c);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Write a test result summary to a writer.
|
||||
pub(crate) fn write_summary<W: Write>(
|
||||
w: &mut BufWriter<W>,
|
||||
|
|
@ -48,3 +76,211 @@ pub(crate) fn write_summary<W: Write>(
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reads test result summary from a TSV file written by a previous run of the
|
||||
/// complement wrapper.
|
||||
pub(crate) fn read_summary(
|
||||
path: &Path,
|
||||
) -> Result<BTreeMap<String, TestResult>> {
|
||||
let contents = fs::read_to_string(path)
|
||||
.into_diagnostic()
|
||||
.wrap_err("failed to read summary file contents")?;
|
||||
let source = NamedSource::new(path.to_string_lossy(), contents);
|
||||
let contents = &source.inner();
|
||||
|
||||
let mut offset = 0;
|
||||
// The TSV spec allows CRLF, but we never emit these ourselves
|
||||
let mut lines = contents.split('\n');
|
||||
|
||||
let header_line = lines.next().ok_or_else(|| {
|
||||
miette!(
|
||||
labels = vec![LabeledSpan::at_offset(0, "expected header row")],
|
||||
"summary file missing header row",
|
||||
)
|
||||
.with_source_code(source.clone())
|
||||
})?;
|
||||
let expected_header_line = "test\tresult";
|
||||
if header_line != expected_header_line {
|
||||
return Err(miette!(
|
||||
labels = vec![LabeledSpan::at(
|
||||
0..header_line.len(),
|
||||
"unexpected header"
|
||||
)],
|
||||
"summary file header row has unexpected columns. Expecting \
|
||||
{expected_header_line:?}."
|
||||
)
|
||||
.with_source_code(source));
|
||||
}
|
||||
offset += header_line.len() + 1;
|
||||
|
||||
let mut results = BTreeMap::new();
|
||||
for line in lines {
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let tabs = line.match_indices('\t').collect::<Vec<_>>();
|
||||
let column_count = tabs.len() + 1;
|
||||
let (result_span, test, result) = match tabs[..] {
|
||||
[(first_tab, _)] => {
|
||||
let result_span = offset + first_tab + 1..offset + line.len();
|
||||
let test = line.get(..first_tab).expect(
|
||||
"index should be valid because it was returned from \
|
||||
'match_indices'",
|
||||
);
|
||||
let result = line.get(first_tab + 1..).expect(
|
||||
"index should be valid because it was returned from \
|
||||
'match_indices'",
|
||||
);
|
||||
(result_span, test, result)
|
||||
}
|
||||
[] => {
|
||||
return Err(miette!(
|
||||
labels = vec![LabeledSpan::at_offset(
|
||||
offset + line.len(),
|
||||
"expected more columns here"
|
||||
)],
|
||||
"each row in the summary file should have exactly two \
|
||||
columns. This row only has {column_count} columns.",
|
||||
)
|
||||
.with_source_code(source))
|
||||
}
|
||||
[_, (first_bad_tab, _), ..] => {
|
||||
let span = offset + first_bad_tab..offset + line.len();
|
||||
return Err(miette!(
|
||||
labels =
|
||||
vec![LabeledSpan::at(span, "unexpected extra columns")],
|
||||
"each row in the summary file should have exactly two \
|
||||
columns. This row has {column_count} columns.",
|
||||
)
|
||||
.with_source_code(source));
|
||||
}
|
||||
};
|
||||
|
||||
let test = unescape_tsv_value(test);
|
||||
let result = unescape_tsv_value(result);
|
||||
|
||||
let result = result.parse().map_err(|_| {
|
||||
miette!(
|
||||
labels =
|
||||
vec![LabeledSpan::at(result_span, "invalid result value")],
|
||||
"test result value must be one of 'PASS', 'FAIL', or 'SKIP'."
|
||||
)
|
||||
.with_source_code(source.clone())
|
||||
})?;
|
||||
|
||||
results.insert(test, result);
|
||||
offset += line.len() + 1;
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Print a bulleted list of test names, truncating if there are too many.
|
||||
fn print_truncated_tests(tests: &[&str]) {
|
||||
let max = 5;
|
||||
for test in &tests[..max.min(tests.len())] {
|
||||
println!(" - {test}");
|
||||
}
|
||||
if tests.len() > max {
|
||||
println!(" ... ({} more)", tests.len() - max);
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares new test results against older results, returning a error if they
|
||||
/// differ.
|
||||
///
|
||||
/// A description of the differences will be logged separately from the returned
|
||||
/// error.
|
||||
pub(crate) fn compare_summary(
|
||||
old: &TestResults,
|
||||
new: &TestResults,
|
||||
old_path: &Path,
|
||||
new_path: &Path,
|
||||
) -> Result<()> {
|
||||
let mut unexpected_pass: Vec<&str> = Vec::new();
|
||||
let mut unexpected_fail: Vec<&str> = Vec::new();
|
||||
let mut unexpected_skip: Vec<&str> = Vec::new();
|
||||
let mut added: Vec<&str> = Vec::new();
|
||||
let mut removed: Vec<&str> = Vec::new();
|
||||
|
||||
for (test, new_result) in new {
|
||||
if let Some(old_result) = old.get(test) {
|
||||
if old_result != new_result {
|
||||
match new_result {
|
||||
TestResult::Pass => unexpected_pass.push(test),
|
||||
TestResult::Fail => unexpected_fail.push(test),
|
||||
TestResult::Skip => unexpected_skip.push(test),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
added.push(test);
|
||||
}
|
||||
}
|
||||
for test in old.keys() {
|
||||
if !new.contains_key(test) {
|
||||
removed.push(test);
|
||||
}
|
||||
}
|
||||
|
||||
let mut differences = false;
|
||||
if !added.is_empty() {
|
||||
differences = true;
|
||||
println!(
|
||||
"\n{} tests were added that were not present in the baseline:",
|
||||
added.len()
|
||||
);
|
||||
print_truncated_tests(&added);
|
||||
}
|
||||
if !removed.is_empty() {
|
||||
differences = true;
|
||||
println!(
|
||||
"\n{} tests present in the baseline were removed:",
|
||||
removed.len()
|
||||
);
|
||||
print_truncated_tests(&removed);
|
||||
}
|
||||
if !unexpected_pass.is_empty() {
|
||||
differences = true;
|
||||
println!(
|
||||
"\n{} tests passed that did not pass in the baseline:",
|
||||
unexpected_pass.len()
|
||||
);
|
||||
print_truncated_tests(&unexpected_pass);
|
||||
}
|
||||
if !unexpected_skip.is_empty() {
|
||||
differences = true;
|
||||
println!(
|
||||
"\n{} tests skipped that were not skipped in the baseline:",
|
||||
unexpected_skip.len()
|
||||
);
|
||||
print_truncated_tests(&unexpected_skip);
|
||||
}
|
||||
if !unexpected_fail.is_empty() {
|
||||
differences = true;
|
||||
println!(
|
||||
"\n{} tests failed that did not fail in the baseline (these are \
|
||||
likely regressions):",
|
||||
unexpected_fail.len()
|
||||
);
|
||||
print_truncated_tests(&unexpected_fail);
|
||||
}
|
||||
|
||||
if differences {
|
||||
Err(miette!(
|
||||
help = format!(
|
||||
"Evaluate each of the differences to determine whether they \
|
||||
are expected. If all differences are expected, copy the new \
|
||||
summary file {new_path:?} to {old_path:?} and commit the \
|
||||
change. If some differences are unexpected, fix them and try \
|
||||
another test run.\n\nAn example of an expected change would \
|
||||
be a test that is now passing after your changes fixed it. \
|
||||
An example of an unexpected change would be an unrelated \
|
||||
test that is now failing, which would be a regression."
|
||||
),
|
||||
"Test results differed from baseline in {old_path:?}. The \
|
||||
differences are described above."
|
||||
))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ use std::{
|
|||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
|
||||
use serde::Deserialize;
|
||||
use strum::Display;
|
||||
use strum::{Display, EnumString};
|
||||
use xshell::{cmd, Shell};
|
||||
|
||||
use super::summary::{write_summary, TestResults};
|
||||
|
|
@ -43,7 +43,7 @@ pub(crate) fn run_complement(
|
|||
out: &Path,
|
||||
docker_image: &str,
|
||||
test_count: u64,
|
||||
) -> Result<()> {
|
||||
) -> Result<TestResults> {
|
||||
// TODO: handle SIG{INT,TERM}
|
||||
// TODO: XTASK_PATH variable, so that we don't need to pollute devshell with
|
||||
// go
|
||||
|
|
@ -70,7 +70,7 @@ pub(crate) fn run_complement(
|
|||
ctx.handle_line(&line)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(ctx.results)
|
||||
}
|
||||
|
||||
/// Schema from <https://pkg.go.dev/cmd/test2json#hdr-Output_Format>
|
||||
|
|
@ -103,7 +103,7 @@ enum GoTestEvent {
|
|||
OtherAction,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Display, Debug)]
|
||||
#[derive(Copy, Clone, Display, EnumString, Eq, PartialEq, Debug)]
|
||||
#[strum(serialize_all = "UPPERCASE")]
|
||||
pub(crate) enum TestResult {
|
||||
Pass,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue