grapevine/xtask/src/complement/test2json.rs
2024-09-24 15:42:12 -07:00

492 lines
16 KiB
Rust

//! Functions for working with the go [`test2json`][test2json] tool.
//!
//! [test2json]: https://pkg.go.dev/cmd/test2json@go1.22.4
use std::{
collections::BTreeMap,
fs::{self, File},
io::{BufRead, BufReader, BufWriter, Seek, SeekFrom, Write},
mem, panic,
path::{Path, PathBuf},
process::{Command, Stdio},
sync::{
atomic::{AtomicBool, Ordering},
Arc, Mutex,
},
thread,
time::Duration,
};
use indicatif::{ProgressBar, ProgressStyle};
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
use process_wrap::std::{ProcessGroup, StdChildWrapper, StdCommandWrap};
use serde::Deserialize;
use signal_hook::{
consts::signal::{SIGINT, SIGQUIT, SIGTERM},
flag,
iterator::Signals,
};
use strum::{Display, EnumString};
use xshell::{cmd, Shell};
use super::summary::{write_summary, TestResults};
/// Returns the total number of complement tests that will be run
///
/// This is only able to count toplevel tests, and will not included subtests
/// (`A/B`)
pub(crate) fn count_complement_tests(
sh: &Shell,
docker_image: &str,
) -> Result<u64> {
let test_list = cmd!(sh, "go tool test2json complement.test -test.list .*")
.env("COMPLEMENT_BASE_IMAGE", docker_image)
.read()
.into_diagnostic()?;
let test_count = u64::try_from(test_list.lines().count())
.into_diagnostic()
.wrap_err("test count overflowed u64")?;
Ok(test_count)
}
/// Run complement tests.
///
/// This function mostly deals with handling shutdown signals, while the actual
/// logic for running complement is in `run_complement_inner`, which is spawned
/// as a separate thread. This is necessary because the go `test2json` tool
/// ignores SIGTERM and SIGINT. Without signal handling on our end, terminating
/// the complement wrapper process would leave a dangling complement child
/// process running.
///
/// The reason that `test2json` does this is that it does not implement any kind
/// of test cleanup, and so the developers decided that ignoring termination
/// signals entirely was safer. Running go unit tests outside of `test2json`
/// (and so without machine-readable output) does not have this limitation.
/// Unfortunately neither of these are an option for us. We need
/// machine-readable output to compare against the baseline result. Complement
/// runs can take 40+ minutes, so being able to cancel them is a requirement.
///
/// Because we don't trigger any of the normal cleanup, we need to handle
/// dangling docker containers ourselves.
pub(crate) fn run_complement(
sh: &Shell,
out: &Path,
docker_image: &str,
test_count: u64,
) -> Result<TestResults> {
let term_signals = [SIGTERM, SIGINT, SIGQUIT];
let term_now = Arc::new(AtomicBool::new(false));
for sig in &term_signals {
// Terminate immediately if `term_now` is true and we receive a
// terminating signal
flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now))
.into_diagnostic()
.wrap_err("error registering signal handler")?;
}
let mut signals = Signals::new(term_signals).unwrap();
let state = Mutex::new(ComplementRunnerState::Startup);
let signals_handle = signals.handle();
let result = thread::scope(|s| {
let state_ref = &state;
let cloned_sh = sh.clone();
let thread_handle = s.spawn(move || {
let panic_result = panic::catch_unwind(|| {
run_complement_inner(
&cloned_sh,
out,
docker_image,
test_count,
state_ref,
)
});
// Stop the signal-handling loop, even if we panicked
signals_handle.close();
match panic_result {
Ok(result) => result,
Err(panic) => panic::resume_unwind(panic),
}
});
let canceled = if let Some(signal) = signals.forever().next() {
let description = match signal {
SIGTERM => "SIGTERM",
SIGINT => "ctrl+c",
SIGQUIT => "SIGQUIT",
_ => unreachable!(),
};
eprintln!(
"Received {description}, stopping complement run. Send \
{description} a second time to terminate without cleaning \
up, which may leave dangling processes and docker containers"
);
term_now.store(true, Ordering::Relaxed);
{
let mut state = state.lock().unwrap();
let old_state =
mem::replace(&mut *state, ComplementRunnerState::Shutdown);
match old_state {
ComplementRunnerState::Startup => (),
ComplementRunnerState::Shutdown => unreachable!(),
ComplementRunnerState::Running(mut child) => {
// Killing the child process should terminate the
// complement runner thread in a
// bounded amount of time, because it will cause the
// stdout reader to return EOF.
child.kill().unwrap();
}
}
}
// TODO: kill dangling docker containers
eprintln!(
"WARNING: complement may have left dangling docker \
containers. Cleanup for these is planned, but has not been \
implemented yet. You need to identify and kill them manually"
);
true
} else {
// hit this branch if the signal handler is closed by the complement
// runner thread. This means the complement run finished
// without being canceled.
false
};
match thread_handle.join() {
Ok(result) => {
if canceled {
Err(miette!("complement run was canceled"))
} else {
result
}
}
Err(panic_value) => panic::resume_unwind(panic_value),
}
});
// From this point on, terminate immediately when signalled
term_now.store(true, Ordering::Relaxed);
result
}
/// Possible states for the complement runner thread.
///
/// The current state should be protected by a mutex, where state changes are
/// only performed while the mutex is locked. This is to prevent a race
/// condition where the main thread handles a shutdown signal at the same time
/// that the complement runner thread is starting the child process, and so the
/// main thread fails to kill the child process.
///
/// Valid state transitions:
///
/// - `Startup` -> `Running`
/// - `Startup` -> `Shutdown`
/// - `Running` -> `Shutdown`
#[derive(Debug)]
enum ComplementRunnerState {
/// The complement child process has not been started yet
Startup,
/// The complement child process is running, and we have not yet received
/// a shutdown signal.
Running(Box<dyn StdChildWrapper>),
/// We have received a shutdown signal.
Shutdown,
}
/// Spawn complement chind process and handle it's output
///
/// This is the "complement runner" thread, spawned by the [`run_complement`]
/// function.
fn run_complement_inner(
sh: &Shell,
out: &Path,
docker_image: &str,
test_count: u64,
state: &Mutex<ComplementRunnerState>,
) -> Result<TestResults> {
let cmd = cmd!(sh, "go tool test2json complement.test -test.v=test2json")
.env("COMPLEMENT_BASE_IMAGE", docker_image)
.env("COMPLEMENT_SPAWN_HS_TIMEOUT", "5")
.env("COMPLEMENT_ALWAYS_PRINT_SERVER_LOGS", "1");
eprintln!("$ {cmd}");
let stdout = {
let mut state = state.lock().unwrap();
match &*state {
ComplementRunnerState::Startup => (),
ComplementRunnerState::Running(_) => unreachable!(),
ComplementRunnerState::Shutdown => {
return Err(miette!("complement run was canceled"))
}
}
let mut cmd = Command::from(cmd);
cmd.stdout(Stdio::piped());
let mut child = StdCommandWrap::from(cmd)
.wrap(ProcessGroup::leader())
.spawn()
.into_diagnostic()
.wrap_err("error spawning complement process")?;
let stdout = child.stdout().take().expect(
"child process spawned with piped stdout should have stdout",
);
*state = ComplementRunnerState::Running(child);
stdout
};
let lines = BufReader::new(stdout).lines();
let mut ctx = TestContext::new(out, test_count)?;
for line in lines {
let line = line
.into_diagnostic()
.wrap_err("error reading output from complement process")?;
ctx.handle_line(&line)?;
}
Ok(ctx.results)
}
/// Schema from <https://pkg.go.dev/cmd/test2json#hdr-Output_Format>
///
/// Only the fields that we need are included here.
#[derive(Deserialize)]
#[serde(
rename_all = "snake_case",
rename_all_fields = "PascalCase",
tag = "Action"
)]
enum GoTestEvent {
Run {
test: Option<String>,
},
Pass {
test: Option<String>,
},
Fail {
test: Option<String>,
},
Skip {
test: Option<String>,
},
Output {
test: Option<String>,
output: String,
},
#[serde(other)]
OtherAction,
}
#[derive(Copy, Clone, Display, EnumString, Eq, PartialEq, Debug)]
#[strum(serialize_all = "UPPERCASE")]
pub(crate) enum TestResult {
Pass,
Fail,
Skip,
}
struct TestContext {
pb: ProgressBar,
pass_count: u64,
fail_count: u64,
skip_count: u64,
// We do not need a specific method to flush this before dropping
// `TestContext`, because the file is only written from the
// `update_summary_file` method. This method always calls flush on
// a non-error path, and the file is left in an inconsistent state on an
// error anyway.
summary_file: BufWriter<File>,
log_dir: PathBuf,
results: TestResults,
}
/// Returns a string to use for displaying a test name
///
/// From the test2json docs:
///
/// > The Test field, if present, specifies the test, example, or benchmark
/// > function that caused the event. Events for the overall package test do not
/// > set Test.
///
/// For events that do not have a `Test` field, we display their test name as
/// `"GLOBAL"` instead.
fn test_str(test: &Option<String>) -> &str {
if let Some(test) = test {
test
} else {
"GLOBAL"
}
}
/// Returns whether a test name is a toplevel test (as opposed to a subtest)
fn test_is_toplevel(test: &str) -> bool {
!test.contains('/')
}
impl TestContext {
fn new(out: &Path, test_count: u64) -> Result<TestContext> {
// TODO: figure out how to display ETA without it fluctuating wildly.
let style = ProgressStyle::with_template(
"({msg}) {pos}/{len} [{elapsed}] {wide_bar}",
)
.expect("static progress bar template should be valid")
.progress_chars("##-");
let pb = ProgressBar::new(test_count).with_style(style);
pb.enable_steady_tick(Duration::from_secs(1));
let summary_file = File::create(out.join("summary.tsv"))
.into_diagnostic()
.wrap_err("failed to create summary file in output dir")?;
let summary_file = BufWriter::new(summary_file);
let log_dir = out.join("logs");
let ctx = TestContext {
pb,
pass_count: 0,
fail_count: 0,
skip_count: 0,
log_dir,
summary_file,
results: BTreeMap::new(),
};
ctx.update_progress();
Ok(ctx)
}
fn update_progress(&self) {
self.pb
.set_position(self.pass_count + self.fail_count + self.skip_count);
self.pb.set_message(format!(
"PASS {}, FAIL {}, SKIP {}",
self.pass_count, self.fail_count, self.skip_count
));
}
fn update_summary_file(&mut self) -> Result<()> {
// Truncate the file to clear existing contents
self.summary_file
.get_mut()
.seek(SeekFrom::Start(0))
.into_diagnostic()?;
self.summary_file.get_mut().set_len(0).into_diagnostic()?;
write_summary(&mut self.summary_file, &self.results)?;
self.summary_file.flush().into_diagnostic()?;
Ok(())
}
fn handle_test_result(
&mut self,
test: &str,
result: TestResult,
) -> Result<()> {
self.pb.println(format!("=== {result}\t{test}"));
self.results.insert(test.to_owned(), result);
// 'complement.test -test.list' is only able to count toplevel tests
// ahead-of-time, so we don't include subtests in the pass/fail/skip
// counts.
if test_is_toplevel(test) {
match result {
TestResult::Pass => self.pass_count += 1,
TestResult::Fail => self.fail_count += 1,
TestResult::Skip => self.skip_count += 1,
}
self.update_progress();
}
self.update_summary_file().wrap_err("error writing summary file")?;
Ok(())
}
fn handle_test_output(&mut self, test: &str, output: &str) -> Result<()> {
let path = self.log_dir.join(test).with_extension("txt");
// Some tests have a '/' in their name, so create the extra dirs if they
// don't already exist.
let parent_dir = path.parent().expect(
"log file path should have parent. At worst, the toplevel dir is \
$out/logs/.",
);
fs::create_dir_all(parent_dir).into_diagnostic().wrap_err_with(
|| {
format!(
"error creating directory at {parent_dir:?} for log file \
{path:?}"
)
},
)?;
let mut log_file = File::options()
.create(true)
.append(true)
.open(&path)
.into_diagnostic()
.wrap_err_with(|| format!("error creating log file at {path:?}"))?;
log_file.write_all(output.as_bytes()).into_diagnostic().wrap_err_with(
|| format!("error writing to log file at {path:?}"),
)?;
Ok(())
}
fn handle_event(&mut self, event: GoTestEvent) -> Result<()> {
match event {
GoTestEvent::OtherAction => (),
GoTestEvent::Run {
test,
} => {
self.pb.println(format!("=== RUN \t{}", test_str(&test)));
}
GoTestEvent::Pass {
test,
} => {
self.handle_test_result(test_str(&test), TestResult::Pass)?;
}
GoTestEvent::Fail {
test,
} => {
self.handle_test_result(test_str(&test), TestResult::Fail)?;
}
GoTestEvent::Skip {
test,
} => {
self.handle_test_result(test_str(&test), TestResult::Skip)?;
}
GoTestEvent::Output {
test,
output,
} => {
let test = test_str(&test);
self.handle_test_output(test, &output).wrap_err_with(|| {
format!(
"failed to write test output to a log file for test \
{test:?}"
)
})?;
}
}
Ok(())
}
/// Processes a line of output from `test2json`
fn handle_line(&mut self, line: &str) -> Result<()> {
match serde_json::from_str(line) {
Ok(event) => self.handle_event(event)?,
Err(e) => {
let label =
LabeledSpan::at_offset(e.column() - 1, "error here");
let report = miette!(labels = vec![label], "{e}",)
.with_source_code(line.to_owned())
.wrap_err(
"failed to parse go test2json event from complement \
tests. Ignoring this event.",
);
eprintln!("{report:?}");
}
};
Ok(())
}
}