//! Functions for working with the go [`test2json`][test2json] tool. //! //! [test2json]: https://pkg.go.dev/cmd/test2json@go1.22.4 use std::{ collections::BTreeMap, fs::{self, File}, io::{BufRead, BufReader, BufWriter, Seek, SeekFrom, Write}, mem, panic, path::{Path, PathBuf}, process::{Command, Stdio}, sync::{ atomic::{AtomicBool, Ordering}, Arc, Mutex, }, thread, time::Duration, }; use indicatif::{ProgressBar, ProgressStyle}; use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr}; use process_wrap::std::{ProcessGroup, StdChildWrapper, StdCommandWrap}; use serde::Deserialize; use signal_hook::{ consts::signal::{SIGINT, SIGQUIT, SIGTERM}, flag, iterator::Signals, }; use strum::{Display, EnumString}; use xshell::{cmd, Shell}; use super::summary::{write_summary, TestResults}; /// Returns the total number of complement tests that will be run /// /// This is only able to count toplevel tests, and will not included subtests /// (`A/B`) pub(crate) fn count_complement_tests( sh: &Shell, docker_image: &str, ) -> Result { let test_list = cmd!(sh, "go tool test2json complement.test -test.list .*") .env("COMPLEMENT_BASE_IMAGE", docker_image) .read() .into_diagnostic()?; let test_count = u64::try_from(test_list.lines().count()) .into_diagnostic() .wrap_err("test count overflowed u64")?; Ok(test_count) } /// Run complement tests. /// /// This function mostly deals with handling shutdown signals, while the actual /// logic for running complement is in `run_complement_inner`, which is spawned /// as a separate thread. This is necessary because the go `test2json` tool /// ignores SIGTERM and SIGINT. Without signal handling on our end, terminating /// the complement wrapper process would leave a dangling complement child /// process running. /// /// The reason that `test2json` does this is that it does not implement any kind /// of test cleanup, and so the developers decided that ignoring termination /// signals entirely was safer. Running go unit tests outside of `test2json` /// (and so without machine-readable output) does not have this limitation. /// Unfortunately neither of these are an option for us. We need /// machine-readable output to compare against the baseline result. Complement /// runs can take 40+ minutes, so being able to cancel them is a requirement. /// /// Because we don't trigger any of the normal cleanup, we need to handle /// dangling docker containers ourselves. pub(crate) fn run_complement( sh: &Shell, out: &Path, docker_image: &str, test_count: u64, ) -> Result { let term_signals = [SIGTERM, SIGINT, SIGQUIT]; let term_now = Arc::new(AtomicBool::new(false)); for sig in &term_signals { // Terminate immediately if `term_now` is true and we receive a // terminating signal flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now)) .into_diagnostic() .wrap_err("error registering signal handler")?; } let mut signals = Signals::new(term_signals).unwrap(); let state = Mutex::new(ComplementRunnerState::Startup); let signals_handle = signals.handle(); let result = thread::scope(|s| { let state_ref = &state; let cloned_sh = sh.clone(); let thread_handle = s.spawn(move || { let panic_result = panic::catch_unwind(|| { run_complement_inner( &cloned_sh, out, docker_image, test_count, state_ref, ) }); // Stop the signal-handling loop, even if we panicked signals_handle.close(); match panic_result { Ok(result) => result, Err(panic) => panic::resume_unwind(panic), } }); let canceled = if let Some(signal) = signals.forever().next() { let description = match signal { SIGTERM => "SIGTERM", SIGINT => "ctrl+c", SIGQUIT => "SIGQUIT", _ => unreachable!(), }; eprintln!( "Received {description}, stopping complement run. Send \ {description} a second time to terminate without cleaning \ up, which may leave dangling processes and docker containers" ); term_now.store(true, Ordering::Relaxed); { let mut state = state.lock().unwrap(); let old_state = mem::replace(&mut *state, ComplementRunnerState::Shutdown); match old_state { ComplementRunnerState::Startup => (), ComplementRunnerState::Shutdown => unreachable!(), ComplementRunnerState::Running(mut child) => { // Killing the child process should terminate the // complement runner thread in a // bounded amount of time, because it will cause the // stdout reader to return EOF. child.kill().unwrap(); } } } // TODO: kill dangling docker containers eprintln!( "WARNING: complement may have left dangling docker \ containers. Cleanup for these is planned, but has not been \ implemented yet. You need to identify and kill them manually" ); true } else { // hit this branch if the signal handler is closed by the complement // runner thread. This means the complement run finished // without being canceled. false }; match thread_handle.join() { Ok(result) => { if canceled { Err(miette!("complement run was canceled")) } else { result } } Err(panic_value) => panic::resume_unwind(panic_value), } }); // From this point on, terminate immediately when signalled term_now.store(true, Ordering::Relaxed); result } /// Possible states for the complement runner thread. /// /// The current state should be protected by a mutex, where state changes are /// only performed while the mutex is locked. This is to prevent a race /// condition where the main thread handles a shutdown signal at the same time /// that the complement runner thread is starting the child process, and so the /// main thread fails to kill the child process. /// /// Valid state transitions: /// /// - `Startup` -> `Running` /// - `Startup` -> `Shutdown` /// - `Running` -> `Shutdown` #[derive(Debug)] enum ComplementRunnerState { /// The complement child process has not been started yet Startup, /// The complement child process is running, and we have not yet received /// a shutdown signal. Running(Box), /// We have received a shutdown signal. Shutdown, } /// Spawn complement chind process and handle it's output /// /// This is the "complement runner" thread, spawned by the [`run_complement`] /// function. fn run_complement_inner( sh: &Shell, out: &Path, docker_image: &str, test_count: u64, state: &Mutex, ) -> Result { let cmd = cmd!(sh, "go tool test2json complement.test -test.v=test2json") .env("COMPLEMENT_BASE_IMAGE", docker_image) .env("COMPLEMENT_SPAWN_HS_TIMEOUT", "5") .env("COMPLEMENT_ALWAYS_PRINT_SERVER_LOGS", "1"); eprintln!("$ {cmd}"); let stdout = { let mut state = state.lock().unwrap(); match &*state { ComplementRunnerState::Startup => (), ComplementRunnerState::Running(_) => unreachable!(), ComplementRunnerState::Shutdown => { return Err(miette!("complement run was canceled")) } } let mut cmd = Command::from(cmd); cmd.stdout(Stdio::piped()); let mut child = StdCommandWrap::from(cmd) .wrap(ProcessGroup::leader()) .spawn() .into_diagnostic() .wrap_err("error spawning complement process")?; let stdout = child.stdout().take().expect( "child process spawned with piped stdout should have stdout", ); *state = ComplementRunnerState::Running(child); stdout }; let lines = BufReader::new(stdout).lines(); let mut ctx = TestContext::new(out, test_count)?; for line in lines { let line = line .into_diagnostic() .wrap_err("error reading output from complement process")?; ctx.handle_line(&line)?; } Ok(ctx.results) } /// Schema from /// /// Only the fields that we need are included here. #[derive(Deserialize)] #[serde( rename_all = "snake_case", rename_all_fields = "PascalCase", tag = "Action" )] enum GoTestEvent { Run { test: Option, }, Pass { test: Option, }, Fail { test: Option, }, Skip { test: Option, }, Output { test: Option, output: String, }, #[serde(other)] OtherAction, } #[derive(Copy, Clone, Display, EnumString, Eq, PartialEq, Debug)] #[strum(serialize_all = "UPPERCASE")] pub(crate) enum TestResult { Pass, Fail, Skip, } struct TestContext { pb: ProgressBar, pass_count: u64, fail_count: u64, skip_count: u64, // We do not need a specific method to flush this before dropping // `TestContext`, because the file is only written from the // `update_summary_file` method. This method always calls flush on // a non-error path, and the file is left in an inconsistent state on an // error anyway. summary_file: BufWriter, log_dir: PathBuf, results: TestResults, } /// Returns a string to use for displaying a test name /// /// From the test2json docs: /// /// > The Test field, if present, specifies the test, example, or benchmark /// > function that caused the event. Events for the overall package test do not /// > set Test. /// /// For events that do not have a `Test` field, we display their test name as /// `"GLOBAL"` instead. fn test_str(test: &Option) -> &str { if let Some(test) = test { test } else { "GLOBAL" } } /// Returns whether a test name is a toplevel test (as opposed to a subtest) fn test_is_toplevel(test: &str) -> bool { !test.contains('/') } impl TestContext { fn new(out: &Path, test_count: u64) -> Result { // TODO: figure out how to display ETA without it fluctuating wildly. let style = ProgressStyle::with_template( "({msg}) {pos}/{len} [{elapsed}] {wide_bar}", ) .expect("static progress bar template should be valid") .progress_chars("##-"); let pb = ProgressBar::new(test_count).with_style(style); pb.enable_steady_tick(Duration::from_secs(1)); let summary_file = File::create(out.join("summary.tsv")) .into_diagnostic() .wrap_err("failed to create summary file in output dir")?; let summary_file = BufWriter::new(summary_file); let log_dir = out.join("logs"); let ctx = TestContext { pb, pass_count: 0, fail_count: 0, skip_count: 0, log_dir, summary_file, results: BTreeMap::new(), }; ctx.update_progress(); Ok(ctx) } fn update_progress(&self) { self.pb .set_position(self.pass_count + self.fail_count + self.skip_count); self.pb.set_message(format!( "PASS {}, FAIL {}, SKIP {}", self.pass_count, self.fail_count, self.skip_count )); } fn update_summary_file(&mut self) -> Result<()> { // Truncate the file to clear existing contents self.summary_file .get_mut() .seek(SeekFrom::Start(0)) .into_diagnostic()?; self.summary_file.get_mut().set_len(0).into_diagnostic()?; write_summary(&mut self.summary_file, &self.results)?; self.summary_file.flush().into_diagnostic()?; Ok(()) } fn handle_test_result( &mut self, test: &str, result: TestResult, ) -> Result<()> { self.pb.println(format!("=== {result}\t{test}")); self.results.insert(test.to_owned(), result); // 'complement.test -test.list' is only able to count toplevel tests // ahead-of-time, so we don't include subtests in the pass/fail/skip // counts. if test_is_toplevel(test) { match result { TestResult::Pass => self.pass_count += 1, TestResult::Fail => self.fail_count += 1, TestResult::Skip => self.skip_count += 1, } self.update_progress(); } self.update_summary_file().wrap_err("error writing summary file")?; Ok(()) } fn handle_test_output(&mut self, test: &str, output: &str) -> Result<()> { let path = self.log_dir.join(test).with_extension("txt"); // Some tests have a '/' in their name, so create the extra dirs if they // don't already exist. let parent_dir = path.parent().expect( "log file path should have parent. At worst, the toplevel dir is \ $out/logs/.", ); fs::create_dir_all(parent_dir).into_diagnostic().wrap_err_with( || { format!( "error creating directory at {parent_dir:?} for log file \ {path:?}" ) }, )?; let mut log_file = File::options() .create(true) .append(true) .open(&path) .into_diagnostic() .wrap_err_with(|| format!("error creating log file at {path:?}"))?; log_file.write_all(output.as_bytes()).into_diagnostic().wrap_err_with( || format!("error writing to log file at {path:?}"), )?; Ok(()) } fn handle_event(&mut self, event: GoTestEvent) -> Result<()> { match event { GoTestEvent::OtherAction => (), GoTestEvent::Run { test, } => { self.pb.println(format!("=== RUN \t{}", test_str(&test))); } GoTestEvent::Pass { test, } => { self.handle_test_result(test_str(&test), TestResult::Pass)?; } GoTestEvent::Fail { test, } => { self.handle_test_result(test_str(&test), TestResult::Fail)?; } GoTestEvent::Skip { test, } => { self.handle_test_result(test_str(&test), TestResult::Skip)?; } GoTestEvent::Output { test, output, } => { let test = test_str(&test); self.handle_test_output(test, &output).wrap_err_with(|| { format!( "failed to write test output to a log file for test \ {test:?}" ) })?; } } Ok(()) } /// Processes a line of output from `test2json` fn handle_line(&mut self, line: &str) -> Result<()> { match serde_json::from_str(line) { Ok(event) => self.handle_event(event)?, Err(e) => { let label = LabeledSpan::at_offset(e.column() - 1, "error here"); let report = miette!(labels = vec![label], "{e}",) .with_source_code(line.to_owned()) .wrap_err( "failed to parse go test2json event from complement \ tests. Ignoring this event.", ); eprintln!("{report:?}"); } }; Ok(()) } }