kill dangling docker containers in complement run

This is a stupid hack and I hate it, but we do need to support
concurrent complement runs if we want to do this in CI.
This commit is contained in:
Benjamin Lee 2024-06-22 14:21:50 -07:00
parent e6f5aa6150
commit 2e03b39cdd
No known key found for this signature in database
GPG key ID: FB9624E2885D55A4
5 changed files with 117 additions and 23 deletions

1
Cargo.lock generated
View file

@ -3906,6 +3906,7 @@ dependencies = [
"indicatif",
"miette",
"process-wrap",
"rand",
"serde",
"serde_json",
"signal-hook",

View file

@ -10,6 +10,7 @@ clap.workspace = true
miette.workspace = true
process-wrap.workspace = true
indicatif.workspace = true
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
signal-hook.workspace = true

View file

@ -3,7 +3,8 @@ use std::{
path::{Path, PathBuf},
};
use miette::{miette, IntoDiagnostic, Result, WrapErr};
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
use serde::Deserialize;
use xshell::{cmd, Shell};
mod docker;
@ -11,7 +12,7 @@ mod summary;
mod test2json;
use self::{
docker::load_docker_image,
docker::{load_docker_image, retag_docker_image},
summary::{compare_summary, read_summary},
test2json::{count_complement_tests, run_complement},
};
@ -76,6 +77,8 @@ pub(crate) fn main(args: Args, sh: &Shell) -> Result<()> {
let docker_image = load_docker_image(sh, &toplevel).wrap_err(
"failed to build and load complement-grapevine docker image",
)?;
let docker_image = retag_docker_image(sh, &docker_image)
.wrap_err("failed to retag docker image")?;
let test_count = count_complement_tests(sh, &docker_image)
.wrap_err("failed to determine total complement test count")?;
let results = run_complement(sh, &args.out, &docker_image, test_count)
@ -86,6 +89,25 @@ pub(crate) fn main(args: Args, sh: &Shell) -> Result<()> {
Ok(())
}
/// Deserialize a single-line json string using [`serde_json::from_str`] and
/// convert the error to a miette diagnostic.
///
/// # Panics
/// Panics if `line` contains a newline.
fn from_json_line<'a, T: Deserialize<'a>>(line: &'a str) -> Result<T> {
assert!(
!line.contains('\n'),
"from_json_line requires single-line json source"
);
serde_json::from_str(line).map_err(|e| {
// Needs single-line input so that we don't have to deal with converting
// line/column to a span offset.
let offset = e.column() - 1;
let label = LabeledSpan::at_offset(offset, "error here");
miette!(labels = vec![label], "{e}").with_source_code(line.to_owned())
})
}
/// Ensures that output directory exists and is empty
///
/// If the directory does not exist, it will be created. If it is not empty, an

View file

@ -3,8 +3,12 @@
use std::path::Path;
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
use rand::{distributions::Alphanumeric, thread_rng, Rng};
use serde::Deserialize;
use xshell::{cmd, Shell};
use super::from_json_line;
/// Build the 'grapevine-complement' OCI image and load it into the docker
/// daemon.
pub(crate) fn load_docker_image(sh: &Shell, toplevel: &Path) -> Result<String> {
@ -56,3 +60,74 @@ pub(crate) fn load_docker_image(sh: &Shell, toplevel: &Path) -> Result<String> {
.to_owned();
Ok(docker_image)
}
/// Retags the docker image with a random tag. Returns the new image reference.
///
/// This is useful so that we can uniquely identify the set of docker containers
/// spawned by a complement run. Without using a unique tag, there is no way to
/// determine which docker containers to kill if a run is cancelled, since other
/// concurrent complement runs may have created containers with the same image.
pub(crate) fn retag_docker_image(sh: &Shell, image: &str) -> Result<String> {
let mut rng = thread_rng();
let new_tag: String =
(0..16).map(|_| char::from(rng.sample(Alphanumeric))).collect();
let (repo, _old_tag) = image.split_once(':').ok_or_else(|| {
miette!(
"Docker image reference was not in the expected format. Expected \
\"{{repository}}:{{tag}}\", got {image:?}"
)
})?;
let new_image = format!("{repo}:{new_tag}");
cmd!(sh, "docker image tag {image} {new_image}").run().into_diagnostic()?;
Ok(new_image)
}
/// Kills all docker containers using a particular image.
///
/// This can be used to clean up dangling docker images after a cancelled
/// complement run, but it's important that the image reference be unique. See
/// the [`retag_docker_image`] function for a discussion of this.
pub(crate) fn kill_docker_containers(sh: &Shell, image: &str) -> Result<()> {
#[derive(Deserialize)]
struct ContainerInfo {
#[serde(rename = "ID")]
id: String,
#[serde(rename = "Image")]
image: String,
}
// --filter ancestor={image} doesn't work here, because images with the same
// image id will be picked up even if their image reference (repo:tag) are
// different. We need to list all the containers and filter them ourselves.
let containers = cmd!(sh, "docker container ls --format json")
.read()
.into_diagnostic()
.wrap_err("error listing running docker containers")?;
let containers = containers
.lines()
.map(from_json_line)
.collect::<Result<Vec<ContainerInfo>, _>>()
.wrap_err(
"error parsing docker container info from 'docker container ls' \
output",
)?;
let our_containers = containers
.into_iter()
.filter(|container| container.image == image)
.map(|container| container.id)
.collect::<Vec<_>>();
if !our_containers.is_empty() {
// Ignore non-zero exit status because 'docker kill' will fail if
// containers already exited before sending the signal, which is
// fine.
cmd!(sh, "docker kill --signal=SIGKILL {our_containers...}")
.ignore_status()
.run()
.into_diagnostic()
.wrap_err("error killing docker containers")?;
}
Ok(())
}

View file

@ -18,7 +18,7 @@ use std::{
};
use indicatif::{ProgressBar, ProgressStyle};
use miette::{miette, IntoDiagnostic, LabeledSpan, Result, WrapErr};
use miette::{miette, IntoDiagnostic, Result, WrapErr};
use process_wrap::std::{ProcessGroup, StdChildWrapper, StdCommandWrap};
use serde::Deserialize;
use signal_hook::{
@ -29,7 +29,11 @@ use signal_hook::{
use strum::{Display, EnumString};
use xshell::{cmd, Shell};
use super::summary::{write_summary, TestResults};
use super::{
docker::kill_docker_containers,
from_json_line,
summary::{write_summary, TestResults},
};
/// Returns the total number of complement tests that will be run
///
@ -142,12 +146,9 @@ pub(crate) fn run_complement(
}
}
// TODO: kill dangling docker containers
eprintln!(
"WARNING: complement may have left dangling docker \
containers. Cleanup for these is planned, but has not been \
implemented yet. You need to identify and kill them manually"
);
kill_docker_containers(sh, docker_image).wrap_err(
"failed to kill dangling complement docker containers",
)?;
true
} else {
@ -500,20 +501,14 @@ impl TestContext {
/// Processes a line of output from `test2json`
fn handle_line(&mut self, line: &str) -> Result<()> {
self.write_raw_log_line(line)?;
match serde_json::from_str(line) {
let result = from_json_line(line).wrap_err(
"failed to parse go test2json event from complement tests. \
Ignoring this event",
);
match result {
Ok(event) => self.handle_event(event)?,
Err(e) => {
let label =
LabeledSpan::at_offset(e.column() - 1, "error here");
let report = miette!(labels = vec![label], "{e}",)
.with_source_code(line.to_owned())
.wrap_err(
"failed to parse go test2json event from complement \
tests. Ignoring this event.",
);
eprintln!("{report:?}");
}
};
Err(e) => eprintln!("{e:?}"),
}
Ok(())
}
}