From 2e5755c9ce9373978f1e02b3c4b5a30e10e455c1 Mon Sep 17 00:00:00 2001
From: Alex Huszagh <ahuszagh@gmail.com>
Date: Fri, 8 Jul 2022 03:04:19 -0500
Subject: [PATCH] Adds numerous bug fixes for remote cross.

Adds simpler container and volume cleanup, and avoiding keeping anonymous volumes for `/cargo/bin` present after the container exited. It also ensures containers are stopped even if a signal is sent to the `cross` command (except `SIGKILL`), and ensures that containers exit quicker if possible. This is done by using the presence of a TTY, and if it is present, running the default entrypoint, which responds to signals. If we do not have a TTY, the image will not respond to signals, so we send the container a `SIGKILL` via `docker stop` using a an instant timeout (0).

This also fixes an issue with stale fingerprint data: if a persistent data volume was deleted and then recreated, previously the project data would not be copied over. This meant that the project could not be found, and it would require users to manually delete the fingerprint data or use single-used volumes.

This also checks if the container has been scheduled for deletion (the drop may not have completed), and errors out early if more calls are made to the container.

This also properly handles `docker rm` calls if the `docker stop` command worked without needing a `SIGKILL`: it silences the output of the error if the container wasn't found.
---
 src/bin/commands/containers.rs |  21 +++-
 src/docker/remote.rs           | 220 ++++++++++++++++++++++-----------
 src/docker/shared.rs           |   4 +-
 src/errors.rs                  |  62 ++++++++++
 4 files changed, 229 insertions(+), 78 deletions(-)

diff --git a/src/bin/commands/containers.rs b/src/bin/commands/containers.rs
index 796894698..566fb1c23 100644
--- a/src/bin/commands/containers.rs
+++ b/src/bin/commands/containers.rs
@@ -396,7 +396,7 @@ pub fn create_persistent_volume(
     let state = docker::remote::container_state(engine, &container, msg_info)?;
     if !state.is_stopped() {
         msg_info.warn("container {container} was running.")?;
-        docker::remote::container_stop(engine, &container, msg_info)?;
+        docker::remote::container_stop_default(engine, &container, msg_info)?;
     }
     if state.exists() {
         msg_info.warn("container {container} was exited.")?;
@@ -407,14 +407,24 @@ pub fn create_persistent_volume(
     let mount_prefix = docker::remote::MOUNT_PREFIX;
     let mut docker = docker::subcommand(engine, "run");
     docker.args(&["--name", &container]);
+    docker.arg("--rm");
     docker.args(&["-v", &format!("{}:{}", volume, mount_prefix)]);
     docker.arg("-d");
-    if io::Stdin::is_atty() && io::Stdout::is_atty() && io::Stderr::is_atty() {
+    let is_tty = io::Stdin::is_atty() && io::Stdout::is_atty() && io::Stderr::is_atty();
+    if is_tty {
         docker.arg("-t");
     }
     docker.arg(docker::UBUNTU_BASE);
-    // ensure the process never exits until we stop it
-    docker.args(&["sh", "-c", "sleep infinity"]);
+    if !is_tty {
+        // ensure the process never exits until we stop it
+        // we only need this infinite loop if we don't allocate
+        // a TTY. this has a few issues though: now, the
+        // container no longer responds to signals, so the
+        // container will need to be sig-killed.
+        docker.args(&["sh", "-c", "sleep infinity"]);
+    }
+    // store first, since failing to non-existing container is fine
+    docker::remote::create_container_deleter(engine.clone(), container.clone());
     docker.run_and_get_status(msg_info, false)?;
 
     docker::remote::copy_volume_container_xargo(
@@ -443,8 +453,7 @@ pub fn create_persistent_volume(
         msg_info,
     )?;
 
-    docker::remote::container_stop(engine, &container, msg_info)?;
-    docker::remote::container_rm(engine, &container, msg_info)?;
+    docker::remote::drop_container(is_tty, msg_info);
 
     Ok(())
 }
diff --git a/src/docker/remote.rs b/src/docker/remote.rs
index 100290dc9..b4cc21dc5 100644
--- a/src/docker/remote.rs
+++ b/src/docker/remote.rs
@@ -1,7 +1,8 @@
 use std::collections::BTreeMap;
 use std::io::{self, BufRead, Read, Write};
 use std::path::{Path, PathBuf};
-use std::process::ExitStatus;
+use std::process::{Command, ExitStatus, Output};
+use std::sync::atomic::{AtomicBool, Ordering};
 use std::{env, fs, time};
 
 use eyre::Context;
@@ -21,31 +22,37 @@ use crate::{Host, Target};
 
 // the mount directory for the data volume.
 pub const MOUNT_PREFIX: &str = "/cross";
+// default timeout to stop a container (in seconds)
+pub const DEFAULT_TIMEOUT: u32 = 2;
+// instant kill in case of a non-graceful exit
+pub const NO_TIMEOUT: u32 = 0;
+
+// we need to specify drops for the containers, but we
+// also need to ensure the drops are called on a
+// termination handler. we use an atomic bool to ensure
+// that the drop only gets called once, even if we have
+// the signal handle invoked multiple times or it fails.
+pub(crate) static mut CONTAINER: Option<DeleteContainer> = None;
+pub(crate) static mut CONTAINER_EXISTS: AtomicBool = AtomicBool::new(false);
 
 // it's unlikely that we ever need to erase a line in the destructors,
 // and it's better than keep global state everywhere, or keeping a ref
 // cell which could have already deleted a line
-struct DeleteVolume<'a>(&'a Engine, &'a VolumeId, ColorChoice, Verbosity);
+pub(crate) struct DeleteContainer(Engine, String, u32, ColorChoice, Verbosity);
 
-impl<'a> Drop for DeleteVolume<'a> {
+impl Drop for DeleteContainer {
     fn drop(&mut self) {
-        if let VolumeId::Discard(id) = self.1 {
-            let mut msg_info = MessageInfo::new(self.2, self.3);
-            volume_rm(self.0, id, &mut msg_info).ok();
+        // SAFETY: safe, since guarded by a thread-safe atomic swap.
+        unsafe {
+            if CONTAINER_EXISTS.swap(false, Ordering::SeqCst) {
+                let mut msg_info = MessageInfo::new(self.3, self.4);
+                container_stop(&self.0, &self.1, self.2, &mut msg_info).ok();
+                container_rm(&self.0, &self.1, &mut msg_info).ok();
+            }
         }
     }
 }
 
-struct DeleteContainer<'a>(&'a Engine, &'a str, ColorChoice, Verbosity);
-
-impl<'a> Drop for DeleteContainer<'a> {
-    fn drop(&mut self) {
-        let mut msg_info = MessageInfo::new(self.2, self.3);
-        container_stop(self.0, self.1, &mut msg_info).ok();
-        container_rm(self.0, self.1, &mut msg_info).ok();
-    }
-}
-
 #[derive(Debug, PartialEq, Eq)]
 pub enum ContainerState {
     Created,
@@ -82,36 +89,77 @@ impl ContainerState {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 enum VolumeId {
     Keep(String),
-    Discard(String),
+    Discard,
 }
 
 impl VolumeId {
-    fn create(
-        engine: &Engine,
-        toolchain: &str,
-        container: &str,
-        msg_info: &mut MessageInfo,
-    ) -> Result<Self> {
+    fn create(engine: &Engine, toolchain: &str, msg_info: &mut MessageInfo) -> Result<Self> {
         if volume_exists(engine, toolchain, msg_info)? {
             Ok(Self::Keep(toolchain.to_owned()))
         } else {
-            Ok(Self::Discard(container.to_owned()))
+            Ok(Self::Discard)
         }
     }
 }
 
-impl AsRef<str> for VolumeId {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::Keep(s) => s,
-            Self::Discard(s) => s,
+// prevent further commands from running if we handled
+// a signal earlier, and the volume is exited.
+// this isn't required, but avoids unnecessary
+// commands while the container is cleaning up.
+macro_rules! bail_container_exited {
+    () => {{
+        if !container_exists() {
+            eyre::bail!("container already exited due to signal");
         }
+    }};
+}
+
+pub fn create_container_deleter(engine: Engine, container: String) {
+    // SAFETY: safe, since single-threaded execution.
+    unsafe {
+        CONTAINER_EXISTS.store(true, Ordering::Relaxed);
+        CONTAINER = Some(DeleteContainer(
+            engine,
+            container,
+            NO_TIMEOUT,
+            ColorChoice::Never,
+            Verbosity::Quiet,
+        ));
     }
 }
 
+pub fn drop_container(is_tty: bool, msg_info: &mut MessageInfo) {
+    // SAFETY: safe, since single-threaded execution.
+    unsafe {
+        // relax the no-timeout and lack of output
+        if let Some(container) = &mut CONTAINER {
+            if is_tty {
+                container.2 = DEFAULT_TIMEOUT;
+            }
+            container.3 = msg_info.color_choice;
+            container.4 = msg_info.verbosity;
+        }
+        CONTAINER = None;
+    }
+}
+
+fn container_exists() -> bool {
+    // SAFETY: safe, not mutating an atomic bool
+    // this can be more relaxed: just used to ensure
+    // that we don't make unnecessary calls, which are
+    // safe even if executed, after we've signaled a
+    // drop to our container.
+    unsafe { CONTAINER_EXISTS.load(Ordering::Relaxed) }
+}
+
+fn subcommand_or_exit(engine: &Engine, cmd: &str) -> Result<Command> {
+    bail_container_exited!();
+    Ok(subcommand(engine, cmd))
+}
+
 fn create_volume_dir(
     engine: &Engine,
     container: &str,
@@ -119,7 +167,7 @@ fn create_volume_dir(
     msg_info: &mut MessageInfo,
 ) -> Result<ExitStatus> {
     // make our parent directory if needed
-    subcommand(engine, "exec")
+    subcommand_or_exit(engine, "exec")?
         .arg(container)
         .args(&["sh", "-c", &format!("mkdir -p '{}'", dir.as_posix()?)])
         .run_and_get_status(msg_info, false)
@@ -133,7 +181,7 @@ fn copy_volume_files(
     dst: &Path,
     msg_info: &mut MessageInfo,
 ) -> Result<ExitStatus> {
-    subcommand(engine, "cp")
+    subcommand_or_exit(engine, "cp")?
         .arg("-a")
         .arg(src.to_utf8()?)
         .arg(format!("{container}:{}", dst.as_posix()?))
@@ -165,7 +213,7 @@ fn container_path_exists(
     path: &Path,
     msg_info: &mut MessageInfo,
 ) -> Result<bool> {
-    Ok(subcommand(engine, "exec")
+    Ok(subcommand_or_exit(engine, "exec")?
         .arg(container)
         .args(&["bash", "-c", &format!("[[ -d '{}' ]]", path.as_posix()?)])
         .run_and_get_status(msg_info, true)?
@@ -609,12 +657,12 @@ rm \"{PATH}\"
 
     // need to avoid having hundreds of files on the command, so
     // just provide a single file name.
-    subcommand(engine, "cp")
+    subcommand_or_exit(engine, "cp")?
         .arg(tempfile.path())
         .arg(format!("{container}:{PATH}"))
         .run_and_get_status(msg_info, true)?;
 
-    subcommand(engine, "exec")
+    subcommand_or_exit(engine, "exec")?
         .arg(container)
         .args(&["sh", "-c", &script.join("\n")])
         .run_and_get_status(msg_info, true)
@@ -642,7 +690,9 @@ fn copy_volume_container_project(
             fs::create_dir_all(&parent)?;
             let fingerprint = parent.join(container);
             let current = get_project_fingerprint(src, copy_cache)?;
-            if fingerprint.exists() {
+            // need to check if the container path exists, otherwise we might
+            // have stale data: the persistent volume was deleted & recreated.
+            if fingerprint.exists() && container_path_exists(engine, container, dst, msg_info)? {
                 let previous = parse_project_fingerprint(&fingerprint)?;
                 let (changed, removed) = get_fingerprint_difference(&previous, &current);
                 write_project_fingerprint(&fingerprint, &current)?;
@@ -658,7 +708,7 @@ fn copy_volume_container_project(
                 copy_all(msg_info)?;
             }
         }
-        VolumeId::Discard(_) => {
+        VolumeId::Discard => {
             copy_all(msg_info)?;
         }
     }
@@ -676,6 +726,14 @@ fn run_and_get_status(
         .run_and_get_status(msg_info, true)
 }
 
+fn run_and_get_output(
+    engine: &Engine,
+    args: &[&str],
+    msg_info: &mut MessageInfo,
+) -> Result<Output> {
+    command(engine).args(args).run_and_get_output(msg_info)
+}
+
 pub fn volume_create(
     engine: &Engine,
     volume: &str,
@@ -689,26 +747,42 @@ pub fn volume_rm(engine: &Engine, volume: &str, msg_info: &mut MessageInfo) -> R
 }
 
 pub fn volume_exists(engine: &Engine, volume: &str, msg_info: &mut MessageInfo) -> Result<bool> {
-    command(engine)
-        .args(&["volume", "inspect", volume])
-        .run_and_get_output(msg_info)
+    run_and_get_output(engine, &["volume", "inspect", volume], msg_info)
         .map(|output| output.status.success())
 }
 
 pub fn container_stop(
+    engine: &Engine,
+    container: &str,
+    timeout: u32,
+    msg_info: &mut MessageInfo,
+) -> Result<ExitStatus> {
+    run_and_get_status(
+        engine,
+        &["stop", container, "--time", &timeout.to_string()],
+        msg_info,
+    )
+}
+
+pub fn container_stop_default(
     engine: &Engine,
     container: &str,
     msg_info: &mut MessageInfo,
 ) -> Result<ExitStatus> {
-    run_and_get_status(engine, &["stop", container], msg_info)
+    // we want a faster timeout, since this might happen in signal
+    // handler. our containers normally clean up pretty fast, it's
+    // only without a pseudo-tty that they don't.
+    container_stop(engine, container, DEFAULT_TIMEOUT, msg_info)
 }
 
+// if stop succeeds without a timeout, this can have a spurious error
+// that is, if the container no longer exists. just silence this.
 pub fn container_rm(
     engine: &Engine,
     container: &str,
     msg_info: &mut MessageInfo,
 ) -> Result<ExitStatus> {
-    run_and_get_status(engine, &["rm", container], msg_info)
+    run_and_get_output(engine, &["rm", container], msg_info).map(|output| output.status)
 }
 
 pub fn container_state(
@@ -810,37 +884,35 @@ pub(crate) fn run(
 
     // 1. get our unique identifiers and cleanup from a previous run.
     // this can happen if we didn't gracefully exit before
+    // note that since we use `docker run --rm`, it's very
+    // unlikely the container state existed before.
     let toolchain_id = unique_toolchain_identifier(&dirs.sysroot)?;
     let container = unique_container_identifier(target, &paths.metadata, dirs)?;
-    let volume = VolumeId::create(engine, &toolchain_id, &container, msg_info)?;
+    let volume = VolumeId::create(engine, &toolchain_id, msg_info)?;
     let state = container_state(engine, &container, msg_info)?;
     if !state.is_stopped() {
         msg_info.warn(format_args!("container {container} was running."))?;
-        container_stop(engine, &container, msg_info)?;
+        container_stop_default(engine, &container, msg_info)?;
     }
     if state.exists() {
         msg_info.warn(format_args!("container {container} was exited."))?;
         container_rm(engine, &container, msg_info)?;
     }
-    if let VolumeId::Discard(ref id) = volume {
-        if volume_exists(engine, id, msg_info)? {
-            msg_info.warn(format_args!("temporary volume {id} existed."))?;
-            volume_rm(engine, id, msg_info)?;
-        }
-    }
 
     // 2. create our volume to copy all our data over to
-    if let VolumeId::Discard(ref id) = volume {
-        volume_create(engine, id, msg_info).wrap_err("when creating volume")?;
-    }
-    let _volume_deletter = DeleteVolume(engine, &volume, msg_info.color_choice, msg_info.verbosity);
+    // we actually use an anonymous volume, so it's auto-cleaned up,
+    // if we're using a discarded volume.
 
     // 3. create our start container command here
     let mut docker = subcommand(engine, "run");
     docker_userns(&mut docker);
     docker.args(&["--name", &container]);
-    docker.args(&["-v", &format!("{}:{mount_prefix}", volume.as_ref())]);
-    docker_envvars(&mut docker, &options.config, target, msg_info)?;
+    docker.arg("--rm");
+    let volume_mount = match volume {
+        VolumeId::Keep(ref id) => format!("{id}:{mount_prefix}"),
+        VolumeId::Discard => mount_prefix.to_owned(),
+    };
+    docker.args(&["-v", &volume_mount]);
 
     let mut volumes = vec![];
     let mount_volumes = docker_mount(
@@ -866,21 +938,24 @@ pub(crate) fn run(
     }
 
     docker.arg("-d");
-    if io::Stdin::is_atty() && io::Stdout::is_atty() && io::Stderr::is_atty() {
+    let is_tty = io::Stdin::is_atty() && io::Stdout::is_atty() && io::Stderr::is_atty();
+    if is_tty {
         docker.arg("-t");
     }
 
-    docker
-        .arg(&image_name(&options.config, target)?)
+    docker.arg(&image_name(&options.config, target)?);
+    if !is_tty {
         // ensure the process never exits until we stop it
-        .args(&["sh", "-c", "sleep infinity"])
-        .run_and_get_status(msg_info, true)?;
-    let _container_deletter = DeleteContainer(
-        engine,
-        &container,
-        msg_info.color_choice,
-        msg_info.verbosity,
-    );
+        // we only need this infinite loop if we don't allocate
+        // a TTY. this has a few issues though: now, the
+        // container no longer responds to signals, so the
+        // container will need to be sig-killed.
+        docker.args(&["sh", "-c", "sleep infinity"]);
+    }
+
+    // store first, since failing to non-existing container is fine
+    create_container_deleter(engine.clone(), container.clone());
+    docker.run_and_get_status(msg_info, true)?;
 
     // 4. copy all mounted volumes over
     let copy_cache = env::var("CROSS_REMOTE_COPY_CACHE")
@@ -894,7 +969,7 @@ pub(crate) fn run(
         }
     };
     let mount_prefix_path = mount_prefix.as_ref();
-    if let VolumeId::Discard(_) = volume {
+    if let VolumeId::Discard = volume {
         copy_volume_container_xargo(
             engine,
             &container,
@@ -1083,7 +1158,7 @@ symlink_recurse \"${{prefix}}\"
     for (src, dst) in to_symlink {
         symlink.push(format!("ln -s \"{}\" \"{}\"", src.as_posix()?, dst));
     }
-    subcommand(engine, "exec")
+    subcommand_or_exit(engine, "exec")?
         .arg(&container)
         .args(&["sh", "-c", &symlink.join("\n")])
         .run_and_get_status(msg_info, false)
@@ -1092,9 +1167,11 @@ symlink_recurse \"${{prefix}}\"
     // 6. execute our cargo command inside the container
     let mut docker = subcommand(engine, "exec");
     docker_user_id(&mut docker, engine.kind);
+    docker_envvars(&mut docker, &options.config, target, msg_info)?;
     docker_cwd(&mut docker, &paths, mount_volumes)?;
     docker.arg(&container);
     docker.args(&["sh", "-c", &format!("PATH=$PATH:/rust/bin {:?}", cmd)]);
+    bail_container_exited!();
     let status = docker
         .run_and_get_status(msg_info, false)
         .map_err(Into::into);
@@ -1104,8 +1181,9 @@ symlink_recurse \"${{prefix}}\"
     let skip_artifacts = env::var("CROSS_REMOTE_SKIP_BUILD_ARTIFACTS")
         .map(|s| bool_from_envvar(&s))
         .unwrap_or_default();
+    bail_container_exited!();
     if !skip_artifacts && container_path_exists(engine, &container, &target_dir, msg_info)? {
-        subcommand(engine, "cp")
+        subcommand_or_exit(engine, "cp")?
             .arg("-a")
             .arg(&format!("{container}:{}", target_dir.as_posix()?))
             .arg(
@@ -1118,5 +1196,7 @@ symlink_recurse \"${{prefix}}\"
             .map_err::<eyre::ErrReport, _>(Into::into)?;
     }
 
+    drop_container(is_tty, msg_info);
+
     status
 }
diff --git a/src/docker/shared.rs b/src/docker/shared.rs
index 6d26c34a9..be23bb54b 100644
--- a/src/docker/shared.rs
+++ b/src/docker/shared.rs
@@ -349,9 +349,9 @@ pub fn command(engine: &Engine) -> Command {
     command
 }
 
-pub fn subcommand(engine: &Engine, subcommand: &str) -> Command {
+pub fn subcommand(engine: &Engine, cmd: &str) -> Command {
     let mut command = command(engine);
-    command.arg(subcommand);
+    command.arg(cmd);
     command
 }
 
diff --git a/src/errors.rs b/src/errors.rs
index 7137469f0..c0c5aaf88 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -1,3 +1,4 @@
+use crate::docker::remote;
 use crate::temp;
 
 use std::sync::atomic::{AtomicBool, Ordering};
@@ -25,6 +26,67 @@ unsafe fn termination_handler() {
         temp::clean();
     }
 
+    // tl;dr this is a long explanation to say this is thread-safe.
+    // due to the risk for UB with this code, it's important we get
+    // this right.
+    //
+    // this code is wrapped in a sequentially-consistent ordering
+    // for strong guarantees about signal safety. since all atomics
+    // are guaranteed to be lock free, and must ensure consistent swaps
+    // among all threads, this means that we should only ever have one
+    // entry into the drop, which makes the external commands safe.
+    //
+    // to quote the reference on signal safety for linux:
+    // > In general, a function is async-signal-safe either because it
+    // > is reentrant or because it is atomic with respect to signals
+    // > (i.e., its execution can't be interrupted by a signal handler).
+    //
+    // https://man7.org/linux/man-pages/man7/signal-safety.7.html
+    //
+    // our operations are atomic, and this is the generally-recommended
+    // approach for signal-safe functions that modify global state (note
+    // that this is C, but the same rules apply except volatile vars):
+    // https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers
+    //
+    // even if the execution of the child process was interrupted by a
+    // signal handler, it's an external process that doesn't modify the
+    // environment variables of the parent, nor is it likely to lock
+    // (except on windows). therefore, for most cases, this code inside
+    // the atomic lock guard will still be lock-free, and therefore async
+    // signal-safe. in general, the implementations for spawning a child
+    // process in rust have the following logic:
+    // 1. get a rw-lock for the environment variables, and read them.
+    // 2. exec the child process
+    //
+    // the rw-lock allows any number of readers, which since we're not
+    // writing any environment variables, should be async-signal safe:
+    // it won't deadlock. it could technically lock if we had something
+    // writing environment variables to a child process, and the execution
+    // was multi-threaded, but we simply don't do that.
+    //
+    // for spawning the child process, on unix, the spawn is done via
+    // `posix_spawnp`, which is async-signal safe on linux, although
+    // it is not guaranteed to be async-signal safe on POSIX in general:
+    //      https://bugzilla.kernel.org/show_bug.cgi?id=25292
+    //
+    // even for POSIX, it's basically thread-safe for our invocations,
+    // since we do not modify the environment for our invocations:
+    // >  It is also complicated to modify the environment of a multi-
+    // > threaded process temporarily, since all threads must agree
+    // > when it is safe for the environment to be changed. However,
+    // > this cost is only borne by those invocations of posix_spawn()
+    // > and posix_spawnp() that use the additional functionality.
+    // > Since extensive modifications are not the usual case,
+    // > and are particularly unlikely in time-critical code,
+    // > keeping much of the environment control out of posix_spawn()
+    // > and posix_spawnp() is appropriate design.
+    //
+    // https://pubs.opengroup.org/onlinepubs/009695399/functions/posix_spawn.html
+    //
+    // on windows, a non-reentrant static mutex is used, so it is
+    // definitely not thread safe, but this should not matter.
+    remote::CONTAINER = None;
+
     // EOWNERDEAD, seems to be the same on linux, macos, and bash on windows.
     std::process::exit(130);
 }