From ccac015db516dfe0971c3691cf462d9f4791b2f7 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 9 Dec 2021 18:02:54 -0500 Subject: [PATCH 1/2] container/deploy: Add error context On general principle to help pin down errors. --- lib/src/container/deploy.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/src/container/deploy.rs b/lib/src/container/deploy.rs index 020657e5..39b2b688 100644 --- a/lib/src/container/deploy.rs +++ b/lib/src/container/deploy.rs @@ -3,6 +3,7 @@ use super::OstreeImageReference; use crate::container::store::PrepareResult; use anyhow::Result; +use fn_error_context::context; use ostree::glib; /// The key in the OSTree origin which holds a serialized [`super::OstreeImageReference`]. @@ -30,6 +31,7 @@ pub struct DeployOpts<'a> { /// Write a container image to an OSTree deployment. /// /// This API is currently intended for only an initial deployment. +#[context("Performing deployment")] pub async fn deploy( sysroot: &ostree::Sysroot, stateroot: &str, From a27dac83831297a6e83bd25c5b6b1b842249ad4d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 9 Dec 2021 18:11:48 -0500 Subject: [PATCH 2/2] containers: Better handle errors from worker and/or driver I was seeing this in a `cosa build`: ``` + rpm-ostree ex-container image deploy --imgref ostree-unverified-image:oci-archive:/var/srv/walters/builds/rhcos-master/builds/410.84.202112092014-0-1/x86_64/rhcos-410.84.202112092014-0-ostree.x86_64.ociarchive --stateroot rhcos --sysroot /tmp/rootfs --karg=random.trust_cpu=on --karg=console=tty0 --karg=console=ttyS0,115200n8 --karg=ignition.platform.id=qemu '--karg=$ignition_firstboot' error: Performing deployment: remote error: write |1: broken pipe ``` which is not useful. This is really a brutal hack around the fact that an error can occur on either our side or in the proxy. But if an error occurs on our side, then we will close the pipe, which will *also* cause the proxy to error out. What we really want is for the proxy to tell us when it got an error from us closing the pipe. Or, we could store that state on our side. Both are slightly tricky, so we have this (again) hacky thing where we just search for `broken pipe` in the error text. Or to restate all of the above - what this function does is check to see if the worker function had an error *and* if the proxy had an error, but if the proxy's error ends in `broken pipe` then it means the real only error is from the worker. Now: ``` + rpm-ostree ex-container image deploy --imgref ostree-unverified-image:oci-archive:/var/srv/walters/builds/rhcos-master/builds/410.84.202112092014-0-1/x86_64/rhcos-410.84.202112092014-0-ostree.x86_64.ociarchive --stateroot rhcos --sysroot /tmp/rootfs --karg=random.trust_cpu=on --karg=console=tty0 --karg=console=ttyS0,115200n8 --karg=ignition.platform.id=qemu '--karg=$ignition_firstboot' error: Performing deployment: Parsing blob sha256:9448e2c9ad473c7d63d7d7789eadd28e5ae72f37eb8a1c4901b7bd76764e9bd0: object 4f/5cc466c863110ecd782153cc3a126ba8593d531dde621539a2d1a290b6482b.file: Processing content object 4f5cc466c863110ecd782153cc3a126ba8593d531dde621539a2d1a290b6482b: Writing content object: Corrupted file object; checksum expected='4f5cc466c863110ecd782153cc3a126ba8593d531dde621539a2d1a290b6482b' actual='63e8321f6ed6f189d37d98d61e782e6e8c9031103c97c983c696de6ca42702f4' ``` (But why is that object corrupted? Don't know yet, that's an exciting new problem!) --- lib/src/container/store.rs | 13 +++++----- lib/src/container/unencapsulate.rs | 41 +++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/lib/src/container/store.rs b/lib/src/container/store.rs index e852b0b3..28525f9f 100644 --- a/lib/src/container/store.rs +++ b/lib/src/container/store.rs @@ -279,10 +279,9 @@ impl LayeredImageImporter { ) .await?; let importer = crate::tar::import_tar(&self.repo, blob, None); - let (commit, driver) = tokio::join!(importer, driver); - driver?; - let commit = - commit.with_context(|| format!("Parsing blob {}", base_layer_ref.digest()))?; + let commit = super::unencapsulate::join_fetch(importer, driver) + .await + .with_context(|| format!("Parsing blob {}", base_layer_ref.digest()))?; // TODO support ref writing in tar import self.repo.set_ref_immediate( None, @@ -314,9 +313,9 @@ impl LayeredImageImporter { }; let w = crate::tar::write_tar(&self.repo, blob, layer.ostree_ref.as_str(), Some(opts)); - let (r, driver) = tokio::join!(w, driver); - let r = r.with_context(|| format!("Parsing layer blob {}", layer.digest()))?; - driver?; + let r = super::unencapsulate::join_fetch(w, driver) + .await + .with_context(|| format!("Parsing layer blob {}", layer.digest()))?; layer_commits.push(r.commit); if !r.filtered.is_empty() { let filtered = HashMap::from_iter(r.filtered.into_iter()); diff --git a/lib/src/container/unencapsulate.rs b/lib/src/container/unencapsulate.rs index 4ccf5e57..880ee522 100644 --- a/lib/src/container/unencapsulate.rs +++ b/lib/src/container/unencapsulate.rs @@ -130,6 +130,41 @@ fn require_one_layer_blob(manifest: &oci_image::ImageManifest) -> Result<&oci_im } } +/// Use this to process potential errors from a worker and a driver. +/// This is really a brutal hack around the fact that an error can occur +/// on either our side or in the proxy. But if an error occurs on our +/// side, then we will close the pipe, which will *also* cause the proxy +/// to error out. +/// +/// What we really want is for the proxy to tell us when it got an +/// error from us closing the pipe. Or, we could store that state +/// on our side. Both are slightly tricky, so we have this (again) +/// hacky thing where we just search for `broken pipe` in the error text. +/// +/// Or to restate all of the above - what this function does is check +/// to see if the worker function had an error *and* if the proxy +/// had an error, but if the proxy's error ends in `broken pipe` +/// then it means the real only error is from the worker. +pub(crate) async fn join_fetch( + worker: impl Future>, + driver: impl Future>, +) -> Result { + let (worker, driver) = tokio::join!(worker, driver); + match (worker, driver) { + (Ok(t), Ok(())) => Ok(t), + (Err(worker), Err(driver)) => { + let text = driver.root_cause().to_string(); + if text.ends_with("broken pipe") { + Err(worker) + } else { + Err(worker.context(format!("proxy failure: {} and client error", text))) + } + } + (Ok(_), Err(driver)) => Err(driver), + (Err(worker), Ok(())) => Err(worker), + } +} + /// Configuration for container fetches. #[derive(Debug, Default)] pub struct UnencapsulateOptions { @@ -219,9 +254,9 @@ async fn unencapsulate_from_manifest_impl( SignatureSource::ContainerPolicy | SignatureSource::ContainerPolicyAllowInsecure => {} } let import = crate::tar::import_tar(repo, blob, Some(taropts)); - let (import, driver) = tokio::join!(import, driver); - driver?; - let ostree_commit = import.with_context(|| format!("Parsing blob {}", layer.digest()))?; + let ostree_commit = join_fetch(import, driver) + .await + .with_context(|| format!("Parsing blob {}", layer.digest()))?; event!(Level::DEBUG, "created commit {}", ostree_commit); Ok(ostree_commit)