Skip to content

Commit

Permalink
[antlir2][gc] go back to delete-on-next-build scheme
Browse files Browse the repository at this point in the history
Summary:
Go back to just asking buck2 to not clean up outputs from previous actions and
use that symlink to delete newly-unnecessary subvols.

This might leak some subvolumes over time, but a shift to fully-rootless image
builds will almost definitely require a large rework of how image contents are
stored on disk anyway, so we can resolve it then. In the meantime, this should
make it extremely unlikely that we'll see subvolumes getting deleted when they
should not be, which we are seeing today.

Test Plan:
Images build
```
❯ buck2 build --show-output fbcode//antlir/antlir2/facebook/images/build_appliance/centos9:build-appliance
Buck UI: https://www.internalfb.com/buck2/1101b21e-d4e1-467f-b1fd-8fd9a0c10638
Network: Up: 522B  Down: 0B  (reSessionID-36bafe8f-f7e2-4919-9a1a-744061ac3cc1)
Jobs completed: 14. Time elapsed: 10.3s.
Cache hits: 0%. Commands: 9 (cached: 0, remote: 0, local: 9). Fallback: 3/9
BUILD SUCCEEDED
fbcode//antlir/antlir2/facebook/images/build_appliance/centos9:build-appliance buck-out/v2/gen/fbcode/de11e7c77e0a6d35/antlir/antlir2/facebook/images/build_appliance/centos9/__build-appliance__/subvol-compile_compile

[email protected] in fbsource
❯ ls buck-out/v2/gen/fbcode/de11e7c77e0a6d35/antlir/antlir2/facebook/images/build_appliance/centos9/__build-appliance__/subvol-compile_compile
afs  __antlir2__  bin  boot  dev  etc  home  lib  lib64  media  mnt  opt  proc  root  run  sbin  srv  sys  tmp  usr  var
```

Reviewed By: sergeyfd

Differential Revision: D52259387

fbshipit-source-id: 9cfdce8eb753fd29409b552a24add60a248c508f
  • Loading branch information
vmagro authored and facebook-github-bot committed Dec 19, 2023
1 parent 9a04e3a commit f71ace4
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 419 deletions.
36 changes: 29 additions & 7 deletions antlir/antlir2/antlir2/src/cmd/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ use nix::mount::MsFlags;
use nix::sched::unshare;
use nix::sched::CloneFlags;
use tracing::debug;
use tracing::trace;
use tracing::warn;

use super::compile::Compile;
use super::compile::CompileExternal;
Expand Down Expand Up @@ -171,6 +173,33 @@ impl Map {
})??;
debug!("map finished, making subvol {subvol:?} readonly");
rootless.as_root(|| subvol.set_readonly(true).context("while making subvol r/o"))??;

if self.setup.output.exists() {
trace!("removing existing output {}", self.setup.output.display());
rootless.as_root(|| {
// Don't fail if the old subvol couldn't be deleted, just print
// a warning. We really don't want to fail someone's build if
// the only thing that went wrong is not being able to delete
// the last version of it.
match Subvolume::open(&self.setup.output) {
Ok(old_subvol) => {
if let Err(e) = old_subvol.delete() {
warn!(
"couldn't delete old subvol '{}': {e:?}",
self.setup.output.display()
);
}
}
Err(e) => {
warn!(
"couldn't open old subvol '{}': {e:?}",
self.setup.output.display()
);
}
}
})?;
}

debug!(
"linking {} -> {}",
self.setup.output.display(),
Expand All @@ -180,13 +209,6 @@ impl Map {
std::os::unix::fs::symlink(subvol.path(), &self.setup.output)
.context("while making symlink")?;

working_volume
.keep_path_alive(subvol.path(), &self.setup.output)
.context("while setting up refcount")?;
working_volume
.collect_garbage()
.context("while garbage collecting old outputs")?;

Ok(())
}
}
25 changes: 21 additions & 4 deletions antlir/antlir2/antlir2_packager/src/sendstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,30 @@ impl PackageFormat for Sendstream {
Ok::<_, anyhow::Error>(subvol)
})??;

if self.subvol_symlink.exists() {
trace!("removing existing output {}", self.subvol_symlink.display());
rootless.as_root(|| match Subvolume::open(&self.subvol_symlink) {
Ok(old_subvol) => {
if let Err(e) = old_subvol.delete() {
warn!(
"couldn't delete old subvol '{}': {e:?}",
self.subvol_symlink.display()
);
}
}
Err(e) => {
warn!(
"couldn't open old subvol '{}': {e:?}",
self.subvol_symlink.display()
);
}
})?;
}

let _ = std::fs::remove_file(&self.subvol_symlink);
std::os::unix::fs::symlink(subvol.path(), &self.subvol_symlink)
.context("while symlinking packaged subvol")?;

working_volume
.keep_path_alive(&final_path, out)
.context("while setting up gc refcount")?;

Ok(())
}
}
40 changes: 29 additions & 11 deletions antlir/antlir2/antlir2_receive/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use nix::sched::unshare;
use nix::sched::CloneFlags;
use nix::unistd::Uid;
use tracing::trace;
use tracing::warn;
use tracing_subscriber::prelude::*;

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -72,6 +73,7 @@ impl Receive {
.allocate_new_path()
.context("while allocating new path for subvol")?;
trace!("WorkingVolume gave us new path {}", dst.display());

Ok(dst)
}

Expand Down Expand Up @@ -166,19 +168,35 @@ impl Receive {

drop(root);

if self.output.exists() {
trace!("removing existing output {}", self.output.display());
rootless.as_root(|| {
// Don't fail if the old subvol couldn't be deleted, just print
// a warning. We really don't want to fail someone's build if
// the only thing that went wrong is not being able to delete
// the last version of it.
match Subvolume::open(&self.output) {
Ok(old_subvol) => {
if let Err(e) = old_subvol.delete() {
warn!(
"couldn't delete old subvol '{}': {e:?}",
self.output.display()
);
}
}
Err(e) => {
warn!(
"couldn't open old subvol '{}': {e:?}",
self.output.display()
);
}
}
})?;
}

let _ = std::fs::remove_file(&self.output);
std::os::unix::fs::symlink(subvol.path(), &self.output).context("while making symlink")?;

rootless.as_root(|| {
working_volume.keep_path_alive(&dst, &self.output)?;
trace!(
"marked path {} with keepalive {}",
dst.display(),
self.output.display()
);
working_volume
.collect_garbage()
.context("while garbage collecting old outputs")
})??;
Ok(())
}
}
Expand Down
75 changes: 1 addition & 74 deletions antlir/antlir2/antlir2_working_volume/BUCK
Original file line number Diff line number Diff line change
@@ -1,87 +1,14 @@
load("//antlir/antlir2/bzl/feature:defs.bzl", "feature")
load("//antlir/antlir2/bzl/image:defs.bzl", "image")
load("//antlir/antlir2/testing:image_test.bzl", "image_rust_test")
load("//antlir/bzl:build_defs.bzl", "rust_binary", "rust_library", "third_party")
load("//antlir/bzl:build_defs.bzl", "rust_library")

oncall("antlir")

rust_library(
name = "antlir2_working_volume",
srcs = glob(["src/**/*.rs"]),
deps = [
"anyhow",
"nix",
"serde",
"serde_json",
"thiserror",
"tracing",
"uuid",
"//antlir/antlir2/antlir2_btrfs:antlir2_btrfs",
],
)

rust_binary(
name = "garbage-collect",
srcs = ["bin/garbage_collect.rs"],
deps = [
"anyhow",
"clap",
"tracing",
"tracing-subscriber",
":antlir2_working_volume",
],
)

image.layer(
name = "test-layer",
features = [
feature.ensure_dirs_exist(dirs = "/sbin"),
feature.install(
src = "//antlir:empty",
dst = "/sbin/nologin",
mode = "a+rx",
),
feature.user_add(
home_dir = "/",
primary_group = "antlir",
uid = 1000,
username = "antlir",
),
feature.group_add(
gid = 1000,
groupname = "antlir",
),
feature.rpms_install(rpms = ["basesystem"]),
feature.ensure_dirs_exist(
dirs = "/empty_repo",
group = "antlir",
user = "antlir",
),
feature.ensure_dirs_exist(
dirs = "/repo/antlir2-out",
group = "antlir",
user = "antlir",
),
feature.ensure_dirs_exist(
dirs = "/links/",
group = "antlir",
user = "antlir",
),
],
flavor = "//antlir/antlir2/test_images:test-image-flavor",
)

image_rust_test(
name = "tests",
srcs = ["tests.rs"],
crate_root = "tests.rs",
layer = ":test-layer",
run_as_user = "antlir",
deps = [
":antlir2_working_volume",
third_party.library(
"nix",
platform = "rust",
),
],
)
30 changes: 0 additions & 30 deletions antlir/antlir2/antlir2_working_volume/bin/garbage_collect.rs

This file was deleted.

Loading

0 comments on commit f71ace4

Please sign in to comment.