From 61f089af2c629d130acfb63db8ae5571ddd6447a Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Thu, 26 Mar 2020 12:51:18 -0400 Subject: [PATCH] osmet: efficient packing/unpacking of CoreOS metal images Introduce a new `osmet` command with two primary subcommands: `pack` and `unpack`. Together, these allow offline bare metal installation of CoreOS images while only slightly increasing the size of the initramfs. The `pack` subcommand takes as input a block device of a CoreOS metal image and the expected checksum to match. It mounts the root partition from that device and generates a smaller version of the metal image itself with the OSTree objects "punched out" (this is called the "punched image" in the code). The command outputs this smaller version as well as a lookup table of where the OSTree objects belonged into an "osmet" binary file. The `unpack` subcommand takes as input an osmet binary file and a path to an OSTree repo and reconstrust the metal image, bit for bit. This command is more for testing in practice. The following patch will teach the `install` command to use the osmet path by default, which is how users will interact with this. --- Cargo.lock | 72 +++++ Cargo.toml | 5 + src/blockdev.rs | 66 +++- src/cmdline.rs | 187 ++++++++++- src/errors.rs | 2 + src/install.rs | 2 +- src/main.rs | 4 + src/osmet/fiemap.rs | 177 +++++++++++ src/osmet/file.rs | 212 +++++++++++++ src/osmet/io_helpers.rs | 189 +++++++++++ src/osmet/mod.rs | 680 ++++++++++++++++++++++++++++++++++++++++ src/osmet/unpacker.rs | 266 ++++++++++++++++ src/source.rs | 90 +++++- 13 files changed, 1937 insertions(+), 15 deletions(-) create mode 100644 src/osmet/fiemap.rs create mode 100644 src/osmet/file.rs create mode 100644 src/osmet/io_helpers.rs create mode 100644 src/osmet/mod.rs create mode 100644 src/osmet/unpacker.rs diff --git a/Cargo.lock b/Cargo.lock index ca7f6669c..b8086f64a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,16 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" +[[package]] +name = "bincode" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5753e2a71534719bf3f4e57006c3a4f0d2c672a4b676eec84161f763eca87dbf" +dependencies = [ + "byteorder", + "serde", +] + [[package]] name = "bitflags" version = "1.2.1" @@ -151,14 +161,18 @@ checksum = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" name = "coreos-installer" version = "0.1.4-alpha.0" dependencies = [ + "bincode", "byte-unit", "clap", "cpio", "error-chain", "flate2", "hex", + "libc", "maplit", "nix", + "openssl", + "pipe", "progress-streams", "regex", "reqwest", @@ -167,6 +181,7 @@ dependencies = [ "sha2", "tempfile", "url", + "walkdir", "xz2", ] @@ -185,6 +200,25 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ec7fcd21571dc78f96cc96243cab8d8f035247c3efd16c687be154c3fa9efa" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" +dependencies = [ + "cfg-if", + "lazy_static", +] + [[package]] name = "digest" version = "0.8.1" @@ -727,6 +761,15 @@ version = "0.1.0-alpha.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5894c618ce612a3fa23881b152b608bafb8c56cfc22f434a3ba3120b40f7b587" +[[package]] +name = "pipe" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f763a706963c2af5e4e7f5b29a93a42809568b857d73ab8c0c4ecf8edf7f8f" +dependencies = [ + "crossbeam-channel", +] + [[package]] name = "pkg-config" version = "0.3.17" @@ -878,6 +921,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.18" @@ -1172,6 +1224,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi 0.3.8", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -1294,6 +1357,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa515c5163a99cc82bab70fd3bfdd36d827be85de63737b40fcef2ce084a436e" +dependencies = [ + "winapi 0.3.8", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 8632db1db..596dc4ccf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,13 +24,17 @@ path = "src/main.rs" lto = true [dependencies] +bincode = "^1.2" byte-unit = "^3.0" clap = "^2.33" cpio = "^0.2" error-chain = { version = "^0.12", default-features = false } flate2 = "^1.0" hex = "^0.4" +libc = "0.2.67" nix = "^0.17" +openssl = "^0.10.28" +pipe = "^0.2.0" progress-streams = "^1.1" regex = "^1.3" reqwest = { version = "^0.10", features = ["blocking"] } @@ -39,6 +43,7 @@ serde_json = "^1.0" sha2 = "^0.8" tempfile = "^3.1" url = "^2.1" +walkdir = "^2.3" xz2 = "^0.1" [dev-dependencies] diff --git a/src/blockdev.rs b/src/blockdev.rs index 091ebce86..80c4b6ed7 100644 --- a/src/blockdev.rs +++ b/src/blockdev.rs @@ -13,12 +13,14 @@ // limitations under the License. use error_chain::bail; +use nix::sys::stat::{major, minor}; use nix::{errno::Errno, mount}; use regex::Regex; use std::collections::HashMap; use std::convert::TryInto; -use std::fs::{remove_dir, File, OpenOptions}; -use std::num::NonZeroU32; +use std::fs::{metadata, read_to_string, remove_dir, File, OpenOptions}; +use std::num::{NonZeroU32, NonZeroU64}; +use std::os::linux::fs::MetadataExt; use std::os::raw::c_int; use std::os::unix::fs::FileTypeExt; use std::os::unix::io::AsRawFd; @@ -147,6 +149,51 @@ impl Mount { pub fn mountpoint(&self) -> &Path { self.mountpoint.as_path() } + + pub fn get_partition_offsets(&self) -> Result<(u64, u64)> { + let dev = metadata(&self.device) + .chain_err(|| format!("getting metadata for {}", &self.device))? + .st_rdev(); + let maj: u64 = major(dev); + let min: u64 = minor(dev); + + let start = read_sysfs_dev_block_value_u64(maj, min, "start")?; + let size = read_sysfs_dev_block_value_u64(maj, min, "size")?; + + // We multiply by 512 here: the kernel values are always in 512 blocks, regardless of the + // actual sector size of the block device. We keep the values as bytes to make things + // easier. + let start_offset: u64 = start + .checked_mul(512) + .ok_or_else(|| "start offset mult overflow")?; + let end_offset: u64 = start_offset + .checked_add( + size.checked_mul(512) + .ok_or_else(|| "end offset mult overflow")?, + ) + .ok_or_else(|| "end offset add overflow")?; + Ok((start_offset, end_offset)) + } +} + +fn read_sysfs_dev_block_value_u64(maj: u64, min: u64, field: &str) -> Result { + let s = read_sysfs_dev_block_value(maj, min, field).chain_err(|| { + format!( + "reading partition {}:{} {} value from sysfs", + maj, min, field + ) + })?; + Ok(s.parse().chain_err(|| { + format!( + "parsing partition {}:{} {} value \"{}\" as u64", + maj, min, field, &s + ) + })?) +} + +fn read_sysfs_dev_block_value(maj: u64, min: u64, field: &str) -> Result { + let path = PathBuf::from(format!("/sys/dev/block/{}:{}/{}", maj, min, field)); + Ok(read_to_string(&path)?.trim_end().into()) } impl Drop for Mount { @@ -202,6 +249,7 @@ pub fn reread_partition_table(file: &mut File) -> Result<()> { Ok(()) } +/// Get the sector size of the block device at a given path. pub fn get_sector_size_for_path(device: &Path) -> Result { let dev = OpenOptions::new() .read(true) @@ -235,13 +283,25 @@ pub fn get_sector_size(file: &File) -> Result { } } +/// Get the size of a block device. +pub fn get_block_device_size(file: &File) -> Result { + let fd = file.as_raw_fd(); + let mut size: libc::size_t = 0; + match unsafe { ioctl::blkgetsize64(fd, &mut size) } { + // just cast using `as`: there is no platform we care about today where size_t > 64bits + Ok(_) => NonZeroU64::new(size as u64).ok_or_else(|| "found block size of zero".into()), + Err(e) => Err(Error::with_chain(e, "getting block size")), + } +} + // create unsafe ioctl wrappers #[allow(clippy::missing_safety_doc)] mod ioctl { use super::c_int; - use nix::{ioctl_none, ioctl_read_bad, request_code_none}; + use nix::{ioctl_none, ioctl_read, ioctl_read_bad, request_code_none}; ioctl_none!(blkrrpart, 0x12, 95); ioctl_read_bad!(blksszget, request_code_none!(0x12, 104), c_int); + ioctl_read!(blkgetsize64, 0x12, 114, libc::size_t); } pub fn udev_settle() -> Result<()> { diff --git a/src/cmdline.rs b/src/cmdline.rs index 649f02a45..fc3e7d424 100644 --- a/src/cmdline.rs +++ b/src/cmdline.rs @@ -29,6 +29,9 @@ pub enum Config { IsoEmbed(IsoEmbedConfig), IsoShow(IsoShowConfig), IsoRemove(IsoRemoveConfig), + OsmetFiemap(OsmetFiemapConfig), + OsmetPack(OsmetPackConfig), + OsmetUnpack(OsmetUnpackConfig), } pub struct InstallConfig { @@ -71,6 +74,23 @@ pub struct IsoRemoveConfig { pub output: Option, } +pub struct OsmetFiemapConfig { + pub file: String, +} + +pub struct OsmetPackConfig { + pub output: String, + pub device: String, + pub checksum: String, + pub description: String, +} + +pub struct OsmetUnpackConfig { + pub repo: String, + pub osmet: String, + pub device: String, +} + /// Parse command-line arguments. pub fn parse_args() -> Result { let uname = nix::sys::utsname::uname(); @@ -94,7 +114,6 @@ pub fn parse_args() -> Result { .long("stream") .value_name("name") .help("Fedora CoreOS stream") - .default_value("stable") .takes_value(true), ) .arg( @@ -163,6 +182,11 @@ pub fn parse_args() -> Result { .next_line_help(true), // so we can stay under 80 chars ) // obscure options without short names + .arg( + Arg::with_name("offline") + .long("offline") + .help("Force offline installation"), + ) .arg( Arg::with_name("insecure") .long("insecure") @@ -357,6 +381,90 @@ pub fn parse_args() -> Result { ), ), ) + .subcommand( + SubCommand::with_name("osmet") + .about("Efficient CoreOS metal disk image packing using OSTree commits") + // users shouldn't be interacting with this command normally + .setting(AppSettings::Hidden) + .subcommand( + SubCommand::with_name("pack") + .about("Create osmet file from CoreOS block device") + .arg( + Arg::with_name("output") + .long("output") + .value_name("FILE") + .required(true) // could output to stdout if missing? + .help("Path to osmet file to write") + .takes_value(true), + ) + .arg( + // XXX: rebase on top of + // https://github.com/coreos/coreos-installer/pull/178 and use the same + // type-digest format + Arg::with_name("checksum") + .long("checksum") + .value_name("SHA256") + .required(true) + .help("Expected SHA256 of block device") + .takes_value(true), + ) + .arg( + Arg::with_name("description") + .long("description") + .value_name("TEXT") + .required(true) + .help("Description of OS") + .takes_value(true), + ) + // positional args + .arg( + Arg::with_name("device") + .help("Source device") + .value_name("DEV") + .required(true) + .takes_value(true), + ), + ) + .subcommand( + SubCommand::with_name("unpack") + .about("Generate raw metal image from osmet file and OSTree repo") + .arg( + Arg::with_name("osmet") + .help("osmet file") + .value_name("PATH") + .required(true) + .long("osmet") + .takes_value(true), + ) + // positional args + .arg( + Arg::with_name("repo") + .help("OSTree repo") + .value_name("PATH") + .required(true) + .takes_value(true), + ) + .arg( + Arg::with_name("device") + .help("Destination device") + .value_name("DEV") + .required(true) + .takes_value(true), + ), + ) + .subcommand( + SubCommand::with_name("fiemap") + .about("Print file extent mapping of specific file") + // positional args + .arg( + Arg::with_name("file") + .help("File to map") + .value_name("PATH") + .required(true) + .takes_value(true), + ), + ), + ) .get_matches(); match app_matches.subcommand() { @@ -369,6 +477,12 @@ pub fn parse_args() -> Result { ("remove", Some(matches)) => parse_iso_remove(&matches), _ => bail!("unrecognized 'iso' subcommand"), }, + ("osmet", Some(osmet_matches)) => match osmet_matches.subcommand() { + ("pack", Some(matches)) => parse_osmet_pack(&matches), + ("unpack", Some(matches)) => parse_osmet_unpack(&matches), + ("fiemap", Some(matches)) => parse_osmet_fiemap(&matches), + _ => bail!("unrecognized 'osmet' subcommand"), + }, _ => bail!("unrecognized subcommand"), } } @@ -378,6 +492,13 @@ fn parse_install(matches: &ArgMatches) -> Result { .value_of("device") .map(String::from) .expect("device missing"); + let architecture = matches + .value_of("architecture") + .expect("architecture missing"); + + let sector_size = get_sector_size_for_path(Path::new(&device)) + .chain_err(|| format!("getting sector size of {}", &device))? + .get(); // Build image location. Ideally we'd use conflicts_with (and an // ArgGroup for streams), but that doesn't play well with default @@ -390,16 +511,18 @@ fn parse_install(matches: &ArgMatches) -> Result { let image_url = Url::parse(matches.value_of("image-url").expect("image-url missing")) .chain_err(|| "parsing image URL")?; Box::new(UrlLocation::new(&image_url)) + } else if matches.is_present("offline") { + match OsmetLocation::new(architecture, sector_size)? { + Some(osmet) => Box::new(osmet), + None => bail!("cannot perform offline install; metadata missing"), + } } else { let base_url = if let Some(stream_base_url) = matches.value_of("stream-base-url") { Some(Url::parse(stream_base_url).chain_err(|| "parsing stream base URL")?) } else { None }; - let format = match get_sector_size_for_path(Path::new(&device)) - .chain_err(|| format!("getting sector size of {}", &device))? - .get() - { + let format = match sector_size { 4096 => "4k.raw.xz", 512 => "raw.xz", n => { @@ -412,12 +535,9 @@ fn parse_install(matches: &ArgMatches) -> Result { "raw.xz" } }; - Box::new(StreamLocation::new( - matches.value_of("stream").expect("stream missing"), - matches - .value_of("architecture") - .expect("architecture missing"), + matches.value_of("stream").unwrap_or("stable"), + architecture, "metal", format, base_url.as_ref(), @@ -535,3 +655,50 @@ fn parse_iso_remove(matches: &ArgMatches) -> Result { output: matches.value_of("output").map(String::from), })) } + +fn parse_osmet_pack(matches: &ArgMatches) -> Result { + Ok(Config::OsmetPack(OsmetPackConfig { + output: matches + .value_of("output") + .map(String::from) + .expect("output missing"), + device: matches + .value_of("device") + .map(String::from) + .expect("device missing"), + checksum: matches + .value_of("checksum") + .map(String::from) + .expect("checksum missing"), + description: matches + .value_of("description") + .map(String::from) + .expect("description missing"), + })) +} + +fn parse_osmet_unpack(matches: &ArgMatches) -> Result { + Ok(Config::OsmetUnpack(OsmetUnpackConfig { + repo: matches + .value_of("repo") + .map(String::from) + .expect("repo missing"), + osmet: matches + .value_of("osmet") + .map(String::from) + .expect("osmet file missing"), + device: matches + .value_of("device") + .map(String::from) + .expect("device missing"), + })) +} + +fn parse_osmet_fiemap(matches: &ArgMatches) -> Result { + Ok(Config::OsmetFiemap(OsmetFiemapConfig { + file: matches + .value_of("file") + .map(String::from) + .expect("file missing"), + })) +} diff --git a/src/errors.rs b/src/errors.rs index f991f70b6..d8e8a34fb 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -21,5 +21,7 @@ error_chain! { Reqwest(reqwest::Error); Url(url::ParseError); Nix(nix::Error); + WalkDir(walkdir::Error); + Parse(std::num::ParseIntError); } } diff --git a/src/install.rs b/src/install.rs index f2e037006..a8fae9ccf 100644 --- a/src/install.rs +++ b/src/install.rs @@ -90,7 +90,7 @@ pub fn install(config: &InstallConfig) -> Result<()> { if !sources.is_empty() { bail!("found multiple artifacts"); } - if source.signature.is_none() { + if source.signature.is_none() && config.location.require_signature() { if config.insecure { eprintln!("Signature not found; skipping verification as requested"); } else { diff --git a/src/main.rs b/src/main.rs index 4e5ef7234..2145cb4ec 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,6 +18,7 @@ mod download; mod errors; mod install; mod iso; +mod osmet; mod source; mod verify; @@ -37,5 +38,8 @@ fn run() -> Result<()> { Config::IsoEmbed(c) => iso::iso_embed(&c), Config::IsoShow(c) => iso::iso_show(&c), Config::IsoRemove(c) => iso::iso_remove(&c), + Config::OsmetFiemap(c) => osmet::osmet_fiemap(&c), + Config::OsmetPack(c) => osmet::osmet_pack(&c), + Config::OsmetUnpack(c) => osmet::osmet_unpack(&c), } } diff --git a/src/osmet/fiemap.rs b/src/osmet/fiemap.rs new file mode 100644 index 000000000..0c750148e --- /dev/null +++ b/src/osmet/fiemap.rs @@ -0,0 +1,177 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ffi::OsStr; +use std::fs::OpenOptions; +use std::os::unix::io::AsRawFd; +use std::os::unix::io::RawFd; + +use serde::{Deserialize, Serialize}; + +use crate::errors::*; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub(super) struct Extent { + pub logical: u64, + pub physical: u64, + pub length: u64, +} + +pub(super) fn fiemap_path(path: &OsStr) -> Result> { + let file = OpenOptions::new() + .read(true) + .open(path) + .chain_err(|| format!("opening {:?}", path))?; + + let fd = file.as_raw_fd(); + Ok(fiemap(fd).chain_err(|| format!("mapping {:?}", path))?) +} + +/// Returns the `Extent`s associated with the given file. Note that the physical offsets are +/// relative to the partition start on which the file resides. +fn fiemap(fd: RawFd) -> Result> { + let mut m = ffi::fiemap::new(); + let mut extents: Vec = Vec::new(); + + loop { + m.fm_start = match extents.iter().last() { + Some(extent) => extent.logical + extent.length, + None => 0, + }; + + // just add FS_IOC_FIEMAP in the error msg; higher-level callers will provide more context + unsafe { ffi::ioctl::fs_ioc_fiemap(fd, &mut m).chain_err(|| "ioctl(FS_IOC_FIEMAP)")? }; + if m.fm_mapped_extents == 0 { + break; + } + + let mut found_last = false; + for extent in m.fm_extents.iter().take(m.fm_mapped_extents as usize) { + // These three are not strictly errors; we could just ignore them and let them be part + // of the packed image. Though let's error out for now, so that (1) we notice them and + // investigate if they do occur, and (2) we don't end up in scenarios where lots of + // extents fall in those buckets and we end up with hyperinflated osmet binaries. + if extent.fe_flags & ffi::FIEMAP_EXTENT_NOT_ALIGNED > 0 { + return Err("extent not aligned".into()); + } else if extent.fe_flags & ffi::FIEMAP_EXTENT_MERGED > 0 { + return Err("file does not support extents".into()); + } else if extent.fe_flags & ffi::FIEMAP_EXTENT_ENCODED > 0 { + return Err("extent encoded".into()); + // the ones below this, we do not expect to hit on a "dead" ro rootfs + } else if extent.fe_flags & ffi::FIEMAP_EXTENT_DELALLOC > 0 { + return Err("extent not allocated yet".into()); + } else if extent.fe_flags & ffi::FIEMAP_EXTENT_UNWRITTEN > 0 { + return Err("extent preallocated".into()); + } else if extent.fe_flags & ffi::FIEMAP_EXTENT_UNKNOWN > 0 { + return Err("extent inaccessible".into()); + } + + extents.push(Extent { + logical: extent.fe_logical, + physical: extent.fe_physical, + length: extent.fe_length, + }); + + if extent.fe_flags & ffi::FIEMAP_EXTENT_LAST > 0 { + found_last = true; + } + } + + if found_last { + break; + } + } + + Ok(extents) +} + +// nest it so it's private to us (ioctl! always declares as `pub`) +mod ffi { + use std::mem::{size_of, zeroed}; + + // The 32 here is somewhat arbitrary; it comes out to a bit less than a 2k buffer for the + // whole struct. filefrag uses 16k on the stack, e4defrag uses ~220k on the heap. But we + // can be much less hungry since we don't expect to operate on fragmented filesystems. That + // way we can comfortably allocate on the stack. + const EXTENT_COUNT: usize = 32; + + // This is a hack to get the size of the fiemap struct *without* the extents array. We could + // use offset_of(fiemap, fm_extents) once that's available as a `const fn`. + const FIEMAP_SIZE: u32 = + ((size_of::() as u32) - (size_of::<[fiemap_extent; EXTENT_COUNT]>() as u32)); + + // https://github.com/torvalds/linux/blob/0a679e13ea30f85a1aef0669ee0c5a9fd7860b34/include/uapi/linux/fs.h#L208 + // We have to use _bad! here because we don't want the macro to use size_of:: directly. + #[allow(clippy::missing_safety_doc)] + pub mod ioctl { + use nix::{ioctl_readwrite_bad, request_code_readwrite}; + ioctl_readwrite_bad!( + fs_ioc_fiemap, + request_code_readwrite!(b'f', 11, super::FIEMAP_SIZE), + super::fiemap + ); + } + + // make this a submod so we can apply dead_code on the whole bunch + #[allow(dead_code)] + #[allow(clippy::unreadable_literal)] + pub mod fiemap_extent_flags { + pub const FIEMAP_EXTENT_LAST: u32 = 0x00000001; // Last extent in file. + pub const FIEMAP_EXTENT_UNKNOWN: u32 = 0x00000002; // Data location unknown. + pub const FIEMAP_EXTENT_DELALLOC: u32 = 0x00000004; // Location still pending. Sets EXTENT_UNKNOWN. + pub const FIEMAP_EXTENT_ENCODED: u32 = 0x00000008; // Data can not be read while fs is unmounted + pub const FIEMAP_EXTENT_DATA_ENCRYPTED: u32 = 0x00000080; // Data is encrypted by fs. Sets EXTENT_NO_BYPASS. + pub const FIEMAP_EXTENT_NOT_ALIGNED: u32 = 0x00000100; // Extent offsets may not be block aligned. + pub const FIEMAP_EXTENT_DATA_INLINE: u32 = 0x00000200; // Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. + pub const FIEMAP_EXTENT_DATA_TAIL: u32 = 0x00000400; // Multiple files in block. Sets EXTENT_NOT_ALIGNED. + pub const FIEMAP_EXTENT_UNWRITTEN: u32 = 0x00000800; // Space allocated, but no data (i.e. zero). + pub const FIEMAP_EXTENT_MERGED: u32 = 0x00001000; // File does not natively support extents. Result merged for efficiency. + pub const FIEMAP_EXTENT_SHARED: u32 = 0x00002000; // Space shared with other files. + } + pub use fiemap_extent_flags::*; + + // https://github.com/torvalds/linux/blob/0a679e13ea30f85a1aef0669ee0c5a9fd7860b34/Documentation/filesystems/fiemap.txt#L15 + #[repr(C)] + #[derive(Debug)] + pub struct fiemap { + pub fm_start: u64, // logical offset (inclusive) at which to start mapping (in) + pub fm_length: u64, // logical length of mapping which userspace cares about (in) + pub fm_flags: u32, // FIEMAP_FLAG_* flags for request (in/out) + pub fm_mapped_extents: u32, // number of extents that were mapped (out) + pub fm_extent_count: u32, // size of fm_extents array (in) + pub fm_reserved: u32, + pub fm_extents: [fiemap_extent; EXTENT_COUNT], // array of mapped extents (out) + } + + // https://github.com/torvalds/linux/blob/0a679e13ea30f85a1aef0669ee0c5a9fd7860b34/Documentation/filesystems/fiemap.txt#L80 + #[repr(C)] + #[derive(Debug)] + pub struct fiemap_extent { + pub fe_logical: u64, // logical offset in bytes for the start of the extent + pub fe_physical: u64, // physical offset in bytes for the start of the extent + pub fe_length: u64, // length in bytes for the extent + pub fe_reserved64: [u64; 2], + pub fe_flags: u32, // FIEMAP_EXTENT_* flags for this extent + pub fe_reserved: [u32; 3], + } + + impl fiemap { + pub fn new() -> Self { + let mut r: Self = unsafe { zeroed() }; + r.fm_extent_count = EXTENT_COUNT as u32; + r.fm_length = std::u64::MAX; + r + } + } +} diff --git a/src/osmet/file.rs b/src/osmet/file.rs new file mode 100644 index 000000000..f5467b83f --- /dev/null +++ b/src/osmet/file.rs @@ -0,0 +1,212 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fs::{File, OpenOptions}; +use std::io::{self, BufReader, BufWriter, Read}; +use std::path::Path; + +use clap::crate_version; +use error_chain::bail; +use serde::{Deserialize, Serialize}; +use xz2::read::XzDecoder; + +use super::*; + +/// Magic header value for osmet binary. +const OSMET_FILE_HEADER_MAGIC: [u8; 8] = *b"OSMET\0\0\0"; + +/// Basic versioning. Used as a safety check that we're unpacking something we understand. Bump +/// this when making changes to the format. +const OSMET_FILE_VERSION: u32 = 1; + +/// We currently use bincode for serialization. Note bincode does not support backwards compatible +/// changes well. However we do not currently care about backcompatibility. If that changes, we +/// should change serializer. +#[derive(Serialize, Deserialize, Debug)] +pub(super) struct OsmetFileHeader { + magic: [u8; 8], + version: u32, + /// For informational purposes only. + app_version: String, + /// Required sector size of target block device during unpacking. + pub(super) sector_size: u32, + pub(super) os_description: String, + pub(super) os_architecture: String, +} + +impl OsmetFileHeader { + pub(super) fn new(sector_size: u32, os_description: &str) -> Self { + Self { + magic: OSMET_FILE_HEADER_MAGIC, + version: OSMET_FILE_VERSION, + app_version: crate_version!().into(), + sector_size, + os_description: os_description.into(), + // There's an assumption here that the OS we're packing is for the same + // architecture on which we're running. This holds, because packing is done by cosa, + // which today doesn't support cross-building. But the osmet format and algorithm + // itself actually doesn't care about the target architecture. In the future, a more + // correct approach is to read this directly from the e.g. coreos-assembler.basearch + // in the commit metadata on the source disk. + os_architecture: nix::sys::utsname::uname().machine().into(), + } + } +} + +pub(super) fn osmet_file_write( + path: &Path, + header: OsmetFileHeader, + osmet: Osmet, + mut xzpacked_image: File, +) -> Result<()> { + validate_osmet(&osmet).chain_err(|| "validating before writing")?; + + // would be nice to opportunistically do open(O_TMPFILE) then linkat here, but the tempfile API + // doesn't provide that API: https://github.com/Stebalien/tempfile/pull/31 + let mut f = BufWriter::new( + tempfile::Builder::new() + .prefix("coreos-installer-osmet") + .suffix(".partial") + .tempfile_in(path.parent().unwrap())?, + ); + + // little endian is the default, but make it explicit + let mut config = bincode::config(); + config.little_endian(); + config + .serialize_into(&mut f, &header) + .chain_err(|| "failed to serialize osmet file header")?; + config + .serialize_into(&mut f, &osmet) + .chain_err(|| "failed to serialize osmet")?; + + // and followed by the xz-compressed packed image + io::copy(&mut xzpacked_image, &mut f)?; + + f.into_inner() + .chain_err(|| "failed to flush write buffer")? + .persist(path) + .chain_err(|| format!("failed to persist tempfile to {:?}", path))?; + + Ok(()) +} + +/// Reads in the header, and does some basic sanity checking. +fn read_and_check_header( + mut f: &mut impl Read, + config: &bincode::Config, +) -> Result { + let header: OsmetFileHeader = config + .deserialize_from(&mut f) + .chain_err(|| "failed to deserialize osmet file")?; + if header.magic != OSMET_FILE_HEADER_MAGIC { + bail!("not an OSMET file!"); + } + if header.version != OSMET_FILE_VERSION { + bail!("incompatible OSMET file version {}", header.version); + } + + Ok(header) +} + +pub(super) fn osmet_file_read_header(path: &Path) -> Result { + let mut f = BufReader::with_capacity( + 8192, + OpenOptions::new() + .read(true) + .open(path) + .chain_err(|| format!("opening {:?}", path))?, + ); + + // little endian is the default, but make it explicit + let mut config = bincode::config(); + config.little_endian(); + + Ok(read_and_check_header(&mut f, &config)?) +} + +pub(super) fn osmet_file_read(path: &Path) -> Result<(OsmetFileHeader, Osmet, impl Read + Send)> { + let mut f = BufReader::with_capacity( + 8192, + OpenOptions::new() + .read(true) + .open(path) + .chain_err(|| format!("opening {:?}", path))?, + ); + + // little endian is the default, but make it explicit + let mut config = bincode::config(); + config.little_endian(); + + let header = read_and_check_header(&mut f, &config)?; + let osmet: Osmet = config + .deserialize_from(&mut f) + .chain_err(|| "failed to deserialize osmet file")?; + + validate_osmet(&osmet).chain_err(|| "validating after reading")?; + Ok((header, osmet, XzDecoder::new(f))) +} + +fn validate_osmet(osmet: &Osmet) -> Result<()> { + if osmet.partitions.is_empty() { + bail!("OSMET file has no partitions!"); + } + + // sanity-check partitions and mappings are in canonical form + let mut cursor: u64 = 0; + for (i, partition) in osmet.partitions.iter().enumerate() { + if cursor > partition.start_offset { + bail!( + "cursor past partition start: {} vs {}", + cursor, + partition.start_offset + ); + } + cursor = cursor + .checked_add( + verify_canonical(&partition.mappings).chain_err(|| format!("partition {}", i))?, + ) + .ok_or_else(|| format!("overflow after partition {}", i))?; + if cursor > partition.end_offset { + bail!( + "cursor past partition end: {} vs {}", + cursor, + partition.end_offset + ); + } + cursor = partition.end_offset; + } + + Ok(()) +} + +fn verify_canonical(mappings: &[Mapping]) -> Result { + let mut cursor: u64 = 0; + for (i, mapping) in mappings.iter().enumerate() { + if cursor > mapping.extent.physical { + bail!( + "cursor past mapping start: {} vs {}", + cursor, + mapping.extent.physical + ); + } + cursor = mapping + .extent + .physical + .checked_add(mapping.extent.length) + .ok_or_else(|| format!("overflow after mapping {}", i))?; + } + + Ok(cursor) +} diff --git a/src/osmet/io_helpers.rs b/src/osmet/io_helpers.rs new file mode 100644 index 000000000..c755e2bab --- /dev/null +++ b/src/osmet/io_helpers.rs @@ -0,0 +1,189 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::convert::{TryFrom, TryInto}; +use std::fs::OpenOptions; +use std::io::{self, ErrorKind, Read, Write}; +use std::os::unix::io::AsRawFd; +use std::path::Path; + +use error_chain::bail; +use openssl::hash::{Hasher, MessageDigest}; +use serde::{Deserialize, Serialize}; + +use super::*; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone, Default)] +pub struct Sha256Digest([u8; 32]); + +impl TryFrom for Sha256Digest { + type Error = Error; + + fn try_from(mut hasher: Hasher) -> std::result::Result { + let digest = hasher.finish().chain_err(|| "finishing hash")?; + Ok(Sha256Digest( + digest + .as_ref() + .try_into() + .chain_err(|| "converting to SHA256")?, + )) + } +} + +/// This is like `std::io:copy()`, but limits the number of bytes copied over. The `Read` trait has +/// `take()`, but that takes ownership of the reader. We also take a buf to avoid re-initializing a +/// block each time (std::io::copy() gets around this by using MaybeUninit, but that requires using +/// nightly and unsafe functions). +pub fn copy_n( + reader: &mut impl Read, + writer: &mut impl Write, + mut n: u64, + buf: &mut [u8], +) -> Result { + let mut written = 0; + loop { + if n == 0 { + return Ok(written); + } + let bufn = if n < (buf.len() as u64) { + &mut buf[..n as usize] + } else { + &mut buf[..] + }; + let len = match reader.read(bufn) { + Ok(0) => return Ok(written), + Ok(len) => len, + Err(ref e) if e.kind() == ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into()), + }; + assert!(len as u64 <= n); + writer.write_all(&bufn[..len])?; + written += len as u64; + n -= len as u64; + } +} + +/// This is like `copy_n()` but errors if the number of bytes copied is less than expected. +pub fn copy_exactly_n( + reader: &mut impl Read, + writer: &mut impl Write, + n: u64, + buf: &mut [u8], +) -> Result { + let bytes_copied = copy_n(reader, writer, n, buf)?; + if bytes_copied != n { + bail!( + "expected to copy {} bytes but instead copied {} bytes", + n, + bytes_copied + ); + } + Ok(n) +} + +// ab/cdef....file --> 0xabcdef... +pub fn object_path_to_checksum(path: &Path) -> Result { + let chksum2 = path + .parent() + .unwrap() + .file_name() + .unwrap() + .to_str() + .unwrap(); + let chksum62 = path + .file_stem() + .unwrap() + .to_str() + .ok_or_else(|| format!("invalid non-UTF-8 object filename: {:?}", path))?; + if chksum2.len() != 2 || chksum62.len() != 62 { + bail!("Malformed object path {:?}", path); + } + + let mut bin_chksum = [0u8; 32]; + bin_chksum[0] = u8::from_str_radix(chksum2, 16)?; + for i in 0..31 { + bin_chksum[i + 1] = u8::from_str_radix(&chksum62[i * 2..(i + 1) * 2], 16)?; + } + + Ok(Sha256Digest(bin_chksum)) +} + +// 0xabcdef... --> ab/cdef....file +pub fn checksum_to_object_path(chksum: &Sha256Digest, buf: &mut Vec) -> Result<()> { + write!(buf, "{:02x}/", chksum.0[0])?; + for i in 1..32 { + write!(buf, "{:02x}", chksum.0[i])?; + } + write!(buf, ".file")?; + Ok(()) +} + +pub fn checksum_to_string(chksum: &Sha256Digest) -> Result { + let mut buf: Vec = Vec::with_capacity(64); + for i in 0..32 { + write!(buf, "{:02x}", chksum.0[i])?; + } + Ok(String::from_utf8(buf).expect("valid utf-8")) +} + +pub fn get_path_digest(path: &Path) -> Result { + let mut f = OpenOptions::new() + .read(true) + .open(path) + .chain_err(|| format!("opening {:?}", path))?; + + // tell kernel to optimize for sequential reading + if unsafe { libc::posix_fadvise(f.as_raw_fd(), 0, 0, libc::POSIX_FADV_SEQUENTIAL) } < 0 { + eprintln!( + "posix_fadvise(SEQUENTIAL) failed (errno {}) -- ignoring...", + nix::errno::errno() + ); + } + + let mut hasher = Hasher::new(MessageDigest::sha256()).chain_err(|| "creating SHA256 hasher")?; + io::copy(&mut f, &mut hasher)?; + Ok(hasher.try_into()?) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + #[test] + fn test_checksum_to_object_path() { + let mut chksum = Sha256Digest([0u8; 32]); + let mut buf: Vec = Vec::new(); + + // all zeros + checksum_to_object_path(&chksum, &mut buf).unwrap(); + assert_eq!( + Path::new(OsStr::from_bytes(buf.as_slice())), + Path::new("00/00000000000000000000000000000000000000000000000000000000000000.file") + ); + buf.truncate(0); + + // not all zeros + chksum.0[0] = 0xff; + chksum.0[1] = 0xfe; + chksum.0[31] = 0xfd; + checksum_to_object_path(&chksum, &mut buf).unwrap(); + assert_eq!( + Path::new(OsStr::from_bytes(buf.as_slice())), + Path::new("ff/fe0000000000000000000000000000000000000000000000000000000000fd.file") + ); + buf.truncate(0); + } +} diff --git a/src/osmet/mod.rs b/src/osmet/mod.rs new file mode 100644 index 000000000..3e6467a99 --- /dev/null +++ b/src/osmet/mod.rs @@ -0,0 +1,680 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Note the following terms are in use in this module: +// - the "unpacked" image refers to the fully blown up metal image (as it'd be read from a block +// device) +// - extents for which we already have a mapping are "skipped" +// - the "packed" image refers to the metal image with all the extents for which we already have a +// mapping skipped +// - the "xzpacked" image is the packed image compressed with xz + +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::convert::TryInto; +use std::fs::{File, OpenOptions}; +use std::io::{self, Seek, SeekFrom, Write}; +use std::os::unix::fs::FileTypeExt; +use std::path::{Path, PathBuf}; + +use error_chain::bail; +use nix::mount; +use serde::{Deserialize, Serialize}; +use walkdir::WalkDir; +use xz2::write::XzEncoder; + +use crate::blockdev::*; +use crate::cmdline::*; +use crate::errors::*; + +mod fiemap; +mod file; +mod io_helpers; +mod unpacker; + +use crate::osmet::fiemap::*; +use crate::osmet::file::*; +use crate::osmet::io_helpers::*; +use crate::osmet::unpacker::*; + +// just re-export OsmetUnpacker +pub use crate::osmet::unpacker::OsmetUnpacker; + +#[derive(Serialize, Deserialize, Debug)] +struct Mapping { + extent: Extent, + object: Sha256Digest, +} + +#[derive(Serialize, Deserialize, Debug)] +struct OsmetPartition { + start_offset: u64, + end_offset: u64, + mappings: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +struct Osmet { + /// Partitions for which we've registered mappings. + partitions: Vec, + /// Checksum of the final disk image. Used for unpacking verification. + checksum: Sha256Digest, + /// Size of the final disk image. Used for unpacking verification. + size: u64, +} + +pub fn osmet_fiemap(config: &OsmetFiemapConfig) -> Result<()> { + eprintln!("{:?}", fiemap_path(config.file.as_str().as_ref())?); + Ok(()) +} + +pub fn osmet_pack(config: &OsmetPackConfig) -> Result<()> { + // First, mount the two main partitions we want to suck out data from: / and /boot. Note + // MS_RDONLY; this also ensures that the partition isn't already mounted rw elsewhere. + let boot = mount_partition_by_label(&config.device, "boot", mount::MsFlags::MS_RDONLY)?; + let root = mount_partition_by_label(&config.device, "root", mount::MsFlags::MS_RDONLY)?; + + // now, we do a first scan of the boot partition and pick up files over a certain size + let boot_files = prescan_boot_partition(&boot)?; + + // generate the primary OSTree object <--> disk block mappings, and also try to match up boot + // files with OSTree objects + let (root_partition, mapped_boot_files) = scan_root_partition(&root, boot_files)?; + + let boot_partition = scan_boot_partition(&boot, mapped_boot_files)?; + + let partitions = vec![boot_partition, root_partition]; + + // create a first tempfile to store the packed image + eprintln!("Packing image"); + let (mut packed_image, size) = + write_packed_image_to_file(Path::new(&config.device), &partitions)?; + + // verify that re-packing will yield the expected checksum + eprintln!("Verifying that repacked image matches digest"); + let (checksum, unpacked_size) = + get_unpacked_image_digest(&mut packed_image, &partitions, &root)?; + packed_image + .seek(SeekFrom::Start(0)) + .chain_err(|| "seeking back to start of packed image")?; + + if unpacked_size != size { + bail!( + "unpacking test: got {} bytes but expected {}", + unpacked_size, + size + ); + } + + let checksum_str = checksum_to_string(&checksum)?; + if checksum_str != config.checksum { + bail!( + "unpacking test: got checksum {} but expected {}", + checksum_str, + &config.checksum + ); + } + + let sector_size = get_sector_size_for_path(Path::new(&config.device))?.get(); + let header = OsmetFileHeader::new(sector_size, &config.description); + + // create final Osmet object to serialize + let osmet = Osmet { + partitions, + checksum, + size, + }; + + osmet_file_write(Path::new(&config.output), header, osmet, packed_image)?; + eprintln!("Packing successful!"); + + Ok(()) +} + +pub fn osmet_unpack(config: &OsmetUnpackConfig) -> Result<()> { + // open output device for writing + let mut dev = OpenOptions::new() + .write(true) + .open(Path::new(&config.device)) + .chain_err(|| format!("opening {:?}", &config.device))?; + + if !dev + .metadata() + .chain_err(|| format!("getting metadata for {:?}", &config.device))? + .file_type() + .is_block_device() + { + bail!("{:?} is not a block device", &config.device); + } + + let mut unpacker = OsmetUnpacker::new(Path::new(&config.osmet), Path::new(&config.repo))?; + io::copy(&mut unpacker, &mut dev) + .chain_err(|| format!("copying to block device {}", &config.device))?; + + Ok(()) +} + +pub fn find_matching_osmet_in_dir( + osmet_dir: &Path, + architecture: &str, + sector_size: u32, +) -> Result> { + for entry in WalkDir::new(osmet_dir).max_depth(1) { + let entry = entry.chain_err(|| format!("walking {:?}", osmet_dir))?; + + if !entry.file_type().is_file() { + continue; + } + + let header = osmet_file_read_header(entry.path())?; + if header.os_architecture == architecture && header.sector_size == sector_size { + return Ok(Some((entry.into_path(), header.os_description))); + } + } + + Ok(None) +} + +fn scan_root_partition( + root: &Mount, + mut boot_files: HashMap, +) -> Result<(OsmetPartition, HashMap)> { + // query the trivial stuff first + let (start_offset, end_offset) = root.get_partition_offsets()?; + + // we only hash boot files if there's a potential match with an OSTree object, so we keep a + // cache to avoid recomputing it multiple times + let mut cached_boot_files_digests: HashMap = HashMap::new(); + + // boot files we were able to match up with OSTree objects + let mut mapped_boot_files: HashMap = HashMap::new(); + + let objects_dir = root.mountpoint().join("ostree/repo/objects"); + + let mut mappings: Vec = vec![]; + let mut mapped_file_count = 0; + let mut empty_file_count = 0; + for entry in WalkDir::new(objects_dir) { + let entry = entry.chain_err(|| "walking objects/ dir")?; + + if !entry.file_type().is_file() { + continue; + } + + if entry.path().extension() != Some("file".as_ref()) { + continue; + } + + let extents = fiemap_path(entry.path().as_os_str())?; + if extents.is_empty() { + empty_file_count += 1; + continue; + } + + let object = object_path_to_checksum(entry.path()) + .chain_err(|| format!("invalid object path {:?}", entry.path()))?; + + for extent in extents { + mappings.push(Mapping { + extent, + object: object.clone(), + }); + } + + // and check if this matches a boot file + let len = entry + .metadata() + .chain_err(|| format!("getting metadata for {:?}", entry.path()))? + .len(); + if let Entry::Occupied(boot_entry) = boot_files.entry(len) { + // we can't use Entry::or_insert_with() here because get_path_digest() is fallible + let boot_file_digest = match cached_boot_files_digests.entry(len) { + Entry::Vacant(e) => e.insert(get_path_digest(boot_entry.get())?), + Entry::Occupied(e) => e.into_mut(), + }; + if get_path_digest(entry.path())? == *boot_file_digest { + mapped_boot_files.insert(boot_entry.remove(), object.clone()); + } + } + + mapped_file_count += 1; + } + + eprintln!( + "Total OSTree objects scanned from /root: {} ({} mapped, {} empty)", + mapped_file_count + empty_file_count, + mapped_file_count, + empty_file_count + ); + + eprintln!( + "Total OSTree objects found in /boot: {}", + mapped_boot_files.len() + ); + + canonicalize(&mut mappings); + + // would be cool to detect and report fragmented vs sparse files here too + eprintln!("Total /root extents: {}", mappings.len()); + + Ok(( + OsmetPartition { + start_offset, + end_offset, + mappings, + }, + mapped_boot_files, + )) +} + +fn prescan_boot_partition(boot: &Mount) -> Result> { + let mut files: HashMap = HashMap::new(); + + for entry in WalkDir::new(boot.mountpoint()) { + let entry = entry.chain_err(|| "walking /boot")?; + + if !entry.file_type().is_file() { + continue; + } + + let len = entry + .metadata() + .chain_err(|| format!("getting metadata for {:?}", entry.path()))? + .len(); + + // The 1024 is chosen semi-arbitrarily; really, as long as the file is larger than the size + // of one serialized `Mapping` (assuming no fragmentation), which is 56 bytes, we save + // space. But we're not guaranteed an OSTree object match, and incur more overhead for + // diminishing returns. + if len > 1024 { + files.entry(len).or_insert_with(|| entry.into_path()); + } + } + + Ok(files) +} + +fn scan_boot_partition( + boot: &Mount, + mut boot_files: HashMap, +) -> Result { + // query the trivial stuff first + let (start_offset, end_offset) = boot.get_partition_offsets()?; + + let mut mappings: Vec = vec![]; + + for (path, object) in boot_files.drain() { + for extent in fiemap_path(path.as_path().as_os_str())? { + mappings.push(Mapping { + extent, + object: object.clone(), + }); + } + } + + canonicalize(&mut mappings); + + eprintln!("Total /boot extents: {}", mappings.len()); + + Ok(OsmetPartition { + start_offset, + end_offset, + mappings, + }) +} + +/// Writes the disk image, with the extents for which we have mappings for skipped. +fn write_packed_image_to_file( + block_device: &Path, + partitions: &[OsmetPartition], +) -> Result<(File, u64)> { + let mut xz_tmpf = XzEncoder::new( + // ideally this would use O_TMPFILE, but since tempfile *needs* to create a named tempfile, + // let's give it a descriptive name and extension + tempfile::Builder::new() + .prefix("coreos-installer-xzpacked") + .suffix(".raw.xz") + .tempfile() + .chain_err(|| "allocating packed image tempfile")? + // and here we delete it on disk so we just have an fd to it + .into_file(), + 9, + ); + + let mut dev = OpenOptions::new() + .read(true) + .open(&block_device) + .chain_err(|| format!("opening {:?}", block_device))?; + + let total_bytes_skipped = write_packed_image(&mut dev, &mut xz_tmpf, partitions)?; + + xz_tmpf + .try_finish() + .chain_err(|| "trying to finish xz stream")?; + + // sanity check that the number of bytes written + packed match up with block device size + let blksize = get_block_device_size(&dev) + .chain_err(|| format!("querying block device size of {:?}", block_device))?; + let total_bytes_written = xz_tmpf.total_in(); + if total_bytes_written + total_bytes_skipped != blksize.get() { + bail!( + "bytes written + bytes skipped != block device size: {} + {} vs {}", + total_bytes_written, + total_bytes_skipped, + blksize + ); + } + + eprintln!("Total bytes skipped: {}", total_bytes_skipped); + eprintln!("Total bytes written: {}", total_bytes_written); + eprintln!("Total bytes written (compressed): {}", xz_tmpf.total_out()); + + let mut tmpf = xz_tmpf.finish().chain_err(|| "finishing xz stream")?; + tmpf.seek(SeekFrom::Start(0)) + .chain_err(|| "seeking back to start of tempfile")?; + + Ok((tmpf, blksize.get())) +} + +fn write_packed_image( + dev: &mut File, + w: &mut impl Write, + partitions: &[OsmetPartition], +) -> Result { + let mut buf = [0u8; 8192]; + + let mut cursor: u64 = 0; + let mut total_bytes_skipped: u64 = 0; + for (i, partition) in partitions.iter().enumerate() { + // first copy everything up to the start of the partition + assert!(partition.start_offset >= cursor); + copy_exactly_n(dev, w, partition.start_offset - cursor, &mut buf)?; + total_bytes_skipped += write_packed_image_partition(dev, w, partition, &mut buf) + .chain_err(|| format!("packing partition {}", i))?; + cursor = partition.end_offset; + } + + // and finally write out the remainder of the disk + io::copy(dev, w).chain_err(|| "copying remainder of disk")?; + + Ok(total_bytes_skipped) +} + +fn write_packed_image_partition( + dev: &mut File, + w: &mut impl Write, + partition: &OsmetPartition, + buf: &mut [u8], +) -> Result { + let mut total_bytes_skipped = 0; + + // and this is where the real fun begins! + let mut cursor = partition.start_offset; + for mapping in partition.mappings.iter() { + // make offset relative to start of disk, not partition + let extent_start = mapping.extent.physical + partition.start_offset; + assert!(extent_start >= cursor); + if cursor < extent_start { + cursor += copy_exactly_n(dev, w, extent_start - cursor, buf) + .chain_err(|| "while writing in between extents")?; + } + + // this is the crucial space-saving step; we skip over the extent we have a mapping for + dev.seek(SeekFrom::Current(mapping.extent.length.try_into().unwrap())) + .chain_err(|| format!("while skipping extent: {:?}", mapping.extent))?; + total_bytes_skipped += mapping.extent.length; + cursor += mapping.extent.length; + } + + assert!(cursor <= partition.end_offset); + + // and now just transfer the rest of the partition + copy_exactly_n(dev, w, partition.end_offset - cursor, buf) + .chain_err(|| "copying remainder of partition")?; + + Ok(total_bytes_skipped) +} + +fn canonicalize(mappings: &mut Vec) { + if mappings.is_empty() { + // technically nothing to do... but this is highly suspicious, so log it + eprintln!("No mappings to canonicalize"); + return; + } + + // first, we need the mappings sorted by physical offset, then length (longest first) + mappings.sort_unstable_by(|a, b| { + a.extent + .physical + .cmp(&b.extent.physical) + .then_with(|| a.extent.length.cmp(&b.extent.length).reverse()) + }); + + let mut clamped_mappings_count = 0; + let mut mappings_to_delete: Vec = Vec::new(); + let mut last_mapping_physical_end = mappings[0].extent.physical + mappings[0].extent.length; + for (i, mapping) in mappings.iter_mut().enumerate().skip(1) { + let mapping_physical_end = mapping.extent.physical + mapping.extent.length; + // first check if the extent is wholly-contained by the previous one + if mapping_physical_end <= last_mapping_physical_end { + mappings_to_delete.push(i); + } else { + // If the extent's start has an overlap with the previous one, clamp it. Optimally, + // we'd want to favour larger extents since it's lower overhead when unpacking. But + // really, OSTree objects normally have no reflinked extents between them (though this + // would be an interesting question to investigate) -- this naive handling provides a + // fallback so we don't just barf if we do hit that case. + if mapping.extent.physical < last_mapping_physical_end { + let n = last_mapping_physical_end - mapping.extent.physical; + mapping.extent.logical += n; + mapping.extent.physical += n; + mapping.extent.length -= n; + clamped_mappings_count += 1; + } + last_mapping_physical_end = mapping_physical_end; + } + } + + eprintln!("Duplicate extents dropped: {}", mappings_to_delete.len()); + eprintln!("Overlapping extents clamped: {}", clamped_mappings_count); + + for i in mappings_to_delete.into_iter().rev() { + mappings.remove(i); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::default::Default; + + #[test] + fn test_canonicalize() { + let mut mappings: Vec = Vec::new(); + mappings.push(Mapping { + extent: Extent { + logical: 100, + physical: 100, + length: 50, + }, + object: Sha256Digest::default(), + }); + canonicalize(&mut mappings); + assert_eq!(mappings.len(), 1); + assert_eq!( + mappings[0].extent, + Extent { + logical: 100, + physical: 100, + length: 50 + } + ); + + mappings.push(Mapping { + extent: Extent { + logical: 100, + physical: 100, + length: 10, + }, + object: Sha256Digest::default(), + }); + mappings.push(Mapping { + extent: Extent { + logical: 110, + physical: 110, + length: 10, + }, + object: Sha256Digest::default(), + }); + mappings.push(Mapping { + extent: Extent { + logical: 140, + physical: 140, + length: 10, + }, + object: Sha256Digest::default(), + }); + canonicalize(&mut mappings); + assert_eq!(mappings.len(), 1); + assert_eq!( + mappings[0].extent, + Extent { + logical: 100, + physical: 100, + length: 50 + } + ); + + mappings.push(Mapping { + extent: Extent { + logical: 140, + physical: 140, + length: 20, + }, + object: Sha256Digest::default(), + }); + mappings.push(Mapping { + extent: Extent { + logical: 150, + physical: 150, + length: 20, + }, + object: Sha256Digest::default(), + }); + canonicalize(&mut mappings); + assert_eq!(mappings.len(), 3); + assert_eq!( + mappings[0].extent, + Extent { + logical: 100, + physical: 100, + length: 50 + } + ); + assert_eq!( + mappings[1].extent, + Extent { + logical: 150, + physical: 150, + length: 10 + } + ); + assert_eq!( + mappings[2].extent, + Extent { + logical: 160, + physical: 160, + length: 10 + } + ); + } + + #[test] + fn test_copy_n() { + let mut sink = std::io::sink(); + let mut buf = [0u8; 50]; + + let data = [0u8; 30]; + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 0, &mut buf).unwrap(), + 0 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 1, &mut buf).unwrap(), + 1 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 29, &mut buf).unwrap(), + 29 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 30, &mut buf).unwrap(), + 30 + ); + assert_eq!(copy_n(&mut &data[..], &mut sink, 31, &mut buf).unwrap(), 30); + assert_eq!(copy_n(&mut &data[..], &mut sink, 49, &mut buf).unwrap(), 30); + assert_eq!(copy_n(&mut &data[..], &mut sink, 50, &mut buf).unwrap(), 30); + assert_eq!(copy_n(&mut &data[..], &mut sink, 51, &mut buf).unwrap(), 30); + + let data = [0u8; 50]; + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 0, &mut buf).unwrap(), + 0 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 1, &mut buf).unwrap(), + 1 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 49, &mut buf).unwrap(), + 49 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 50, &mut buf).unwrap(), + 50 + ); + assert_eq!(copy_n(&mut &data[..], &mut sink, 51, &mut buf).unwrap(), 50); + + let data = [0u8; 80]; + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 0, &mut buf).unwrap(), + 0 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 1, &mut buf).unwrap(), + 1 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 49, &mut buf).unwrap(), + 49 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 50, &mut buf).unwrap(), + 50 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 51, &mut buf).unwrap(), + 51 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 79, &mut buf).unwrap(), + 79 + ); + assert_eq!( + copy_exactly_n(&mut &data[..], &mut sink, 80, &mut buf).unwrap(), + 80 + ); + assert_eq!(copy_n(&mut &data[..], &mut sink, 81, &mut buf).unwrap(), 80); + } +} diff --git a/src/osmet/unpacker.rs b/src/osmet/unpacker.rs new file mode 100644 index 000000000..c70852e14 --- /dev/null +++ b/src/osmet/unpacker.rs @@ -0,0 +1,266 @@ +// Copyright 2020 Red Hat, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::convert::{TryFrom, TryInto}; +use std::ffi::OsStr; +use std::fs::{File, OpenOptions}; +use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write}; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; +use std::thread; + +use error_chain::bail; +use openssl::hash::{Hasher, MessageDigest}; +use xz2::read::XzDecoder; + +use super::*; + +/// Path to OSTree repo of sysroot. +const SYSROOT_OSTREE_REPO: &str = "/sysroot/ostree/repo"; + +pub struct OsmetUnpacker { + thread_handle: Option>>, + reader: pipe::PipeReader, + length: u64, +} + +impl OsmetUnpacker { + pub fn new(osmet: &Path, repo: &Path) -> Result { + let (_, osmet, xzpacked_image) = osmet_file_read(&osmet)?; + Self::new_impl(osmet, xzpacked_image, repo) + } + + pub fn new_from_sysroot(osmet: &Path) -> Result { + let (_, osmet, xzpacked_image) = osmet_file_read(&osmet)?; + Self::new_impl(osmet, xzpacked_image, Path::new(SYSROOT_OSTREE_REPO)) + } + + fn new_impl( + osmet: Osmet, + packed_image: impl Read + Send + 'static, + repo: &Path, + ) -> Result { + let (reader, writer) = pipe::pipe(); + + let length = osmet.size; + let repo = repo.to_owned(); + let thread_handle = Some(thread::spawn(move || -> Result<()> { + osmet_unpack_to_writer(osmet, packed_image, repo, writer) + })); + + Ok(Self { + thread_handle, + reader, + length, + }) + } + + pub fn length(&self) -> u64 { + self.length + } +} + +impl Read for OsmetUnpacker { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = self.reader.read(buf)?; + if n == 0 { + match self + .thread_handle + .take() + .expect("pending thread") + .join() + .expect("joining thread") + { + Ok(_) => Ok(0), + Err(e) => Err(io::Error::new( + ErrorKind::Other, + format!("while unpacking: {}", e), + )), + } + } else { + Ok(n) + } + } +} + +pub(super) fn get_unpacked_image_digest( + xzpacked_image: &mut File, + partitions: &[OsmetPartition], + root: &Mount, +) -> Result<(Sha256Digest, u64)> { + let mut hasher = Hasher::new(MessageDigest::sha256()).chain_err(|| "creating SHA256 hasher")?; + let repo = root.mountpoint().join("ostree/repo"); + let mut packed_image = XzDecoder::new(xzpacked_image); + let n = write_unpacked_image(&mut packed_image, &mut hasher, &partitions, &repo)?; + Ok((hasher.try_into()?, n)) +} + +struct WriteHasher { + writer: W, + hasher: Hasher, +} + +impl Write for WriteHasher { + fn write(&mut self, buf: &[u8]) -> io::Result { + if buf.is_empty() { + return Ok(0); + } + + let n = self.writer.write(buf)?; + self.hasher.write_all(&buf[..n])?; + + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.flush()?; + self.hasher.flush()?; + Ok(()) + } +} + +impl TryFrom> for Sha256Digest { + type Error = Error; + + fn try_from(wrapper: WriteHasher) -> std::result::Result { + Sha256Digest::try_from(wrapper.hasher) + } +} + +fn osmet_unpack_to_writer( + osmet: Osmet, + mut packed_image: impl Read, + repo: PathBuf, + writer: impl Write, +) -> Result<()> { + let hasher = Hasher::new(MessageDigest::sha256()).chain_err(|| "creating SHA256 hasher")?; + + let mut w = WriteHasher { writer, hasher }; + + let n = write_unpacked_image(&mut packed_image, &mut w, &osmet.partitions, &repo)?; + if n != osmet.size { + bail!("wrote {} bytes but expected {}", n, osmet.size); + } + + let final_checksum: Sha256Digest = w.try_into()?; + if final_checksum != osmet.checksum { + bail!( + "expected final checksum {:?}, but got {:?}", + osmet.checksum, + final_checksum + ); + } + + Ok(()) +} + +fn write_unpacked_image( + packed_image: &mut impl Read, + w: &mut impl Write, + partitions: &[OsmetPartition], + repo: &Path, +) -> Result { + let mut buf = [0u8; 8192]; + + // start streaming writes to device, interspersing OSTree objects + let mut cursor: u64 = 0; + for partition in partitions { + assert!(partition.start_offset >= cursor); + cursor += copy_exactly_n(packed_image, w, partition.start_offset - cursor, &mut buf)?; + cursor += write_partition(w, partition, packed_image, repo, &mut buf)?; + } + + // and copy the rest + cursor += io::copy(packed_image, w)?; + + Ok(cursor) +} + +fn write_partition( + w: &mut impl Write, + partition: &OsmetPartition, + packed_image: &mut impl Read, + ostree_repo: &Path, + buf: &mut [u8], +) -> Result { + // Set up a reusable buffer for building object paths instead of re-allocating each time. It's + // easier to maintain it as a Vec than a PathBuf so we can just use e.g. `write!()`. + let mut object_pathbuf = { + let mut repo = Path::new(ostree_repo).to_path_buf(); + repo.push("objects"); + repo.into_os_string().into_vec() + }; + object_pathbuf.push(b'/'); + let object_pathbuf_n = object_pathbuf.len(); + + let mut cursor = partition.start_offset; + for mapping in partition.mappings.iter() { + let extent_start = mapping.extent.physical + partition.start_offset; + assert!(extent_start >= cursor); + if cursor < extent_start { + cursor += copy_exactly_n(packed_image, w, extent_start - cursor, buf)?; + } + + checksum_to_object_path(&mapping.object, &mut object_pathbuf)?; + cursor += write_partition_mapping( + &mapping.extent, + Path::new(OsStr::from_bytes(object_pathbuf.as_slice())), + w, + buf, + )?; + object_pathbuf.truncate(object_pathbuf_n); + } + + // and copy to the rest of the partition + assert!(partition.end_offset >= cursor); + cursor += copy_exactly_n(packed_image, w, partition.end_offset - cursor, buf)?; + + // subtract back the partition offset here so we only return the actual size of the partition + Ok(cursor - partition.start_offset) +} + +fn write_partition_mapping( + extent: &Extent, + object: &Path, + w: &mut impl Write, + buf: &mut [u8], +) -> Result { + // really, we should be e.g. caching the last N used objects here as open fds so we don't + // re-open them each time; in practice we don't really encounter much fragmentation, so we can + // afford to be lazy and keep the code simpler + let mut object = OpenOptions::new() + .read(true) + .open(object) + .chain_err(|| format!("opening {:?}", object))?; + + let mut objlen = object + .metadata() + .chain_err(|| format!("getting metadata for {:?}", object))? + .len(); + + if extent.logical > 0 { + object.seek(SeekFrom::Start(extent.logical))?; + objlen -= extent.logical; + } + + let mut n = 0; + if objlen < extent.length { + n += copy_exactly_n(&mut object, w, objlen, buf)?; + n += copy_exactly_n(&mut io::repeat(0), w, extent.length - objlen, buf)?; + } else { + n += copy_exactly_n(&mut object, w, extent.length, buf)?; + } + + Ok(n) +} diff --git a/src/source.rs b/src/source.rs index e486acbda..fa3f117b0 100644 --- a/src/source.rs +++ b/src/source.rs @@ -19,11 +19,12 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use std::fs::OpenOptions; use std::io::{Read, Seek, SeekFrom}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::time::Duration; use crate::cmdline::*; use crate::errors::*; +use crate::osmet::*; /// Completion timeout for HTTP requests (4 hours). const HTTP_COMPLETION_TIMEOUT: Duration = Duration::from_secs(4 * 60 * 60); @@ -31,9 +32,17 @@ const HTTP_COMPLETION_TIMEOUT: Duration = Duration::from_secs(4 * 60 * 60); /// Default base URL to Fedora CoreOS streams metadata. const DEFAULT_STREAM_BASE_URL: &str = "https://builds.coreos.fedoraproject.org/streams/"; +/// Directory in which we look for osmet files. +const OSMET_FILES_DIR: &str = "/run/coreos-installer/osmet"; + pub trait ImageLocation: Display { // Obtain image lengths and signatures and start fetching the images fn sources(&self) -> Result>; + + // Whether GPG signature verification is required by default + fn require_signature(&self) -> bool { + true + } } // Local image source @@ -43,6 +52,14 @@ pub struct FileLocation { sig_path: String, } +// Local osmet image source +pub struct OsmetLocation { + osmet_path: PathBuf, + architecture: String, + sector_size: u32, + description: String, +} + // Remote image source #[derive(Debug)] pub struct UrlLocation { @@ -286,6 +303,77 @@ impl ImageLocation for StreamLocation { } } +impl OsmetLocation { + pub fn new(architecture: &str, sector_size: u32) -> Result> { + let osmet_dir = Path::new(OSMET_FILES_DIR); + if !osmet_dir.exists() { + return Ok(None); + } + + if let Some((osmet_path, description)) = + find_matching_osmet_in_dir(osmet_dir, architecture, sector_size)? + { + Ok(Some(Self { + osmet_path, + architecture: architecture.into(), + sector_size, + description, + })) + } else { + Ok(None) + } + } +} + +impl Display for OsmetLocation { + fn fmt(&self, f: &mut Formatter<'_>) -> ::std::fmt::Result { + write!( + f, + "Installing {} {} ({}-byte sectors)", + self.description, self.architecture, self.sector_size + ) + } +} + +impl ImageLocation for OsmetLocation { + fn sources(&self) -> Result> { + let unpacker = OsmetUnpacker::new_from_sysroot(Path::new(&self.osmet_path))?; + + let filename = { + let stem = self.osmet_path.file_stem().ok_or_else(|| { + // This really should never happen since for us to get here, we must've found a + // valid osmet file... But let's still just error out instead of assert in case + // somehow this doesn't hold true in the future and a user hits this. + format!( + "can't create new .raw filename from osmet path {:?}", + &self.osmet_path + ) + })?; + // really we don't need to care about UTF-8 here, but ImageSource right now does + let mut filename: String = stem + .to_str() + .ok_or_else(|| format!("non-UTF-8 osmet file stem: {:?}", stem))? + .into(); + filename.push_str(".raw"); + filename + }; + let length = unpacker.length(); + Ok(vec![ImageSource { + reader: Box::new(unpacker), + length_hint: Some(length), + signature: None, + filename, + artifact_type: "disk".to_string(), + }]) + } + + // For osmet, we don't require GPG verification since we trust osmet files placed in the + // OSMET_FILES_DIR. + fn require_signature(&self) -> bool { + false + } +} + /// Subcommand to list objects available in stream metadata. pub fn list_stream(config: &ListStreamConfig) -> Result<()> { #[derive(PartialEq, Eq, PartialOrd, Ord)]