This repository has been archived by the owner on Nov 7, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This analyzes an ostree commit and splits it into chunks suitable for output to separate layers in an OCI image.
- Loading branch information
Showing
7 changed files
with
593 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,399 @@ | ||
//! Split an OSTree commit into separate chunks | ||
|
||
// SPDX-License-Identifier: Apache-2.0 OR MIT | ||
|
||
use std::borrow::Borrow; | ||
use std::collections::{BTreeMap, BTreeSet}; | ||
use std::rc::Rc; | ||
|
||
use crate::objgv::*; | ||
use anyhow::Result; | ||
use camino::Utf8PathBuf; | ||
use gvariant::aligned_bytes::TryAsAligned; | ||
use gvariant::{Marker, Structure}; | ||
use ostree; | ||
use ostree::prelude::*; | ||
use ostree::{gio, glib}; | ||
|
||
const FIRMWARE: &str = "/usr/lib/firmware"; | ||
const MODULES: &str = "/usr/lib/modules"; | ||
|
||
const QUERYATTRS: &str = "standard::name,standard::type"; | ||
|
||
/// Size in bytes of the smallest chunk we will emit. | ||
// pub(crate) const MIN_CHUNK_SIZE: u32 = 10 * 1024; | ||
/// Maximum number of layers (chunks) we will use. | ||
// We take half the limit of 128. | ||
// https://github.com/ostreedev/ostree-rs-ext/issues/69 | ||
pub(crate) const MAX_CHUNKS: u32 = 64; | ||
|
||
/// Size in bytes for the minimum size for chunks | ||
#[allow(dead_code)] | ||
pub(crate) const DEFAULT_MIN_CHUNK: usize = 10 * 1024; | ||
|
||
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone)] | ||
pub(crate) struct RcStr(Rc<str>); | ||
|
||
impl Borrow<str> for RcStr { | ||
fn borrow(&self) -> &str { | ||
&*self.0 | ||
} | ||
} | ||
|
||
impl From<&str> for RcStr { | ||
fn from(s: &str) -> Self { | ||
Self(Rc::from(s)) | ||
} | ||
} | ||
|
||
#[derive(Debug, Default)] | ||
pub(crate) struct Chunk { | ||
pub(crate) name: String, | ||
pub(crate) content: BTreeMap<RcStr, (u64, Vec<Utf8PathBuf>)>, | ||
pub(crate) size: u64, | ||
} | ||
|
||
#[derive(Debug)] | ||
pub(crate) enum Meta { | ||
DirTree(RcStr), | ||
DirMeta(RcStr), | ||
} | ||
|
||
impl Meta { | ||
pub(crate) fn objtype(&self) -> ostree::ObjectType { | ||
match self { | ||
Meta::DirTree(_) => ostree::ObjectType::DirTree, | ||
Meta::DirMeta(_) => ostree::ObjectType::DirMeta, | ||
} | ||
} | ||
|
||
pub(crate) fn checksum(&self) -> &str { | ||
match self { | ||
Meta::DirTree(v) => &*v.0, | ||
Meta::DirMeta(v) => &*v.0, | ||
} | ||
} | ||
} | ||
|
||
#[derive(Debug, Default)] | ||
pub(crate) struct Chunking { | ||
pub(crate) metadata_size: u64, | ||
pub(crate) commit: Box<str>, | ||
pub(crate) meta: Vec<Meta>, | ||
pub(crate) remainder: Chunk, | ||
pub(crate) chunks: Vec<Chunk>, | ||
} | ||
|
||
// pub(crate) struct ChunkConfig { | ||
// pub(crate) min_size: u32, | ||
// pub(crate) max_chunks: u32, | ||
// } | ||
// | ||
// impl Default for ChunkConfig { | ||
// fn default() -> Self { | ||
// Self { | ||
// min_size: MIN_CHUNK_SIZE, | ||
// max_chunks: MAX_CHUNKS, | ||
// } | ||
// } | ||
// } | ||
|
||
#[derive(Default)] | ||
struct Generation { | ||
path: Utf8PathBuf, | ||
metadata_size: u64, | ||
meta: Vec<Meta>, | ||
dirtree_found: BTreeSet<RcStr>, | ||
dirmeta_found: BTreeSet<RcStr>, | ||
} | ||
|
||
fn generate_chunking_recurse( | ||
repo: &ostree::Repo, | ||
gen: &mut Generation, | ||
chunk: &mut Chunk, | ||
dt: &glib::Variant, | ||
) -> Result<()> { | ||
let dt = dt.data_as_bytes(); | ||
let dt = dt.try_as_aligned()?; | ||
let dt = gv_dirtree!().cast(dt); | ||
let (files, dirs) = dt.to_tuple(); | ||
// A reusable buffer to avoid heap allocating these | ||
let mut hexbuf = [0u8; 64]; | ||
for file in files { | ||
let (name, csum) = file.to_tuple(); | ||
let fpath = gen.path.join(name.to_str()); | ||
hex::encode_to_slice(csum, &mut hexbuf)?; | ||
let checksum = std::str::from_utf8(&hexbuf)?; | ||
let (_, meta, _) = repo.load_file(checksum, gio::NONE_CANCELLABLE)?; | ||
// SAFETY: We know this API returns this value; it only has a return nullable because the | ||
// caller can pass NULL to skip it. | ||
let meta = meta.unwrap(); | ||
let size = meta.size() as u64; | ||
let entry = chunk.content.entry(RcStr::from(checksum)).or_default(); | ||
entry.0 = size; | ||
let first = entry.1.is_empty(); | ||
if first { | ||
chunk.size += size; | ||
} | ||
entry.1.push(fpath); | ||
} | ||
for item in dirs { | ||
let (name, contents_csum, meta_csum) = item.to_tuple(); | ||
let name = name.to_str(); | ||
// Extend our current path | ||
gen.path.push(name); | ||
hex::encode_to_slice(contents_csum, &mut hexbuf)?; | ||
let checksum_s = std::str::from_utf8(&hexbuf)?; | ||
if !gen.dirtree_found.contains(checksum_s) { | ||
let checksum = RcStr::from(checksum_s); | ||
gen.dirtree_found.insert(RcStr::clone(&checksum)); | ||
gen.meta.push(Meta::DirTree(checksum)); | ||
let child_v = repo.load_variant(ostree::ObjectType::DirTree, checksum_s)?; | ||
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64; | ||
generate_chunking_recurse(repo, gen, chunk, &child_v)?; | ||
} | ||
hex::encode_to_slice(meta_csum, &mut hexbuf)?; | ||
let checksum_s = std::str::from_utf8(&hexbuf)?; | ||
if !gen.dirtree_found.contains(checksum_s) { | ||
let checksum = RcStr::from(checksum_s); | ||
gen.dirmeta_found.insert(RcStr::clone(&checksum)); | ||
let child_v = repo.load_variant(ostree::ObjectType::DirMeta, checksum_s)?; | ||
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64; | ||
gen.meta.push(Meta::DirMeta(checksum)); | ||
} | ||
// We did a push above, so pop must succeed. | ||
assert!(gen.path.pop()); | ||
} | ||
Ok(()) | ||
} | ||
|
||
impl Chunk { | ||
fn new(name: &str) -> Self { | ||
Chunk { | ||
name: name.to_string(), | ||
..Default::default() | ||
} | ||
} | ||
|
||
fn move_obj(&mut self, dest: &mut Self, checksum: &str) -> bool { | ||
// In most cases, we expect the object to exist in the source. However, it's | ||
// conveneient here to simply ignore objects which were already moved into | ||
// a chunk. | ||
if let Some((name, (size, paths))) = self.content.remove_entry(checksum) { | ||
let v = dest.content.insert(name, (size, paths)); | ||
debug_assert!(v.is_none()); | ||
self.size -= size; | ||
dest.size += size; | ||
true | ||
} else { | ||
false | ||
} | ||
} | ||
|
||
// fn split(self) -> (Self, Self) { | ||
// todo!() | ||
// } | ||
} | ||
|
||
fn find_kernel_dir( | ||
root: &gio::File, | ||
cancellable: Option<&gio::Cancellable>, | ||
) -> Result<Option<gio::File>> { | ||
let moddir = root.resolve_relative_path(MODULES); | ||
let e = moddir.enumerate_children( | ||
"standard::name", | ||
gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS, | ||
cancellable, | ||
)?; | ||
let mut r = None; | ||
for child in e.clone() { | ||
let child = &child?; | ||
let childpath = e.child(child); | ||
if child.file_type() == gio::FileType::Directory { | ||
if r.replace(childpath).is_some() { | ||
anyhow::bail!("Found multiple subdirectories in {}", MODULES); | ||
} | ||
} | ||
} | ||
Ok(r) | ||
} | ||
|
||
impl Chunking { | ||
/// Generate an initial single chunk. | ||
pub(crate) fn new(repo: &ostree::Repo, rev: &str) -> Result<Self> { | ||
// Find the target commit | ||
let rev = repo.resolve_rev(rev, false)?.unwrap(); | ||
|
||
// Load and parse the commit object | ||
let (commit_v, _) = repo.load_commit(&rev)?; | ||
let commit_v = commit_v.data_as_bytes(); | ||
let commit_v = commit_v.try_as_aligned()?; | ||
let commit = gv_commit!().cast(commit_v); | ||
let commit = commit.to_tuple(); | ||
|
||
// Find the root directory tree | ||
let contents_checksum = &hex::encode(commit.6); | ||
let contents_v = repo.load_variant(ostree::ObjectType::DirTree, contents_checksum)?; | ||
|
||
// Load it all into a single chunk | ||
let mut gen: Generation = Default::default(); | ||
gen.path = Utf8PathBuf::from("/"); | ||
let mut chunk: Chunk = Default::default(); | ||
generate_chunking_recurse(repo, &mut gen, &mut chunk, &contents_v)?; | ||
|
||
let chunking = Chunking { | ||
commit: Box::from(rev.as_str()), | ||
metadata_size: gen.metadata_size, | ||
meta: gen.meta, | ||
remainder: chunk, | ||
..Default::default() | ||
}; | ||
Ok(chunking) | ||
} | ||
|
||
fn remaining(&self) -> u32 { | ||
MAX_CHUNKS.saturating_sub(self.chunks.len() as u32) | ||
} | ||
|
||
/// Find the object named by `path` in `src`, and move it to `dest`. | ||
fn extend_chunk( | ||
repo: &ostree::Repo, | ||
src: &mut Chunk, | ||
dest: &mut Chunk, | ||
path: &ostree::RepoFile, | ||
) -> Result<()> { | ||
let cancellable = gio::NONE_CANCELLABLE; | ||
let ft = path.query_file_type(gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS, cancellable); | ||
if ft == gio::FileType::Directory { | ||
let e = path.enumerate_children( | ||
QUERYATTRS, | ||
gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS, | ||
cancellable, | ||
)?; | ||
for child in e { | ||
let childi = child?; | ||
let child = path.child(childi.name()); | ||
let child = child.downcast::<ostree::RepoFile>().unwrap(); | ||
Self::extend_chunk(repo, src, dest, &child)?; | ||
} | ||
} else { | ||
let checksum = path.checksum().unwrap(); | ||
src.move_obj(dest, checksum.as_str()); | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Create a new chunk from the provided filesystem paths. | ||
pub(crate) fn chunk_paths<'a>( | ||
&mut self, | ||
repo: &ostree::Repo, | ||
paths: impl IntoIterator<Item = &'a gio::File>, | ||
name: &str, | ||
cancellable: Option<&gio::Cancellable>, | ||
) -> Result<()> { | ||
// Do nothing if we've hit our max. | ||
if self.remaining() == 0 { | ||
return Ok(()); | ||
} | ||
|
||
let mut chunk = Chunk::new(name); | ||
for path in paths { | ||
if !path.query_exists(cancellable) { | ||
continue; | ||
} | ||
let child = path.downcast_ref::<ostree::RepoFile>().unwrap(); | ||
Self::extend_chunk(repo, &mut self.remainder, &mut chunk, &child)?; | ||
} | ||
if !chunk.content.is_empty() { | ||
self.chunks.push(chunk); | ||
} | ||
Ok(()) | ||
} | ||
|
||
fn chunk_kernel_initramfs( | ||
&mut self, | ||
repo: &ostree::Repo, | ||
root: &gio::File, | ||
cancellable: Option<&gio::Cancellable>, | ||
) -> Result<()> { | ||
let moddir = if let Some(m) = find_kernel_dir(root, cancellable)? { | ||
m | ||
} else { | ||
return Ok(()); | ||
}; | ||
// The initramfs has a dependency on userspace *and* kernel, so we | ||
// should chunk the kernel separately. | ||
let initramfs = &moddir.resolve_relative_path("initramfs.img"); | ||
self.chunk_paths(repo, [initramfs], "initramfs", cancellable)?; | ||
// Gather all of the rest of the kernel as a single chunk | ||
self.chunk_paths(repo, [&moddir], "kernel", cancellable) | ||
} | ||
|
||
/// Apply built-in heuristics to automatically create chunks. | ||
pub(crate) fn auto_chunk(&mut self, repo: &ostree::Repo) -> Result<()> { | ||
let cancellable = gio::NONE_CANCELLABLE; | ||
let root = &repo.read_commit(&self.commit, cancellable)?.0; | ||
|
||
// Grab all of linux-firmware; it's the largest thing in FCOS. | ||
let firmware = root.resolve_relative_path(FIRMWARE); | ||
self.chunk_paths(repo, [&firmware], "firmware", cancellable)?; | ||
|
||
// Kernel and initramfs | ||
self.chunk_kernel_initramfs(repo, root, cancellable)?; | ||
|
||
self.large_files(20, 1)?; | ||
|
||
Ok(()) | ||
} | ||
|
||
/// Gather large files (up to `max` chunks) as a percentage (1-99) of total size. | ||
pub(crate) fn large_files(&mut self, max: u32, percentage: u32) -> Result<()> { | ||
let max = max.min(self.remaining()); | ||
if max == 0 { | ||
return Ok(()); | ||
} | ||
|
||
let mut large_objects = Vec::new(); | ||
let total_size = self.remainder.size; | ||
let largefile_limit = (total_size * (percentage * 100) as u64) / total_size; | ||
dbg!(largefile_limit); | ||
for (objid, (size, _names)) in &self.remainder.content { | ||
if *size > largefile_limit { | ||
large_objects.push((*size, objid.clone())); | ||
} | ||
} | ||
large_objects.sort_by(|a, b| a.0.cmp(&b.0)); | ||
for (_size, objid) in large_objects.iter().rev().take(max as usize) { | ||
let mut chunk = { | ||
let (_size, names) = self.remainder.content.get(objid).unwrap(); | ||
let name = &names[0]; | ||
Chunk::new(name.as_str()) | ||
}; | ||
let moved = self.remainder.move_obj(&mut chunk, objid.borrow()); | ||
// The object only exists once, so we must have moved it. | ||
assert!(moved); | ||
self.chunks.push(chunk); | ||
} | ||
Ok(()) | ||
} | ||
|
||
pub(crate) fn take_chunks(&mut self) -> Vec<Chunk> { | ||
let mut r = Vec::new(); | ||
std::mem::swap(&mut self.chunks, &mut r); | ||
r | ||
} | ||
} | ||
|
||
pub(crate) fn print(src: &Chunking) { | ||
println!("Metadata: {}", glib::format_size(src.metadata_size)); | ||
for (n, chunk) in src.chunks.iter().enumerate() { | ||
let sz = glib::format_size(chunk.size); | ||
println!("Chunk {}: \"{}\": objects:{} size:{}", n, chunk.name, chunk.content.len(), sz); | ||
} | ||
let sz = glib::format_size(src.remainder.size); | ||
println!( | ||
"Remainder: objects:{} size:{}", | ||
src.remainder.content.len(), | ||
sz | ||
); | ||
} |
Oops, something went wrong.