Skip to content

Commit

Permalink
compose: Hash all treefile externals and flattened manifest
Browse files Browse the repository at this point in the history
Move hashing to the Rust side so that we can easily hash over the final
set of inputs after parsing. This means that we now hash over all the
externals, like `add-files` references, any `postprocess-script` script,
and `passwd` and `group` files.

The original motivation for this was that hashing over a reserialized
version of the treefile was not deterministic now that treefiles include
hash tables (i.e. `add-commit-metadata`). So I initially included each
individual treefile as part of the hash.

I realized afterwards that just switching to `BTreeMap` fixes this, so
we can keep hashing only the final flattened reserialized treefile so we
ignore comments and whitespace too. But since I already wrote the patch,
and it fixes a real issue today... here we are.

One notable change though is that we now hash the treefile in non-pretty
mode to increase the chances that the serialized form remains stable.
Ironically, this change is likely to cause a no-op commit once it gets
to pipelines which iterate quickly. All for the greater good though.
  • Loading branch information
jlebon committed Jul 9, 2019
1 parent 71e585f commit 8b3753f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 35 deletions.
67 changes: 62 additions & 5 deletions rust/src/treefile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const INCLUDE_MAXDEPTH: u32 = 50;
/// a TreeComposeConfig.
struct TreefileExternals {
postprocess_script: Option<fs::File>,
add_files: collections::HashMap<String, fs::File>,
add_files: collections::BTreeMap<String, fs::File>,
passwd: Option<fs::File>,
group: Option<fs::File>,
}
Expand All @@ -56,6 +56,8 @@ pub struct Treefile {
rojig_spec: Option<CUtf8Buf>,
serialized: CUtf8Buf,
externals: TreefileExternals,
// This is a checksum over *all* the treefile inputs (recursed treefiles + externals).
checksum: CUtf8Buf,
}

// We only use this while parsing
Expand Down Expand Up @@ -239,7 +241,7 @@ fn treefile_parse<P: AsRef<Path>>(
} else {
None
};
let mut add_files: collections::HashMap<String, fs::File> = collections::HashMap::new();
let mut add_files: BTreeMap<String, fs::File> = BTreeMap::new();
if let Some(ref add_file_names) = tf.add_files.as_ref() {
for (name, _) in add_file_names.iter() {
add_files.insert(name.clone(), utils::open_file(filename.with_file_name(name))?);
Expand Down Expand Up @@ -361,9 +363,7 @@ fn treefile_merge_externals(dest: &mut TreefileExternals, src: &mut TreefileExte
}

// add-files is an array and hence has append semantics.
for (k, v) in src.add_files.drain() {
dest.add_files.insert(k, v);
}
dest.add_files.append(&mut src.add_files);

// passwd/group are basic values
if dest.passwd.is_none() {
Expand Down Expand Up @@ -432,6 +432,13 @@ impl Treefile {
(None, None)
};
let serialized = Treefile::serialize_json_string(&parsed.config)?;
// Notice we hash the *reserialization* of the final flattened treefile only so that e.g.
// comments/whitespace/hash table key reorderings don't trigger a respin. We could take
// this further by using a custom `serialize_with` for Vecs where ordering doesn't matter
// (or just sort the Vecs).
let mut hasher = glib::Checksum::new(glib::ChecksumType::Sha256);
parsed.config.hasher_update(&mut hasher)?;
parsed.externals.hasher_update(&mut hasher)?;
Ok(Box::new(Treefile {
primary_dfd: dfd,
parsed: parsed.config,
Expand All @@ -440,6 +447,7 @@ impl Treefile {
rojig_spec: rojig_spec,
serialized: serialized,
externals: parsed.externals,
checksum: CUtf8Buf::from_string(hasher.get_string().unwrap()),
}))
}

Expand Down Expand Up @@ -516,6 +524,42 @@ for x in *; do mv ${{x}} %{{buildroot}}%{{_prefix}}/lib/ostree-jigdo/%{{name}};
}
}

fn hash_file(hasher: &mut glib::Checksum, mut f: &fs::File) -> Fallible<()> {
let mut reader = io::BufReader::with_capacity(128 * 1024, f);
loop {
// have to scope fill_buf() so we can consume() below
let n = {
let buf = reader.fill_buf()?;
hasher.update(buf);
buf.len()
};
if n == 0 {
break;
}
reader.consume(n);
}
f.seek(io::SeekFrom::Start(0))?;
Ok(())
}

impl TreefileExternals {
fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> {
if let Some(ref f) = self.postprocess_script {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.passwd {
hash_file(hasher, f)?;
}
if let Some(ref f) = self.group {
hash_file(hasher, f)?;
}
for f in self.add_files.values() {
hash_file(hasher, f)?;
}
Ok(())
}
}

/// For increased readability in YAML/JSON, we support whitespace in individual
/// array elements.
fn whitespace_split_packages(pkgs: &[String]) -> Vec<String> {
Expand Down Expand Up @@ -767,6 +811,13 @@ impl TreeComposeConfig {

Ok(self)
}

fn hasher_update(&self, hasher: &mut glib::Checksum) -> Fallible<()> {
// don't use pretty mode to increase the chances of a stable serialization
// https://github.com/projectatomic/rpm-ostree/pull/1865
hasher.update(serde_json::to_vec(self)?.as_slice());
Ok(())
}
}

#[cfg(test)]
Expand Down Expand Up @@ -1228,6 +1279,12 @@ mod ffi {
.unwrap_or(ptr::null_mut())
}

#[no_mangle]
pub extern "C" fn ror_treefile_get_checksum(tf: *mut Treefile) -> *const libc::c_char {
let tf = ref_from_raw_ptr(tf);
tf.checksum.as_ptr()
}

#[no_mangle]
pub extern "C" fn ror_treefile_free(tf: *mut Treefile) {
if tf.is_null() {
Expand Down
33 changes: 3 additions & 30 deletions src/app/rpmostree-composeutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,36 +58,9 @@ rpmostree_composeutil_checksum (HyGoal goal,
{
g_autoptr(GChecksum) checksum = g_checksum_new (G_CHECKSUM_SHA256);

/* Hash in the raw treefile; this means reordering the input packages
* or adding a comment will cause a recompose, but let's be conservative
* here.
*/
g_checksum_update (checksum, (guint8*)ror_treefile_get_json_string (tf), -1);

if (json_object_has_member (treefile, "add-files"))
{
JsonArray *add_files = json_object_get_array_member (treefile, "add-files");
guint i, len = json_array_get_length (add_files);
for (i = 0; i < len; i++)
{
JsonArray *add_el = json_array_get_array_element (add_files, i);
if (!add_el)
return glnx_throw (error, "Element in add-files is not an array");
const char *src = _rpmostree_jsonutil_array_require_string_element (add_el, 0, error);
if (!src)
return FALSE;

int src_fd = ror_treefile_get_add_file_fd (tf, src);
g_assert_cmpint (src_fd, !=, -1);
g_autoptr(GBytes) bytes = glnx_fd_readall_bytes (src_fd, NULL, FALSE);
gsize len;
const guint8* buf = g_bytes_get_data (bytes, &len);
g_checksum_update (checksum, (const guint8 *) buf, len);
}

}

/* FIXME; we should also hash the post script */
/* Hash in the treefile inputs (this includes all externals like postprocess, add-files,
* etc... and the final flattened treefile -- see treefile.rs for more details). */
g_checksum_update (checksum, (guint8*)ror_treefile_get_checksum (tf), -1);

/* Hash in each package */
if (!rpmostree_dnf_add_checksum_goal (checksum, goal, NULL, error))
Expand Down

0 comments on commit 8b3753f

Please sign in to comment.