From 8df80d9c12d828edfa9cd45ab58a56717be54cef Mon Sep 17 00:00:00 2001 From: Alessandro Decina Date: Wed, 20 Mar 2024 09:39:33 +1100 Subject: [PATCH] accounts-db: unpack_archive: unpack accounts straight into their final destination (#289) * accounts-db: unpack_archive: avoid extra iteration on each path We used to do a iterator.clone().any(...) followed by iterator.collect(). Merge the two and avoid an extra iteration and re-parsing of the path. * accounts-db: unpack_archive: unpack accounts straight into their final destination We used to unpack accounts into account_path/accounts/ then rename to account_path/. We now unpack them into their final destination directly and avoid the rename syscall. --- accounts-db/src/hardened_unpack.rs | 76 ++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/accounts-db/src/hardened_unpack.rs b/accounts-db/src/hardened_unpack.rs index 39eca4f9cdf3d9..ebdafe675f0512 100644 --- a/accounts-db/src/hardened_unpack.rs +++ b/accounts-db/src/hardened_unpack.rs @@ -112,27 +112,26 @@ where // first by ourselves when there are odd paths like including `..` or / // for our clearer pattern matching reasoning: // https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371 - let parts = path.components().map(|p| match p { - CurDir => Some("."), - Normal(c) => c.to_str(), - _ => None, // Prefix (for Windows) and RootDir are forbidden - }); + let parts = path + .components() + .map(|p| match p { + CurDir => Ok("."), + Normal(c) => c.to_str().ok_or(()), + _ => Err(()), // Prefix (for Windows) and RootDir are forbidden + }) + .collect::, _>>(); // Reject old-style BSD directory entries that aren't explicitly tagged as directories let legacy_dir_entry = entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/"); let kind = entry.header().entry_type(); let reject_legacy_dir_entry = legacy_dir_entry && (kind != Directory); - - if parts.clone().any(|p| p.is_none()) || reject_legacy_dir_entry { + let (Ok(parts), false) = (parts, reject_legacy_dir_entry) else { return Err(UnpackError::Archive(format!( "invalid path found: {path_str:?}" ))); - } + }; - let parts: Vec<_> = parts.map(|p| p.unwrap()).collect(); - let account_filename = - (parts.len() == 2 && parts[0] == "accounts").then(|| PathBuf::from(parts[1])); let unpack_dir = match entry_checker(parts.as_slice(), kind) { UnpackPath::Invalid => { return Err(UnpackError::Archive(format!( @@ -159,13 +158,24 @@ where )?; total_count = checked_total_count_increment(total_count, limit_count)?; - let target = sanitize_path(&entry.path()?, unpack_dir)?; // ? handles file system errors - if target.is_none() { + let account_filename = match parts.as_slice() { + ["accounts", account_filename] => Some(PathBuf::from(account_filename)), + _ => None, + }; + let entry_path = if let Some(account) = account_filename { + // Special case account files. We're unpacking an account entry inside one of the + // account_paths returned by `entry_checker`. We want to unpack into + // account_path/ instead of account_path/accounts/ so we strip the + // accounts/ prefix. + sanitize_path(&account, unpack_dir) + } else { + sanitize_path(&path, unpack_dir) + }?; // ? handles file system errors + let Some(entry_path) = entry_path else { continue; // skip it - } - let target = target.unwrap(); + }; - let unpack = entry.unpack(target); + let unpack = entry.unpack(&entry_path); check_unpack_result(unpack.map(|_unpack| true)?, path_str)?; // Sanitize permissions. @@ -173,16 +183,7 @@ where GNUSparse | Regular => 0o644, _ => 0o755, }; - let entry_path_buf = unpack_dir.join(entry.path()?); - set_perms(&entry_path_buf, mode)?; - - let entry_path = if let Some(account_filename) = account_filename { - let stripped_path = unpack_dir.join(account_filename); // strip away "accounts" - fs::rename(&entry_path_buf, &stripped_path)?; - stripped_path - } else { - entry_path_buf - }; + set_perms(&entry_path, mode)?; // Process entry after setting permissions entry_processor(entry_path); @@ -1029,4 +1030,27 @@ mod tests { if message == "too many files in snapshot: 1000000000000" ); } + + #[test] + fn test_archive_unpack_account_path() { + let mut header = Header::new_gnu(); + header.set_path("accounts/123.456").unwrap(); + header.set_size(4); + header.set_cksum(); + let data: &[u8] = &[1, 2, 3, 4]; + + let mut archive = Builder::new(Vec::new()); + archive.append(&header, data).unwrap(); + let result = with_finalize_and_unpack(archive, |ar, tmp| { + unpack_snapshot_with_processors( + ar, + tmp, + &[tmp.join("accounts_dest")], + None, + |_, _| {}, + |path| assert_eq!(path, tmp.join("accounts_dest/123.456")), + ) + }); + assert_matches!(result, Ok(())); + } }