diff --git a/Cargo.lock b/Cargo.lock index 7c5af8bc9a6f18..c07b07bbf653a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2461,6 +2461,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "mmarinus" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473" +dependencies = [ + "libc", +] + [[package]] name = "modular-bitfield" version = "0.11.2" @@ -5625,6 +5634,7 @@ dependencies = [ "libsecp256k1", "log", "memmap2", + "mmarinus", "num-derive", "num-traits", "num_cpus", diff --git a/programs/bpf/Cargo.lock b/programs/bpf/Cargo.lock index 6331f861d84662..2b73e6c1dbe100 100644 --- a/programs/bpf/Cargo.lock +++ b/programs/bpf/Cargo.lock @@ -2191,6 +2191,15 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "mmarinus" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b8098e6e7a3823fa237ef9569010d3af3894a1ae54c92f0c220b9e6357f9473" +dependencies = [ + "libc", +] + [[package]] name = "modular-bitfield" version = "0.11.2" @@ -5002,6 +5011,7 @@ dependencies = [ "lazy_static", "log", "memmap2", + "mmarinus", "num-derive", "num-traits", "num_cpus", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index cf26ece5921635..1cff4178ffe1ca 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -62,6 +62,9 @@ zstd = "0.11.1" crate-type = ["lib"] name = "solana_runtime" +[target.'cfg(unix)'.dependencies] +mmarinus = "0.4.0" + [dev-dependencies] assert_matches = "1.5.0" ed25519-dalek = "=1.0.1" diff --git a/runtime/src/shared_buffer_reader.rs b/runtime/src/shared_buffer_reader.rs index c6251d202ed6fa..4471ccfb1813e4 100644 --- a/runtime/src/shared_buffer_reader.rs +++ b/runtime/src/shared_buffer_reader.rs @@ -24,7 +24,7 @@ use { // # bytes allocated and populated by reading ahead const TOTAL_BUFFER_BUDGET_DEFAULT: usize = 2_000_000_000; // data is read-ahead and saved in chunks of this many bytes -const CHUNK_SIZE_DEFAULT: usize = 100_000_000; +const CHUNK_SIZE_DEFAULT: usize = 50_000_000; type OneSharedBuffer = Arc>; diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 534c47742d3a20..76b48a97e7a036 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -781,8 +781,8 @@ pub struct BankFromArchiveTimings { pub verify_snapshot_bank_us: u64, } -// From testing, 4 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later. -const PARALLEL_UNTAR_READERS_DEFAULT: usize = 4; +// From testing, 8 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later. +const PARALLEL_UNTAR_READERS_DEFAULT: usize = 8; /// Rebuild bank from snapshot archives. Handles either just a full snapshot, or both a full /// snapshot and an incremental snapshot. @@ -1451,6 +1451,7 @@ fn unpack_snapshot_local T>( unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector) }) .collect::>(); + let mut unpacked_append_vec_map = UnpackedAppendVecMap::new(); for h in all_unpacked_append_vec_map { unpacked_append_vec_map.extend(h?); @@ -1459,12 +1460,29 @@ fn unpack_snapshot_local T>( Ok(unpacked_append_vec_map) } +#[cfg(not(target_os = "linux"))] fn untar_snapshot_in>( snapshot_tar: P, unpack_dir: &Path, account_paths: &[PathBuf], archive_format: ArchiveFormat, parallel_divisions: usize, +) -> Result { + untar_snapshot_file( + snapshot_tar.as_ref(), + unpack_dir, + account_paths, + archive_format, + parallel_divisions, + ) +} + +fn untar_snapshot_file( + snapshot_tar: &Path, + unpack_dir: &Path, + account_paths: &[PathBuf], + archive_format: ArchiveFormat, + parallel_divisions: usize, ) -> Result { let open_file = || File::open(&snapshot_tar).unwrap(); let account_paths_map = match archive_format { @@ -1496,6 +1514,99 @@ fn untar_snapshot_in>( Ok(account_paths_map) } +#[cfg(target_os = "linux")] +fn untar_snapshot_in>( + snapshot_tar: P, + unpack_dir: &Path, + account_paths: &[PathBuf], + archive_format: ArchiveFormat, + parallel_divisions: usize, +) -> Result { + let ret = untar_snapshot_mmap( + snapshot_tar.as_ref(), + unpack_dir, + account_paths, + archive_format, + parallel_divisions, + ); + + if ret.is_ok() { + ret + } else { + warn!( + "Failed to memory map the snapshot file: {}", + snapshot_tar.as_ref().display(), + ); + + untar_snapshot_file( + snapshot_tar.as_ref(), + unpack_dir, + account_paths, + archive_format, + parallel_divisions, + ) + } +} + +#[cfg(target_os = "linux")] +impl From> for SnapshotError { + fn from(_: mmarinus::Error) -> SnapshotError { + SnapshotError::Io(std::io::Error::new(ErrorKind::Other, "mmap failure")) + } +} + +#[cfg(target_os = "linux")] +fn untar_snapshot_mmap( + snapshot_tar: &Path, + unpack_dir: &Path, + account_paths: &[PathBuf], + archive_format: ArchiveFormat, + parallel_divisions: usize, +) -> Result { + use { + mmarinus::{perms, Map, Private}, + std::slice, + }; + + let mmap = Map::load(&snapshot_tar, Private, perms::Read)?; + + // `unpack_snapshot_local` takes a BufReader creator, which requires a + // static lifetime because of its background reader thread. Therefore, we + // can't pass the &mmap. Instead, we construct and pass a a slice + // explicitly. However, the following code is guaranteed to be safe because + // the lifetime of mmap last till the end of the function while the usage of + // mmap, BufReader's lifetime only last within fn unpack_snapshot_local. + let len = &mmap[..].len(); + let ptr = &mmap[0] as *const u8; + let slice = unsafe { slice::from_raw_parts(ptr, *len) }; + + let account_paths_map = match archive_format { + ArchiveFormat::TarBzip2 => unpack_snapshot_local( + || BzDecoder::new(slice), + unpack_dir, + account_paths, + parallel_divisions, + )?, + ArchiveFormat::TarGzip => unpack_snapshot_local( + || GzDecoder::new(slice), + unpack_dir, + account_paths, + parallel_divisions, + )?, + ArchiveFormat::TarZstd => unpack_snapshot_local( + || zstd::stream::read::Decoder::new(slice).unwrap(), + unpack_dir, + account_paths, + parallel_divisions, + )?, + ArchiveFormat::Tar => { + unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)? + } + }; + + Ok(account_paths_map) +} + fn verify_unpacked_snapshots_dir_and_version( unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion, ) -> Result<(SnapshotVersion, BankSnapshotInfo)> {