Skip to content

Commit

Permalink
Use memory map to speed up snapshot untar (#24889)
Browse files Browse the repository at this point in the history
* mmap

* add no_os_memory_reporting_args to ledger-tool

* use safe memory map package for unix. use regular file for windows

* fallback to untaring with filebuf when mmap fails

* update comments
  • Loading branch information
HaoranYi authored May 12, 2022
1 parent cde15ff commit 3367e44
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 3 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions programs/bpf/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ zstd = "0.11.1"
crate-type = ["lib"]
name = "solana_runtime"

[target.'cfg(unix)'.dependencies]
mmarinus = "0.4.0"

[dev-dependencies]
assert_matches = "1.5.0"
ed25519-dalek = "=1.0.1"
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/shared_buffer_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use {
// # bytes allocated and populated by reading ahead
const TOTAL_BUFFER_BUDGET_DEFAULT: usize = 2_000_000_000;
// data is read-ahead and saved in chunks of this many bytes
const CHUNK_SIZE_DEFAULT: usize = 100_000_000;
const CHUNK_SIZE_DEFAULT: usize = 50_000_000;

type OneSharedBuffer = Arc<Vec<u8>>;

Expand Down
115 changes: 113 additions & 2 deletions runtime/src/snapshot_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,8 +781,8 @@ pub struct BankFromArchiveTimings {
pub verify_snapshot_bank_us: u64,
}

// From testing, 4 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 4;
// From testing, 8 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
const PARALLEL_UNTAR_READERS_DEFAULT: usize = 8;

/// Rebuild bank from snapshot archives. Handles either just a full snapshot, or both a full
/// snapshot and an incremental snapshot.
Expand Down Expand Up @@ -1451,6 +1451,7 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector)
})
.collect::<Vec<_>>();

let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
for h in all_unpacked_append_vec_map {
unpacked_append_vec_map.extend(h?);
Expand All @@ -1459,12 +1460,29 @@ fn unpack_snapshot_local<T: 'static + Read + std::marker::Send, F: Fn() -> T>(
Ok(unpacked_append_vec_map)
}

#[cfg(not(target_os = "linux"))]
fn untar_snapshot_in<P: AsRef<Path>>(
snapshot_tar: P,
unpack_dir: &Path,
account_paths: &[PathBuf],
archive_format: ArchiveFormat,
parallel_divisions: usize,
) -> Result<UnpackedAppendVecMap> {
untar_snapshot_file(
snapshot_tar.as_ref(),
unpack_dir,
account_paths,
archive_format,
parallel_divisions,
)
}

fn untar_snapshot_file(
snapshot_tar: &Path,
unpack_dir: &Path,
account_paths: &[PathBuf],
archive_format: ArchiveFormat,
parallel_divisions: usize,
) -> Result<UnpackedAppendVecMap> {
let open_file = || File::open(&snapshot_tar).unwrap();
let account_paths_map = match archive_format {
Expand Down Expand Up @@ -1496,6 +1514,99 @@ fn untar_snapshot_in<P: AsRef<Path>>(
Ok(account_paths_map)
}

#[cfg(target_os = "linux")]
fn untar_snapshot_in<P: AsRef<Path>>(
snapshot_tar: P,
unpack_dir: &Path,
account_paths: &[PathBuf],
archive_format: ArchiveFormat,
parallel_divisions: usize,
) -> Result<UnpackedAppendVecMap> {
let ret = untar_snapshot_mmap(
snapshot_tar.as_ref(),
unpack_dir,
account_paths,
archive_format,
parallel_divisions,
);

if ret.is_ok() {
ret
} else {
warn!(
"Failed to memory map the snapshot file: {}",
snapshot_tar.as_ref().display(),
);

untar_snapshot_file(
snapshot_tar.as_ref(),
unpack_dir,
account_paths,
archive_format,
parallel_divisions,
)
}
}

#[cfg(target_os = "linux")]
impl<T> From<mmarinus::Error<T>> for SnapshotError {
fn from(_: mmarinus::Error<T>) -> SnapshotError {
SnapshotError::Io(std::io::Error::new(ErrorKind::Other, "mmap failure"))
}
}

#[cfg(target_os = "linux")]
fn untar_snapshot_mmap(
snapshot_tar: &Path,
unpack_dir: &Path,
account_paths: &[PathBuf],
archive_format: ArchiveFormat,
parallel_divisions: usize,
) -> Result<UnpackedAppendVecMap> {
use {
mmarinus::{perms, Map, Private},
std::slice,
};

let mmap = Map::load(&snapshot_tar, Private, perms::Read)?;

// `unpack_snapshot_local` takes a BufReader creator, which requires a
// static lifetime because of its background reader thread. Therefore, we
// can't pass the &mmap. Instead, we construct and pass a a slice
// explicitly. However, the following code is guaranteed to be safe because
// the lifetime of mmap last till the end of the function while the usage of
// mmap, BufReader's lifetime only last within fn unpack_snapshot_local.
let len = &mmap[..].len();
let ptr = &mmap[0] as *const u8;
let slice = unsafe { slice::from_raw_parts(ptr, *len) };

let account_paths_map = match archive_format {
ArchiveFormat::TarBzip2 => unpack_snapshot_local(
|| BzDecoder::new(slice),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::TarGzip => unpack_snapshot_local(
|| GzDecoder::new(slice),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::TarZstd => unpack_snapshot_local(
|| zstd::stream::read::Decoder::new(slice).unwrap(),
unpack_dir,
account_paths,
parallel_divisions,
)?,
ArchiveFormat::Tar => {
unpack_snapshot_local(|| slice, unpack_dir, account_paths, parallel_divisions)?
}
};

Ok(account_paths_map)
}

fn verify_unpacked_snapshots_dir_and_version(
unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
) -> Result<(SnapshotVersion, BankSnapshotInfo)> {
Expand Down

0 comments on commit 3367e44

Please sign in to comment.