From 88f6a7a45997fd5f56e5f9a0b4432f52f9f2a8b2 Mon Sep 17 00:00:00 2001 From: Brooks Date: Mon, 11 Mar 2024 17:09:26 -0400 Subject: [PATCH] Removes holding storages in AccountsHashVerifier for fastboot (#120) --- accounts-db/src/lib.rs | 1 - accounts-db/src/starting_snapshot_storages.rs | 19 ----- core/src/accounts_hash_verifier.rs | 44 ---------- core/src/validator.rs | 43 ++++------ core/tests/epoch_accounts_hash.rs | 2 - core/tests/snapshots.rs | 2 - ledger-tool/src/ledger_utils.rs | 32 +++----- ledger/src/bank_forks_utils.rs | 81 +++++-------------- 8 files changed, 52 insertions(+), 172 deletions(-) delete mode 100644 accounts-db/src/starting_snapshot_storages.rs diff --git a/accounts-db/src/lib.rs b/accounts-db/src/lib.rs index b7994fe4354118..7883f852d1e3f2 100644 --- a/accounts-db/src/lib.rs +++ b/accounts-db/src/lib.rs @@ -37,7 +37,6 @@ pub mod secondary_index; pub mod shared_buffer_reader; pub mod sorted_storages; pub mod stake_rewards; -pub mod starting_snapshot_storages; pub mod storable_accounts; pub mod tiered_storage; pub mod utils; diff --git a/accounts-db/src/starting_snapshot_storages.rs b/accounts-db/src/starting_snapshot_storages.rs deleted file mode 100644 index cc5e26c61872b7..00000000000000 --- a/accounts-db/src/starting_snapshot_storages.rs +++ /dev/null @@ -1,19 +0,0 @@ -use {crate::accounts_db::AccountStorageEntry, std::sync::Arc}; - -/// Snapshot storages that the node loaded from -/// -/// This is used to support fastboot. Since fastboot reuses existing storages, we must carefully -/// handle the storages used to load at startup. If we do not handle these storages properly, -/// restarting from the same local state (i.e. bank snapshot) may fail. -#[derive(Debug)] -pub enum StartingSnapshotStorages { - /// Starting from genesis has no storages yet - Genesis, - /// Starting from a snapshot archive always extracts the storages from the archive, so no - /// special handling is necessary to preserve them. - Archive, - /// Starting from local state must preserve the loaded storages. These storages must *not* be - /// recycled or removed prior to taking the next snapshot, otherwise restarting from the same - /// bank snapshot may fail. - Fastboot(Vec>), -} diff --git a/core/src/accounts_hash_verifier.rs b/core/src/accounts_hash_verifier.rs index f5572d94a3c7d1..20adba99835eeb 100644 --- a/core/src/accounts_hash_verifier.rs +++ b/core/src/accounts_hash_verifier.rs @@ -9,7 +9,6 @@ use { IncrementalAccountsHash, }, sorted_storages::SortedStorages, - starting_snapshot_storages::StartingSnapshotStorages, }, solana_measure::measure_us, solana_runtime::{ @@ -43,7 +42,6 @@ impl AccountsHashVerifier { accounts_package_sender: Sender, accounts_package_receiver: Receiver, snapshot_package_sender: Option>, - starting_snapshot_storages: StartingSnapshotStorages, exit: Arc, snapshot_config: SnapshotConfig, ) -> Self { @@ -53,14 +51,6 @@ impl AccountsHashVerifier { .name("solAcctHashVer".to_string()) .spawn(move || { info!("AccountsHashVerifier has started"); - // To support fastboot, we must ensure the storages used in the latest POST snapshot are - // not recycled nor removed early. Hold an Arc of their AppendVecs to prevent them from - // expiring. - let mut fastboot_storages = match starting_snapshot_storages { - StartingSnapshotStorages::Genesis => None, - StartingSnapshotStorages::Archive => None, - StartingSnapshotStorages::Fastboot(storages) => Some(storages), - }; loop { if exit.load(Ordering::Relaxed) { break; @@ -81,14 +71,6 @@ impl AccountsHashVerifier { info!("handling accounts package: {accounts_package:?}"); let enqueued_time = accounts_package.enqueued.elapsed(); - // If this accounts package is for a snapshot, then clone the storages to - // save for fastboot. - let snapshot_storages_for_fastboot = accounts_package - .snapshot_info - .is_some() - .then(|| accounts_package.snapshot_storages.clone()); - - let slot = accounts_package.slot; let (_, handling_time_us) = measure_us!(Self::process_accounts_package( accounts_package, snapshot_package_sender.as_ref(), @@ -96,25 +78,6 @@ impl AccountsHashVerifier { &exit, )); - if let Some(snapshot_storages_for_fastboot) = snapshot_storages_for_fastboot { - // Get the number of storages that are being kept alive for fastboot. - // Looking at the storage Arc's strong reference count, we know that one - // ref is for fastboot, and one ref is for snapshot packaging. If there - // are no others, then the storage will be kept alive because of fastboot. - let num_storages_kept_alive = snapshot_storages_for_fastboot - .iter() - .filter(|storage| Arc::strong_count(storage) == 2) - .count(); - let num_storages_total = snapshot_storages_for_fastboot.len(); - fastboot_storages = Some(snapshot_storages_for_fastboot); - datapoint_info!( - "fastboot", - ("slot", slot, i64), - ("num_storages_total", num_storages_total, i64), - ("num_storages_kept_alive", num_storages_kept_alive, i64), - ); - } - datapoint_info!( "accounts_hash_verifier", ( @@ -132,13 +95,6 @@ impl AccountsHashVerifier { ); } info!("AccountsHashVerifier has stopped"); - debug!( - "Number of storages kept alive for fastboot: {}", - fastboot_storages - .as_ref() - .map(|storages| storages.len()) - .unwrap_or(0) - ); }) .unwrap(); Self { diff --git a/core/src/validator.rs b/core/src/validator.rs index a1c8293f86cb3a..3d2a93daecba2f 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -35,7 +35,6 @@ use { accounts_index::AccountSecondaryIndexes, accounts_update_notifier_interface::AccountsUpdateNotifier, hardened_unpack::{open_genesis_config, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE}, - starting_snapshot_storages::StartingSnapshotStorages, utils::{move_and_async_delete_path, move_and_async_delete_path_contents}, }, solana_client::connection_cache::{ConnectionCache, Protocol}, @@ -691,7 +690,6 @@ impl Validator { completed_slots_receiver, leader_schedule_cache, starting_snapshot_hashes, - starting_snapshot_storages, TransactionHistoryServices { transaction_status_sender, transaction_status_service, @@ -781,7 +779,6 @@ impl Validator { accounts_package_sender.clone(), accounts_package_receiver, snapshot_package_sender, - starting_snapshot_storages, exit.clone(), config.snapshot_config.clone(), ); @@ -1770,7 +1767,6 @@ fn load_blockstore( CompletedSlotsReceiver, LeaderScheduleCache, Option, - StartingSnapshotStorages, TransactionHistoryServices, blockstore_processor::ProcessOptions, BlockstoreRootScan, @@ -1860,27 +1856,23 @@ fn load_blockstore( let entry_notifier_service = entry_notifier .map(|entry_notifier| EntryNotifierService::new(entry_notifier, exit.clone())); - let ( - bank_forks, - mut leader_schedule_cache, - starting_snapshot_hashes, - starting_snapshot_storages, - ) = bank_forks_utils::load_bank_forks( - &genesis_config, - &blockstore, - config.account_paths.clone(), - Some(&config.snapshot_config), - &process_options, - transaction_history_services - .cache_block_meta_sender - .as_ref(), - entry_notifier_service - .as_ref() - .map(|service| service.sender()), - accounts_update_notifier, - exit, - ) - .map_err(|err| err.to_string())?; + let (bank_forks, mut leader_schedule_cache, starting_snapshot_hashes) = + bank_forks_utils::load_bank_forks( + &genesis_config, + &blockstore, + config.account_paths.clone(), + Some(&config.snapshot_config), + &process_options, + transaction_history_services + .cache_block_meta_sender + .as_ref(), + entry_notifier_service + .as_ref() + .map(|service| service.sender()), + accounts_update_notifier, + exit, + ) + .map_err(|err| err.to_string())?; // Before replay starts, set the callbacks in each of the banks in BankForks so that // all dropped banks come through the `pruned_banks_receiver` channel. This way all bank @@ -1906,7 +1898,6 @@ fn load_blockstore( completed_slots_receiver, leader_schedule_cache, starting_snapshot_hashes, - starting_snapshot_storages, transaction_history_services, process_options, blockstore_root_scan, diff --git a/core/tests/epoch_accounts_hash.rs b/core/tests/epoch_accounts_hash.rs index 25e97689923bb0..76b5e4c30dd018 100755 --- a/core/tests/epoch_accounts_hash.rs +++ b/core/tests/epoch_accounts_hash.rs @@ -9,7 +9,6 @@ use { accounts_hash::CalcAccountsHashConfig, accounts_index::AccountSecondaryIndexes, epoch_accounts_hash::EpochAccountsHash, - starting_snapshot_storages::StartingSnapshotStorages, }, solana_core::{ accounts_hash_verifier::AccountsHashVerifier, @@ -197,7 +196,6 @@ impl BackgroundServices { accounts_package_sender.clone(), accounts_package_receiver, Some(snapshot_package_sender), - StartingSnapshotStorages::Genesis, exit.clone(), snapshot_config.clone(), ); diff --git a/core/tests/snapshots.rs b/core/tests/snapshots.rs index 1607ebd3fa2094..730277e2c12a65 100644 --- a/core/tests/snapshots.rs +++ b/core/tests/snapshots.rs @@ -11,7 +11,6 @@ use { accounts_hash::AccountsHash, accounts_index::AccountSecondaryIndexes, epoch_accounts_hash::EpochAccountsHash, - starting_snapshot_storages::StartingSnapshotStorages, }, solana_core::{ accounts_hash_verifier::AccountsHashVerifier, @@ -1044,7 +1043,6 @@ fn test_snapshots_with_background_services( accounts_package_sender, accounts_package_receiver, Some(snapshot_package_sender), - StartingSnapshotStorages::Genesis, exit.clone(), snapshot_test_config.snapshot_config.clone(), ); diff --git a/ledger-tool/src/ledger_utils.rs b/ledger-tool/src/ledger_utils.rs index 8a8302d7e4e94b..c05cc6c2d64cd0 100644 --- a/ledger-tool/src/ledger_utils.rs +++ b/ledger-tool/src/ledger_utils.rs @@ -268,24 +268,19 @@ pub fn load_and_process_ledger( }; let exit = Arc::new(AtomicBool::new(false)); - let ( - bank_forks, - leader_schedule_cache, - starting_snapshot_hashes, - starting_snapshot_storages, - .., - ) = bank_forks_utils::load_bank_forks( - genesis_config, - blockstore.as_ref(), - account_paths, - snapshot_config.as_ref(), - &process_options, - None, - None, // Maybe support this later, though - accounts_update_notifier, - exit.clone(), - ) - .map_err(LoadAndProcessLedgerError::LoadBankForks)?; + let (bank_forks, leader_schedule_cache, starting_snapshot_hashes, ..) = + bank_forks_utils::load_bank_forks( + genesis_config, + blockstore.as_ref(), + account_paths, + snapshot_config.as_ref(), + &process_options, + None, + None, // Maybe support this later, though + accounts_update_notifier, + exit.clone(), + ) + .map_err(LoadAndProcessLedgerError::LoadBankForks)?; let block_verification_method = value_t!( arg_matches, "block_verification_method", @@ -330,7 +325,6 @@ pub fn load_and_process_ledger( accounts_package_sender.clone(), accounts_package_receiver, None, - starting_snapshot_storages, exit.clone(), SnapshotConfig::new_load_only(), ); diff --git a/ledger/src/bank_forks_utils.rs b/ledger/src/bank_forks_utils.rs index b30f90986bb9c2..17412c1801ac68 100644 --- a/ledger/src/bank_forks_utils.rs +++ b/ledger/src/bank_forks_utils.rs @@ -10,10 +10,7 @@ use { use_snapshot_archives_at_startup::{self, UseSnapshotArchivesAtStartup}, }, log::*, - solana_accounts_db::{ - accounts_update_notifier_interface::AccountsUpdateNotifier, - starting_snapshot_storages::StartingSnapshotStorages, - }, + solana_accounts_db::accounts_update_notifier_interface::AccountsUpdateNotifier, solana_runtime::{ accounts_background_service::AbsRequestSender, bank_forks::BankForks, @@ -70,7 +67,6 @@ pub type LoadResult = result::Result< Arc>, LeaderScheduleCache, Option, - StartingSnapshotStorages, ), BankForksUtilsError, >; @@ -92,13 +88,7 @@ pub fn load( accounts_update_notifier: Option, exit: Arc, ) -> LoadResult { - let ( - bank_forks, - leader_schedule_cache, - starting_snapshot_hashes, - starting_snapshot_storages, - .., - ) = load_bank_forks( + let (bank_forks, leader_schedule_cache, starting_snapshot_hashes, ..) = load_bank_forks( genesis_config, blockstore, account_paths, @@ -121,12 +111,7 @@ pub fn load( ) .map_err(BankForksUtilsError::ProcessBlockstoreFromRoot)?; - Ok(( - bank_forks, - leader_schedule_cache, - starting_snapshot_hashes, - starting_snapshot_storages, - )) + Ok((bank_forks, leader_schedule_cache, starting_snapshot_hashes)) } #[allow(clippy::too_many_arguments)] @@ -176,7 +161,7 @@ pub fn load_bank_forks( )) } - let (bank_forks, starting_snapshot_hashes, starting_snapshot_storages) = + let (bank_forks, starting_snapshot_hashes) = if let Some((full_snapshot_archive_info, incremental_snapshot_archive_info)) = get_snapshots_to_load(snapshot_config) { @@ -188,22 +173,17 @@ pub fn load_bank_forks( ); std::fs::create_dir_all(&snapshot_config.bank_snapshots_dir) .expect("create bank snapshots dir"); - let (bank_forks, starting_snapshot_hashes, starting_snapshot_storages) = - bank_forks_from_snapshot( - full_snapshot_archive_info, - incremental_snapshot_archive_info, - genesis_config, - account_paths, - snapshot_config, - process_options, - accounts_update_notifier, - exit, - )?; - ( - bank_forks, - Some(starting_snapshot_hashes), - starting_snapshot_storages, - ) + let (bank_forks, starting_snapshot_hashes) = bank_forks_from_snapshot( + full_snapshot_archive_info, + incremental_snapshot_archive_info, + genesis_config, + account_paths, + snapshot_config, + process_options, + accounts_update_notifier, + exit, + )?; + (bank_forks, Some(starting_snapshot_hashes)) } else { info!("Processing ledger from genesis"); let bank_forks = blockstore_processor::process_blockstore_for_bank_0( @@ -222,7 +202,7 @@ pub fn load_bank_forks( .root_bank() .set_startup_verification_complete(); - (bank_forks, None, StartingSnapshotStorages::Genesis) + (bank_forks, None) }; let mut leader_schedule_cache = @@ -238,12 +218,7 @@ pub fn load_bank_forks( .for_each(|hard_fork_slot| root_bank.register_hard_fork(*hard_fork_slot)); } - Ok(( - bank_forks, - leader_schedule_cache, - starting_snapshot_hashes, - starting_snapshot_storages, - )) + Ok((bank_forks, leader_schedule_cache, starting_snapshot_hashes)) } #[allow(clippy::too_many_arguments)] @@ -256,14 +231,7 @@ fn bank_forks_from_snapshot( process_options: &ProcessOptions, accounts_update_notifier: Option, exit: Arc, -) -> Result< - ( - Arc>, - StartingSnapshotHashes, - StartingSnapshotStorages, - ), - BankForksUtilsError, -> { +) -> Result<(Arc>, StartingSnapshotHashes), BankForksUtilsError> { // Fail hard here if snapshot fails to load, don't silently continue if account_paths.is_empty() { return Err(BankForksUtilsError::AccountPathsNotPresent); @@ -289,7 +257,7 @@ fn bank_forks_from_snapshot( .unwrap_or(true), }; - let (bank, starting_snapshot_storages) = if will_startup_from_snapshot_archives { + let bank = if will_startup_from_snapshot_archives { // Given that we are going to boot from an archive, the append vecs held in the snapshot dirs for fast-boot should // be released. They will be released by the account_background_service anyway. But in the case of the account_paths // using memory-mounted file system, they are not released early enough to give space for the new append-vecs from @@ -324,7 +292,7 @@ fn bank_forks_from_snapshot( .map(|archive| archive.path().display().to_string()) .unwrap_or("none".to_string()), })?; - (bank, StartingSnapshotStorages::Archive) + bank } else { let bank_snapshot = latest_bank_snapshot.ok_or_else(|| BankForksUtilsError::NoBankSnapshotDirectory { @@ -378,8 +346,7 @@ fn bank_forks_from_snapshot( // snapshot archive next time, which is safe. snapshot_utils::purge_all_bank_snapshots(&snapshot_config.bank_snapshots_dir); - let storages = bank.get_snapshot_storages(None); - (bank, StartingSnapshotStorages::Fastboot(storages)) + bank }; let full_snapshot_hash = FullSnapshotHash(( @@ -398,9 +365,5 @@ fn bank_forks_from_snapshot( incremental: incremental_snapshot_hash, }; - Ok(( - BankForks::new_rw_arc(bank), - starting_snapshot_hashes, - starting_snapshot_storages, - )) + Ok((BankForks::new_rw_arc(bank), starting_snapshot_hashes)) }