diff --git a/core/src/consensus.rs b/core/src/consensus.rs index b5868ffc9267b2..d19e049947e2ea 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -969,7 +969,7 @@ impl Tower { if let Some(last_voted_slot) = self.last_voted_slot() { if tower_root <= replayed_root { // Normally, we goes into this clause with possible help of - // reconcile_blockstore_roots_with_tower() + // reconcile_blockstore_roots_with_external_source() if slot_history.check(last_voted_slot) == Check::TooOld { // We could try hard to anchor with other older votes, but opt to simplify the // following logic @@ -1221,45 +1221,61 @@ impl TowerError { } } +#[derive(Debug)] +pub enum ExternalRootSource { + Tower(Slot), + HardFork(Slot), +} + +impl ExternalRootSource { + fn root(&self) -> Slot { + match self { + ExternalRootSource::Tower(slot) => *slot, + ExternalRootSource::HardFork(slot) => *slot, + } + } +} + // Given an untimely crash, tower may have roots that are not reflected in blockstore, // or the reverse of this. // That's because we don't impose any ordering guarantee or any kind of write barriers // between tower (plain old POSIX fs calls) and blockstore (through RocksDB), when // `ReplayState::handle_votable_bank()` saves tower before setting blockstore roots. -pub fn reconcile_blockstore_roots_with_tower( - tower: &Tower, +pub fn reconcile_blockstore_roots_with_external_source( + external_source: ExternalRootSource, blockstore: &Blockstore, + last_blockstore_root: &mut Slot, ) -> blockstore_db::Result<()> { - let tower_root = tower.root(); - let last_blockstore_root = blockstore.last_root(); - if last_blockstore_root < tower_root { - // Ensure tower_root itself to exist and be marked as rooted in the blockstore + let external_root = external_source.root(); + if *last_blockstore_root < external_root { + // Ensure external_root itself to exist and be marked as rooted in the blockstore // in addition to its ancestors. - let new_roots: Vec<_> = AncestorIterator::new_inclusive(tower_root, blockstore) - .take_while(|current| match current.cmp(&last_blockstore_root) { + let new_roots: Vec<_> = AncestorIterator::new_inclusive(external_root, blockstore) + .take_while(|current| match current.cmp(last_blockstore_root) { Ordering::Greater => true, Ordering::Equal => false, Ordering::Less => panic!( "couldn't find a last_blockstore_root upwards from: {}!?", - tower_root + external_root ), }) .collect(); if !new_roots.is_empty() { info!( - "Reconciling slots as root based on tower root: {:?} ({}..{}) ", - new_roots, tower_root, last_blockstore_root + "Reconciling slots as root based on external root: {:?} ({}..{}) ", + new_roots, external_root, last_blockstore_root ); blockstore.set_roots(new_roots.iter())?; + *last_blockstore_root = blockstore.last_root(); } else { // This indicates we're in bad state; but still don't panic here. // That's because we might have a chance of recovering properly with // newer snapshot. warn!( - "Couldn't find any ancestor slots from tower root ({}) \ + "Couldn't find any ancestor slots from external source ({:?}) \ towards blockstore root ({}); blockstore pruned or only \ - tower moved into new ledger?", - tower_root, last_blockstore_root, + tower moved into new ledger or just hard fork?", + external_source, last_blockstore_root, ); } } @@ -2737,7 +2753,12 @@ pub mod test { let mut tower = Tower::default(); tower.vote_state.root_slot = Some(4); - reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + reconcile_blockstore_roots_with_external_source( + ExternalRootSource::Tower(tower.root()), + &blockstore, + &mut blockstore.last_root(), + ) + .unwrap(); assert!(!blockstore.is_root(0)); assert!(blockstore.is_root(1)); @@ -2769,7 +2790,12 @@ pub mod test { let mut tower = Tower::default(); tower.vote_state.root_slot = Some(4); - reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + reconcile_blockstore_roots_with_external_source( + ExternalRootSource::Tower(tower.root()), + &blockstore, + &mut blockstore.last_root(), + ) + .unwrap(); } Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); } @@ -2792,7 +2818,12 @@ pub mod test { let mut tower = Tower::default(); tower.vote_state.root_slot = Some(4); assert_eq!(blockstore.last_root(), 0); - reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap(); + reconcile_blockstore_roots_with_external_source( + ExternalRootSource::Tower(tower.root()), + &blockstore, + &mut blockstore.last_root(), + ) + .unwrap(); assert_eq!(blockstore.last_root(), 0); } Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); diff --git a/core/src/validator.rs b/core/src/validator.rs index 9bd7bd072f1594..22349f654f9aaf 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -6,7 +6,7 @@ use { cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService}, cluster_info_vote_listener::VoteTracker, completed_data_sets_service::CompletedDataSetsService, - consensus::{reconcile_blockstore_roots_with_tower, Tower}, + consensus::{reconcile_blockstore_roots_with_external_source, ExternalRootSource, Tower}, cost_model::CostModel, rewards_recorder_service::{RewardsRecorderSender, RewardsRecorderService}, sample_performance_service::SamplePerformanceService, @@ -158,6 +158,7 @@ pub struct ValidatorConfig { pub validator_exit: Arc>, pub no_wait_for_vote_to_start_leader: bool, pub accounts_shrink_ratio: AccountShrinkThreshold, + pub no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test: bool, } impl Default for ValidatorConfig { @@ -217,6 +218,7 @@ impl Default for ValidatorConfig { no_wait_for_vote_to_start_leader: true, accounts_shrink_ratio: AccountShrinkThreshold::default(), accounts_db_config: None, + no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test: false, } } } @@ -1005,14 +1007,16 @@ fn post_process_restored_tower( .and_then(|tower| { let root_bank = bank_forks.root_bank(); let slot_history = root_bank.get_slot_history(); + // make sure tower isn't corrupted first before the following hard fork check let tower = tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history); + // detect cluster-wide restart (hard fork) indirectly via wait_for_supermajority... if let Some(wait_slot_for_supermajority) = config.wait_for_supermajority { - if root_bank.slot() == wait_slot_for_supermajority { + if wait_slot_for_supermajority == root_bank.slot() { // intentionally fail to restore tower; we're supposedly in a new hard fork; past // out-of-chain vote state doesn't make sense at all // what if --wait-for-supermajority again if the validator restarted? - let message = format!("Hardfork is detected; discarding tower restoration result: {:?}", tower); + let message = format!("Hard fork is detected; discarding tower restoration result: {:?}", tower); datapoint_error!( "tower_error", ( @@ -1143,11 +1147,22 @@ fn new_banks_from_ledger( ) .expect("Failed to open ledger database"); blockstore.set_no_compaction(config.no_rocksdb_compaction); + // following boot sequence (esp BankForks) could set root. so stash the original value + // of blockstore root away here as soon as possible. + let mut last_blockstore_root = blockstore.last_root(); let restored_tower = Tower::restore(config.tower_storage.as_ref(), validator_identity); if let Ok(tower) = &restored_tower { - reconcile_blockstore_roots_with_tower(tower, &blockstore).unwrap_or_else(|err| { - error!("Failed to reconcile blockstore with tower: {:?}", err); + reconcile_blockstore_roots_with_external_source( + ExternalRootSource::Tower(tower.root()), + &blockstore, + &mut last_blockstore_root, + ) + .unwrap_or_else(|err| { + error!( + "Failed to reconcile blockstore according to tower: {:?}", + err + ); abort() }); } @@ -1266,6 +1281,24 @@ fn new_banks_from_ledger( ); } + if let Some(wait_slot_for_supermajority) = config.wait_for_supermajority { + if wait_slot_for_supermajority == bank_forks.root_bank().slot() + && !config.no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test + { + reconcile_blockstore_roots_with_external_source( + ExternalRootSource::HardFork(wait_slot_for_supermajority), + &blockstore, + &mut last_blockstore_root, + ) + .unwrap_or_else(|err| { + error!( + "Failed to reconcile blockstore according to hard fork: {:?}", + err + ); + abort() + }); + } + } let tower = post_process_restored_tower( restored_tower, validator_identity, diff --git a/ledger/src/blockstore.rs b/ledger/src/blockstore.rs index f385c1b65b287c..f43917f5dc194d 100644 --- a/ledger/src/blockstore.rs +++ b/ledger/src/blockstore.rs @@ -576,13 +576,28 @@ impl Blockstore { Ok(slot_iterator.take_while(move |((shred_slot, _), _)| *shred_slot == slot)) } - pub fn rooted_slot_iterator(&self, slot: Slot) -> Result + '_> { + fn prepare_rooted_slot_iterator( + &self, + slot: Slot, + direction: IteratorDirection, + ) -> Result + '_> { let slot_iterator = self .db - .iter::(IteratorMode::From(slot, IteratorDirection::Forward))?; + .iter::(IteratorMode::From(slot, direction))?; Ok(slot_iterator.map(move |(rooted_slot, _)| rooted_slot)) } + pub fn rooted_slot_iterator(&self, slot: Slot) -> Result + '_> { + self.prepare_rooted_slot_iterator(slot, IteratorDirection::Forward) + } + + pub fn reversed_rooted_slot_iterator( + &self, + slot: Slot, + ) -> Result + '_> { + self.prepare_rooted_slot_iterator(slot, IteratorDirection::Reverse) + } + fn get_recovery_data_shreds( index: &mut Index, set_index: u64, diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index 9129773b987548..eadcff4a5cb8bf 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -17,15 +17,18 @@ use { gossip_service::discover_cluster, }, solana_ledger::create_new_tmp_ledger, - solana_runtime::genesis_utils::{ - create_genesis_config_with_vote_accounts_and_cluster_type, GenesisConfigInfo, - ValidatorVoteKeypairs, + solana_runtime::{ + genesis_utils::{ + create_genesis_config_with_vote_accounts_and_cluster_type, GenesisConfigInfo, + ValidatorVoteKeypairs, + }, + snapshot_config::SnapshotConfig, }, solana_sdk::{ account::Account, account::AccountSharedData, client::SyncClient, - clock::{DEFAULT_DEV_SLOTS_PER_EPOCH, DEFAULT_TICKS_PER_SLOT}, + clock::{Slot, DEFAULT_DEV_SLOTS_PER_EPOCH, DEFAULT_TICKS_PER_SLOT}, commitment_config::CommitmentConfig, epoch_schedule::EpochSchedule, genesis_config::{ClusterType, GenesisConfig}, @@ -50,10 +53,13 @@ use { collections::HashMap, io::{Error, ErrorKind, Result}, iter, + path::{Path, PathBuf}, sync::{Arc, RwLock}, }, }; +const DUMMY_SNAPSHOT_CONFIG_PATH_MARKER: &str = "dummy"; + pub struct ClusterConfig { /// The validator config that should be applied to every node in the cluster pub validator_configs: Vec, @@ -128,6 +134,23 @@ impl LocalCluster { Self::new(&mut config, socket_addr_space) } + fn sync_ledger_path_across_nested_config_fields( + config: &mut ValidatorConfig, + ledger_path: &Path, + ) { + config.account_paths = vec![ledger_path.join("accounts")]; + config.tower_storage = Arc::new(FileTowerStorage::new(ledger_path.to_path_buf())); + if let Some(snapshot_config) = &mut config.snapshot_config { + let dummy: PathBuf = DUMMY_SNAPSHOT_CONFIG_PATH_MARKER.into(); + if snapshot_config.snapshot_archives_dir == dummy { + snapshot_config.snapshot_archives_dir = ledger_path.to_path_buf(); + } + if snapshot_config.bank_snapshots_dir == dummy { + snapshot_config.bank_snapshots_dir = ledger_path.join("snapshot"); + } + } + } + pub fn new(config: &mut ClusterConfig, socket_addr_space: SocketAddrSpace) -> Self { assert_eq!(config.validator_configs.len(), config.node_stakes.len()); let mut validator_keys = { @@ -215,8 +238,7 @@ impl LocalCluster { let leader_contact_info = leader_node.info.clone(); let mut leader_config = safe_clone_config(&config.validator_configs[0]); leader_config.rpc_addrs = Some((leader_node.info.rpc, leader_node.info.rpc_pubsub)); - leader_config.account_paths = vec![leader_ledger_path.join("accounts")]; - leader_config.tower_storage = Arc::new(FileTowerStorage::new(leader_ledger_path.clone())); + Self::sync_ledger_path_across_nested_config_fields(&mut leader_config, &leader_ledger_path); let leader_keypair = Arc::new(Keypair::from_bytes(&leader_keypair.to_bytes()).unwrap()); let leader_vote_keypair = Arc::new(Keypair::from_bytes(&leader_vote_keypair.to_bytes()).unwrap()); @@ -376,8 +398,7 @@ impl LocalCluster { let mut config = safe_clone_config(validator_config); config.rpc_addrs = Some((validator_node.info.rpc, validator_node.info.rpc_pubsub)); - config.account_paths = vec![ledger_path.join("accounts")]; - config.tower_storage = Arc::new(FileTowerStorage::new(ledger_path.clone())); + Self::sync_ledger_path_across_nested_config_fields(&mut config, &ledger_path); let voting_keypair = voting_keypair.unwrap(); let validator_server = Validator::new( validator_node, @@ -408,7 +429,7 @@ impl LocalCluster { validator_pubkey } - pub fn ledger_path(&self, validator_pubkey: &Pubkey) -> std::path::PathBuf { + pub fn ledger_path(&self, validator_pubkey: &Pubkey) -> PathBuf { self.validators .get(validator_pubkey) .unwrap() @@ -639,6 +660,19 @@ impl LocalCluster { )), } } + + pub fn create_dummy_load_only_snapshot_config() -> SnapshotConfig { + // DUMMY_SNAPSHOT_CONFIG_PATH_MARKER will be replaced with real value as part of cluster + // node lifecycle. + // There must be some place holder for now... + SnapshotConfig { + full_snapshot_archive_interval_slots: Slot::MAX, + incremental_snapshot_archive_interval_slots: Slot::MAX, + snapshot_archives_dir: DUMMY_SNAPSHOT_CONFIG_PATH_MARKER.into(), + bank_snapshots_dir: DUMMY_SNAPSHOT_CONFIG_PATH_MARKER.into(), + ..SnapshotConfig::default() + } + } } impl Cluster for LocalCluster { @@ -713,10 +747,10 @@ impl Cluster for LocalCluster { ) -> ClusterValidatorInfo { // Restart the node let validator_info = &cluster_validator_info.info; - cluster_validator_info.config.account_paths = - vec![validator_info.ledger_path.join("accounts")]; - cluster_validator_info.config.tower_storage = - Arc::new(FileTowerStorage::new(validator_info.ledger_path.clone())); + LocalCluster::sync_ledger_path_across_nested_config_fields( + &mut cluster_validator_info.config, + &validator_info.ledger_path, + ); let restarted_node = Validator::new( node, validator_info.keypair.clone(), diff --git a/local-cluster/src/validator_configs.rs b/local-cluster/src/validator_configs.rs index 581b7763ef2369..9f31d3c5f09577 100644 --- a/local-cluster/src/validator_configs.rs +++ b/local-cluster/src/validator_configs.rs @@ -59,6 +59,8 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig { no_wait_for_vote_to_start_leader: config.no_wait_for_vote_to_start_leader, accounts_shrink_ratio: config.accounts_shrink_ratio, accounts_db_config: config.accounts_db_config.clone(), + no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test: config + .no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test, } } diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index bc5a43321e17d6..bc978553c33802 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -31,8 +31,10 @@ use { }, solana_ledger::{ ancestor_iterator::AncestorIterator, + bank_forks_utils, blockstore::{Blockstore, PurgeType}, blockstore_db::AccessType, + blockstore_processor::ProcessOptions, leader_schedule::FixedSchedule, leader_schedule::LeaderSchedule, }, @@ -43,10 +45,11 @@ use { validator_configs::*, }, solana_runtime::{ + hardened_unpack::open_genesis_config, snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_config::SnapshotConfig, snapshot_package::SnapshotType, - snapshot_utils::{self, ArchiveFormat}, + snapshot_utils::{self, ArchiveFormat, SnapshotVersion}, }, solana_sdk::{ account::AccountSharedData, @@ -55,6 +58,7 @@ use { commitment_config::CommitmentConfig, epoch_schedule::MINIMUM_SLOTS_PER_EPOCH, genesis_config::ClusterType, + hard_forks::HardForks, hash::Hash, poh_config::PohConfig, pubkey::Pubkey, @@ -70,7 +74,7 @@ use { iter, path::{Path, PathBuf}, sync::atomic::{AtomicBool, Ordering}, - sync::Arc, + sync::{mpsc::channel, Arc}, thread::{sleep, Builder, JoinHandle}, time::{Duration, Instant}, }, @@ -3670,6 +3674,8 @@ fn test_hard_fork_invalidates_tower() { let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey); // setup hard fork at slot < a previously rooted slot! + // hard fork earlier than root is very unrealistic in the wild, but it's handy for + // persistent tower's lockout behavior... let hard_fork_slot = min_root - 5; let hard_fork_slots = Some(vec![hard_fork_slot]); let mut hard_forks = solana_sdk::hard_forks::HardForks::default(); @@ -3738,6 +3744,256 @@ fn test_hard_fork_invalidates_tower() { .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified); } +fn create_simple_snapshot_config(ledger_path: &Path) -> SnapshotConfig { + SnapshotConfig { + snapshot_archives_dir: ledger_path.to_path_buf(), + bank_snapshots_dir: ledger_path.join("snapshot"), + ..SnapshotConfig::default() + } +} + +fn create_snapshot_to_hard_fork(ledger_path: &Path, snapshot_slot: Slot, hardforks: Vec) { + let process_options = ProcessOptions { + dev_halt_at_slot: Some(snapshot_slot), + new_hard_forks: Some(hardforks), + poh_verify: false, + ..ProcessOptions::default() + }; + let genesis_config = open_genesis_config(ledger_path, u64::max_value()); + let blockstore = + Blockstore::open_with_access_type(ledger_path, AccessType::PrimaryOnly, None, true) + .unwrap_or_else(|e| { + panic!("Failed to open ledger at {:?}, err: {}", ledger_path, e); + }); + let snapshot_config = Some(create_simple_snapshot_config(ledger_path)); + let (accounts_package_sender, _) = channel(); + let (bank_forks, ..) = bank_forks_utils::load( + &genesis_config, + &blockstore, + vec![blockstore.ledger_path().join("accounts")], + None, + snapshot_config.as_ref(), + process_options, + None, + None, + accounts_package_sender, + ) + .unwrap(); + let bank = bank_forks.get(snapshot_slot).unwrap().clone(); + let full_snapshot_archive_info = snapshot_utils::bank_to_full_snapshot_archive( + ledger_path, + &bank, + Some(SnapshotVersion::default()), + ledger_path, + ArchiveFormat::TarZstd, + 1, + 1, + ) + .unwrap(); + info!( + "Successfully created snapshot for slot {}, hash {}: {}", + bank.slot(), + bank.hash(), + full_snapshot_archive_info.path().display(), + ); +} + +fn do_test_hard_fork_with_or_without_gap_in_roots(with_gap: bool) { + solana_logger::setup_with_default(RUST_LOG_FILTER); + + // First set up the cluster with 2 nodes + let slots_per_epoch = 2048; + let node_stakes = vec![60, 40]; + + let validator_keys = vec![ + "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", + "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .take(node_stakes.len()) + .collect::>(); + let validators = validator_keys + .iter() + .map(|(kp, _)| kp.pubkey()) + .collect::>(); + + let validator_a_pubkey = validators[0]; + let validator_b_pubkey = validators[1]; + + let validator_config = ValidatorConfig { + snapshot_config: Some(LocalCluster::create_dummy_load_only_snapshot_config()), + no_hard_fork_blockstore_root_reconcilation_for_local_cluster_test: with_gap, + ..ValidatorConfig::default() + }; + let mut config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes: node_stakes.clone(), + validator_configs: make_identical_validator_configs(&validator_config, node_stakes.len()), + validator_keys: Some(validator_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let cluster = std::sync::Arc::new(std::sync::Mutex::new(LocalCluster::new( + &mut config, + SocketAddrSpace::Unspecified, + ))); + + let val_a_ledger_path = cluster.lock().unwrap().ledger_path(&validator_a_pubkey); + let val_b_ledger_path = cluster.lock().unwrap().ledger_path(&validator_b_pubkey); + + let min_last_vote = 45; + let min_root = 10; + loop { + sleep(Duration::from_millis(100)); + + if let Some((last_vote, _)) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) { + if last_vote >= min_last_vote + && root_in_tower(&val_a_ledger_path, &validator_a_pubkey) > Some(min_root) + { + break; + } + } + } + + let mut validator_a_info = cluster.lock().unwrap().exit_node(&validator_a_pubkey); + let mut validator_b_info = cluster.lock().unwrap().exit_node(&validator_b_pubkey); + + let hard_fork_slot = min_last_vote - 5; + let hard_fork_slots = Some(vec![hard_fork_slot]); + let mut hard_forks = HardForks::default(); + hard_forks.register(hard_fork_slot); + + let expected_shred_version = solana_sdk::shred_version::compute_shred_version( + &cluster.lock().unwrap().genesis_config.hash(), + Some(&hard_forks), + ); + + // create hard-forked snapshot only for validator a, emulating the manual cluster restart + // procedure with `solana-ledger-tool create-snapshot` + create_snapshot_to_hard_fork(&val_a_ledger_path, hard_fork_slot, vec![hard_fork_slot]); + + // strictly speaking, new_hard_forks isn't needed for validator a. + // but when snapshot loading isn't working, you might see: + // shred version mismatch: expected NNNN found: MMMM + //validator_a_info.config.new_hard_forks = hard_fork_slots.clone(); + + // effectively pass the --hard-fork parameter to validator b + validator_b_info.config.new_hard_forks = hard_fork_slots; + + validator_a_info.config.wait_for_supermajority = Some(hard_fork_slot); + validator_a_info.config.expected_shred_version = Some(expected_shred_version); + + validator_b_info.config.wait_for_supermajority = Some(hard_fork_slot); + validator_b_info.config.expected_shred_version = Some(expected_shred_version); + + // restart validator A first + let cluster_for_a = cluster.clone(); + // Spawn a thread because wait_for_supermajority blocks in Validator::new()! + let thread = std::thread::spawn(move || { + let restart_context = cluster_for_a + .lock() + .unwrap() + .create_restart_context(&validator_a_pubkey, &mut validator_a_info); + let restarted_validator_info = LocalCluster::restart_node_with_context( + validator_a_info, + restart_context, + SocketAddrSpace::Unspecified, + ); + cluster_for_a + .lock() + .unwrap() + .add_node(&validator_a_pubkey, restarted_validator_info); + }); + + // test validator A actually to wait for supermajority + let mut last_vote = None; + for _ in 0..10 { + sleep(Duration::from_millis(1000)); + + let (new_last_vote, _) = + last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); + if let Some(last_vote) = last_vote { + assert_eq!(last_vote, new_last_vote); + } else { + last_vote = Some(new_last_vote); + } + } + + // restart validator B normally + cluster.lock().unwrap().restart_node( + &validator_b_pubkey, + validator_b_info, + SocketAddrSpace::Unspecified, + ); + + // validator A should now start so join its thread here + thread.join().unwrap(); + + // new slots should be rooted after hard-fork cluster relaunch + cluster + .lock() + .unwrap() + .check_for_new_roots(16, "hard fork", SocketAddrSpace::Unspecified); + drop(cluster); + + let (common_last_vote, common_root) = { + let (last_vote_a, _) = last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); + let (last_vote_b, _) = last_vote_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap(); + let root_a = root_in_tower(&val_a_ledger_path, &validator_a_pubkey).unwrap(); + let root_b = root_in_tower(&val_b_ledger_path, &validator_b_pubkey).unwrap(); + (last_vote_a.min(last_vote_b), root_a.min(root_b)) + }; + + let blockstore_a = Blockstore::open(&val_a_ledger_path).unwrap(); + let blockstore_b = Blockstore::open(&val_b_ledger_path).unwrap(); + + // collect all slots + let slots_a = AncestorIterator::new(common_last_vote, &blockstore_a).collect::>(); + let roots_a = blockstore_a + .reversed_rooted_slot_iterator(common_root) + .unwrap() + .collect::>(); + let slots_b = AncestorIterator::new(common_last_vote, &blockstore_b).collect::>(); + let roots_b = blockstore_b + .reversed_rooted_slot_iterator(common_root) + .unwrap() + .collect::>(); + + // compare all slots + if !with_gap { + assert_eq!((slots_a, roots_a), (slots_b, roots_b)); + } else { + // rough way to detect a gap with mostly similar slots... + assert_eq!( + (slots_a.first().unwrap(), roots_a.first().unwrap()), + (slots_b.first().unwrap(), roots_b.first().unwrap()) + ); + assert_eq!( + (slots_a.last().unwrap(), roots_a.last().unwrap()), + (slots_b.last().unwrap(), roots_b.last().unwrap()) + ); + assert_ne!((slots_a, roots_a), (slots_b, roots_b)); + } +} + +// the following two tests are rather fragile, depending on successful loading of the hard-forked snapshot. +// so, make sure they aren't broken by testing (expected) failures as well with those conditioned +// two actual test code invocations. +#[test] +#[serial] +fn test_hard_fork_without_gap_in_roots() { + do_test_hard_fork_with_or_without_gap_in_roots(false); +} + +#[test] +#[serial] +fn test_hard_fork_with_gap_in_roots() { + do_test_hard_fork_with_or_without_gap_in_roots(true); +} + #[test] #[serial] fn test_no_optimistic_confirmation_violation_with_tower() {