diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 31c4e3bf36082e..690dbe6ddf79e9 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -1423,6 +1423,8 @@ pub mod test { &AbsRequestSender::default(), None, &mut self.heaviest_subtree_fork_choice, + &mut true, + &mut Vec::new(), ) } diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 7313067f69818e..a5793f34012fc7 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -37,6 +37,7 @@ use solana_sdk::{ genesis_config::ClusterType, hash::Hash, pubkey::Pubkey, + signature::Signature, signature::{Keypair, Signer}, timing::timestamp, transaction::Transaction, @@ -57,6 +58,7 @@ use std::{ pub const MAX_ENTRY_RECV_PER_ITER: usize = 512; pub const SUPERMINORITY_THRESHOLD: f64 = 1f64 / 3f64; pub const MAX_UNCONFIRMED_SLOTS: usize = 5; +const MAX_VOTE_SIGNATURES: usize = 200; #[derive(PartialEq, Debug)] pub(crate) enum HeaviestForkFailures { @@ -105,6 +107,7 @@ pub struct ReplayStageConfig { pub rewards_recorder_sender: Option, pub cache_block_time_sender: Option, pub bank_notification_sender: Option, + pub wait_for_vote_to_start_leader: bool, } #[derive(Default)] @@ -265,6 +268,7 @@ impl ReplayStage { rewards_recorder_sender, cache_block_time_sender, bank_notification_sender, + wait_for_vote_to_start_leader, } = config; trace!("replay stage"); @@ -294,6 +298,8 @@ impl ReplayStage { let mut partition_exists = false; let mut skipped_slots_info = SkippedSlotsInfo::default(); let mut replay_timing = ReplayTiming::default(); + let mut voted_signatures = Vec::new(); + let mut has_new_vote_been_rooted = !wait_for_vote_to_start_leader; loop { let allocated = thread_mem_usage::Allocatedp::default(); @@ -481,6 +487,8 @@ impl ReplayStage { &mut heaviest_subtree_fork_choice, &cache_block_time_sender, &bank_notification_sender, + &mut voted_signatures, + &mut has_new_vote_been_rooted, ); }; voting_time.stop(); @@ -572,6 +580,7 @@ impl ReplayStage { &progress, &retransmit_slots_sender, &mut skipped_slots_info, + has_new_vote_been_rooted, ); let poh_bank = poh_recorder.lock().unwrap().bank(); @@ -887,7 +896,12 @@ impl ReplayStage { progress_map: &ProgressMap, retransmit_slots_sender: &RetransmitSlotsSender, skipped_slots_info: &mut SkippedSlotsInfo, + has_new_vote_been_rooted: bool, ) { + if !has_new_vote_been_rooted { + info!("Haven't landed a vote, so skipping my leader slot"); + return; + } // all the individual calls to poh_recorder.lock() are designed to // increase granularity, decrease contention @@ -1102,6 +1116,8 @@ impl ReplayStage { heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, cache_block_time_sender: &Option, bank_notification_sender: &Option, + vote_signatures: &mut Vec, + has_new_vote_been_rooted: &mut bool, ) { if bank.is_empty() { inc_new_counter_info!("replay_stage-voted_empty_bank", 1); @@ -1154,6 +1170,8 @@ impl ReplayStage { accounts_background_request_sender, highest_confirmed_root, heaviest_subtree_fork_choice, + has_new_vote_been_rooted, + vote_signatures, ); subscriptions.notify_roots(rooted_slots); if let Some(sender) = bank_notification_sender { @@ -1183,6 +1201,8 @@ impl ReplayStage { last_vote, &tower_slots, switch_fork_decision, + vote_signatures, + *has_new_vote_been_rooted, ); } @@ -1194,6 +1214,8 @@ impl ReplayStage { vote: Vote, tower: &[Slot], switch_fork_decision: &SwitchForkDecision, + vote_signatures: &mut Vec, + has_new_vote_been_rooted: bool, ) { if authorized_voter_keypairs.is_empty() { return; @@ -1263,6 +1285,14 @@ impl ReplayStage { let mut vote_tx = Transaction::new_with_payer(&[vote_ix], Some(&node_keypair.pubkey())); + if !has_new_vote_been_rooted { + vote_signatures.push(vote_tx.signatures[0]); + if vote_signatures.len() > MAX_VOTE_SIGNATURES { + vote_signatures.remove(0); + } + } else { + vote_signatures.clear(); + } let blockhash = bank.last_blockhash(); vote_tx.partial_sign(&[node_keypair.as_ref()], blockhash); vote_tx.partial_sign(&[authorized_voter_keypair.as_ref()], blockhash); @@ -1866,6 +1896,8 @@ impl ReplayStage { accounts_background_request_sender: &AbsRequestSender, highest_confirmed_root: Option, heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, + has_new_vote_been_rooted: &mut bool, + voted_signatures: &mut Vec, ) { bank_forks.write().unwrap().set_root( new_root, @@ -1873,6 +1905,18 @@ impl ReplayStage { highest_confirmed_root, ); let r_bank_forks = bank_forks.read().unwrap(); + let new_root_bank = &r_bank_forks[new_root]; + if !*has_new_vote_been_rooted { + for signature in voted_signatures.iter() { + if new_root_bank.get_signature_status(signature).is_some() { + *has_new_vote_been_rooted = true; + break; + } + } + if *has_new_vote_been_rooted { + std::mem::take(voted_signatures); + } + } progress.handle_new_root(&r_bank_forks); heaviest_subtree_fork_choice.set_root(new_root); } @@ -2280,6 +2324,8 @@ pub(crate) mod tests { &AbsRequestSender::default(), None, &mut heaviest_subtree_fork_choice, + &mut true, + &mut Vec::new(), ); assert_eq!(bank_forks.read().unwrap().root(), root); assert_eq!(progress.len(), 1); @@ -2324,6 +2370,8 @@ pub(crate) mod tests { &AbsRequestSender::default(), Some(confirmed_root), &mut heaviest_subtree_fork_choice, + &mut true, + &mut Vec::new(), ); assert_eq!(bank_forks.read().unwrap().root(), root); assert!(bank_forks.read().unwrap().get(confirmed_root).is_some()); diff --git a/core/src/test_validator.rs b/core/src/test_validator.rs index 560dd675669482..4d2c378990c320 100644 --- a/core/src/test_validator.rs +++ b/core/src/test_validator.rs @@ -422,6 +422,7 @@ impl TestValidator { warp_slot: config.warp_slot, bpf_jit: !config.no_bpf_jit, validator_exit: config.validator_exit.clone(), + no_wait_for_vote_to_start_leader: true, ..ValidatorConfig::default() }; diff --git a/core/src/tvu.rs b/core/src/tvu.rs index 4b033b20321b42..3844de8747583a 100644 --- a/core/src/tvu.rs +++ b/core/src/tvu.rs @@ -84,6 +84,7 @@ pub struct TvuConfig { pub use_index_hash_calculation: bool, pub rocksdb_compaction_interval: Option, pub rocksdb_max_compaction_jitter: Option, + pub wait_for_vote_to_start_leader: bool, } impl Tvu { @@ -254,6 +255,7 @@ impl Tvu { rewards_recorder_sender, cache_block_time_sender, bank_notification_sender, + wait_for_vote_to_start_leader: tvu_config.wait_for_vote_to_start_leader, }; let replay_stage = ReplayStage::new( diff --git a/core/src/validator.rs b/core/src/validator.rs index 2cf6fa07e8243b..4645c3c15e1b3a 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -130,6 +130,7 @@ pub struct ValidatorConfig { pub accounts_db_use_index_hash_calculation: bool, pub tpu_coalesce_ms: u64, pub validator_exit: Arc>, + pub no_wait_for_vote_to_start_leader: bool, } impl Default for ValidatorConfig { @@ -184,6 +185,7 @@ impl Default for ValidatorConfig { accounts_db_use_index_hash_calculation: true, tpu_coalesce_ms: DEFAULT_TPU_COALESCE_MS, validator_exit: Arc::new(RwLock::new(ValidatorExit::default())), + no_wait_for_vote_to_start_leader: true, } } } @@ -628,15 +630,20 @@ impl Validator { check_poh_speed(&genesis_config, None); } - if wait_for_supermajority( + let waited_for_supermajority = if let Ok(waited) = wait_for_supermajority( config, &bank, &cluster_info, rpc_override_health_check, &start_progress, ) { + waited + } else { abort(); - } + }; + + let wait_for_vote_to_start_leader = + !waited_for_supermajority && !config.no_wait_for_vote_to_start_leader; let poh_service = PohService::new( poh_recorder.clone(), @@ -721,6 +728,7 @@ impl Validator { use_index_hash_calculation: config.accounts_db_use_index_hash_calculation, rocksdb_compaction_interval: config.rocksdb_compaction_interval, rocksdb_max_compaction_jitter: config.rocksdb_compaction_interval, + wait_for_vote_to_start_leader, }, &max_slots, ); @@ -1287,17 +1295,28 @@ fn initialize_rpc_transaction_history_services( } } -// Return true on error, indicating the validator should exit. +#[derive(Debug, PartialEq)] +enum ValidatorError { + BadExpectedBankHash, + NotEnoughLedgerData, +} + +// Return if the validator waited on other nodes to start. In this case +// it should not wait for one of it's votes to land to produce blocks +// because if the whole network is waiting, then it will stall. +// +// Error indicates that a bad hash was encountered or another condition +// that is unrecoverable and the validator should exit. fn wait_for_supermajority( config: &ValidatorConfig, bank: &Bank, cluster_info: &ClusterInfo, rpc_override_health_check: Arc, start_progress: &Arc>, -) -> bool { +) -> Result { if let Some(wait_for_supermajority) = config.wait_for_supermajority { match wait_for_supermajority.cmp(&bank.slot()) { - std::cmp::Ordering::Less => return false, + std::cmp::Ordering::Less => return Ok(false), std::cmp::Ordering::Greater => { error!( "Ledger does not have enough data to wait for supermajority, \ @@ -1305,12 +1324,12 @@ fn wait_for_supermajority( bank.slot(), wait_for_supermajority ); - return true; + return Err(ValidatorError::NotEnoughLedgerData); } _ => {} } } else { - return false; + return Ok(false); } if let Some(expected_bank_hash) = config.expected_bank_hash { @@ -1320,7 +1339,7 @@ fn wait_for_supermajority( bank.hash(), expected_bank_hash ); - return true; + return Err(ValidatorError::BadExpectedBankHash); } } @@ -1345,7 +1364,7 @@ fn wait_for_supermajority( sleep(Duration::new(1, 0)); } rpc_override_health_check.store(false, Ordering::Relaxed); - false + Ok(true) } fn report_target_features() { @@ -1636,17 +1655,21 @@ mod tests { &cluster_info, rpc_override_health_check.clone(), &start_progress, - )); + ) + .unwrap()); // bank=0, wait=1, should fail config.wait_for_supermajority = Some(1); - assert!(wait_for_supermajority( - &config, - &bank, - &cluster_info, - rpc_override_health_check.clone(), - &start_progress, - )); + assert_eq!( + wait_for_supermajority( + &config, + &bank, + &cluster_info, + rpc_override_health_check.clone(), + &start_progress, + ), + Err(ValidatorError::NotEnoughLedgerData) + ); // bank=1, wait=0, should pass, bank is past the wait slot let bank = Bank::new_from_parent(&bank, &Pubkey::default(), 1); @@ -1657,18 +1680,22 @@ mod tests { &cluster_info, rpc_override_health_check.clone(), &start_progress, - )); + ) + .unwrap()); // bank=1, wait=1, equal, but bad hash provided config.wait_for_supermajority = Some(1); config.expected_bank_hash = Some(hash(&[1])); - assert!(wait_for_supermajority( - &config, - &bank, - &cluster_info, - rpc_override_health_check, - &start_progress, - )); + assert_eq!( + wait_for_supermajority( + &config, + &bank, + &cluster_info, + rpc_override_health_check, + &start_progress, + ), + Err(ValidatorError::BadExpectedBankHash) + ); } #[test] diff --git a/local-cluster/src/validator_configs.rs b/local-cluster/src/validator_configs.rs index 61bb05c9919a21..a4d4afcfdcc583 100644 --- a/local-cluster/src/validator_configs.rs +++ b/local-cluster/src/validator_configs.rs @@ -53,6 +53,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig { tpu_coalesce_ms: config.tpu_coalesce_ms, validator_exit: Arc::new(RwLock::new(ValidatorExit::default())), poh_hashes_per_batch: config.poh_hashes_per_batch, + no_wait_for_vote_to_start_leader: config.no_wait_for_vote_to_start_leader, } } diff --git a/multinode-demo/bootstrap-validator.sh b/multinode-demo/bootstrap-validator.sh index 9cbdecadd74bdd..b6d1516b20b9c2 100755 --- a/multinode-demo/bootstrap-validator.sh +++ b/multinode-demo/bootstrap-validator.sh @@ -105,6 +105,7 @@ args+=( --vote-account "$vote_account" --rpc-faucet-address 127.0.0.1:9900 --no-poh-speed-test + --no-wait-for-vote-to-start-leader ) default_arg --gossip-port 8001 default_arg --log - diff --git a/run.sh b/run.sh index f7fe98f1339436..917e5fcd6fde0e 100755 --- a/run.sh +++ b/run.sh @@ -105,6 +105,7 @@ args=( --init-complete-file "$dataDir"/init-completed --snapshot-compression none --require-tower + --no-wait-for-vote-to-start-leader ) # shellcheck disable=SC2086 solana-validator "${args[@]}" $SOLANA_RUN_SH_VALIDATOR_ARGS & diff --git a/validator/src/main.rs b/validator/src/main.rs index e9e68078ea3018..13205f70045f9d 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1368,6 +1368,14 @@ pub fn main() { .help("After processing the ledger and the next slot is SLOT, wait until a \ supermajority of stake is visible on gossip before starting PoH"), ) + .arg( + Arg::with_name("no_wait_for_vote_to_start_leader") + .hidden(true) + .long("no-wait-for-vote-to-start-leader") + .help("If the validator starts up with no ledger, it will wait to start block + production until it sees a vote land in a rooted slot. This prevents + double signing. Turn off to risk double signing a block."), + ) .arg( Arg::with_name("hard_forks") .long("hard-fork") @@ -1997,6 +2005,7 @@ pub fn main() { accounts_db_test_hash_calculation: matches.is_present("accounts_db_test_hash_calculation"), accounts_db_use_index_hash_calculation: matches.is_present("accounts_db_index_hashing"), tpu_coalesce_ms, + no_wait_for_vote_to_start_leader: matches.is_present("no_wait_for_vote_to_start_leader"), ..ValidatorConfig::default() };