From 5b14f0043895d799203e0341515c2793bf489f53 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 14:08:10 +0200 Subject: [PATCH 01/50] fix(db-tool): Tool to run DB migrations --- tools/database/src/commands.rs | 12 ++++++++++++ tools/database/src/lib.rs | 1 + tools/database/src/run_migrations.rs | 25 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 tools/database/src/run_migrations.rs diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index e13642b0a37..8c3e946ab24 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -1,5 +1,6 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; +use crate::run_migrations::RunMigrationsCommand; use clap::Parser; use std::path::PathBuf; @@ -17,6 +18,9 @@ enum SubCommand { /// Change DbKind of hot or cold db. ChangeDbKind(ChangeDbKindCommand), + + /// Run migrations, + RunMigrations(RunMigrationsCommand), } impl DatabaseCommand { @@ -31,6 +35,14 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &near_config) } + SumCommand::RunMigrationsCommand(cmd) => { + let mut near_config = nearcore::config::load_config( + &home, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + cmd.run(home, &mut near_config) + } } } } diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index 644f087a3f7..a9261186b50 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -1,3 +1,4 @@ mod adjust_database; mod analyse_data_size_distribution; pub mod commands; +mod run_migrations; diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs new file mode 100644 index 00000000000..5d713cc12aa --- /dev/null +++ b/tools/database/src/run_migrations.rs @@ -0,0 +1,25 @@ +use near_store::metadata::DbKind; +use near_store::{Mode, NodeStorage}; +use nearcore::{migrations, NearConfig, open_storage}; +use std::path::Path; + +/// This can potentially support db specified not in config, but in command line. +/// `ChangeRelative { path: Path, archive: bool }` +/// But it is a pain to implement, because of all the current storage possibilities. +/// So, I'll leave it as a TODO(posvyatokum): implement relative path DbSelector. +/// This can be useful workaround for config modification. +#[derive(clap::Subcommand)] +enum DbSelector { + ChangeHot, + ChangeCold, +} + +#[derive(clap::Args)] +pub(crate) struct RunMigrationsCommand { } + +impl RunMigrationsCommand { + pub(crate) fn run(&self, home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result<()> { + let storage = open_storage(home_dir, near_config)?; + Ok(()) + } +} From e51a6891dbb80a4e9b42cccc3407c1ffb06395dc Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 13 Jul 2023 14:22:41 +0200 Subject: [PATCH 02/50] feat: simple nightshade v2 - shard layout with 5 shards (#9274) Introduced new protocol version called SimpleNightshadeV2, guarded it behind the rust feature `protocol_feature_simple_nightshade_v2` and added it to nightly. Refactored the AllEpochConfig::for_protocol_version a bit and added the SimpleNightshadeV2 shard layout to it. Note that I'm only hiding the SimpleNightshadeV2 behind the rust feature, I'm not planning on adding it everywhere. I'm reusing the same ShardLayout::V1 structure, just with bumped version and an extra boundary account. This should allow for smooth development since we won't need to guard all of the new code behind the new rust feature. I tested it manually and some sort of resharding did happen. I'm yet to fully appreciate what exactly happened and if it's any good, as well as add some proper tests. I'll do that in separate PRs. test repro instructions: ``` - get the current layout in json by running the print_shard_layout_all test and put it in $SHARD_LAYOUT - generate localnet setup with 4 shards and 1 validator - in the genesis file overwrite: - .epoch_length=10 - .use_production_config=true - .shard_layout=$SHARD_LAYOUT - build neard with nightly not enabled - run neard for at least one epoch - build neard with nightly enabled - run neard - watch resharding happening (only enabled debug logs for "catchup" target) - see new shard layout in the debug page ``` ![Screenshot 2023-07-11 at 15 34 36](https://github.com/near/nearcore/assets/1555986/5b83d645-4fdf-4994-a215-a500c0c0092f) resharding logs: https://gist.github.com/wacban/7b3a8c74c80f99003c71b92bea44539f --- chain/chain/src/chain.rs | 10 ++- chain/chain/src/tests/simple_chain.rs | 4 +- core/primitives-core/Cargo.toml | 3 + core/primitives-core/src/version.rs | 6 +- core/primitives/Cargo.toml | 2 + core/primitives/src/epoch_manager.rs | 80 ++++++++++++++--------- core/primitives/src/shard_layout.rs | 93 +++++++++++++++++++++++++++ nearcore/Cargo.toml | 6 +- neard/Cargo.toml | 2 + 9 files changed, 170 insertions(+), 36 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 8ca68647e1f..779dc948317 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -2436,6 +2436,7 @@ impl Chain { let (is_caught_up, state_dl_info, need_state_snapshot) = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { + debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { // The previous block is not caught up for the next epoch relative to the previous // block, which is the current epoch for this block, so this block cannot be applied @@ -2668,9 +2669,12 @@ impl Chain { // if shard layout will change the next epoch, we should catch up the shard regardless // whether we already have the shard's state this epoch, because we need to generate // new states for shards split from the current shard for the next epoch - shard_tracker.will_care_about_shard(me.as_ref(), parent_hash, shard_id, true) - && (will_shard_layout_change - || !shard_tracker.care_about_shard(me.as_ref(), parent_hash, shard_id, true)) + let will_care_about_shard = + shard_tracker.will_care_about_shard(me.as_ref(), parent_hash, shard_id, true); + let does_care_about_shard = + shard_tracker.care_about_shard(me.as_ref(), parent_hash, shard_id, true); + + will_care_about_shard && (will_shard_layout_change || !does_care_about_shard) } /// Check if any block with missing chunk is ready to be processed and start processing these blocks diff --git a/chain/chain/src/tests/simple_chain.rs b/chain/chain/src/tests/simple_chain.rs index 2b3875319c8..b01ad79cc78 100644 --- a/chain/chain/src/tests/simple_chain.rs +++ b/chain/chain/src/tests/simple_chain.rs @@ -48,7 +48,7 @@ fn build_chain() { // cargo insta test --accept -p near-chain --features nightly -- tests::simple_chain::build_chain let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"3Dkg6hjpnYvMuoyEdSLnEXza6Ct2ZV9xoridA37AJzSz"); + insta::assert_display_snapshot!(hash, @"86ZZBdNhwHbXDXdTjFZxGbddSy4qLpoxpWtqJtYwYXX"); } else { insta::assert_display_snapshot!(hash, @"8GP6PcFavb4pqeofMFjDyKUQnfVZtwPWsVA4V47WNbRn"); } @@ -78,7 +78,7 @@ fn build_chain() { let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"6uCZwfkpE8qV54n5MvZXqTt8RMHYDduX4eE7quNzgLNk"); + insta::assert_display_snapshot!(hash, @"8XW5k1JDHWPXkRcGwb6PTEgwggnppAW1qwWgwiqPY286"); } else { insta::assert_display_snapshot!(hash, @"319JoVaUej5iXmrZMeaZBPMeBLePQzJofA5Y1ztdyPw9"); } diff --git a/core/primitives-core/Cargo.toml b/core/primitives-core/Cargo.toml index 9d8da94d9c9..eca9aa693f8 100644 --- a/core/primitives-core/Cargo.toml +++ b/core/primitives-core/Cargo.toml @@ -35,11 +35,14 @@ default = [] protocol_feature_fix_staking_threshold = [] protocol_feature_fix_contract_loading_cost = [] protocol_feature_reject_blocks_with_outdated_protocol_version = [] +protocol_feature_simple_nightshade_v2 = [] + nightly = [ "nightly_protocol", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", + "protocol_feature_simple_nightshade_v2", ] nightly_protocol = [ diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index 678efdacd8b..1a5bc78f9d5 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -120,6 +120,8 @@ pub enum ProtocolFeature { FixContractLoadingCost, #[cfg(feature = "protocol_feature_reject_blocks_with_outdated_protocol_version")] RejectBlocksWithOutdatedProtocolVersions, + #[cfg(feature = "protocol_feature_simple_nightshade_v2")] + SimpleNightshadeV2, } impl ProtocolFeature { @@ -170,6 +172,8 @@ impl ProtocolFeature { ProtocolFeature::FixContractLoadingCost => 129, #[cfg(feature = "protocol_feature_reject_blocks_with_outdated_protocol_version")] ProtocolFeature::RejectBlocksWithOutdatedProtocolVersions => 132, + #[cfg(feature = "protocol_feature_simple_nightshade_v2")] + ProtocolFeature::SimpleNightshadeV2 => 135, } } } @@ -182,7 +186,7 @@ const STABLE_PROTOCOL_VERSION: ProtocolVersion = 62; /// Largest protocol version supported by the current binary. pub const PROTOCOL_VERSION: ProtocolVersion = if cfg!(feature = "nightly_protocol") { // On nightly, pick big enough version to support all features. - 136 + 137 } else { // Enable all stable features. STABLE_PROTOCOL_VERSION diff --git a/core/primitives/Cargo.toml b/core/primitives/Cargo.toml index 16522bd8498..0f073bf0213 100644 --- a/core/primitives/Cargo.toml +++ b/core/primitives/Cargo.toml @@ -47,11 +47,13 @@ dump_errors_schema = ["near-rpc-error-macro/dump_errors_schema"] protocol_feature_fix_staking_threshold = ["near-primitives-core/protocol_feature_fix_staking_threshold"] protocol_feature_fix_contract_loading_cost = ["near-primitives-core/protocol_feature_fix_contract_loading_cost"] protocol_feature_reject_blocks_with_outdated_protocol_version = ["near-primitives-core/protocol_feature_reject_blocks_with_outdated_protocol_version"] +protocol_feature_simple_nightshade_v2 = ["near-primitives-core/protocol_feature_simple_nightshade_v2"] nightly = [ "nightly_protocol", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", + "protocol_feature_simple_nightshade_v2", "near-fmt/nightly", "near-primitives-core/nightly", "near-vm-runner/nightly", diff --git a/core/primitives/src/epoch_manager.rs b/core/primitives/src/epoch_manager.rs index 3d48750695c..74f7d3f1946 100644 --- a/core/primitives/src/epoch_manager.rs +++ b/core/primitives/src/epoch_manager.rs @@ -91,39 +91,61 @@ impl AllEpochConfig { } pub fn for_protocol_version(&self, protocol_version: ProtocolVersion) -> EpochConfig { - // if SimpleNightshade is enabled, we override genesis shard config with - // the simple nightshade shard config let mut config = self.genesis_epoch_config.clone(); - if self.use_production_config { - if checked_feature!("stable", SimpleNightshade, protocol_version) { - config.shard_layout = ShardLayout::get_simple_nightshade_layout(); - config.num_block_producer_seats_per_shard = vec![ - config.num_block_producer_seats; - config.shard_layout.num_shards() - as usize - ]; - config.avg_hidden_validator_seats_per_shard = - vec![0; config.shard_layout.num_shards() as usize]; - } - if checked_feature!("stable", ChunkOnlyProducers, protocol_version) { - // On testnet, genesis config set num_block_producer_seats to 200 - // This is to bring it back to 100 to be the same as on mainnet - config.num_block_producer_seats = 100; - // Technically, after ChunkOnlyProducers is enabled, this field is no longer used - // We still set it here just in case - config.num_block_producer_seats_per_shard = - vec![100; config.shard_layout.num_shards() as usize]; - config.block_producer_kickout_threshold = 80; - config.chunk_producer_kickout_threshold = 80; - config.validator_selection_config.num_chunk_only_producer_seats = 200; - } - - if checked_feature!("stable", MaxKickoutStake, protocol_version) { - config.validator_max_kickout_stake_perc = 30; - } + if !self.use_production_config { + return config; } + + Self::config_nightshade(&mut config, protocol_version); + + Self::config_chunk_only_producers(&mut config, protocol_version); + + Self::config_max_kickout_stake(&mut config, protocol_version); + config } + + fn config_nightshade(config: &mut EpochConfig, protocol_version: ProtocolVersion) { + #[cfg(feature = "protocol_feature_simple_nightshade_v2")] + if checked_feature!("stable", SimpleNightshadeV2, protocol_version) { + Self::config_nightshade_impl(config, ShardLayout::get_simple_nightshade_layout_v2()); + return; + } + + if checked_feature!("stable", SimpleNightshade, protocol_version) { + Self::config_nightshade_impl(config, ShardLayout::get_simple_nightshade_layout()); + return; + } + } + + fn config_nightshade_impl(config: &mut EpochConfig, shard_layout: ShardLayout) { + let num_shards = shard_layout.num_shards() as usize; + let num_block_producer_seats = config.num_block_producer_seats; + config.shard_layout = shard_layout; + config.num_block_producer_seats_per_shard = vec![num_block_producer_seats; num_shards]; + config.avg_hidden_validator_seats_per_shard = vec![0; num_shards]; + } + + fn config_chunk_only_producers(config: &mut EpochConfig, protocol_version: u32) { + if checked_feature!("stable", ChunkOnlyProducers, protocol_version) { + let num_shards = config.shard_layout.num_shards() as usize; + // On testnet, genesis config set num_block_producer_seats to 200 + // This is to bring it back to 100 to be the same as on mainnet + config.num_block_producer_seats = 100; + // Technically, after ChunkOnlyProducers is enabled, this field is no longer used + // We still set it here just in case + config.num_block_producer_seats_per_shard = vec![100; num_shards]; + config.block_producer_kickout_threshold = 80; + config.chunk_producer_kickout_threshold = 80; + config.validator_selection_config.num_chunk_only_producer_seats = 200; + } + } + + fn config_max_kickout_stake(config: &mut EpochConfig, protocol_version: u32) { + if checked_feature!("stable", MaxKickoutStake, protocol_version) { + config.validator_max_kickout_stake_perc = 30; + } + } } /// Additional configuration parameters for the new validator selection diff --git a/core/primitives/src/shard_layout.rs b/core/primitives/src/shard_layout.rs index e7fe33a2a1a..f34800c53ca 100644 --- a/core/primitives/src/shard_layout.rs +++ b/core/primitives/src/shard_layout.rs @@ -156,6 +156,22 @@ impl ShardLayout { ) } + /// Returns the simple nightshade layout, version 2, that will be used in production. + /// This is work in progress and the exact way of splitting is yet to be determined. + pub fn get_simple_nightshade_layout_v2() -> ShardLayout { + ShardLayout::v1( + // TODO(resharding) - find the right boundary to split shards in + // place of just "sweat". Likely somewhere in between near.social + // and sweatcoin. + vec!["aurora", "aurora-0", "kkuuue2akv_1630967379.near", "sweat"] + .into_iter() + .map(|s| s.parse().unwrap()) + .collect(), + Some(vec![vec![0], vec![1], vec![2], vec![3, 4]]), + 2, + ) + } + /// Given a parent shard id, return the shard uids for the shards in the current shard layout that /// are split from this parent shard. If this shard layout has no parent shard layout, return None pub fn get_split_shard_uids(&self, parent_shard_id: ShardId) -> Option> { @@ -547,4 +563,81 @@ mod tests { fn parse_account_ids(ids: &[&str]) -> Vec { ids.into_iter().map(|a| a.parse().unwrap()).collect() } + + #[test] + fn test_shard_layout_all() { + let v0 = ShardLayout::v0(1, 0); + let v1 = ShardLayout::get_simple_nightshade_layout(); + let v2 = ShardLayout::get_simple_nightshade_layout_v2(); + + insta::assert_snapshot!(serde_json::to_string_pretty(&v0).unwrap(), @r###" + { + "V0": { + "num_shards": 1, + "version": 0 + } + } + "###); + insta::assert_snapshot!(serde_json::to_string_pretty(&v1).unwrap(), @r###" + { + "V1": { + "boundary_accounts": [ + "aurora", + "aurora-0", + "kkuuue2akv_1630967379.near" + ], + "shards_split_map": [ + [ + 0, + 1, + 2, + 3 + ] + ], + "to_parent_shard_map": [ + 0, + 0, + 0, + 0 + ], + "version": 1 + } + } + "###); + insta::assert_snapshot!(serde_json::to_string_pretty(&v2).unwrap(), @r###" + { + "V1": { + "boundary_accounts": [ + "aurora", + "aurora-0", + "kkuuue2akv_1630967379.near", + "sweat" + ], + "shards_split_map": [ + [ + 0 + ], + [ + 1 + ], + [ + 2 + ], + [ + 3, + 4 + ] + ], + "to_parent_shard_map": [ + 0, + 1, + 2, + 3, + 3 + ], + "version": 2 + } + } + "###); + } } diff --git a/nearcore/Cargo.toml b/nearcore/Cargo.toml index fe71c1c10e1..a76bc0d68b7 100644 --- a/nearcore/Cargo.toml +++ b/nearcore/Cargo.toml @@ -111,12 +111,16 @@ protocol_feature_fix_staking_threshold = [ protocol_feature_fix_contract_loading_cost = [ "near-vm-runner/protocol_feature_fix_contract_loading_cost", ] -serialize_all_state_changes = ["near-store/serialize_all_state_changes"] +protocol_feature_simple_nightshade_v2 = [ + "near-primitives/protocol_feature_simple_nightshade_v2", +] +serialize_all_state_changes = ["near-store/serialize_all_state_changes"] nightly = [ "nightly_protocol", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", + "protocol_feature_simple_nightshade_v2", "serialize_all_state_changes", "near-async/nightly", "near-chain-configs/nightly", diff --git a/neard/Cargo.toml b/neard/Cargo.toml index 6173df7493e..4f330cb2277 100644 --- a/neard/Cargo.toml +++ b/neard/Cargo.toml @@ -70,11 +70,13 @@ delay_detector = ["nearcore/delay_detector"] rosetta_rpc = ["nearcore/rosetta_rpc"] json_rpc = ["nearcore/json_rpc"] protocol_feature_fix_staking_threshold = ["nearcore/protocol_feature_fix_staking_threshold"] +protocol_feature_simple_nightshade_v2 = ["nearcore/protocol_feature_simple_nightshade_v2"] serialize_all_state_changes = ["nearcore/serialize_all_state_changes"] nightly = [ "nightly_protocol", "protocol_feature_fix_staking_threshold", + "protocol_feature_simple_nightshade_v2", "serialize_all_state_changes", "near-chain-configs/nightly", "near-client/nightly", From 832f0c3896af338f4fb4345a33f3dd26a89e00ff Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 13 Jul 2023 15:38:43 +0200 Subject: [PATCH 03/50] refactor: small refactorings and improvements (#9296) - Renamed a lot of "dl_info" and 'to_dl" to "state_sync_info". I'm too afraid to ask what "dl" stands for but either way it's very confusing. (it could be download). I'm not sure I fully appreciate the difference between state sync, catchup and download and I'm open for a better suggestion how to rename those. - In the LocalnetCmd I added logic to generate default LogConfig - to get rid of a pesky log message about this config missing when starting neard. - In docs, renamed `SyncJobActor` to `SyncJobsActor` which is the correct name. - Allowing the `stable_hash` to be unused. It's only unused on macOS so we need to keep it but let's not generate a warning. All of the failed builds (red cross) below are due to this. cc @andrei-near shall we add some automation to notify us when builds are failing? Should this build be also part of PR-buildkite? ![Screenshot 2023-07-13 at 15 03 36](https://github.com/near/nearcore/assets/1555986/3adf18bf-6adc-4bf3-9996-55dc2ac8ad68) --- Cargo.lock | 1 + chain/chain/src/block_processing_utils.rs | 2 +- chain/chain/src/chain.rs | 34 +++++++++++------------ chain/chain/src/store.rs | 24 ++++++++-------- core/o11y/Cargo.toml | 1 + core/o11y/src/log_config.rs | 10 +++++++ docs/architecture/how/resharding.md | 4 +-- docs/architecture/how/sync.md | 8 +++--- nearcore/src/config.rs | 15 +++++++--- nearcore/src/dyn_config.rs | 2 +- runtime/near-vm-runner/src/utils.rs | 2 ++ 11 files changed, 62 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca23e3e37e2..ff92cbade30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3976,6 +3976,7 @@ dependencies = [ "opentelemetry-semantic-conventions", "prometheus", "serde", + "serde_json", "smartstring 1.0.1", "strum", "thiserror", diff --git a/chain/chain/src/block_processing_utils.rs b/chain/chain/src/block_processing_utils.rs index 6bb1b3ccc8c..c93c97caa1c 100644 --- a/chain/chain/src/block_processing_utils.rs +++ b/chain/chain/src/block_processing_utils.rs @@ -18,7 +18,7 @@ pub(crate) const MAX_PROCESSING_BLOCKS: usize = 5; /// Contains information from preprocessing a block pub(crate) struct BlockPreprocessInfo { pub(crate) is_caught_up: bool, - pub(crate) state_dl_info: Option, + pub(crate) state_sync_info: Option, pub(crate) incoming_receipts: HashMap>, pub(crate) challenges_result: ChallengesResult, pub(crate) challenged_blocks: Vec, diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 779dc948317..0220741c1bc 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -1069,13 +1069,13 @@ impl Chain { /// Return a StateSyncInfo that includes the information needed for syncing state for shards needed /// in the next epoch. - fn get_state_dl_info( + fn get_state_sync_info( &self, me: &Option, block: &Block, ) -> Result, Error> { let prev_hash = *block.header().prev_hash(); - let shards_to_dl = Chain::get_shards_to_dl_state( + let shards_to_state_sync = Chain::get_shards_to_state_sync( self.epoch_manager.as_ref(), &self.shard_tracker, me, @@ -1083,7 +1083,7 @@ impl Chain { )?; let prev_block = self.get_block(&prev_hash)?; - if prev_block.chunks().len() != block.chunks().len() && !shards_to_dl.is_empty() { + if prev_block.chunks().len() != block.chunks().len() && !shards_to_state_sync.is_empty() { // Currently, the state sync algorithm assumes that the number of chunks do not change // between the epoch being synced to and the last epoch. // For example, if shard layout changes at the beginning of epoch T, validators @@ -1100,14 +1100,14 @@ impl Chain { ); debug_assert!(false); } - if shards_to_dl.is_empty() { + if shards_to_state_sync.is_empty() { Ok(None) } else { - debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_dl, me); + debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_state_sync, me); - let state_dl_info = StateSyncInfo { + let state_sync_info = StateSyncInfo { epoch_tail_hash: *block.header().hash(), - shards: shards_to_dl + shards: shards_to_state_sync .iter() .map(|shard_id| { let chunk = &prev_block.chunks()[*shard_id as usize]; @@ -1116,7 +1116,7 @@ impl Chain { .collect(), }; - Ok(Some(state_dl_info)) + Ok(Some(state_sync_info)) } } @@ -2434,7 +2434,7 @@ impl Chain { return Err(Error::InvalidBlockHeight(prev_height)); } - let (is_caught_up, state_dl_info, need_state_snapshot) = + let (is_caught_up, state_sync_info, need_state_snapshot) = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { @@ -2447,10 +2447,10 @@ impl Chain { // For the first block of the epoch we check if we need to start download states for // shards that we will care about in the next epoch. If there is no state to be downloaded, // we consider that we are caught up, otherwise not - let state_dl_info = self.get_state_dl_info(me, block)?; + let state_sync_info = self.get_state_sync_info(me, block)?; let is_genesis = prev_prev_hash == CryptoHash::default(); let need_state_snapshot = !is_genesis; - (state_dl_info.is_none(), state_dl_info, need_state_snapshot) + (state_sync_info.is_none(), state_sync_info, need_state_snapshot) } else { (self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false) }; @@ -2538,7 +2538,7 @@ impl Chain { apply_chunk_work, BlockPreprocessInfo { is_caught_up, - state_dl_info, + state_sync_info, incoming_receipts, challenges_result, challenged_blocks, @@ -2637,7 +2637,7 @@ impl Chain { /// in the current epoch that will be split into a future shard that `me` will track. /// 2) Shard layout will be the same. In this case, the method returns all shards that `me` will /// track in the next epoch but not this epoch - fn get_shards_to_dl_state( + fn get_shards_to_state_sync( epoch_manager: &dyn EpochManagerAdapter, shard_tracker: &ShardTracker, me: &Option, @@ -3529,7 +3529,7 @@ impl Chain { debug!(target: "chain", "Catching up: removing prev={:?} from the queue. I'm {:?}", block_hash, me); chain_store_update.remove_prev_block_to_catchup(*block_hash); } - chain_store_update.remove_state_dl_info(*epoch_first_block); + chain_store_update.remove_state_sync_info(*epoch_first_block); chain_store_update.commit()?; @@ -5180,7 +5180,7 @@ impl<'a> ChainUpdate<'a> { let BlockPreprocessInfo { is_caught_up, - state_dl_info, + state_sync_info, incoming_receipts, challenges_result, challenged_blocks, @@ -5199,8 +5199,8 @@ impl<'a> ChainUpdate<'a> { Arc::new(receipt_proofs), ); } - if let Some(state_dl_info) = state_dl_info { - self.chain_store_update.add_state_dl_info(state_dl_info); + if let Some(state_sync_info) = state_sync_info { + self.chain_store_update.add_state_sync_info(state_sync_info); } self.chain_store_update.save_block_extra(block.hash(), BlockExtra { challenges_result }); diff --git a/chain/chain/src/store.rs b/chain/chain/src/store.rs index 9ae7e069582..7221a222e68 100644 --- a/chain/chain/src/store.rs +++ b/chain/chain/src/store.rs @@ -1249,8 +1249,8 @@ pub struct ChainStoreUpdate<'a> { remove_blocks_to_catchup: Vec<(CryptoHash, CryptoHash)>, // A prev_hash to be removed with all the hashes associated with it remove_prev_blocks_to_catchup: Vec, - add_state_dl_infos: Vec, - remove_state_dl_infos: Vec, + add_state_sync_infos: Vec, + remove_state_sync_infos: Vec, challenged_blocks: HashSet, } @@ -1273,8 +1273,8 @@ impl<'a> ChainStoreUpdate<'a> { add_blocks_to_catchup: vec![], remove_blocks_to_catchup: vec![], remove_prev_blocks_to_catchup: vec![], - add_state_dl_infos: vec![], - remove_state_dl_infos: vec![], + add_state_sync_infos: vec![], + remove_state_sync_infos: vec![], challenged_blocks: HashSet::default(), } } @@ -1917,12 +1917,12 @@ impl<'a> ChainStoreUpdate<'a> { self.remove_prev_blocks_to_catchup.push(hash); } - pub fn add_state_dl_info(&mut self, info: StateSyncInfo) { - self.add_state_dl_infos.push(info); + pub fn add_state_sync_info(&mut self, info: StateSyncInfo) { + self.add_state_sync_infos.push(info); } - pub fn remove_state_dl_info(&mut self, hash: CryptoHash) { - self.remove_state_dl_infos.push(hash); + pub fn remove_state_sync_info(&mut self, hash: CryptoHash) { + self.remove_state_sync_infos.push(hash); } pub fn save_challenged_block(&mut self, hash: CryptoHash) { @@ -3019,14 +3019,14 @@ impl<'a> ChainStoreUpdate<'a> { prev_table.push(new_hash); store_update.set_ser(DBCol::BlocksToCatchup, prev_hash.as_ref(), &prev_table)?; } - for state_dl_info in self.add_state_dl_infos.drain(..) { + for state_sync_info in self.add_state_sync_infos.drain(..) { store_update.set_ser( DBCol::StateDlInfos, - state_dl_info.epoch_tail_hash.as_ref(), - &state_dl_info, + state_sync_info.epoch_tail_hash.as_ref(), + &state_sync_info, )?; } - for hash in self.remove_state_dl_infos.drain(..) { + for hash in self.remove_state_sync_infos.drain(..) { store_update.delete(DBCol::StateDlInfos, hash.as_ref()); } for hash in self.challenged_blocks.drain() { diff --git a/core/o11y/Cargo.toml b/core/o11y/Cargo.toml index f870a012c61..907b5f2c5a6 100644 --- a/core/o11y/Cargo.toml +++ b/core/o11y/Cargo.toml @@ -24,6 +24,7 @@ opentelemetry-otlp.workspace = true opentelemetry-semantic-conventions.workspace = true prometheus.workspace = true serde.workspace = true +serde_json.workspace = true strum.workspace = true thiserror.workspace = true tokio.workspace = true diff --git a/core/o11y/src/log_config.rs b/core/o11y/src/log_config.rs index 46845bffe63..33b9f029824 100644 --- a/core/o11y/src/log_config.rs +++ b/core/o11y/src/log_config.rs @@ -1,4 +1,6 @@ use serde::{Deserialize, Serialize}; +use std::path::Path; +use std::{fs::File, io::Write}; /// Configures logging. #[derive(Default, Serialize, Deserialize, Clone, Debug)] @@ -11,3 +13,11 @@ pub struct LogConfig { /// Verbosity level of collected traces. pub opentelemetry_level: Option, } + +impl LogConfig { + pub fn write_to_file(&self, path: &Path) -> std::io::Result<()> { + let mut file = File::create(path)?; + let str = serde_json::to_string_pretty(self)?; + file.write_all(str.as_bytes()) + } +} diff --git a/docs/architecture/how/resharding.md b/docs/architecture/how/resharding.md index 090e99f1785..b9d5ad09220 100644 --- a/docs/architecture/how/resharding.md +++ b/docs/architecture/how/resharding.md @@ -38,12 +38,12 @@ do it more often than on new releases? It all starts in ``preprocess_block`` - if the node sees, that the block it is about to preprocess is the first block of the epoch (X+1) - it calls -``get_state_dl_info``, which is responsible for figuring out which shards will +``get_state_sync_info``, which is responsible for figuring out which shards will be needed in next epoch (X+2). This is the moment, when node can request new shards that it didn't track before (using StateSync) - and if it detects that the shard layout would change in the next epoch, it also involves the StateSync - but skips the download part (as it already has the data) - and starts from state splitting. -StateSync in this phase would send the ``StateSplitRequest`` to the ``SyncJobActor`` (you can think about the SyncJobActor as a background thread). +StateSync in this phase would send the ``StateSplitRequest`` to the ``SyncJobsActor`` (you can think about the ``SyncJobsActor`` as a background thread). We'd use the background thread to do the state splitting: the goal is to change the one trie (that represents the state of the current shard) - to multiple tries (one for each of the new shards). diff --git a/docs/architecture/how/sync.md b/docs/architecture/how/sync.md index c6e030c8dbd..f0a585d67ae 100644 --- a/docs/architecture/how/sync.md +++ b/docs/architecture/how/sync.md @@ -238,14 +238,14 @@ initiates the syncing process for these shards. After the state is downloaded, One thing to note is that `run_catchup` is located at `ClientActor`, but intensive work such as applying state parts and applying blocks is actually -offloaded to `SyncJobActor` in another thread, because we don’t want +offloaded to `SyncJobsActor` in another thread, because we don’t want `ClientActor` to be blocked by this. `run_catchup` is simply responsible for -scheduling `SyncJobActor` to do the intensive job. Note that `SyncJobActor` is +scheduling `SyncJobsActor` to do the intensive job. Note that `SyncJobsActor` is state-less, it doesn’t have write access to the chain. It will return the changes that need to be made as part of the response to `ClientActor`, and `ClientActor` is responsible for applying these changes. This is to ensure only one thread (`ClientActor`) has write access to the chain state. However, this also adds a -lot of limits, for example, `SyncJobActor` can only be scheduled to apply one +lot of limits, for example, `SyncJobsActor` can only be scheduled to apply one block at a time. Because `run_catchup` is only scheduled to run every 100ms, the speed of catching up blocks is limited to 100ms per block, even when blocks applying can be faster. Similar constraints happen to apply state parts. @@ -262,7 +262,7 @@ Second, even though `run_catchup` is scheduled to run every 100ms, the call can be delayed if ClientActor has messages in its actix queue. A better way to do this is to move the scheduling of `run_catchup` to `check_triggers`. -Third, because of how `run_catchup` interacts with `SyncJobActor`, `run_catchup` +Third, because of how `run_catchup` interacts with `SyncJobsActor`, `run_catchup` can catch up at most one block every 100 ms. This is because we don’t want to write to `ChainStore` in multiple threads. However, the changes that catching up blocks make do not interfere with regular block processing and they can be diff --git a/nearcore/src/config.rs b/nearcore/src/config.rs index 6120ccec52d..72b2eb3e2cf 100644 --- a/nearcore/src/config.rs +++ b/nearcore/src/config.rs @@ -1,4 +1,5 @@ use crate::download_file::{run_download_file, FileDownloadError}; +use crate::dyn_config::LOG_CONFIG_FILENAME; use anyhow::{anyhow, bail, Context}; use near_chain_configs::{ get_initial_supply, ClientConfig, GCConfig, Genesis, GenesisConfig, GenesisValidationMode, @@ -10,6 +11,7 @@ use near_crypto::{InMemorySigner, KeyFile, KeyType, PublicKey, Signer}; use near_jsonrpc::RpcConfig; use near_network::config::NetworkConfig; use near_network::tcp; +use near_o11y::log_config::LogConfig; use near_primitives::account::{AccessKey, Account}; use near_primitives::hash::CryptoHash; #[cfg(test)] @@ -1272,23 +1274,28 @@ pub fn init_testnet_configs( archive, tracked_shards, ); + let log_config = LogConfig::default(); for i in 0..(num_validator_seats + num_non_validator_seats) as usize { + let config = &configs[i]; let node_dir = dir.join(format!("{}{}", prefix, i)); fs::create_dir_all(node_dir.clone()).expect("Failed to create directory"); validator_signers[i] - .write_to_file(&node_dir.join(&configs[i].validator_key_file)) + .write_to_file(&node_dir.join(&config.validator_key_file)) .expect("Error writing validator key file"); network_signers[i] - .write_to_file(&node_dir.join(&configs[i].node_key_file)) + .write_to_file(&node_dir.join(&config.node_key_file)) .expect("Error writing key file"); for key in &shard_keys { key.write_to_file(&node_dir.join(format!("{}_key.json", key.account_id))) .expect("Error writing shard file"); } - genesis.to_file(&node_dir.join(&configs[i].genesis_file)); - configs[i].write_to_file(&node_dir.join(CONFIG_FILENAME)).expect("Error writing config"); + genesis.to_file(&node_dir.join(&config.genesis_file)); + config.write_to_file(&node_dir.join(CONFIG_FILENAME)).expect("Error writing config"); + log_config + .write_to_file(&node_dir.join(LOG_CONFIG_FILENAME)) + .expect("Error writing log config"); info!(target: "near", "Generated node key, validator key, genesis file in {}", node_dir.display()); } } diff --git a/nearcore/src/dyn_config.rs b/nearcore/src/dyn_config.rs index 4a98142319b..3357cf97a25 100644 --- a/nearcore/src/dyn_config.rs +++ b/nearcore/src/dyn_config.rs @@ -5,7 +5,7 @@ use near_o11y::log_config::LogConfig; use serde::Deserialize; use std::path::{Path, PathBuf}; -const LOG_CONFIG_FILENAME: &str = "log_config.json"; +pub const LOG_CONFIG_FILENAME: &str = "log_config.json"; /// This function gets called at the startup and each time a config needs to be reloaded. pub fn read_updateable_configs( diff --git a/runtime/near-vm-runner/src/utils.rs b/runtime/near-vm-runner/src/utils.rs index 8ab0b33d8e2..40a529b3bdd 100644 --- a/runtime/near-vm-runner/src/utils.rs +++ b/runtime/near-vm-runner/src/utils.rs @@ -1,5 +1,7 @@ use std::hash::{Hash, Hasher}; +// This method is not used on macOS so it's fine to allow it to be unused. +#[allow(dead_code)] pub(crate) fn stable_hash(value: T) -> u64 { // This is ported over from the previous uses, that relied on near-stable-hasher. // The need for stability here can certainly be discussed, and it could probably be replaced with DefaultHasher. From d36b1c36889264fa83a8057f3e29ddcf2b37a9c8 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 14 Jul 2023 09:15:12 +0200 Subject: [PATCH 04/50] refactor: refactoring and commenting some resharding code (#9299) --- chain/chain/src/chain.rs | 128 +++++++++++++++------------ chain/client-primitives/src/types.rs | 7 +- chain/client/src/client.rs | 116 +++++++++++++++--------- chain/client/src/sync/state.rs | 2 +- core/o11y/src/lib.rs | 4 +- nearcore/src/runtime/mod.rs | 1 + 6 files changed, 151 insertions(+), 107 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 0220741c1bc..4e9ba507b6a 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -489,6 +489,15 @@ impl Drop for Chain { let _ = self.blocks_in_processing.wait_for_all_blocks(); } } + +/// PreprocessBlockResult is a tuple where +/// the first element is a vector of jobs to apply chunks +/// the second element is BlockPreprocessInfo +type PreprocessBlockResult = ( + Vec Result + Send + 'static>>, + BlockPreprocessInfo, +); + impl Chain { pub fn make_genesis_block( epoch_manager: &dyn EpochManagerAdapter, @@ -2356,49 +2365,43 @@ impl Chain { invalid_chunks: &mut Vec, block_received_time: Instant, state_patch: SandboxStatePatch, - ) -> Result< - ( - Vec Result + Send + 'static>>, - BlockPreprocessInfo, - ), - Error, - > { + ) -> Result { + let header = block.header(); + // see if the block is already in processing or if there are too many blocks being processed self.blocks_in_processing.add_dry_run(block.hash())?; - debug!(target: "chain", num_approvals = block.header().num_approvals(), "Preprocess block"); + debug!(target: "chain", num_approvals = header.num_approvals(), "Preprocess block"); // Check that we know the epoch of the block before we try to get the header // (so that a block from unknown epoch doesn't get marked as an orphan) - if !self.epoch_manager.epoch_exists(block.header().epoch_id()) { - return Err(Error::EpochOutOfBounds(block.header().epoch_id().clone())); + if !self.epoch_manager.epoch_exists(header.epoch_id()) { + return Err(Error::EpochOutOfBounds(header.epoch_id().clone())); } - if block.chunks().len() - != self.epoch_manager.num_shards(block.header().epoch_id())? as usize - { + if block.chunks().len() != self.epoch_manager.num_shards(header.epoch_id())? as usize { return Err(Error::IncorrectNumberOfChunkHeaders); } // Check if we have already processed this block previously. - check_known(self, block.header().hash())?.map_err(|e| Error::BlockKnown(e))?; + check_known(self, header.hash())?.map_err(|e| Error::BlockKnown(e))?; // Delay hitting the db for current chain head until we know this block is not already known. let head = self.head()?; - let is_next = block.header().prev_hash() == &head.last_block_hash; + let is_next = header.prev_hash() == &head.last_block_hash; // Sandbox allows fast-forwarding, so only enable when not within sandbox if !cfg!(feature = "sandbox") { // A heuristic to prevent block height to jump too fast towards BlockHeight::max and cause // overflow-related problems - let block_height = block.header().height(); + let block_height = header.height(); if block_height > head.height + self.epoch_length * 20 { return Err(Error::InvalidBlockHeight(block_height)); } } // Block is an orphan if we do not know about the previous full block. - if !is_next && !self.block_exists(block.header().prev_hash())? { + if !is_next && !self.block_exists(header.prev_hash())? { // Before we add the block to the orphan pool, do some checks: // 1. Block header is signed by the block producer for height. // 2. Chunk headers in block body match block header. @@ -2406,23 +2409,23 @@ impl Chain { // Not checked: // - Block producer could be slashed // - Chunk header signatures could be wrong - if !self.partial_verify_orphan_header_signature(block.header())? { + if !self.partial_verify_orphan_header_signature(header)? { return Err(Error::InvalidSignature); } block.check_validity()?; // TODO: enable after #3729 and #3863 - // self.verify_orphan_header_approvals(&block.header())?; + // self.verify_orphan_header_approvals(&header)?; return Err(Error::Orphan); } let epoch_protocol_version = - self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; + self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; if epoch_protocol_version > PROTOCOL_VERSION { panic!("The client protocol version is older than the protocol version of the network. Please update nearcore. Client protocol version:{}, network protocol version {}", PROTOCOL_VERSION, epoch_protocol_version); } // First real I/O expense. - let prev = self.get_previous_header(block.header())?; + let prev = self.get_previous_header(header)?; let prev_hash = *prev.hash(); let prev_prev_hash = *prev.prev_hash(); let prev_gas_price = prev.gas_price(); @@ -2435,42 +2438,24 @@ impl Chain { } let (is_caught_up, state_sync_info, need_state_snapshot) = - if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); - if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { - // The previous block is not caught up for the next epoch relative to the previous - // block, which is the current epoch for this block, so this block cannot be applied - // at all yet, needs to be orphaned - return Err(Error::Orphan); - } + self.get_catchup_and_state_sync_infos(header, prev_hash, prev_prev_hash, me, block)?; - // For the first block of the epoch we check if we need to start download states for - // shards that we will care about in the next epoch. If there is no state to be downloaded, - // we consider that we are caught up, otherwise not - let state_sync_info = self.get_state_sync_info(me, block)?; - let is_genesis = prev_prev_hash == CryptoHash::default(); - let need_state_snapshot = !is_genesis; - (state_sync_info.is_none(), state_sync_info, need_state_snapshot) - } else { - (self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false) - }; + self.check_if_challenged_block_on_chain(header)?; - self.check_if_challenged_block_on_chain(block.header())?; - - debug!(target: "chain", "{:?} Process block {}, is_caught_up: {}", me, block.hash(), is_caught_up); + debug!(target: "chain", block_hash = ?header.hash(), me=?me, is_caught_up=is_caught_up, "Process block"); // Check the header is valid before we proceed with the full block. - self.validate_header(block.header(), provenance, challenges)?; + self.validate_header(header, provenance, challenges)?; self.epoch_manager.verify_block_vrf( - block.header().epoch_id(), - block.header().height(), + header.epoch_id(), + header.height(), &prev_random_value, block.vrf_value(), block.vrf_proof(), )?; - if block.header().random_value() != &hash(block.vrf_value().0.as_ref()) { + if header.random_value() != &hash(block.vrf_value().0.as_ref()) { return Err(Error::InvalidRandomnessBeaconOutput); } @@ -2483,8 +2468,7 @@ impl Chain { return Err(e); } - let protocol_version = - self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; + let protocol_version = self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; if !block.verify_gas_price( prev_gas_price, self.block_economics_config.min_gas_price(protocol_version), @@ -2495,7 +2479,7 @@ impl Chain { return Err(Error::InvalidGasPrice); } let minted_amount = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - Some(self.epoch_manager.get_epoch_minted_amount(block.header().next_epoch_id())?) + Some(self.epoch_manager.get_epoch_minted_amount(header.next_epoch_id())?) } else { None }; @@ -2505,11 +2489,8 @@ impl Chain { return Err(Error::InvalidGasPrice); } - let (challenges_result, challenged_blocks) = self.verify_challenges( - block.challenges(), - block.header().epoch_id(), - block.header().prev_hash(), - )?; + let (challenges_result, challenged_blocks) = + self.verify_challenges(block.challenges(), header.epoch_id(), header.prev_hash())?; let prev_block = self.get_block(&prev_hash)?; @@ -2519,7 +2500,7 @@ impl Chain { let incoming_receipts = self.collect_incoming_receipts_from_block(me, block)?; // Check if block can be finalized and drop it otherwise. - self.check_if_finalizable(block.header())?; + self.check_if_finalizable(header)?; let apply_chunk_work = self.apply_chunks_preprocessing( me, @@ -2550,6 +2531,35 @@ impl Chain { )) } + fn get_catchup_and_state_sync_infos( + &self, + header: &BlockHeader, + prev_hash: CryptoHash, + prev_prev_hash: CryptoHash, + me: &Option, + block: &MaybeValidated, + ) -> Result<(bool, Option, bool), Error> { + if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { + debug!(target: "chain", block_hash=?header.hash(), "block is the first block of an epoch"); + if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { + // The previous block is not caught up for the next epoch relative to the previous + // block, which is the current epoch for this block, so this block cannot be applied + // at all yet, needs to be orphaned + return Err(Error::Orphan); + } + + // For the first block of the epoch we check if we need to start download states for + // shards that we will care about in the next epoch. If there is no state to be downloaded, + // we consider that we are caught up, otherwise not + let state_sync_info = self.get_state_sync_info(me, block)?; + let is_genesis = prev_prev_hash == CryptoHash::default(); + let need_state_snapshot = !is_genesis; + Ok((state_sync_info.is_none(), state_sync_info, need_state_snapshot)) + } else { + Ok((self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false)) + } + } + /// Check if we can request chunks for this orphan. Conditions are /// 1) Orphans that with outstanding missing chunks request has not exceed `MAX_ORPHAN_MISSING_CHUNKS` /// 2) we haven't already requested missing chunks for the orphan @@ -2685,7 +2695,9 @@ impl Chain { apply_chunks_done_callback: DoneApplyChunkCallback, ) { let blocks = self.blocks_with_missing_chunks.ready_blocks(); - debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); + if !blocks.is_empty() { + debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); + } for block in blocks { let block_hash = *block.block.header().hash(); let height = block.block.header().height(); @@ -5165,7 +5177,7 @@ impl<'a> ChainUpdate<'a> { &mut self, me: &Option, block: &Block, - preprocess_block_info: BlockPreprocessInfo, + block_preprocess_info: BlockPreprocessInfo, apply_chunks_results: Vec>, ) -> Result, Error> { let prev_hash = block.header().prev_hash(); @@ -5185,7 +5197,7 @@ impl<'a> ChainUpdate<'a> { challenges_result, challenged_blocks, .. - } = preprocess_block_info; + } = block_preprocess_info; if !is_caught_up { debug!(target: "chain", %prev_hash, hash = %*block.hash(), "Add block to catch up"); diff --git a/chain/client-primitives/src/types.rs b/chain/client-primitives/src/types.rs index 0f78bdeff35..8f5ba42b441 100644 --- a/chain/client-primitives/src/types.rs +++ b/chain/client-primitives/src/types.rs @@ -185,8 +185,11 @@ pub struct StateSplitApplyingStatus { /// Stores status of shard sync and statuses of downloading shards. #[derive(Clone, Debug)] pub struct ShardSyncDownload { - /// Stores all download statuses. If we are downloading state parts, its length equals the number of state parts. - /// Otherwise it is 1, since we have only one piece of data to download, like shard state header. + /// Stores all download statuses. If we are downloading state parts, its + /// length equals the number of state parts. Otherwise it is 1, since we + /// have only one piece of data to download, like shard state header. It + /// could be 0 when we are not downloading anything but rather splitting a + /// shard as part of resharding. pub downloads: Vec, pub status: ShardSyncStatus, } diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 9873288dd32..2a181021530 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -53,6 +53,7 @@ use near_primitives::hash::CryptoHash; use near_primitives::merkle::{merklize, MerklePath, PartialMerkleTree}; use near_primitives::network::PeerId; use near_primitives::receipt::Receipt; +use near_primitives::sharding::StateSyncInfo; use near_primitives::sharding::{ ChunkHash, EncodedShardChunk, PartialEncodedChunk, ReedSolomonWrapper, ShardChunk, ShardChunkHeader, ShardInfo, @@ -2123,65 +2124,38 @@ impl Client { let me = &self.validator_signer.as_ref().map(|x| x.validator_id().clone()); for (sync_hash, state_sync_info) in self.chain.store().iterate_state_sync_infos()? { assert_eq!(sync_hash, state_sync_info.epoch_tail_hash); - let network_adapter1 = self.network_adapter.clone(); - - let new_shard_sync = { - let prev_hash = *self.chain.get_block(&sync_hash)?.header().prev_hash(); - let need_to_split_states = - self.epoch_manager.will_shard_layout_change(&prev_hash)?; - if need_to_split_states { - // If the client already has the state for this epoch, skip the downloading phase - let new_shard_sync = state_sync_info - .shards - .iter() - .filter_map(|ShardInfo(shard_id, _)| { - let shard_id = *shard_id; - if self.shard_tracker.care_about_shard( - me.as_ref(), - &prev_hash, - shard_id, - true, - ) { - Some(( - shard_id, - ShardSyncDownload { - downloads: vec![], - status: ShardSyncStatus::StateSplitScheduling, - }, - )) - } else { - None - } - }) - .collect(); - // For colour decorators to work, they need to printed directly. Otherwise the decorators get escaped, garble output and don't add colours. - debug!(target: "catchup", progress_per_shard = ?format_shard_sync_phase_per_shard(&new_shard_sync, false), "Need to split states for shards"); - new_shard_sync - } else { - debug!(target: "catchup", "do not need to split states for shards"); - HashMap::new() - } - }; + let network_adapter = self.network_adapter.clone(); + + let shards_to_split = self.get_shards_to_split(sync_hash, &state_sync_info, me)?; let state_sync_timeout = self.config.state_sync_timeout; let epoch_id = self.chain.get_block(&sync_hash)?.header().epoch_id().clone(); - let (state_sync, new_shard_sync, blocks_catch_up_state) = + + // TODO(resharding) what happens to the shards_to_split here when + // catchup_state_syncs already contains an entry for the sync hash? + // Does it get overwritten? Are we guaranteed that the existing + // entry contains the same data? + let (state_sync, shards_to_split, blocks_catch_up_state) = self.catchup_state_syncs.entry(sync_hash).or_insert_with(|| { ( StateSync::new( - network_adapter1, + network_adapter, state_sync_timeout, &self.config.chain_id, &self.config.state_sync.sync, ), - new_shard_sync, + shards_to_split, BlocksCatchUpState::new(sync_hash, epoch_id), ) }); // For colour decorators to work, they need to printed directly. Otherwise the decorators get escaped, garble output and don't add colours. - debug!(target: "catchup", ?me, ?sync_hash, progress_per_shard = ?format_shard_sync_phase_per_shard(&new_shard_sync, false), "Catchup"); - + debug!(target: "catchup", ?me, ?sync_hash, progress_per_shard = ?format_shard_sync_phase_per_shard(&shards_to_split, false), "Catchup"); let use_colour = matches!(self.config.log_summary_style, LogSummaryStyle::Colored); + + // Initialize the new shard sync to contain the shards to split at + // first. It will get updated with the shard sync download status + // for other shards later. + let new_shard_sync = shards_to_split; match state_sync.run( me, sync_hash, @@ -2229,6 +2203,60 @@ impl Client { Ok(()) } + /// This method checks which of the shards requested for state sync are already present. + /// Any shard that is currently tracked needs not to be downloaded again. + /// + /// The hidden logic here is that shards that are marked for state sync but + /// are currently tracked are actually marked for splitting. Please see the + /// comment on [`Chain::get_shards_to_state_sync`] for further explanation. + /// + /// Returns a map from the shard_id to ShardSyncDownload only for those + /// shards that need to be split. + fn get_shards_to_split( + &mut self, + sync_hash: CryptoHash, + state_sync_info: &StateSyncInfo, + me: &Option, + ) -> Result, Error> { + let prev_hash = *self.chain.get_block(&sync_hash)?.header().prev_hash(); + let need_to_split_states = self.epoch_manager.will_shard_layout_change(&prev_hash)?; + + if !need_to_split_states { + debug!(target: "catchup", "do not need to split states for shards"); + return Ok(HashMap::new()); + } + + // If the client already has the state for this epoch, skip the downloading phase + let shards_to_split = state_sync_info + .shards + .iter() + .filter_map(|ShardInfo(shard_id, _)| self.should_split_shard(shard_id, me, prev_hash)) + .collect(); + // For colour decorators to work, they need to printed directly. Otherwise the decorators get escaped, garble output and don't add colours. + debug!(target: "catchup", progress_per_shard = ?format_shard_sync_phase_per_shard(&shards_to_split, false), "Need to split states for shards"); + Ok(shards_to_split) + } + + /// Shard should be split if state sync was requested for it but we already + /// track it. + fn should_split_shard( + &mut self, + shard_id: &u64, + me: &Option, + prev_hash: CryptoHash, + ) -> Option<(u64, ShardSyncDownload)> { + let shard_id = *shard_id; + if self.shard_tracker.care_about_shard(me.as_ref(), &prev_hash, shard_id, true) { + let shard_sync_download = ShardSyncDownload { + downloads: vec![], + status: ShardSyncStatus::StateSplitScheduling, + }; + Some((shard_id, shard_sync_download)) + } else { + None + } + } + /// When accepting challenge, we verify that it's valid given signature with current validators. pub fn process_challenge(&mut self, _challenge: Challenge) -> Result<(), Error> { // TODO(2445): Enable challenges when they are working correctly. diff --git a/chain/client/src/sync/state.rs b/chain/client/src/sync/state.rs index 6a13e55d7f3..96acb702b63 100644 --- a/chain/client/src/sync/state.rs +++ b/chain/client/src/sync/state.rs @@ -414,7 +414,7 @@ impl StateSync { let part_id = msg.part_id.idx; if msg.sync_hash != sync_hash { debug!(target: "sync", - "Recieved message for other sync hash: shard_id {}, part_id {} expected sync_hash {} recieved sync_hash {}.", + "Received message for other sync hash: shard_id {}, part_id {} expected sync_hash {} recieved sync_hash {}.", &shard_id, &part_id, &sync_hash, diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 3ed87e730fe..9403e230187 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -459,10 +459,10 @@ pub fn reload_log_config(config: Option<&log_config::LogConfig>) { }; match result { Ok(_) => { - println!("Updated the logging layer according to `log_config.json`"); + tracing::info!("Updated the logging layer according to `log_config.json`"); } Err(err) => { - println!("Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", err); + tracing::info!("Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", err); } } } diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 4d54f9b6da9..8a25701f4b0 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -1179,6 +1179,7 @@ impl RuntimeAdapter for NightshadeRuntime { next_epoch_shard_layout: &ShardLayout, state_split_status: Arc, ) -> Result, Error> { + // TODO(resharding) use flat storage to split the trie here let trie = self.tries.get_view_trie_for_shard(shard_uid, *state_root); let shard_id = shard_uid.shard_id(); let new_shards = next_epoch_shard_layout From de0d9ba5b53d5f26813328059ec43d3022bcfcc3 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 14 Jul 2023 14:26:46 +0300 Subject: [PATCH 05/50] near-vm-runner: move protocol-sensitive error schemas to near-primitives (#9295) This allows to drop a dependency on `near-account-id` and `near-rpc-error-macro` crates and brings us ever-so-slightly closer to having a contract runtime suitable for limited replayability. But more importantly this also solves a long-term pain point in the contract runtime where we never really felt too confident modifying errors that are output from the contract runtime due to our fears about it possibly affecting the protocol output. Now that the schemas are outside of `nearcore/runtime` there's also a neat rule of thumb: anything goes inside `nearcore/runtime` (as far as errors are concerned.) --- Cargo.lock | 2 - chain/jsonrpc/res/rpc_errors_schema.json | 854 +++++++++--------- core/primitives/src/errors.rs | 298 +++++- .../limit_contract_functions_number.rs | 7 +- .../src/tests/runtime/test_evil_contracts.rs | 5 +- .../src/tests/standard_cases/mod.rs | 12 +- runtime/near-vm-runner/Cargo.toml | 4 - runtime/near-vm-runner/src/logic/errors.rs | 215 ++--- .../src/logic/tests/promises.rs | 13 +- runtime/runtime/Cargo.toml | 1 - runtime/runtime/src/actions.rs | 8 +- 11 files changed, 815 insertions(+), 604 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff92cbade30..c212b8e5a95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4361,11 +4361,9 @@ dependencies = [ "hex", "loupe", "memoffset 0.8.0", - "near-account-id", "near-crypto", "near-primitives", "near-primitives-core", - "near-rpc-error-macro", "near-stdx", "near-test-contracts", "near-vm-compiler", diff --git a/chain/jsonrpc/res/rpc_errors_schema.json b/chain/jsonrpc/res/rpc_errors_schema.json index 5bd88fcd74b..1279c61751e 100644 --- a/chain/jsonrpc/res/rpc_errors_schema.json +++ b/chain/jsonrpc/res/rpc_errors_schema.json @@ -1,425 +1,5 @@ { "schema": { - "AltBn128InvalidInput": { - "name": "AltBn128InvalidInput", - "subtypes": [], - "props": { - "msg": "" - } - }, - "BadUTF16": { - "name": "BadUTF16", - "subtypes": [], - "props": {} - }, - "BadUTF8": { - "name": "BadUTF8", - "subtypes": [], - "props": {} - }, - "BalanceExceeded": { - "name": "BalanceExceeded", - "subtypes": [], - "props": {} - }, - "CallIndirectOOB": { - "name": "CallIndirectOOB", - "subtypes": [], - "props": {} - }, - "CannotAppendActionToJointPromise": { - "name": "CannotAppendActionToJointPromise", - "subtypes": [], - "props": {} - }, - "CannotReturnJointPromise": { - "name": "CannotReturnJointPromise", - "subtypes": [], - "props": {} - }, - "CodeDoesNotExist": { - "name": "CodeDoesNotExist", - "subtypes": [], - "props": { - "account_id": "" - } - }, - "CompilationError": { - "name": "CompilationError", - "subtypes": [ - "CodeDoesNotExist", - "PrepareError", - "WasmerCompileError" - ], - "props": {} - }, - "ContractSizeExceeded": { - "name": "ContractSizeExceeded", - "subtypes": [], - "props": { - "limit": "", - "size": "" - } - }, - "Deprecated": { - "name": "Deprecated", - "subtypes": [], - "props": { - "method_name": "" - } - }, - "Deserialization": { - "name": "Deserialization", - "subtypes": [], - "props": {} - }, - "ECRecoverError": { - "name": "ECRecoverError", - "subtypes": [], - "props": { - "msg": "" - } - }, - "Ed25519VerifyInvalidInput": { - "name": "Ed25519VerifyInvalidInput", - "subtypes": [], - "props": { - "msg": "" - } - }, - "EmptyMethodName": { - "name": "EmptyMethodName", - "subtypes": [], - "props": {} - }, - "GasExceeded": { - "name": "GasExceeded", - "subtypes": [], - "props": {} - }, - "GasInstrumentation": { - "name": "GasInstrumentation", - "subtypes": [], - "props": {} - }, - "GasLimitExceeded": { - "name": "GasLimitExceeded", - "subtypes": [], - "props": {} - }, - "GenericTrap": { - "name": "GenericTrap", - "subtypes": [], - "props": {} - }, - "GuestPanic": { - "name": "GuestPanic", - "subtypes": [], - "props": { - "panic_msg": "" - } - }, - "HostError": { - "name": "HostError", - "subtypes": [ - "BadUTF16", - "BadUTF8", - "GasExceeded", - "GasLimitExceeded", - "BalanceExceeded", - "EmptyMethodName", - "GuestPanic", - "IntegerOverflow", - "InvalidPromiseIndex", - "CannotAppendActionToJointPromise", - "CannotReturnJointPromise", - "InvalidPromiseResultIndex", - "InvalidRegisterId", - "IteratorWasInvalidated", - "MemoryAccessViolation", - "InvalidReceiptIndex", - "InvalidIteratorIndex", - "InvalidAccountId", - "InvalidMethodName", - "InvalidPublicKey", - "ProhibitedInView", - "NumberOfLogsExceeded", - "KeyLengthExceeded", - "ValueLengthExceeded", - "TotalLogLengthExceeded", - "NumberPromisesExceeded", - "NumberInputDataDependenciesExceeded", - "ReturnedValueLengthExceeded", - "ContractSizeExceeded", - "Deprecated", - "ECRecoverError", - "AltBn128InvalidInput", - "Ed25519VerifyInvalidInput" - ], - "props": {} - }, - "IllegalArithmetic": { - "name": "IllegalArithmetic", - "subtypes": [], - "props": {} - }, - "IncorrectCallIndirectSignature": { - "name": "IncorrectCallIndirectSignature", - "subtypes": [], - "props": {} - }, - "IndirectCallToNull": { - "name": "IndirectCallToNull", - "subtypes": [], - "props": {} - }, - "Instantiate": { - "name": "Instantiate", - "subtypes": [], - "props": {} - }, - "IntegerOverflow": { - "name": "IntegerOverflow", - "subtypes": [], - "props": {} - }, - "InternalMemoryDeclared": { - "name": "InternalMemoryDeclared", - "subtypes": [], - "props": {} - }, - "InvalidAccountId": { - "name": "InvalidAccountId", - "subtypes": [], - "props": { - "account_id": "" - } - }, - "InvalidIteratorIndex": { - "name": "InvalidIteratorIndex", - "subtypes": [], - "props": { - "iterator_index": "" - } - }, - "InvalidMethodName": { - "name": "InvalidMethodName", - "subtypes": [], - "props": {} - }, - "InvalidPromiseIndex": { - "name": "InvalidPromiseIndex", - "subtypes": [], - "props": { - "promise_idx": "" - } - }, - "InvalidPromiseResultIndex": { - "name": "InvalidPromiseResultIndex", - "subtypes": [], - "props": { - "result_idx": "" - } - }, - "InvalidPublicKey": { - "name": "InvalidPublicKey", - "subtypes": [], - "props": {} - }, - "InvalidReceiptIndex": { - "name": "InvalidReceiptIndex", - "subtypes": [], - "props": { - "receipt_index": "" - } - }, - "InvalidRegisterId": { - "name": "InvalidRegisterId", - "subtypes": [], - "props": { - "register_id": "" - } - }, - "IteratorWasInvalidated": { - "name": "IteratorWasInvalidated", - "subtypes": [], - "props": { - "iterator_index": "" - } - }, - "KeyLengthExceeded": { - "name": "KeyLengthExceeded", - "subtypes": [], - "props": { - "length": "", - "limit": "" - } - }, - "Memory": { - "name": "Memory", - "subtypes": [], - "props": {} - }, - "MemoryAccessViolation": { - "name": "MemoryAccessViolation", - "subtypes": [], - "props": {} - }, - "MemoryOutOfBounds": { - "name": "MemoryOutOfBounds", - "subtypes": [], - "props": {} - }, - "MethodEmptyName": { - "name": "MethodEmptyName", - "subtypes": [], - "props": {} - }, - "MethodInvalidSignature": { - "name": "MethodInvalidSignature", - "subtypes": [], - "props": {} - }, - "MethodNotFound": { - "name": "MethodNotFound", - "subtypes": [], - "props": {} - }, - "MethodResolveError": { - "name": "MethodResolveError", - "subtypes": [ - "MethodEmptyName", - "MethodNotFound", - "MethodInvalidSignature" - ], - "props": {} - }, - "MisalignedAtomicAccess": { - "name": "MisalignedAtomicAccess", - "subtypes": [], - "props": {} - }, - "NumberInputDataDependenciesExceeded": { - "name": "NumberInputDataDependenciesExceeded", - "subtypes": [], - "props": { - "limit": "", - "number_of_input_data_dependencies": "" - } - }, - "NumberOfLogsExceeded": { - "name": "NumberOfLogsExceeded", - "subtypes": [], - "props": { - "limit": "" - } - }, - "NumberPromisesExceeded": { - "name": "NumberPromisesExceeded", - "subtypes": [], - "props": { - "limit": "", - "number_of_promises": "" - } - }, - "PrepareError": { - "name": "PrepareError", - "subtypes": [ - "Serialization", - "Deserialization", - "InternalMemoryDeclared", - "GasInstrumentation", - "StackHeightInstrumentation", - "Instantiate", - "Memory", - "TooManyFunctions", - "TooManyLocals" - ], - "props": {} - }, - "ProhibitedInView": { - "name": "ProhibitedInView", - "subtypes": [], - "props": { - "method_name": "" - } - }, - "ReturnedValueLengthExceeded": { - "name": "ReturnedValueLengthExceeded", - "subtypes": [], - "props": { - "length": "", - "limit": "" - } - }, - "Serialization": { - "name": "Serialization", - "subtypes": [], - "props": {} - }, - "StackHeightInstrumentation": { - "name": "StackHeightInstrumentation", - "subtypes": [], - "props": {} - }, - "StackOverflow": { - "name": "StackOverflow", - "subtypes": [], - "props": {} - }, - "TooManyFunctions": { - "name": "TooManyFunctions", - "subtypes": [], - "props": {} - }, - "TooManyLocals": { - "name": "TooManyLocals", - "subtypes": [], - "props": {} - }, - "TotalLogLengthExceeded": { - "name": "TotalLogLengthExceeded", - "subtypes": [], - "props": { - "length": "", - "limit": "" - } - }, - "Unreachable": { - "name": "Unreachable", - "subtypes": [], - "props": {} - }, - "ValueLengthExceeded": { - "name": "ValueLengthExceeded", - "subtypes": [], - "props": { - "length": "", - "limit": "" - } - }, - "WasmTrap": { - "name": "WasmTrap", - "subtypes": [ - "Unreachable", - "IncorrectCallIndirectSignature", - "MemoryOutOfBounds", - "CallIndirectOOB", - "IllegalArithmetic", - "MisalignedAtomicAccess", - "IndirectCallToNull", - "StackOverflow", - "GenericTrap" - ], - "props": {} - }, - "WasmerCompileError": { - "name": "WasmerCompileError", - "subtypes": [], - "props": { - "msg": "" - } - }, "AccessKeyNotFound": { "name": "AccessKeyNotFound", "subtypes": [], @@ -524,6 +104,28 @@ "total_number_of_bytes": "" } }, + "AltBn128InvalidInput": { + "name": "AltBn128InvalidInput", + "subtypes": [], + "props": { + "msg": "" + } + }, + "BadUTF16": { + "name": "BadUTF16", + "subtypes": [], + "props": {} + }, + "BadUTF8": { + "name": "BadUTF8", + "subtypes": [], + "props": {} + }, + "BalanceExceeded": { + "name": "BalanceExceeded", + "subtypes": [], + "props": {} + }, "BalanceMismatchError": { "name": "BalanceMismatchError", "subtypes": [], @@ -542,6 +144,45 @@ "tx_burnt_amount": "" } }, + "CallIndirectOOB": { + "name": "CallIndirectOOB", + "subtypes": [], + "props": {} + }, + "CannotAppendActionToJointPromise": { + "name": "CannotAppendActionToJointPromise", + "subtypes": [], + "props": {} + }, + "CannotReturnJointPromise": { + "name": "CannotReturnJointPromise", + "subtypes": [], + "props": {} + }, + "CodeDoesNotExist": { + "name": "CodeDoesNotExist", + "subtypes": [], + "props": { + "account_id": "" + } + }, + "CompilationError": { + "name": "CompilationError", + "subtypes": [ + "CodeDoesNotExist", + "PrepareError", + "WasmerCompileError" + ], + "props": {} + }, + "ContractSizeExceeded": { + "name": "ContractSizeExceeded", + "subtypes": [], + "props": { + "limit": "", + "size": "" + } + }, "CostOverflow": { "name": "CostOverflow", "subtypes": [], @@ -635,6 +276,37 @@ "subtypes": [], "props": {} }, + "Deprecated": { + "name": "Deprecated", + "subtypes": [], + "props": { + "method_name": "" + } + }, + "Deserialization": { + "name": "Deserialization", + "subtypes": [], + "props": {} + }, + "ECRecoverError": { + "name": "ECRecoverError", + "subtypes": [], + "props": { + "msg": "" + } + }, + "Ed25519VerifyInvalidInput": { + "name": "Ed25519VerifyInvalidInput", + "subtypes": [], + "props": { + "msg": "" + } + }, + "EmptyMethodName": { + "name": "EmptyMethodName", + "subtypes": [], + "props": {} + }, "Expired": { "name": "Expired", "subtypes": [], @@ -648,16 +320,102 @@ "limit": "" } }, - "FunctionCallMethodNameLengthExceeded": { - "name": "FunctionCallMethodNameLengthExceeded", + "FunctionCallMethodNameLengthExceeded": { + "name": "FunctionCallMethodNameLengthExceeded", + "subtypes": [], + "props": { + "length": "", + "limit": "" + } + }, + "FunctionCallZeroAttachedGas": { + "name": "FunctionCallZeroAttachedGas", + "subtypes": [], + "props": {} + }, + "GasExceeded": { + "name": "GasExceeded", + "subtypes": [], + "props": {} + }, + "GasInstrumentation": { + "name": "GasInstrumentation", + "subtypes": [], + "props": {} + }, + "GasLimitExceeded": { + "name": "GasLimitExceeded", + "subtypes": [], + "props": {} + }, + "GenericTrap": { + "name": "GenericTrap", + "subtypes": [], + "props": {} + }, + "GuestPanic": { + "name": "GuestPanic", + "subtypes": [], + "props": { + "panic_msg": "" + } + }, + "HostError": { + "name": "HostError", + "subtypes": [ + "BadUTF16", + "BadUTF8", + "GasExceeded", + "GasLimitExceeded", + "BalanceExceeded", + "EmptyMethodName", + "GuestPanic", + "IntegerOverflow", + "InvalidPromiseIndex", + "CannotAppendActionToJointPromise", + "CannotReturnJointPromise", + "InvalidPromiseResultIndex", + "InvalidRegisterId", + "IteratorWasInvalidated", + "MemoryAccessViolation", + "InvalidReceiptIndex", + "InvalidIteratorIndex", + "InvalidAccountId", + "InvalidMethodName", + "InvalidPublicKey", + "ProhibitedInView", + "NumberOfLogsExceeded", + "KeyLengthExceeded", + "ValueLengthExceeded", + "TotalLogLengthExceeded", + "NumberPromisesExceeded", + "NumberInputDataDependenciesExceeded", + "ReturnedValueLengthExceeded", + "ContractSizeExceeded", + "Deprecated", + "ECRecoverError", + "AltBn128InvalidInput", + "Ed25519VerifyInvalidInput" + ], + "props": {} + }, + "IllegalArithmetic": { + "name": "IllegalArithmetic", + "subtypes": [], + "props": {} + }, + "IncorrectCallIndirectSignature": { + "name": "IncorrectCallIndirectSignature", + "subtypes": [], + "props": {} + }, + "IndirectCallToNull": { + "name": "IndirectCallToNull", "subtypes": [], - "props": { - "length": "", - "limit": "" - } + "props": {} }, - "FunctionCallZeroAttachedGas": { - "name": "FunctionCallZeroAttachedGas", + "Instantiate": { + "name": "Instantiate", "subtypes": [], "props": {} }, @@ -670,6 +428,16 @@ "stake": "" } }, + "IntegerOverflow": { + "name": "IntegerOverflow", + "subtypes": [], + "props": {} + }, + "InternalMemoryDeclared": { + "name": "InternalMemoryDeclared", + "subtypes": [], + "props": {} + }, "InvalidAccessKeyError": { "name": "InvalidAccessKeyError", "subtypes": [ @@ -682,6 +450,11 @@ ], "props": {} }, + "InvalidAccountId": { + "name": "InvalidAccountId", + "subtypes": [], + "props": {} + }, "InvalidChain": { "name": "InvalidChain", "subtypes": [], @@ -694,6 +467,18 @@ "account_id": "" } }, + "InvalidIteratorIndex": { + "name": "InvalidIteratorIndex", + "subtypes": [], + "props": { + "iterator_index": "" + } + }, + "InvalidMethodName": { + "name": "InvalidMethodName", + "subtypes": [], + "props": {} + }, "InvalidNonce": { "name": "InvalidNonce", "subtypes": [], @@ -709,6 +494,32 @@ "account_id": "" } }, + "InvalidPromiseIndex": { + "name": "InvalidPromiseIndex", + "subtypes": [], + "props": { + "promise_idx": "" + } + }, + "InvalidPromiseResultIndex": { + "name": "InvalidPromiseResultIndex", + "subtypes": [], + "props": { + "result_idx": "" + } + }, + "InvalidPublicKey": { + "name": "InvalidPublicKey", + "subtypes": [], + "props": {} + }, + "InvalidReceiptIndex": { + "name": "InvalidReceiptIndex", + "subtypes": [], + "props": { + "receipt_index": "" + } + }, "InvalidReceiverId": { "name": "InvalidReceiverId", "subtypes": [], @@ -716,6 +527,13 @@ "account_id": "" } }, + "InvalidRegisterId": { + "name": "InvalidRegisterId", + "subtypes": [], + "props": { + "register_id": "" + } + }, "InvalidSignature": { "name": "InvalidSignature", "subtypes": [], @@ -748,6 +566,21 @@ ], "props": {} }, + "IteratorWasInvalidated": { + "name": "IteratorWasInvalidated", + "subtypes": [], + "props": { + "iterator_index": "" + } + }, + "KeyLengthExceeded": { + "name": "KeyLengthExceeded", + "subtypes": [], + "props": { + "length": "", + "limit": "" + } + }, "LackBalanceForState": { "name": "LackBalanceForState", "subtypes": [], @@ -756,6 +589,31 @@ "amount": "" } }, + "Memory": { + "name": "Memory", + "subtypes": [], + "props": {} + }, + "MemoryAccessViolation": { + "name": "MemoryAccessViolation", + "subtypes": [], + "props": {} + }, + "MemoryOutOfBounds": { + "name": "MemoryOutOfBounds", + "subtypes": [], + "props": {} + }, + "MethodEmptyName": { + "name": "MethodEmptyName", + "subtypes": [], + "props": {} + }, + "MethodInvalidSignature": { + "name": "MethodInvalidSignature", + "subtypes": [], + "props": {} + }, "MethodNameMismatch": { "name": "MethodNameMismatch", "subtypes": [], @@ -763,6 +621,25 @@ "method_name": "" } }, + "MethodNotFound": { + "name": "MethodNotFound", + "subtypes": [], + "props": {} + }, + "MethodResolveError": { + "name": "MethodResolveError", + "subtypes": [ + "MethodEmptyName", + "MethodNotFound", + "MethodInvalidSignature" + ], + "props": {} + }, + "MisalignedAtomicAccess": { + "name": "MisalignedAtomicAccess", + "subtypes": [], + "props": {} + }, "NonceTooLarge": { "name": "NonceTooLarge", "subtypes": [], @@ -790,6 +667,29 @@ "signer_id": "" } }, + "NumberInputDataDependenciesExceeded": { + "name": "NumberInputDataDependenciesExceeded", + "subtypes": [], + "props": { + "limit": "", + "number_of_input_data_dependencies": "" + } + }, + "NumberOfLogsExceeded": { + "name": "NumberOfLogsExceeded", + "subtypes": [], + "props": { + "limit": "" + } + }, + "NumberPromisesExceeded": { + "name": "NumberPromisesExceeded", + "subtypes": [], + "props": { + "limit": "", + "number_of_promises": "" + } + }, "OnlyImplicitAccountCreationAllowed": { "name": "OnlyImplicitAccountCreationAllowed", "subtypes": [], @@ -797,6 +697,28 @@ "account_id": "" } }, + "PrepareError": { + "name": "PrepareError", + "subtypes": [ + "Serialization", + "Deserialization", + "InternalMemoryDeclared", + "GasInstrumentation", + "StackHeightInstrumentation", + "Instantiate", + "Memory", + "TooManyFunctions", + "TooManyLocals" + ], + "props": {} + }, + "ProhibitedInView": { + "name": "ProhibitedInView", + "subtypes": [], + "props": { + "method_name": "" + } + }, "ReceiptValidationError": { "name": "ReceiptValidationError", "subtypes": [ @@ -823,6 +745,19 @@ "subtypes": [], "props": {} }, + "ReturnedValueLengthExceeded": { + "name": "ReturnedValueLengthExceeded", + "subtypes": [], + "props": { + "length": "", + "limit": "" + } + }, + "Serialization": { + "name": "Serialization", + "subtypes": [], + "props": {} + }, "SignerDoesNotExist": { "name": "SignerDoesNotExist", "subtypes": [], @@ -830,6 +765,34 @@ "signer_id": "" } }, + "StackHeightInstrumentation": { + "name": "StackHeightInstrumentation", + "subtypes": [], + "props": {} + }, + "StackOverflow": { + "name": "StackOverflow", + "subtypes": [], + "props": {} + }, + "TooManyFunctions": { + "name": "TooManyFunctions", + "subtypes": [], + "props": {} + }, + "TooManyLocals": { + "name": "TooManyLocals", + "subtypes": [], + "props": {} + }, + "TotalLogLengthExceeded": { + "name": "TotalLogLengthExceeded", + "subtypes": [], + "props": { + "length": "", + "limit": "" + } + }, "TotalNumberOfActionsExceeded": { "name": "TotalNumberOfActionsExceeded", "subtypes": [], @@ -879,6 +842,11 @@ ], "props": {} }, + "Unreachable": { + "name": "Unreachable", + "subtypes": [], + "props": {} + }, "UnsuitableStakingKey": { "name": "UnsuitableStakingKey", "subtypes": [], @@ -894,6 +862,36 @@ "version": "" } }, + "ValueLengthExceeded": { + "name": "ValueLengthExceeded", + "subtypes": [], + "props": { + "length": "", + "limit": "" + } + }, + "WasmTrap": { + "name": "WasmTrap", + "subtypes": [ + "Unreachable", + "IncorrectCallIndirectSignature", + "MemoryOutOfBounds", + "CallIndirectOOB", + "IllegalArithmetic", + "MisalignedAtomicAccess", + "IndirectCallToNull", + "StackOverflow", + "GenericTrap" + ], + "props": {} + }, + "WasmerCompileError": { + "name": "WasmerCompileError", + "subtypes": [], + "props": { + "msg": "" + } + }, "Closed": { "name": "Closed", "subtypes": [], diff --git a/core/primitives/src/errors.rs b/core/primitives/src/errors.rs index f1678de9336..9ede1561aba 100644 --- a/core/primitives/src/errors.rs +++ b/core/primitives/src/errors.rs @@ -5,7 +5,6 @@ use borsh::{BorshDeserialize, BorshSerialize}; use near_crypto::PublicKey; use near_primitives_core::types::ProtocolVersion; use near_rpc_error_macro::RpcError; -use near_vm_runner::logic::errors::FunctionCallErrorSer; use std::fmt::{Debug, Display}; /// Error returned in the ExecutionOutcome in case of failure @@ -469,7 +468,7 @@ pub enum ActionErrorKind { minimum_stake: Balance, }, /// An error occurred during a `FunctionCall` Action, parameter is debug message. - FunctionCallError(FunctionCallErrorSer), + FunctionCallError(FunctionCallError), /// Error occurs when a new `ActionReceipt` created by the `FunctionCall` action fails /// receipt validation. NewReceiptValidationError(ReceiptValidationError), @@ -897,3 +896,298 @@ impl From for EpochError { EpochError::IOErr(error.to_string()) } } + +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + RpcError, + serde::Deserialize, + serde::Serialize, +)] +/// Error that can occur while preparing or executing Wasm smart-contract. +pub enum PrepareError { + /// Error happened while serializing the module. + Serialization, + /// Error happened while deserializing the module. + Deserialization, + /// Internal memory declaration has been found in the module. + InternalMemoryDeclared, + /// Gas instrumentation failed. + /// + /// This most likely indicates the module isn't valid. + GasInstrumentation, + /// Stack instrumentation failed. + /// + /// This most likely indicates the module isn't valid. + StackHeightInstrumentation, + /// Error happened during instantiation. + /// + /// This might indicate that `start` function trapped, or module isn't + /// instantiable and/or unlinkable. + Instantiate, + /// Error creating memory. + Memory, + /// Contract contains too many functions. + TooManyFunctions, + /// Contract contains too many locals. + TooManyLocals, +} + +/// A kind of a trap happened during execution of a binary +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + RpcError, + serde::Deserialize, + serde::Serialize, + strum::IntoStaticStr, +)] +pub enum WasmTrap { + /// An `unreachable` opcode was executed. + Unreachable, + /// Call indirect incorrect signature trap. + IncorrectCallIndirectSignature, + /// Memory out of bounds trap. + MemoryOutOfBounds, + /// Call indirect out of bounds trap. + CallIndirectOOB, + /// An arithmetic exception, e.g. divided by zero. + IllegalArithmetic, + /// Misaligned atomic access trap. + MisalignedAtomicAccess, + /// Indirect call to null. + IndirectCallToNull, + /// Stack overflow. + StackOverflow, + /// Generic trap. + GenericTrap, +} + +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + RpcError, + serde::Deserialize, + serde::Serialize, + strum::IntoStaticStr, +)] +pub enum HostError { + /// String encoding is bad UTF-16 sequence + BadUTF16, + /// String encoding is bad UTF-8 sequence + BadUTF8, + /// Exceeded the prepaid gas + GasExceeded, + /// Exceeded the maximum amount of gas allowed to burn per contract + GasLimitExceeded, + /// Exceeded the account balance + BalanceExceeded, + /// Tried to call an empty method name + EmptyMethodName, + /// Smart contract panicked + GuestPanic { panic_msg: String }, + /// IntegerOverflow happened during a contract execution + IntegerOverflow, + /// `promise_idx` does not correspond to existing promises + InvalidPromiseIndex { promise_idx: u64 }, + /// Actions can only be appended to non-joint promise. + CannotAppendActionToJointPromise, + /// Returning joint promise is currently prohibited + CannotReturnJointPromise, + /// Accessed invalid promise result index + InvalidPromiseResultIndex { result_idx: u64 }, + /// Accessed invalid register id + InvalidRegisterId { register_id: u64 }, + /// Iterator `iterator_index` was invalidated after its creation by performing a mutable operation on trie + IteratorWasInvalidated { iterator_index: u64 }, + /// Accessed memory outside the bounds + MemoryAccessViolation, + /// VM Logic returned an invalid receipt index + InvalidReceiptIndex { receipt_index: u64 }, + /// Iterator index `iterator_index` does not exist + InvalidIteratorIndex { iterator_index: u64 }, + /// VM Logic returned an invalid account id + InvalidAccountId, + /// VM Logic returned an invalid method name + InvalidMethodName, + /// VM Logic provided an invalid public key + InvalidPublicKey, + /// `method_name` is not allowed in view calls + ProhibitedInView { method_name: String }, + /// The total number of logs will exceed the limit. + NumberOfLogsExceeded { limit: u64 }, + /// The storage key length exceeded the limit. + KeyLengthExceeded { length: u64, limit: u64 }, + /// The storage value length exceeded the limit. + ValueLengthExceeded { length: u64, limit: u64 }, + /// The total log length exceeded the limit. + TotalLogLengthExceeded { length: u64, limit: u64 }, + /// The maximum number of promises within a FunctionCall exceeded the limit. + NumberPromisesExceeded { number_of_promises: u64, limit: u64 }, + /// The maximum number of input data dependencies exceeded the limit. + NumberInputDataDependenciesExceeded { number_of_input_data_dependencies: u64, limit: u64 }, + /// The returned value length exceeded the limit. + ReturnedValueLengthExceeded { length: u64, limit: u64 }, + /// The contract size for DeployContract action exceeded the limit. + ContractSizeExceeded { size: u64, limit: u64 }, + /// The host function was deprecated. + Deprecated { method_name: String }, + /// General errors for ECDSA recover. + ECRecoverError { msg: String }, + /// Invalid input to alt_bn128 familiy of functions (e.g., point which isn't + /// on the curve). + AltBn128InvalidInput { msg: String }, + /// Invalid input to ed25519 signature verification function (e.g. signature cannot be + /// derived from bytes). + Ed25519VerifyInvalidInput { msg: String }, +} + +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + RpcError, + serde::Deserialize, + serde::Serialize, + strum::IntoStaticStr, +)] +pub enum MethodResolveError { + MethodEmptyName, + MethodNotFound, + MethodInvalidSignature, +} + +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + RpcError, + serde::Deserialize, + serde::Serialize, + strum::IntoStaticStr, +)] +pub enum CompilationError { + CodeDoesNotExist { + account_id: AccountId, + }, + PrepareError(PrepareError), + /// This is for defense in depth. + /// We expect our runtime-independent preparation code to fully catch all invalid wasms, + /// but, if it ever misses something we’ll emit this error + WasmerCompileError { + msg: String, + }, +} + +/// Serializable version of `near-vm-runner::FunctionCallError`. +/// +/// Must never reorder/remove elements, can only add new variants at the end (but do that very +/// carefully). It describes stable serialization format, and only used by serialization logic. +#[derive( + Debug, + Clone, + PartialEq, + Eq, + BorshDeserialize, + BorshSerialize, + serde::Serialize, + serde::Deserialize, +)] +pub enum FunctionCallError { + /// Wasm compilation error + CompilationError(CompilationError), + /// Wasm binary env link error + /// + /// Note: this is only to deserialize old data, use execution error for new data + LinkError { + msg: String, + }, + /// Import/export resolve error + MethodResolveError(MethodResolveError), + /// A trap happened during execution of a binary + /// + /// Note: this is only to deserialize old data, use execution error for new data + WasmTrap(WasmTrap), + WasmUnknownError, + /// Note: this is only to deserialize old data, use execution error for new data + HostError(HostError), + // Unused, can be reused by a future error but must be exactly one error to keep ExecutionError + // error borsh serialized at correct index + _EVMError, + ExecutionError(String), +} + +impl From for MethodResolveError { + fn from(outer_err: near_vm_runner::logic::errors::MethodResolveError) -> Self { + use near_vm_runner::logic::errors::MethodResolveError as MRE; + match outer_err { + MRE::MethodEmptyName => Self::MethodEmptyName, + MRE::MethodNotFound => Self::MethodNotFound, + MRE::MethodInvalidSignature => Self::MethodInvalidSignature, + } + } +} + +impl From for PrepareError { + fn from(outer_err: near_vm_runner::logic::errors::PrepareError) -> Self { + use near_vm_runner::logic::errors::PrepareError as PE; + match outer_err { + PE::Serialization => Self::Serialization, + PE::Deserialization => Self::Deserialization, + PE::InternalMemoryDeclared => Self::InternalMemoryDeclared, + PE::GasInstrumentation => Self::GasInstrumentation, + PE::StackHeightInstrumentation => Self::StackHeightInstrumentation, + PE::Instantiate => Self::Instantiate, + PE::Memory => Self::Memory, + PE::TooManyFunctions => Self::TooManyFunctions, + PE::TooManyLocals => Self::TooManyLocals, + } + } +} + +impl From for CompilationError { + fn from(outer_err: near_vm_runner::logic::errors::CompilationError) -> Self { + use near_vm_runner::logic::errors::CompilationError as CE; + match outer_err { + CE::CodeDoesNotExist { account_id } => Self::CodeDoesNotExist { + account_id: account_id.parse().expect("account_id in error must be valid"), + }, + CE::PrepareError(pe) => Self::PrepareError(pe.into()), + CE::WasmerCompileError { msg } => Self::WasmerCompileError { msg }, + } + } +} + +impl From for FunctionCallError { + fn from(outer_err: near_vm_runner::logic::errors::FunctionCallError) -> Self { + use near_vm_runner::logic::errors::FunctionCallError as FCE; + match outer_err { + FCE::CompilationError(e) => Self::CompilationError(e.into()), + FCE::MethodResolveError(e) => Self::MethodResolveError(e.into()), + // Note: We deliberately collapse all execution errors for + // serialization to make the DB representation less dependent + // on specific types in Rust code. + FCE::HostError(ref _e) => Self::ExecutionError(outer_err.to_string()), + FCE::LinkError { msg } => Self::ExecutionError(format!("Link Error: {}", msg)), + FCE::WasmTrap(ref _e) => Self::ExecutionError(outer_err.to_string()), + } + } +} diff --git a/integration-tests/src/tests/client/features/limit_contract_functions_number.rs b/integration-tests/src/tests/client/features/limit_contract_functions_number.rs index 5d5af25b4d5..29a7ad5dacd 100644 --- a/integration-tests/src/tests/client/features/limit_contract_functions_number.rs +++ b/integration-tests/src/tests/client/features/limit_contract_functions_number.rs @@ -4,11 +4,12 @@ use assert_matches::assert_matches; use near_chain::ChainGenesis; use near_chain_configs::Genesis; use near_client::test_utils::TestEnv; -use near_primitives::errors::{ActionErrorKind, TxExecutionError}; +use near_primitives::errors::{ + ActionErrorKind, CompilationError, FunctionCallError, PrepareError, TxExecutionError, +}; use near_primitives::runtime::config_store::RuntimeConfigStore; use near_primitives::version::ProtocolFeature; use near_primitives::views::FinalExecutionStatus; -use near_vm_runner::logic::errors::{CompilationError, FunctionCallErrorSer, PrepareError}; use nearcore::config::GenesisExt; fn verify_contract_limits_upgrade( @@ -64,7 +65,7 @@ fn verify_contract_limits_upgrade( status => panic!("expected transaction to fail, got {:?}", status), }; match e.kind { - ActionErrorKind::FunctionCallError(FunctionCallErrorSer::CompilationError( + ActionErrorKind::FunctionCallError(FunctionCallError::CompilationError( CompilationError::PrepareError(e), )) if e == expected_prepare_err => (), kind => panic!("got unexpected action error kind: {:?}", kind), diff --git a/integration-tests/src/tests/runtime/test_evil_contracts.rs b/integration-tests/src/tests/runtime/test_evil_contracts.rs index ef0ad89a2a4..e6c11c6783f 100644 --- a/integration-tests/src/tests/runtime/test_evil_contracts.rs +++ b/integration-tests/src/tests/runtime/test_evil_contracts.rs @@ -1,7 +1,6 @@ use crate::node::{Node, RuntimeNode}; -use near_primitives::errors::{ActionError, ActionErrorKind}; +use near_primitives::errors::{ActionError, ActionErrorKind, FunctionCallError}; use near_primitives::views::FinalExecutionStatus; -use near_vm_runner::logic::errors::FunctionCallErrorSer; use std::mem::size_of; use assert_matches::assert_matches; @@ -129,7 +128,7 @@ fn test_evil_abort() { FinalExecutionStatus::Failure( ActionError { index: Some(0), - kind: ActionErrorKind::FunctionCallError(FunctionCallErrorSer::ExecutionError( + kind: ActionErrorKind::FunctionCallError(FunctionCallError::ExecutionError( "String encoding is bad UTF-16 sequence.".to_string() )) } diff --git a/integration-tests/src/tests/standard_cases/mod.rs b/integration-tests/src/tests/standard_cases/mod.rs index 38e94ba2a76..65a91e5ee20 100644 --- a/integration-tests/src/tests/standard_cases/mod.rs +++ b/integration-tests/src/tests/standard_cases/mod.rs @@ -9,7 +9,8 @@ use near_jsonrpc_primitives::errors::ServerError; use near_primitives::account::{AccessKey, AccessKeyPermission, FunctionCallPermission}; use near_primitives::config::{ActionCosts, ExtCosts}; use near_primitives::errors::{ - ActionError, ActionErrorKind, InvalidAccessKeyError, InvalidTxError, TxExecutionError, + ActionError, ActionErrorKind, FunctionCallError, InvalidAccessKeyError, InvalidTxError, + MethodResolveError, TxExecutionError, }; use near_primitives::hash::{hash, CryptoHash}; use near_primitives::types::{AccountId, Balance, TrieNodesCount}; @@ -17,7 +18,6 @@ use near_primitives::views::{ AccessKeyView, AccountView, ExecutionMetadataView, FinalExecutionOutcomeView, FinalExecutionStatus, }; -use near_vm_runner::logic::errors::{FunctionCallErrorSer, MethodResolveError}; use nearcore::config::{NEAR_BASE, TESTING_INIT_BALANCE, TESTING_INIT_STAKE}; use crate::node::Node; @@ -89,7 +89,7 @@ pub fn test_smart_contract_panic(node: impl Node) { FinalExecutionStatus::Failure( ActionError { index: Some(0), - kind: ActionErrorKind::FunctionCallError(FunctionCallErrorSer::ExecutionError( + kind: ActionErrorKind::FunctionCallError(FunctionCallError::ExecutionError( "Smart contract panicked: WAT?".to_string() )) } @@ -127,7 +127,7 @@ pub fn test_smart_contract_bad_method_name(node: impl Node) { FinalExecutionStatus::Failure( ActionError { index: Some(0), - kind: ActionErrorKind::FunctionCallError(FunctionCallErrorSer::MethodResolveError( + kind: ActionErrorKind::FunctionCallError(FunctionCallError::MethodResolveError( MethodResolveError::MethodNotFound )) } @@ -151,7 +151,7 @@ pub fn test_smart_contract_empty_method_name_with_no_tokens(node: impl Node) { FinalExecutionStatus::Failure( ActionError { index: Some(0), - kind: ActionErrorKind::FunctionCallError(FunctionCallErrorSer::MethodResolveError( + kind: ActionErrorKind::FunctionCallError(FunctionCallError::MethodResolveError( MethodResolveError::MethodEmptyName )) } @@ -175,7 +175,7 @@ pub fn test_smart_contract_empty_method_name_with_tokens(node: impl Node) { FinalExecutionStatus::Failure( ActionError { index: Some(0), - kind: ActionErrorKind::FunctionCallError(FunctionCallErrorSer::MethodResolveError( + kind: ActionErrorKind::FunctionCallError(FunctionCallError::MethodResolveError( MethodResolveError::MethodEmptyName )) } diff --git a/runtime/near-vm-runner/Cargo.toml b/runtime/near-vm-runner/Cargo.toml index b739c731ff2..95c3dd4f7d1 100644 --- a/runtime/near-vm-runner/Cargo.toml +++ b/runtime/near-vm-runner/Cargo.toml @@ -36,10 +36,8 @@ tracing.workspace = true wasmparser.workspace = true wasmtime = { workspace = true, optional = true } -near-account-id.workspace = true near-crypto.workspace = true near-primitives-core.workspace = true -near-rpc-error-macro.workspace = true # Old versions of pwasm-utils we need to preserve backwards compatibility under # old protocol versions. @@ -126,8 +124,6 @@ nightly = [ sandbox = [] io_trace = [] -dump_errors_schema = ["near-rpc-error-macro/dump_errors_schema"] - # Use this feature to enable counting of fees and costs applied. costs_counting = [] diff --git a/runtime/near-vm-runner/src/logic/errors.rs b/runtime/near-vm-runner/src/logic/errors.rs index 42a8388f7aa..6dd35cc65ec 100644 --- a/runtime/near-vm-runner/src/logic/errors.rs +++ b/runtime/near-vm-runner/src/logic/errors.rs @@ -1,6 +1,4 @@ use borsh::{BorshDeserialize, BorshSerialize}; -use near_account_id::AccountId; -use near_rpc_error_macro::RpcError; use std::any::Any; use std::fmt::{self, Error, Formatter}; use std::io; @@ -55,44 +53,7 @@ pub enum FunctionCallError { HostError(HostError), } -/// Serializable version of `FunctionCallError`. Must never reorder/remove elements, can only -/// add new variants at the end (but do that very carefully). -/// It describes stable serialization format, and only used by serialization logic. -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - serde::Serialize, - serde::Deserialize, -)] -pub enum FunctionCallErrorSer { - /// Wasm compilation error - CompilationError(CompilationError), - /// Wasm binary env link error - /// - /// Note: this is only to deserialize old data, use execution error for new data - LinkError { - msg: String, - }, - /// Import/export resolve error - MethodResolveError(MethodResolveError), - /// A trap happened during execution of a binary - /// - /// Note: this is only to deserialize old data, use execution error for new data - WasmTrap(WasmTrap), - WasmUnknownError, - /// Note: this is only to deserialize old data, use execution error for new data - HostError(HostError), - // Unused, can be reused by a future error but must be exactly one error to keep ExecutionError - // error borsh serialized at correct index - _EVMError, - ExecutionError(String), -} - -#[derive(Debug, strum::IntoStaticStr, thiserror::Error)] +#[derive(Debug, thiserror::Error, strum::IntoStaticStr)] pub enum CacheError { #[error("cache read error")] ReadError(#[source] io::Error), @@ -104,18 +65,7 @@ pub enum CacheError { SerializationError { hash: [u8; 32] }, } /// A kind of a trap happened during execution of a binary -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - RpcError, - serde::Deserialize, - serde::Serialize, - strum::IntoStaticStr, -)] +#[derive(Debug, Clone, PartialEq, Eq, strum::IntoStaticStr)] pub enum WasmTrap { /// An `unreachable` opcode was executed. Unreachable, @@ -137,39 +87,17 @@ pub enum WasmTrap { GenericTrap, } -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - RpcError, - serde::Deserialize, - serde::Serialize, - strum::IntoStaticStr, -)] +#[derive(Debug, Clone, PartialEq, Eq, strum::IntoStaticStr)] pub enum MethodResolveError { MethodEmptyName, MethodNotFound, MethodInvalidSignature, } -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - RpcError, - serde::Deserialize, - serde::Serialize, - strum::IntoStaticStr, -)] +#[derive(Debug, Clone, PartialEq, Eq, BorshDeserialize, BorshSerialize, strum::IntoStaticStr)] pub enum CompilationError { CodeDoesNotExist { - account_id: AccountId, + account_id: Box, }, PrepareError(PrepareError), /// This is for defense in depth. @@ -180,17 +108,7 @@ pub enum CompilationError { }, } -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - RpcError, - serde::Deserialize, - serde::Serialize, -)] +#[derive(Debug, Clone, PartialEq, Eq, BorshDeserialize, BorshSerialize)] /// Error that can occur while preparing or executing Wasm smart-contract. pub enum PrepareError { /// Error happened while serializing the module. @@ -220,18 +138,7 @@ pub enum PrepareError { TooManyLocals, } -#[derive( - Debug, - Clone, - PartialEq, - Eq, - BorshDeserialize, - BorshSerialize, - RpcError, - serde::Deserialize, - serde::Serialize, - strum::IntoStaticStr, -)] +#[derive(Debug, Clone, PartialEq, Eq, strum::IntoStaticStr)] pub enum HostError { /// String encoding is bad UTF-16 sequence BadUTF16, @@ -259,8 +166,6 @@ pub enum HostError { InvalidPromiseResultIndex { result_idx: u64 }, /// Accessed invalid register id InvalidRegisterId { register_id: u64 }, - /// Iterator `iterator_index` was invalidated after its creation by performing a mutable operation on trie - IteratorWasInvalidated { iterator_index: u64 }, /// Accessed memory outside the bounds MemoryAccessViolation, /// VM Logic returned an invalid receipt index @@ -323,27 +228,6 @@ pub enum InconsistentStateError { IntegerOverflow, } -impl From for FunctionCallErrorSer { - fn from(outer_err: FunctionCallError) -> Self { - match outer_err { - FunctionCallError::CompilationError(e) => FunctionCallErrorSer::CompilationError(e), - FunctionCallError::MethodResolveError(e) => FunctionCallErrorSer::MethodResolveError(e), - // Note: We deliberately collapse all execution errors for - // serialization to make the DB representation less dependent - // on specific types in Rust code. - FunctionCallError::HostError(ref _e) => { - FunctionCallErrorSer::ExecutionError(outer_err.to_string()) - } - FunctionCallError::LinkError { msg } => { - FunctionCallErrorSer::ExecutionError(format!("Link Error: {}", msg)) - } - FunctionCallError::WasmTrap(ref _e) => { - FunctionCallErrorSer::ExecutionError(outer_err.to_string()) - } - } - } -} - impl From for VMLogicError { fn from(err: HostError) -> Self { VMLogicError::HostError(err) @@ -476,36 +360,81 @@ impl std::fmt::Display for HostError { BadUTF8 => write!(f, "String encoding is bad UTF-8 sequence."), BadUTF16 => write!(f, "String encoding is bad UTF-16 sequence."), GasExceeded => write!(f, "Exceeded the prepaid gas."), - GasLimitExceeded => write!(f, "Exceeded the maximum amount of gas allowed to burn per contract."), + GasLimitExceeded => { + write!(f, "Exceeded the maximum amount of gas allowed to burn per contract.") + } BalanceExceeded => write!(f, "Exceeded the account balance."), EmptyMethodName => write!(f, "Tried to call an empty method name."), GuestPanic { panic_msg } => write!(f, "Smart contract panicked: {}", panic_msg), IntegerOverflow => write!(f, "Integer overflow."), - InvalidIteratorIndex { iterator_index } => write!(f, "Iterator index {:?} does not exist", iterator_index), - InvalidPromiseIndex { promise_idx } => write!(f, "{:?} does not correspond to existing promises", promise_idx), - CannotAppendActionToJointPromise => write!(f, "Actions can only be appended to non-joint promise."), - CannotReturnJointPromise => write!(f, "Returning joint promise is currently prohibited."), - InvalidPromiseResultIndex { result_idx } => write!(f, "Accessed invalid promise result index: {:?}", result_idx), - InvalidRegisterId { register_id } => write!(f, "Accessed invalid register id: {:?}", register_id), - IteratorWasInvalidated { iterator_index } => write!(f, "Iterator {:?} was invalidated after its creation by performing a mutable operation on trie", iterator_index), + InvalidIteratorIndex { iterator_index } => { + write!(f, "Iterator index {:?} does not exist", iterator_index) + } + InvalidPromiseIndex { promise_idx } => { + write!(f, "{:?} does not correspond to existing promises", promise_idx) + } + CannotAppendActionToJointPromise => { + write!(f, "Actions can only be appended to non-joint promise.") + } + CannotReturnJointPromise => { + write!(f, "Returning joint promise is currently prohibited.") + } + InvalidPromiseResultIndex { result_idx } => { + write!(f, "Accessed invalid promise result index: {:?}", result_idx) + } + InvalidRegisterId { register_id } => { + write!(f, "Accessed invalid register id: {:?}", register_id) + } MemoryAccessViolation => write!(f, "Accessed memory outside the bounds."), - InvalidReceiptIndex { receipt_index } => write!(f, "VM Logic returned an invalid receipt index: {:?}", receipt_index), + InvalidReceiptIndex { receipt_index } => { + write!(f, "VM Logic returned an invalid receipt index: {:?}", receipt_index) + } InvalidAccountId => write!(f, "VM Logic returned an invalid account id"), InvalidMethodName => write!(f, "VM Logic returned an invalid method name"), InvalidPublicKey => write!(f, "VM Logic provided an invalid public key"), - ProhibitedInView { method_name } => write!(f, "{} is not allowed in view calls", method_name), - NumberOfLogsExceeded { limit } => write!(f, "The number of logs will exceed the limit {}", limit), - KeyLengthExceeded { length, limit } => write!(f, "The length of a storage key {} exceeds the limit {}", length, limit), - ValueLengthExceeded { length, limit } => write!(f, "The length of a storage value {} exceeds the limit {}", length, limit), - TotalLogLengthExceeded{ length, limit } => write!(f, "The length of a log message {} exceeds the limit {}", length, limit), - NumberPromisesExceeded { number_of_promises, limit } => write!(f, "The number of promises within a FunctionCall {} exceeds the limit {}", number_of_promises, limit), - NumberInputDataDependenciesExceeded { number_of_input_data_dependencies, limit } => write!(f, "The number of input data dependencies {} exceeds the limit {}", number_of_input_data_dependencies, limit), - ReturnedValueLengthExceeded { length, limit } => write!(f, "The length of a returned value {} exceeds the limit {}", length, limit), - ContractSizeExceeded { size, limit } => write!(f, "The size of a contract code in DeployContract action {} exceeds the limit {}", size, limit), - Deprecated {method_name}=> write!(f, "Attempted to call deprecated host function {}", method_name), + ProhibitedInView { method_name } => { + write!(f, "{} is not allowed in view calls", method_name) + } + NumberOfLogsExceeded { limit } => { + write!(f, "The number of logs will exceed the limit {}", limit) + } + KeyLengthExceeded { length, limit } => { + write!(f, "The length of a storage key {} exceeds the limit {}", length, limit) + } + ValueLengthExceeded { length, limit } => { + write!(f, "The length of a storage value {} exceeds the limit {}", length, limit) + } + TotalLogLengthExceeded { length, limit } => { + write!(f, "The length of a log message {} exceeds the limit {}", length, limit) + } + NumberPromisesExceeded { number_of_promises, limit } => write!( + f, + "The number of promises within a FunctionCall {} exceeds the limit {}", + number_of_promises, limit + ), + NumberInputDataDependenciesExceeded { number_of_input_data_dependencies, limit } => { + write!( + f, + "The number of input data dependencies {} exceeds the limit {}", + number_of_input_data_dependencies, limit + ) + } + ReturnedValueLengthExceeded { length, limit } => { + write!(f, "The length of a returned value {} exceeds the limit {}", length, limit) + } + ContractSizeExceeded { size, limit } => write!( + f, + "The size of a contract code in DeployContract action {} exceeds the limit {}", + size, limit + ), + Deprecated { method_name } => { + write!(f, "Attempted to call deprecated host function {}", method_name) + } AltBn128InvalidInput { msg } => write!(f, "AltBn128 invalid input: {}", msg), ECRecoverError { msg } => write!(f, "ECDSA recover error: {}", msg), - Ed25519VerifyInvalidInput { msg } => write!(f, "ED25519 signature verification error: {}", msg), + Ed25519VerifyInvalidInput { msg } => { + write!(f, "ED25519 signature verification error: {}", msg) + } } } } diff --git a/runtime/near-vm-runner/src/logic/tests/promises.rs b/runtime/near-vm-runner/src/logic/tests/promises.rs index 98b068e79c7..d85b7c5e762 100644 --- a/runtime/near-vm-runner/src/logic/tests/promises.rs +++ b/runtime/near-vm-runner/src/logic/tests/promises.rs @@ -4,17 +4,16 @@ use crate::logic::tests::vm_logic_builder::VMLogicBuilder; use crate::logic::types::PromiseResult; use crate::logic::VMLogic; use borsh::BorshSerialize; -use near_account_id::AccountId; use near_crypto::PublicKey; use serde_json; -#[derive(serde::Serialize)] -struct ReceiptView<'a> { - receiver_id: &'a AccountId, - actions: &'a [Action], -} +fn vm_receipts<'a>(logic: &'a VMLogic) -> Vec { + #[derive(serde::Serialize)] + struct ReceiptView<'a, T> { + receiver_id: T, + actions: &'a [Action], + } -fn vm_receipts<'a>(logic: &'a VMLogic) -> Vec> { logic .receipt_manager() .action_receipts diff --git a/runtime/runtime/Cargo.toml b/runtime/runtime/Cargo.toml index 22a7dc366ec..9321c338d1a 100644 --- a/runtime/runtime/Cargo.toml +++ b/runtime/runtime/Cargo.toml @@ -40,7 +40,6 @@ nightly = [ "near-vm-runner/nightly", ] default = [] -dump_errors_schema = ["near-vm-runner/dump_errors_schema"] nightly_protocol = [ "near-chain-configs/nightly_protocol", "near-o11y/nightly_protocol", diff --git a/runtime/runtime/src/actions.rs b/runtime/runtime/src/actions.rs index f03f4ef9d9b..82386289880 100644 --- a/runtime/runtime/src/actions.rs +++ b/runtime/runtime/src/actions.rs @@ -31,8 +31,7 @@ use near_store::{ StorageError, TrieUpdate, }; use near_vm_runner::logic::errors::{ - CompilationError, FunctionCallError, FunctionCallErrorSer, InconsistentStateError, - VMRunnerError, + CompilationError, FunctionCallError, InconsistentStateError, VMRunnerError, }; use near_vm_runner::logic::types::PromiseResult; use near_vm_runner::logic::{ActionCosts, VMContext, VMOutcome}; @@ -58,7 +57,7 @@ pub(crate) fn execute_function_call( Ok(Some(code)) => code, Ok(None) => { let error = FunctionCallError::CompilationError(CompilationError::CodeDoesNotExist { - account_id: account_id.clone(), + account_id: account_id.as_str().into(), }); return Ok(VMOutcome::nop_outcome(error)); } @@ -241,8 +240,7 @@ pub(crate) fn action_function_call( } // Update action result with the abort error converted to the // transaction runtime's format of errors. - let ser: FunctionCallErrorSer = err.into(); - let action_err: ActionError = ActionErrorKind::FunctionCallError(ser).into(); + let action_err: ActionError = ActionErrorKind::FunctionCallError(err.into()).into(); result.result = Err(action_err); } result.gas_burnt = safe_add_gas(result.gas_burnt, outcome.burnt_gas)?; From aee8ab32899ac7c130f1d1a008d2f0b1fb3baca2 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 14 Jul 2023 16:10:49 +0300 Subject: [PATCH 06/50] rust: 1.70.0 -> 1.71.0 (#9302) Announcement: https://blog.rust-lang.org/2023/07/13/Rust-1.71.0.html Notable breakages for us involve tightened down lints and replacement of the `clippy::integer_arithtmetic` lint with a more general `clippy::arithmentic_side_effects` lint. The latter was particularly angry about `curve25519-dalek` crate which only exposes unchecked arithmetic operations. I had no clue what the expected behaviour there is (wrapping? a panic?) so I simply allowed the lint for now, but somebody should definitely take a look at it in the future cc @abacabadabacaba --- Cargo.toml | 2 +- chain/chunks/src/lib.rs | 2 +- core/crypto/src/lib.rs | 2 +- core/crypto/src/vrf.rs | 15 ++++++++++ core/o11y/src/lib.rs | 2 +- core/store/src/trie/split_state.rs | 4 +-- core/store/src/trie/trie_storage.rs | 3 +- .../near-vm-runner/src/logic/tests/context.rs | 2 +- runtime/near-vm-runner/src/near_vm_runner.rs | 4 +-- runtime/near-vm-runner/src/tests/cache.rs | 28 +++++++++---------- runtime/near-vm-runner/src/wasmer2_runner.rs | 4 +-- .../compiler-singlepass/src/codegen_x64.rs | 2 +- .../near-vm/test-api/src/sys/import_object.rs | 12 ++++---- runtime/near-vm/vm/src/table.rs | 4 +-- .../emu-cost/Dockerfile | 2 +- rust-toolchain.toml | 2 +- tools/rpctypegen/core/src/lib.rs | 2 +- utils/stdx/src/lib.rs | 2 +- 18 files changed, 53 insertions(+), 41 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f00b71ad712..864c06a6b0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ version = "0.0.0" # managed by cargo-workspaces, see below authors = ["Near Inc "] edition = "2021" -rust-version = "1.70.0" +rust-version = "1.71.0" repository = "https://github.com/near/nearcore" license = "MIT OR Apache-2.0" diff --git a/chain/chunks/src/lib.rs b/chain/chunks/src/lib.rs index 1872b6cdc0a..710d5c13b52 100644 --- a/chain/chunks/src/lib.rs +++ b/chain/chunks/src/lib.rs @@ -224,7 +224,7 @@ impl RequestPool { pub fn fetch(&mut self, current_time: time::Instant) -> Vec<(ChunkHash, ChunkRequestInfo)> { let mut removed_requests = HashSet::::default(); let mut requests = Vec::new(); - for (chunk_hash, mut chunk_request) in self.requests.iter_mut() { + for (chunk_hash, chunk_request) in self.requests.iter_mut() { if current_time - chunk_request.added >= self.max_duration { debug!(target: "chunks", "Evicted chunk requested that was never fetched {} (shard_id: {})", chunk_hash.0, chunk_request.shard_id); removed_requests.insert(chunk_hash.clone()); diff --git a/core/crypto/src/lib.rs b/core/crypto/src/lib.rs index 1cfbfdbf5b8..c29d0b8a906 100644 --- a/core/crypto/src/lib.rs +++ b/core/crypto/src/lib.rs @@ -1,4 +1,4 @@ -#![deny(clippy::integer_arithmetic)] +#![deny(clippy::arithmetic_side_effects)] pub use errors::{ParseKeyError, ParseKeyTypeError, ParseSignatureError}; pub use key_file::KeyFile; diff --git a/core/crypto/src/vrf.rs b/core/crypto/src/vrf.rs index 7f79bddcb70..4745ebc80bd 100644 --- a/core/crypto/src/vrf.rs +++ b/core/crypto/src/vrf.rs @@ -26,6 +26,9 @@ impl PublicKey { self.is_valid(input.borrow(), value, proof) } + // FIXME: no clear fix is available here -- the underlying library runs a non-trivial amount of + // unchecked arithmetic inside and provides no apparent way to do it in a checked manner. + #[allow(clippy::arithmetic_side_effects)] fn is_valid(&self, input: &[u8], value: &Value, proof: &Proof) -> bool { let p = unwrap_or_return_false!(unpack(&value.0)); let (r, c) = unwrap_or_return_false!(unpack(&proof.0)); @@ -38,6 +41,10 @@ impl PublicKey { } } +// FIXME: no clear fix is available here -- the underlying library runs a non-trivial amount of +// unchecked arithmetic inside and provides no apparent way to do it in a checked or wrapping +// manner. +#[allow(clippy::arithmetic_side_effects)] fn basemul(s: Scalar) -> Point { &s * > } @@ -64,6 +71,10 @@ impl SecretKey { self.compute(input.borrow()) } + // FIXME: no clear fix is available here -- the underlying library runs a non-trivial amount of + // unchecked arithmetic inside and provides no apparent way to do it in a checked or wrapping + // manner. + #[allow(clippy::arithmetic_side_effects)] fn compute(&self, input: &[u8]) -> Value { Value(basemul(safe_invert(self.0 + self.1.offset(input))).pack()) } @@ -72,6 +83,10 @@ impl SecretKey { self.compute_with_proof(input.borrow()) } + // FIXME: no clear fix is available here -- the underlying library runs a non-trivial amount of + // unchecked arithmetic inside and provides no apparent way to do it in a checked or wrapping + // manner. + #[allow(clippy::arithmetic_side_effects)] fn compute_with_proof(&self, input: &[u8]) -> (Value, Proof) { let x = self.0 + self.1.offset(input); let inv = safe_invert(x); diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 9403e230187..0ef1f85ba36 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -1,5 +1,5 @@ #![doc = include_str!("../README.md")] -#![deny(clippy::integer_arithmetic)] +#![deny(clippy::arithmetic_side_effects)] pub use context::*; use near_crypto::PublicKey; diff --git a/core/store/src/trie/split_state.rs b/core/store/src/trie/split_state.rs index 5841418f7bd..e857e42484a 100644 --- a/core/store/src/trie/split_state.rs +++ b/core/store/src/trie/split_state.rs @@ -227,7 +227,7 @@ fn apply_delayed_receipts_to_split_states_impl( } // we already checked that new_shard_uid is in trie_updates and delayed_receipts_indices // so we can safely unwrap here - let mut delayed_receipts_indices = + let delayed_receipts_indices = delayed_receipts_indices_by_shard.get_mut(&new_shard_uid).unwrap(); set( trie_updates.get_mut(&new_shard_uid).unwrap(), @@ -254,7 +254,7 @@ fn apply_delayed_receipts_to_split_states_impl( ); return Err(StorageError::StorageInconsistentState(err)); } - let mut delayed_receipts_indices = + let delayed_receipts_indices = delayed_receipts_indices_by_shard.get_mut(&new_shard_uid).unwrap(); let trie_update = trie_updates.get_mut(&new_shard_uid).unwrap(); diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index cd3f0e64277..15eb9b7fcb7 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -10,7 +10,6 @@ use near_primitives::challenge::PartialState; use near_primitives::hash::CryptoHash; use near_primitives::shard_layout::ShardUId; use near_primitives::types::{ShardId, TrieCacheMode, TrieNodesCount}; -use std::borrow::Borrow; use std::cell::{Cell, RefCell}; use std::collections::{HashMap, HashSet, VecDeque}; use std::rc::Rc; @@ -614,7 +613,7 @@ impl TrieStorage for TrieCachingStorage { // (`storage_read_value_byte`) ~= (500 * 10**12 / 5611005) / 2**20 ~= 85 MB. // All values are given as of 16/03/2022. We may consider more precise limit for the chunk cache as well. self.inc_db_read_nodes(); - if let TrieCacheMode::CachingChunk = self.cache_mode.borrow().get() { + if let TrieCacheMode::CachingChunk = self.cache_mode.get() { self.chunk_cache.borrow_mut().insert(*hash, val.clone()); }; diff --git a/runtime/near-vm-runner/src/logic/tests/context.rs b/runtime/near-vm-runner/src/logic/tests/context.rs index 24026698ffa..fe6ec44ee51 100644 --- a/runtime/near-vm-runner/src/logic/tests/context.rs +++ b/runtime/near-vm-runner/src/logic/tests/context.rs @@ -95,7 +95,7 @@ fn test_attached_deposit_view() { #[track_caller] fn test_view(amount: u128) { let mut logic_builder = VMLogicBuilder::default(); - let mut context = &mut logic_builder.context; + let context = &mut logic_builder.context; context.view_config = Some(ViewConfig { max_gas_burnt: VMLimitConfig::test().max_gas_burnt }); context.account_balance = 0; diff --git a/runtime/near-vm-runner/src/near_vm_runner.rs b/runtime/near-vm-runner/src/near_vm_runner.rs index 8562c2dd5d8..7db593f2d1b 100644 --- a/runtime/near-vm-runner/src/near_vm_runner.rs +++ b/runtime/near-vm-runner/src/near_vm_runner.rs @@ -214,11 +214,11 @@ impl NearVmConfig { } // We use following scheme for the bits forming seed: -// kind << 10, kind is 1 for Wasmer2, 2 for NearVm +// kind << 29, kind 2 is for NearVm // major version << 6 // minor version const VM_CONFIG: NearVmConfig = NearVmConfig { - seed: (2 << 10) | (1 << 6) | 2, + seed: (2 << 29) | (2 << 6) | 0, engine: NearVmEngine::Universal, compiler: NearVmCompiler::Singlepass, }; diff --git a/runtime/near-vm-runner/src/tests/cache.rs b/runtime/near-vm-runner/src/tests/cache.rs index b1894762a41..d2960d017c0 100644 --- a/runtime/near-vm-runner/src/tests/cache.rs +++ b/runtime/near-vm-runner/src/tests/cache.rs @@ -120,13 +120,13 @@ fn test_wasmer2_artifact_output_stability() { ]; let mut got_prepared_hashes = Vec::with_capacity(seeds.len()); let compiled_hashes = [ - 16241863964906842660, - 9891733092817574479, - 17353479639813695155, - 14282522049460604929, - 10549554738494211661, - 15523181531223292814, - 1999054137996096555, + 10064221885882795403, + 3125775751094251057, + 10028445138356098295, + 12076298193069645776, + 5262356478082097591, + 15002713309850850128, + 17666356303775050986, ]; let mut got_compiled_hashes = Vec::with_capacity(seeds.len()); for seed in seeds { @@ -191,13 +191,13 @@ fn test_near_vm_artifact_output_stability() { ]; let mut got_prepared_hashes = Vec::with_capacity(seeds.len()); let compiled_hashes = [ - 11507498784243099762, - 14031545576101638739, - 2630687984789910827, - 15828343131478480720, - 6078633865191114650, - 1749545310758671460, - 15841184848317093324, + 4853457605418485197, + 13732980080772388685, + 13113947215618315585, + 14806575926393320657, + 12949634280637067071, + 6571507299571270433, + 2426595065881413005, ]; let mut got_compiled_hashes = Vec::with_capacity(seeds.len()); for seed in seeds { diff --git a/runtime/near-vm-runner/src/wasmer2_runner.rs b/runtime/near-vm-runner/src/wasmer2_runner.rs index 9bd44cffa70..1f8107dfa1b 100644 --- a/runtime/near-vm-runner/src/wasmer2_runner.rs +++ b/runtime/near-vm-runner/src/wasmer2_runner.rs @@ -215,11 +215,11 @@ impl Wasmer2Config { } // We use following scheme for the bits forming seed: -// kind << 10, kind is 1 for Wasmer2 +// kind << 29, kind is 1 for Wasmer2 // major version << 6 // minor version const WASMER2_CONFIG: Wasmer2Config = Wasmer2Config { - seed: (1 << 10) | (11 << 6) | 0, + seed: (1 << 29) | (12 << 6) | 0, engine: WasmerEngine::Universal, compiler: WasmerCompiler::Singlepass, }; diff --git a/runtime/near-vm/compiler-singlepass/src/codegen_x64.rs b/runtime/near-vm/compiler-singlepass/src/codegen_x64.rs index 882697a9841..37602fa3a97 100644 --- a/runtime/near-vm/compiler-singlepass/src/codegen_x64.rs +++ b/runtime/near-vm/compiler-singlepass/src/codegen_x64.rs @@ -4921,7 +4921,7 @@ impl<'a> FuncGen<'a> { } } - let mut frame = self.control_stack.last_mut().unwrap(); + let frame = self.control_stack.last_mut().unwrap(); let released: &[Location] = &self.value_stack[frame.value_stack_depth..]; self.machine.release_locations(self.assembler, released); diff --git a/runtime/near-vm/test-api/src/sys/import_object.rs b/runtime/near-vm/test-api/src/sys/import_object.rs index ae5cd5587f6..c0ff74faa23 100644 --- a/runtime/near-vm/test-api/src/sys/import_object.rs +++ b/runtime/near-vm/test-api/src/sys/import_object.rs @@ -2,7 +2,7 @@ //! manipulate and access a wasm module's imports including memories, tables, globals, and //! functions. use near_vm_vm::{Export, NamedResolver}; -use std::borrow::{Borrow, BorrowMut}; +use std::borrow::BorrowMut; use std::collections::VecDeque; use std::collections::{hash_map::Entry, HashMap}; use std::fmt; @@ -59,8 +59,7 @@ impl ImportObject { /// import_object.get_export("module", "name"); /// ``` pub fn get_export(&self, module: &str, name: &str) -> Option { - let guard = self.map.lock().unwrap(); - let map_ref = guard.borrow(); + let map_ref = self.map.lock().unwrap(); if map_ref.contains_key(module) { let namespace = map_ref[module].as_ref(); return namespace.get_namespace_export(name); @@ -70,7 +69,7 @@ impl ImportObject { /// Returns true if the ImportObject contains namespace with the provided name. pub fn contains_namespace(&self, name: &str) -> bool { - self.map.lock().unwrap().borrow().contains_key(name) + self.map.lock().unwrap().contains_key(name) } /// Register anything that implements `LikeNamespace` as a namespace. @@ -103,8 +102,7 @@ impl ImportObject { fn get_objects(&self) -> VecDeque<((String, String), Export)> { let mut out = VecDeque::new(); - let guard = self.map.lock().unwrap(); - let map = guard.borrow(); + let map = self.map.lock().unwrap(); for (name, ns) in map.iter() { for (id, exp) in ns.get_namespace_exports() { out.push_back(((name.clone(), id), exp)); @@ -168,7 +166,7 @@ impl fmt::Debug for ImportObject { } f.debug_struct("ImportObject") - .field("map", &SecretMap::new(self.map.lock().unwrap().borrow().len())) + .field("map", &SecretMap::new(self.map.lock().unwrap().len())) .finish() } } diff --git a/runtime/near-vm/vm/src/table.rs b/runtime/near-vm/vm/src/table.rs index 24cfbee950c..8316208e4db 100644 --- a/runtime/near-vm/vm/src/table.rs +++ b/runtime/near-vm/vm/src/table.rs @@ -10,7 +10,7 @@ use crate::trap::{Trap, TrapCode}; use crate::vmcontext::VMTableDefinition; use crate::VMExternRef; use near_vm_types::{ExternRef, TableType, Type as ValType}; -use std::borrow::{Borrow, BorrowMut}; +use std::borrow::BorrowMut; use std::cell::UnsafeCell; use std::convert::TryFrom; use std::fmt; @@ -337,7 +337,7 @@ impl Table for LinearTable { /// Returns `None` if the index is out of bounds. fn get(&self, index: u32) -> Option { let vec_guard = self.vec.lock().unwrap(); - let raw_data = vec_guard.borrow().get(index as usize).cloned()?; + let raw_data = vec_guard.get(index as usize).cloned()?; Some(match self.table.ty { ValType::ExternRef => { TableElement::ExternRef(unsafe { raw_data.extern_ref.ref_clone() }.into()) diff --git a/runtime/runtime-params-estimator/emu-cost/Dockerfile b/runtime/runtime-params-estimator/emu-cost/Dockerfile index 6b3856e7936..8fca02119f4 100644 --- a/runtime/runtime-params-estimator/emu-cost/Dockerfile +++ b/runtime/runtime-params-estimator/emu-cost/Dockerfile @@ -1,5 +1,5 @@ # our local base image -FROM rust:1.70.0 +FROM rust:1.71.0 LABEL description="Container for builds" diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 02fb9bb7c5d..70a43b798fa 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -2,6 +2,6 @@ # This specifies the version of Rust we use to build. # Individual crates in the workspace may support a lower version, as indicated by `rust-version` field in each crate's `Cargo.toml`. # The version specified below, should be at least as high as the maximum `rust-version` within the workspace. -channel = "1.70.0" +channel = "1.71.0" components = [ "rustfmt" ] targets = [ "wasm32-unknown-unknown" ] diff --git a/tools/rpctypegen/core/src/lib.rs b/tools/rpctypegen/core/src/lib.rs index 8c9a57db45b..f1b879814be 100644 --- a/tools/rpctypegen/core/src/lib.rs +++ b/tools/rpctypegen/core/src/lib.rs @@ -65,7 +65,7 @@ pub fn parse_error_type(schema: &mut BTreeMap, input: &Derive } } for e in direct_error_types { - let mut error_type = error_type_name(schema, e.name.clone()); + let error_type = error_type_name(schema, e.name.clone()); error_type.name = e.name; error_type.props = e.props; } diff --git a/utils/stdx/src/lib.rs b/utils/stdx/src/lib.rs index c7625dba324..a31eb9a7581 100644 --- a/utils/stdx/src/lib.rs +++ b/utils/stdx/src/lib.rs @@ -1,6 +1,6 @@ //! `stdx` crate contains polyfills which should really be in std, //! but currently aren't for one reason or another. -#![deny(clippy::integer_arithmetic)] +#![deny(clippy::arithmetic_side_effects)] // TODO(mina86): Replace usage of the split functions by split_array_ref et al // methods of array and slice types once those are stabilised. From 2981f5d2f8b53097bccb28e38172e85948790882 Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:16:12 +0200 Subject: [PATCH 07/50] fix(state-sync): Always use flat storage when catching up (#9311) The original code made the use of flat storage conditional on the node tracking that shard this epoch. If a node prepares to track shard S next epoch E, then it downloads its state (E-1) and applies chunks in order. To apply chunks correctly in a way compatible with the rest of the network, it needs to be using flat storage. Also add a metric for the latest block processed during catchup. Also fix `view-state apply-range` tool not to fail because of getting delayed indices. Also reduce verbosity of the inlining migration. --- chain/chain/src/chain.rs | 15 ++++++++++----- chain/chain/src/metrics.rs | 7 +++++++ core/store/src/flat/inlining_migration.rs | 2 +- tools/state-viewer/src/apply_chain_range.rs | 8 ++++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 4e9ba507b6a..d08ad654a3c 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -3408,9 +3408,13 @@ impl Chain { blocks_catch_up_state: &mut BlocksCatchUpState, block_catch_up_scheduler: &dyn Fn(BlockCatchUpRequest), ) -> Result<(), Error> { - debug!(target:"catchup", "catch up blocks: pending blocks: {:?}, processed {:?}, scheduled: {:?}, done: {:?}", - blocks_catch_up_state.pending_blocks, blocks_catch_up_state.processed_blocks.keys().collect::>(), - blocks_catch_up_state.scheduled_blocks, blocks_catch_up_state.done_blocks.len()); + tracing::debug!( + target: "catchup", + pending_blocks = ?blocks_catch_up_state.pending_blocks, + processed_blocks = ?blocks_catch_up_state.processed_blocks.keys().collect::>(), + scheduled_blocks = ?blocks_catch_up_state.scheduled_blocks, + done_blocks = blocks_catch_up_state.done_blocks.len(), + "catch up blocks"); let mut processed_blocks = HashMap::new(); for (queued_block, results) in blocks_catch_up_state.processed_blocks.drain() { // If this block is parent of some blocks in processing that need to be caught up, @@ -3461,6 +3465,7 @@ impl Chain { Default::default(), &mut Vec::new(), )?; + metrics::SCHEDULED_CATCHUP_BLOCK.set(block.header().height() as i64); blocks_catch_up_state.scheduled_blocks.insert(pending_block); block_catch_up_scheduler(BlockCatchUpRequest { sync_hash: *sync_hash, @@ -3983,7 +3988,7 @@ impl Chain { true, is_first_block_with_chunk_of_version, state_patch, - cares_about_shard_this_epoch, + true, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = @@ -4044,7 +4049,7 @@ impl Chain { false, false, state_patch, - cares_about_shard_this_epoch, + true, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = diff --git a/chain/chain/src/metrics.rs b/chain/chain/src/metrics.rs index e38c70a68c1..d6ddce5867b 100644 --- a/chain/chain/src/metrics.rs +++ b/chain/chain/src/metrics.rs @@ -110,3 +110,10 @@ pub static STATE_PART_ELAPSED: Lazy = Lazy::new(|| { pub static NUM_INVALID_BLOCKS: Lazy = Lazy::new(|| { try_create_int_gauge("near_num_invalid_blocks", "Number of invalid blocks").unwrap() }); +pub(crate) static SCHEDULED_CATCHUP_BLOCK: Lazy = Lazy::new(|| { + try_create_int_gauge( + "near_catchup_scheduled_block_height", + "Tracks the progress of blocks catching up", + ) + .unwrap() +}); diff --git a/core/store/src/flat/inlining_migration.rs b/core/store/src/flat/inlining_migration.rs index 4886d5e2026..5b0c15c8919 100644 --- a/core/store/src/flat/inlining_migration.rs +++ b/core/store/src/flat/inlining_migration.rs @@ -269,7 +269,7 @@ pub fn inline_flat_state_values( } store_update.commit().expect("failed to commit inlined values"); assert!(flat_storage_manager.set_flat_state_updates_mode(true)); - tracing::info!(target: "store", "Unlocked flat storage after the inlining migration"); + tracing::debug!(target: "store", "Unlocked flat storage after the inlining migration"); inlined_total_count += inlined_batch_count; batch_duration = batch_inlining_start.elapsed(); FLAT_STATE_PAUSED_DURATION.observe(batch_duration.as_secs_f64()); diff --git a/tools/state-viewer/src/apply_chain_range.rs b/tools/state-viewer/src/apply_chain_range.rs index 4b392d5dbc0..eb9d05a3f1b 100644 --- a/tools/state-viewer/src/apply_chain_range.rs +++ b/tools/state-viewer/src/apply_chain_range.rs @@ -11,7 +11,7 @@ use near_primitives::transaction::{Action, ExecutionOutcomeWithId, ExecutionOutc use near_primitives::trie_key::TrieKey; use near_primitives::types::chunk_extra::ChunkExtra; use near_primitives::types::{BlockHeight, ShardId}; -use near_store::{get, DBCol, Store}; +use near_store::{DBCol, Store}; use nearcore::NightshadeRuntime; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::fs::File; @@ -288,8 +288,8 @@ fn apply_block_from_range( let state_update = runtime_adapter.get_tries().new_trie_update(shard_uid, *chunk_extra.state_root()); - let delayed_indices: Option = - get(&state_update, &TrieKey::DelayedReceiptIndices).unwrap(); + let delayed_indices = + near_store::get::(&state_update, &TrieKey::DelayedReceiptIndices); match existing_chunk_extra { Some(existing_chunk_extra) => { @@ -321,7 +321,7 @@ fn apply_block_from_range( apply_result.total_gas_burnt, chunk_present, apply_result.processed_delayed_receipts.len(), - delayed_indices.map_or(0, |d| d.next_available_index - d.first_index), + delayed_indices.unwrap_or(None).map_or(0, |d| d.next_available_index - d.first_index), apply_result.trie_changes.state_changes().len(), ), ); From 2ab8fccb42f9d188b457adc51dac7c865f39323a Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:54:53 +0200 Subject: [PATCH 08/50] fix(state-snapshot): Tool to make DB snapshots (#9308) Co-authored-by: near-bulldozer[bot] <73298989+near-bulldozer[bot]@users.noreply.github.com> --- Cargo.lock | 1 + core/store/src/opener.rs | 2 +- tools/database/Cargo.toml | 1 + tools/database/README.md | 23 +++++++- tools/database/src/commands.rs | 12 +++++ tools/database/src/lib.rs | 1 + tools/database/src/make_snapshot.rs | 83 +++++++++++++++++++++++++++++ 7 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 tools/database/src/make_snapshot.rs diff --git a/Cargo.lock b/Cargo.lock index c212b8e5a95..f6ce6b2c8e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3624,6 +3624,7 @@ dependencies = [ "nearcore", "rayon", "strum", + "tempfile", ] [[package]] diff --git a/core/store/src/opener.rs b/core/store/src/opener.rs index 762d0223ef9..a8710695bfb 100644 --- a/core/store/src/opener.rs +++ b/core/store/src/opener.rs @@ -604,7 +604,7 @@ pub fn checkpoint_hot_storage_and_cleanup_columns( config.path = Some(checkpoint_path); let archive = hot_store.get_db_kind()? == Some(DbKind::Archive); let opener = StoreOpener::new(checkpoint_base_path, archive, &config, None); - let node_storage = opener.open()?; + let node_storage = opener.open_in_mode(Mode::ReadWriteExisting)?; if let Some(columns_to_keep) = columns_to_keep { let columns_to_keep_set: std::collections::HashSet = diff --git a/tools/database/Cargo.toml b/tools/database/Cargo.toml index 7f004de84bc..5ef1a5b4f1c 100644 --- a/tools/database/Cargo.toml +++ b/tools/database/Cargo.toml @@ -13,6 +13,7 @@ anyhow.workspace = true clap.workspace = true rayon.workspace = true strum.workspace = true +tempfile.workspace = true nearcore.workspace = true near-store.workspace = true diff --git a/tools/database/README.md b/tools/database/README.md index 3fdaf09c4d2..29cc2c1664f 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -11,7 +11,7 @@ of keys and values within RocksDB. To run the script, use the following example: ```bash -cargo run --bin neard -- --home /home/ubuntu/.nerd database analyse-data-size-distribution --column State --top_k 50 +cargo run --bin neard -- --home /home/ubuntu/.near database analyse-data-size-distribution --column State --top_k 50 ``` The arguments are as follows: @@ -45,7 +45,10 @@ It is intended as a collection of commands that perform small db modifications. ### change-db-kind Changes DbKind of a DB described in config (cold or hot). Example usage: -`neard database change-db-kind --new-kind RPC change-cold` +```bash +cargo run --bin neard -- --home /home/ubuntu/.near database change-db-kind --new-kind RPC change-cold +``` + In this example we change DbKind of the cold db to RPC (for some reason). Notice, that you cannot perform this exact command twice in a row, because you will not be able to open cold db in the first place. @@ -59,3 +62,19 @@ Then you can call `neard database change-db-kind --new-kind Cold change-hot`. Notice that even though in your mind this db is cold, in your config this db hot, so you have to pass `change-hot`. +## Make a DB Snapshot + +Makes a copy of a DB (hot store only) at a specified location. If the +destination is within the same filesystem, the copy will be made instantly and +take no additional disk space due to hardlinking all the files. + +Example usage: +```bash +cargo run --bin neard -- --home /home/ubuntu/.near database make_snapshot --destination /home/ubuntu/.near/data/snapshot +``` + +In this example all `.sst` files from `/home/ubuntu/.near/data` will be also +available in `/home/ubuntu/.near/data/snapshot` + +This command can be helpful before attempting activities that can potentially +corrupt the database. diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 8c3e946ab24..0544f246aff 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -1,5 +1,6 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; +use crate::make_snapshot::MakeSnapshotCommand; use crate::run_migrations::RunMigrationsCommand; use clap::Parser; use std::path::PathBuf; @@ -19,6 +20,9 @@ enum SubCommand { /// Change DbKind of hot or cold db. ChangeDbKind(ChangeDbKindCommand), + /// Make snapshot of the database + MakeSnapshot(MakeSnapshotCommand), + /// Run migrations, RunMigrations(RunMigrationsCommand), } @@ -35,6 +39,14 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &near_config) } + SubCommand::MakeSnapshot(cmd) => { + let near_config = nearcore::config::load_config( + &home, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + cmd.run(home, near_config.config.archive, &near_config.config.store) + } SumCommand::RunMigrationsCommand(cmd) => { let mut near_config = nearcore::config::load_config( &home, diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index a9261186b50..a8c8ed15ec5 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -1,4 +1,5 @@ mod adjust_database; mod analyse_data_size_distribution; pub mod commands; +mod make_snapshot; mod run_migrations; diff --git a/tools/database/src/make_snapshot.rs b/tools/database/src/make_snapshot.rs new file mode 100644 index 00000000000..8f755a95d88 --- /dev/null +++ b/tools/database/src/make_snapshot.rs @@ -0,0 +1,83 @@ +use near_store::{checkpoint_hot_storage_and_cleanup_columns, Mode, NodeStorage, StoreConfig}; +use std::path::{Path, PathBuf}; + +#[derive(clap::Args)] +pub(crate) struct MakeSnapshotCommand { + /// Destination directory. + #[clap(long)] + destination: PathBuf, +} + +impl MakeSnapshotCommand { + pub(crate) fn run( + &self, + home_dir: &Path, + archive: bool, + store_config: &StoreConfig, + ) -> anyhow::Result<()> { + let opener = NodeStorage::opener(home_dir, archive, store_config, None); + let node_storage = opener.open_in_mode(Mode::ReadWriteExisting)?; + checkpoint_hot_storage_and_cleanup_columns( + &node_storage.get_hot_store(), + &self.destination, + None, + )?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use crate::make_snapshot::MakeSnapshotCommand; + use near_store::{DBCol, Mode, NodeStorage, StoreConfig}; + + /// Populates a DB, makes a checkpoint, makes changes to the DB. + /// Checks that the checkpoint DB can be opened and doesn't contain the latest changes. + #[test] + fn test() { + let home_dir = tempfile::tempdir().unwrap(); + let store_config = StoreConfig::test_config(); + let opener = NodeStorage::opener(home_dir.path(), false, &store_config, None); + + let keys = vec![vec![0], vec![1], vec![2], vec![3]]; + + { + // Populate the DB. + let node_storage = opener.open().unwrap(); + let mut store_update = node_storage.get_hot_store().store_update(); + for key in &keys { + store_update.insert(DBCol::Block, key, &vec![42]); + } + store_update.commit().unwrap(); + println!("Populated"); + // Drops node_storage, which unlocks the DB. + } + + let destination = home_dir.path().join("data").join("snapshot"); + let cmd = MakeSnapshotCommand { destination: destination.clone() }; + cmd.run(home_dir.path(), false, &store_config).unwrap(); + println!("Made a checkpoint"); + + { + // Make a change to the original DB. + let node_storage = opener.open().unwrap(); + let mut store_update = node_storage.get_hot_store().store_update(); + store_update.delete_all(DBCol::Block); + store_update.commit().unwrap(); + println!("Deleted"); + } + + let node_storage = opener.open_in_mode(Mode::ReadOnly).unwrap(); + let snapshot_node_storage = NodeStorage::opener(&destination, false, &store_config, None) + .open_in_mode(Mode::ReadOnly) + .unwrap(); + for key in keys { + let exists_original = node_storage.get_hot_store().exists(DBCol::Block, &key).unwrap(); + let exists_snapshot = + snapshot_node_storage.get_hot_store().exists(DBCol::Block, &key).unwrap(); + println!("{exists_original},{exists_snapshot},{key:?}"); + assert!(!exists_original); + assert!(exists_snapshot); + } + } +} From a3c8ab5895f21b1008d776574d80852e5227a53c Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Fri, 14 Jul 2023 17:20:03 +0200 Subject: [PATCH 09/50] chore(estimator): remove TTN read estimation (#9307) Since we have flat storage for reads, we no longer charge for touched trie nodes (TTN) on reads. Remove the gas estimation for it. More specifically, we used to estimate TTN cost as `max(read_ttn, write_ttn)` and therefore had 3 numbers reported. (read, write, combined). Now we only need a single number reported. The removed code (read TTN estimation) also didn't work anymore, as it didn't actually touch any trie nodes, and hence an assertion was triggered. ``` thread 'main' panicked at 'assertion failed: nodes_touched_delta as usize >= 2 * final_key_len - 10', runtime/runtime-params-estimator/src/trie.rs:118:5 stack backtrace: 0: rust_begin_unwind at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/std/src/panicking.rs:575:5 1: core::panicking::panic_fmt at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/core/src/panicking.rs:64:14 2: core::panicking::panic at /rustc/9eb3afe9ebe9c7d2b84b71002d44f4a0edac95e0/library/core/src/panicking.rs:114:5 3: runtime_params_estimator::touching_trie_node_read 4: runtime_params_estimator::touching_trie_node 5: runtime_params_estimator::run_estimation 6: runtime_params_estimator::main ``` We "fix" it by removing the code. --- runtime/runtime-params-estimator/src/cost.rs | 20 ++----- .../src/estimator_context.rs | 1 - runtime/runtime-params-estimator/src/lib.rs | 23 ++------ .../src/transaction_builder.rs | 8 --- runtime/runtime-params-estimator/src/trie.rs | 54 ------------------- 5 files changed, 7 insertions(+), 99 deletions(-) diff --git a/runtime/runtime-params-estimator/src/cost.rs b/runtime/runtime-params-estimator/src/cost.rs index 4ac5bfed272..4913e96e749 100644 --- a/runtime/runtime-params-estimator/src/cost.rs +++ b/runtime/runtime-params-estimator/src/cost.rs @@ -546,8 +546,10 @@ pub enum Cost { /// `storage_write` or `storage_remove`. The fee is paid once for each /// unique trie node accessed. /// - /// Estimation: Take the maximum of estimations for `TouchingTrieNodeRead` - /// and `TouchingTrieNodeWrite` + /// Estimation: Prepare an account that has many keys stored that are + /// prefixes from each other. Then measure write cost for the shortest and + /// the longest key. The gas estimation difference is divided by the + /// difference of actually touched nodes. TouchingTrieNode, /// It is similar to `TouchingTrieNode`, but it is charged instead of this /// cost when we can guarantee that trie node is cached in memory, which @@ -562,20 +564,6 @@ pub enum Cost { /// for this are a bit involved but roughly speaking, it just forces values /// out of CPU caches so that they are always read from memory. ReadCachedTrieNode, - /// Helper estimation for `TouchingTrieNode` - /// - /// Estimation: Prepare an account that has many keys stored that are - /// prefixes from each other. Then measure access cost for the shortest and - /// the longest key. The gas estimation difference is divided by the - /// difference of actually touched nodes. - TouchingTrieNodeRead, - /// Helper estimation for `TouchingTrieNode` - /// - /// Estimation: Prepare an account that has many keys stored that are - /// prefixes from each other. Then measure write cost for the shortest and - /// the longest key. The gas estimation difference is divided by the - /// difference of actually touched nodes. - TouchingTrieNodeWrite, /// Estimates `promise_and_base` which is charged for every call to /// `promise_and`. This should cover the base cost for creating receipt /// dependencies. diff --git a/runtime/runtime-params-estimator/src/estimator_context.rs b/runtime/runtime-params-estimator/src/estimator_context.rs index 700fb311476..1885726e74a 100644 --- a/runtime/runtime-params-estimator/src/estimator_context.rs +++ b/runtime/runtime-params-estimator/src/estimator_context.rs @@ -43,7 +43,6 @@ pub(crate) struct CachedCosts { pub(crate) compile_cost_base_per_byte_v2: Option<(GasCost, GasCost)>, pub(crate) gas_metering_cost_base_per_op: Option<(GasCost, GasCost)>, pub(crate) apply_block: Option, - pub(crate) touching_trie_node_read: Option, pub(crate) touching_trie_node_write: Option, pub(crate) ed25519_verify_base: Option, } diff --git a/runtime/runtime-params-estimator/src/lib.rs b/runtime/runtime-params-estimator/src/lib.rs index d5986d5ad95..35e37370f33 100644 --- a/runtime/runtime-params-estimator/src/lib.rs +++ b/runtime/runtime-params-estimator/src/lib.rs @@ -244,8 +244,6 @@ static ALL_COSTS: &[(Cost, fn(&mut EstimatorContext) -> GasCost)] = &[ (Cost::StorageRemoveRetValueByte, storage_remove_ret_value_byte), (Cost::TouchingTrieNode, touching_trie_node), (Cost::ReadCachedTrieNode, read_cached_trie_node), - (Cost::TouchingTrieNodeRead, touching_trie_node_read), - (Cost::TouchingTrieNodeWrite, touching_trie_node_write), (Cost::ApplyBlock, apply_block_cost), (Cost::ContractCompileBase, contract_compile_base), (Cost::ContractCompileBytes, contract_compile_bytes), @@ -1168,24 +1166,9 @@ fn storage_remove_ret_value_byte(ctx: &mut EstimatorContext) -> GasCost { } fn touching_trie_node(ctx: &mut EstimatorContext) -> GasCost { - let read = touching_trie_node_read(ctx); - let write = touching_trie_node_write(ctx); - return std::cmp::max(read, write); -} - -fn touching_trie_node_read(ctx: &mut EstimatorContext) -> GasCost { - if let Some(cost) = ctx.cached.touching_trie_node_read.clone() { - return cost; - } - let warmup_iters = ctx.config.warmup_iters_per_block; - let measured_iters = ctx.config.iter_per_block; - // Number of bytes in the final key. Will create 2x that many nodes. - // Picked somewhat arbitrarily, balancing estimation time vs accuracy. - let final_key_len = 1000; - let cost = trie::read_node_from_db(ctx, warmup_iters, measured_iters, final_key_len); - - ctx.cached.touching_trie_node_read = Some(cost.clone()); - cost + // TTN write cost = TTN cost because we no longer charge it on reads since + // flat storage for reads was introduced + touching_trie_node_write(ctx) } fn touching_trie_node_write(ctx: &mut EstimatorContext) -> GasCost { diff --git a/runtime/runtime-params-estimator/src/transaction_builder.rs b/runtime/runtime-params-estimator/src/transaction_builder.rs index 7f680c83454..8c18d5e2f7a 100644 --- a/runtime/runtime-params-estimator/src/transaction_builder.rs +++ b/runtime/runtime-params-estimator/src/transaction_builder.rs @@ -100,14 +100,6 @@ impl TransactionBuilder { self.transaction_from_function_call(account, "account_storage_insert_key", arg) } - /// Transaction that checks existence of a given key under an account. - /// The account must have the test contract deployed. - pub(crate) fn account_has_key(&mut self, account: AccountId, key: &str) -> SignedTransaction { - let arg = (key.len() as u64).to_le_bytes().into_iter().chain(key.bytes()).collect(); - - self.transaction_from_function_call(account, "account_storage_has_key", arg) - } - pub(crate) fn rng(&mut self) -> ThreadRng { rand::thread_rng() } diff --git a/runtime/runtime-params-estimator/src/trie.rs b/runtime/runtime-params-estimator/src/trie.rs index 5b21233a52b..ca606bd96ff 100644 --- a/runtime/runtime-params-estimator/src/trie.rs +++ b/runtime/runtime-params-estimator/src/trie.rs @@ -5,7 +5,6 @@ use near_primitives::hash::hash; use near_primitives::types::TrieCacheMode; use near_store::{TrieCachingStorage, TrieStorage}; use near_vm_runner::logic::ExtCosts; -use std::iter; use std::sync::atomic::{AtomicUsize, Ordering}; static SINK: AtomicUsize = AtomicUsize::new(0); @@ -69,59 +68,6 @@ pub(crate) fn write_node( cost } -pub(crate) fn read_node_from_db( - ctx: &mut EstimatorContext, - warmup_iters: usize, - measured_iters: usize, - final_key_len: usize, -) -> GasCost { - let block_latency = 0; - let overhead = overhead_per_measured_block(ctx, block_latency); - let mut testbed = ctx.testbed(); - let tb = testbed.transaction_builder(); - // Prepare a long chain in the trie - let signer = tb.random_account(); - let key = "j".repeat(final_key_len); - let mut setup_block = Vec::new(); - for key_len in 0..final_key_len { - let key = &key.as_bytes()[..key_len]; - let value = b"0"; - setup_block.push(tb.account_insert_key(signer.clone(), key, value)); - } - let mut blocks = Vec::with_capacity(1 + 2 * warmup_iters + 2 * measured_iters); - blocks.push(setup_block); - blocks.extend( - iter::repeat_with(|| vec![tb.account_has_key(signer.clone(), &key[0..1])]) - .take(measured_iters + warmup_iters), - ); - blocks.extend( - iter::repeat_with(|| vec![tb.account_has_key(signer.clone(), &key)]) - .take(measured_iters + warmup_iters), - ); - let results = &testbed.measure_blocks(blocks, block_latency)[1..]; - let (short_key_results, long_key_results) = results.split_at(measured_iters + warmup_iters); - let (cost_short_key, ext_cost_short_key) = aggregate_per_block_measurements( - 1, - short_key_results[warmup_iters..].to_vec(), - Some(overhead.clone()), - ); - let (cost_long_key, ext_cost_long_key) = aggregate_per_block_measurements( - 1, - long_key_results[warmup_iters..].to_vec(), - Some(overhead), - ); - let nodes_touched_delta = ext_cost_long_key[&ExtCosts::touching_trie_node] - - ext_cost_short_key[&ExtCosts::touching_trie_node]; - // The exact number of touched nodes is a implementation that we don't want - // to test here but it should be close to 2*final_key_len - assert!(nodes_touched_delta as usize <= 2 * final_key_len + 10); - assert!(nodes_touched_delta as usize >= 2 * final_key_len - 10); - let cost_delta = - cost_long_key.saturating_sub(&cost_short_key, &NonNegativeTolerance::PER_MILLE); - let cost = cost_delta / nodes_touched_delta; - cost -} - pub(crate) fn read_node_from_chunk_cache(testbed: &mut Testbed) -> GasCost { let debug = testbed.config.debug; let iters = 200; From a6684d0706c5c54eeda5242ce8ee711ccd58b382 Mon Sep 17 00:00:00 2001 From: Anton Puhach Date: Fri, 14 Jul 2023 17:58:12 +0200 Subject: [PATCH 10/50] feat: expose more RocksDB properties (#9279) This expose more RocksDB properties as prometheus metrics to enable better observability around RocksDB internals: [grafana dashboard](https://nearinc.grafana.net/d/e6676bfd-2eca-46f4-91eb-02cb1714e058/rocksdb-internals). In particular this enables us to track total RocksDB memory usage, which is useful to look at when making RocksDB configuration changes or troubleshooting increased neard memory consumption. See [the dashboard](https://nearinc.grafana.net/d/f0afab7d-1333-4234-9161-598911f64328/rocksdb-ram-usage) for more details. --- core/store/src/db/rocksdb.rs | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/core/store/src/db/rocksdb.rs b/core/store/src/db/rocksdb.rs index b50c7467dde..54d48d86cfd 100644 --- a/core/store/src/db/rocksdb.rs +++ b/core/store/src/db/rocksdb.rs @@ -4,7 +4,9 @@ use crate::{metadata, metrics, DBCol, StoreConfig, StoreStatistics, Temperature} use ::rocksdb::{ BlockBasedOptions, Cache, ColumnFamily, Env, IteratorMode, Options, ReadOptions, WriteBatch, DB, }; +use once_cell::sync::Lazy; use std::io; +use std::ops::Deref; use std::path::Path; use strum::IntoEnumIterator; use tracing::warn; @@ -15,8 +17,29 @@ pub(crate) mod snapshot; /// List of integer RocskDB properties we’re reading when collecting statistics. /// /// In the end, they are exported as Prometheus metrics. -const CF_PROPERTY_NAMES: [&'static std::ffi::CStr; 1] = - [::rocksdb::properties::LIVE_SST_FILES_SIZE]; +static CF_PROPERTY_NAMES: Lazy> = Lazy::new(|| { + use ::rocksdb::properties; + let mut ret = Vec::new(); + ret.extend_from_slice( + &[ + properties::LIVE_SST_FILES_SIZE, + properties::ESTIMATE_LIVE_DATA_SIZE, + properties::COMPACTION_PENDING, + properties::NUM_RUNNING_COMPACTIONS, + properties::ESTIMATE_PENDING_COMPACTION_BYTES, + properties::ESTIMATE_TABLE_READERS_MEM, + properties::BLOCK_CACHE_CAPACITY, + properties::BLOCK_CACHE_USAGE, + properties::CUR_SIZE_ACTIVE_MEM_TABLE, + properties::SIZE_ALL_MEM_TABLES, + ] + .map(std::ffi::CStr::to_owned), + ); + for level in 0..=6 { + ret.push(properties::num_files_at_level(level)); + } + ret +}); pub struct RocksDB { db: DB, @@ -538,7 +561,7 @@ impl RocksDB { /// Gets every int property in CF_PROPERTY_NAMES for every column in DBCol. fn get_cf_statistics(&self, result: &mut StoreStatistics) { - for prop_name in CF_PROPERTY_NAMES { + for prop_name in CF_PROPERTY_NAMES.deref() { let values = self .cf_handles() .filter_map(|(col, handle)| { From 8ba86abce2116ca083aa26a672ae2ce605c52150 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Fri, 14 Jul 2023 22:32:58 +0200 Subject: [PATCH 11/50] chain: remove deprecated near_peer_message_received_total metric (#9312) The metric has been deprecated since 1.30. Users should use near_peer_message_received_by_type_total instead. --- chain/network/src/peer/peer_actor.rs | 1 - chain/network/src/stats/metrics.rs | 8 -------- 2 files changed, 9 deletions(-) diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index a7e157b99e7..823799fc54c 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -1526,7 +1526,6 @@ impl actix::Handler for PeerActor { // Message type agnostic stats. { metrics::PEER_DATA_RECEIVED_BYTES.inc_by(msg.len() as u64); - metrics::PEER_MESSAGE_RECEIVED_TOTAL.inc(); tracing::trace!(target: "network", msg_len=msg.len()); self.tracker.lock().increment_received(&self.clock, msg.len() as u64); } diff --git a/chain/network/src/stats/metrics.rs b/chain/network/src/stats/metrics.rs index fcd088bb4a2..23e0cc35f32 100644 --- a/chain/network/src/stats/metrics.rs +++ b/chain/network/src/stats/metrics.rs @@ -168,14 +168,6 @@ pub(crate) static PEER_MESSAGE_RECEIVED_BY_TYPE_BYTES: Lazy = Laz ) .unwrap() }); -// TODO(mina86): This has been deprecated in 1.30. Remove at 1.32 or so. -pub(crate) static PEER_MESSAGE_RECEIVED_TOTAL: Lazy = Lazy::new(|| { - try_create_int_counter( - "near_peer_message_received_total", - "Deprecated; aggregate near_peer_message_received_by_type_total instead", - ) - .unwrap() -}); pub(crate) static PEER_MESSAGE_RECEIVED_BY_TYPE_TOTAL: Lazy = Lazy::new(|| { try_create_int_counter_vec( "near_peer_message_received_by_type_total", From 0318ff820390ccc208381767d23fca50d17feaf6 Mon Sep 17 00:00:00 2001 From: wacban Date: Sat, 15 Jul 2023 13:08:58 +0200 Subject: [PATCH 12/50] refactor: improvements to logging (#9309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are plenty of log lines that don't fit in a single line, even on a quite wide monitor. This is an attempt to improve that. - Removed a few variables in tracing spans that were redundant - already included in parent span. - Removed `apply_transactions_with_optional_storage_proof` span that immediately enters `process_state_update` and doesn't provide much value. - Set the test formatter to use a new custom time formatter that only prints seconds and milliseconds since the test started. The default one prints full date, time, and nanoseconds. - Mini refactor of the sharding_upgrade.rs that I'm just trying to sneak through. These tests are the inspiration for improving the spam log since I can't parse it. - **RFC: changed the log level of the `process_receipt` log to `trace!`. This is very subjective but my reasoning is that if a log line appears more that a few times per block, then if should have the trace level.** Since it's runtime related, cc @jakmeier @nagisa, are you fine with that change? For any of those I can be convinced otherwise, please shout. new log lines look like this: ``` 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=1}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=2}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update: runtime: is next_block_epoch_start false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=2}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=39.2µs time.idle=3.04µs 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=1}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=71.0µs time.idle=2.67µs 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=62.2µs time.idle=3.58µs ``` (with the exception of hashes, I have them shortened locally, but I'm not including that in this PR) On a sidenote, I quite like tracing spans but we may be overdoing it a bit. --- chain/chain/src/types.rs | 5 -- chain/client/src/client.rs | 4 +- core/o11y/src/testonly.rs | 42 ++++++++- .../src/tests/client/sharding_upgrade.rs | 88 ++++++++++++------- nearcore/src/runtime/mod.rs | 3 +- runtime/runtime/src/lib.rs | 6 +- 6 files changed, 99 insertions(+), 49 deletions(-) diff --git a/chain/chain/src/types.rs b/chain/chain/src/types.rs index b3327ac0e98..59fc4c2da3c 100644 --- a/chain/chain/src/types.rs +++ b/chain/chain/src/types.rs @@ -335,11 +335,6 @@ pub trait RuntimeAdapter: Send + Sync { state_patch: SandboxStatePatch, use_flat_storage: bool, ) -> Result { - let _span = tracing::debug_span!( - target: "runtime", - "apply_transactions", - shard_id) - .entered(); let _timer = metrics::APPLYING_CHUNKS_TIME.with_label_values(&[&shard_id.to_string()]).start_timer(); self.apply_transactions_with_optional_storage_proof( diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 2a181021530..8128c5097d9 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -851,8 +851,6 @@ impl Client { debug!( target: "client", - height=next_height, - shard_id, me=%validator_signer.validator_id(), chunk_hash=%encoded_chunk.chunk_hash().0, %prev_block_hash, @@ -1571,7 +1569,7 @@ impl Client { if &chunk_proposer == &validator_id { let _span = tracing::debug_span!( target: "client", - "on_block_accepted_produce_chunk", + "on_block_accepted", prev_block_hash = ?*block.hash(), ?shard_id) .entered(); diff --git a/core/o11y/src/testonly.rs b/core/o11y/src/testonly.rs index 17e484f533d..2dddbc9f86c 100644 --- a/core/o11y/src/testonly.rs +++ b/core/o11y/src/testonly.rs @@ -1,7 +1,14 @@ mod tracing_capture; +use std::time::Instant; + use crate::use_color_auto; -use tracing_subscriber::{fmt as subscriber_fmt, EnvFilter}; +use core::fmt::Result; +use tracing_subscriber::fmt; +use tracing_subscriber::fmt::format::Writer; +use tracing_subscriber::fmt::time::FormatTime; + +use tracing_subscriber::EnvFilter; pub use tracing_capture::TracingCapture; @@ -18,11 +25,12 @@ fn setup_subscriber_from_filter(mut env_filter: EnvFilter) { } } - let _ = subscriber_fmt::Subscriber::builder() + let _ = fmt::Subscriber::builder() .with_ansi(use_color_auto()) - .with_span_events(subscriber_fmt::format::FmtSpan::CLOSE) + .with_span_events(fmt::format::FmtSpan::CLOSE) .with_env_filter(env_filter) - .with_writer(subscriber_fmt::TestWriter::new()) + .with_writer(fmt::TestWriter::new()) + .with_timer(TestUptime::default()) .try_init(); } @@ -47,3 +55,29 @@ pub fn init_integration_logger() { let env_filter = EnvFilter::new("actix_web=warn,info"); setup_subscriber_from_filter(env_filter); } + +/// Shameless copy paste of the Uptime timer in the tracing subscriber with +/// adjusted time formatting. It measures time since the subscriber is configured. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub struct TestUptime { + epoch: Instant, +} + +impl Default for TestUptime { + fn default() -> Self { + TestUptime { epoch: Instant::now() } + } +} + +impl From for TestUptime { + fn from(epoch: Instant) -> Self { + TestUptime { epoch } + } +} + +impl FormatTime for TestUptime { + fn format_time(&self, w: &mut Writer<'_>) -> Result { + let e = self.epoch.elapsed(); + write!(w, "{:2}.{:03}s", e.as_secs(), e.subsec_millis()) + } +} diff --git a/integration-tests/src/tests/client/sharding_upgrade.rs b/integration-tests/src/tests/client/sharding_upgrade.rs index 7e9ba79ce7d..283493023e4 100644 --- a/integration-tests/src/tests/client/sharding_upgrade.rs +++ b/integration-tests/src/tests/client/sharding_upgrade.rs @@ -484,39 +484,16 @@ fn test_shard_layout_upgrade_simple() { let initial_accounts = test_env.initial_accounts.clone(); let generate_create_accounts_txs: &mut dyn FnMut(usize, bool) -> Vec = &mut |max_size: usize, check_accounts: bool| -> Vec { - let size = rng.gen_range(0..max_size) + 1; - std::iter::repeat_with(|| loop { - let signer_account = initial_accounts.choose(&mut rng).unwrap(); - let signer0 = InMemorySigner::from_seed( - signer_account.clone(), - KeyType::ED25519, - &signer_account.to_string(), - ); - let account_id = gen_account(&mut rng, b"abcdefghijkmn"); - if all_accounts.insert(account_id.clone()) { - let signer = InMemorySigner::from_seed( - account_id.clone(), - KeyType::ED25519, - account_id.as_ref(), - ); - let tx = SignedTransaction::create_account( - nonce, - signer_account.clone(), - account_id.clone(), - NEAR_BASE, - signer.public_key(), - &signer0, - genesis_hash, - ); - if check_accounts { - accounts_to_check.push(account_id); - } - nonce += 1; - return tx; - } - }) - .take(size) - .collect() + generate_create_accounts_txs( + &mut rng, + genesis_hash, + &initial_accounts, + &mut accounts_to_check, + &mut all_accounts, + &mut nonce, + max_size, + check_accounts, + ) }; // add transactions until after sharding upgrade finishes @@ -541,6 +518,51 @@ fn test_shard_layout_upgrade_simple() { test_env.check_split_states_artifacts(); } +fn generate_create_accounts_txs( + mut rng: &mut rand::rngs::ThreadRng, + genesis_hash: CryptoHash, + initial_accounts: &Vec, + accounts_to_check: &mut Vec, + all_accounts: &mut HashSet, + nonce: &mut u64, + max_size: usize, + check_accounts: bool, +) -> Vec { + let size = rng.gen_range(0..max_size) + 1; + std::iter::repeat_with(|| loop { + let signer_account = initial_accounts.choose(&mut rng).unwrap(); + let signer0 = InMemorySigner::from_seed( + signer_account.clone(), + KeyType::ED25519, + &signer_account.to_string(), + ); + let account_id = gen_account(&mut rng, b"abcdefghijkmn"); + if all_accounts.insert(account_id.clone()) { + let signer = InMemorySigner::from_seed( + account_id.clone(), + KeyType::ED25519, + account_id.as_ref(), + ); + let tx = SignedTransaction::create_account( + *nonce, + signer_account.clone(), + account_id.clone(), + NEAR_BASE, + signer.public_key(), + &signer0, + genesis_hash, + ); + if check_accounts { + accounts_to_check.push(account_id); + } + *nonce += 1; + return tx; + } + }) + .take(size) + .collect() +} + const GAS_1: u64 = 300_000_000_000_000; const GAS_2: u64 = GAS_1 / 3; diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 8a25701f4b0..22499a27bb2 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -324,8 +324,7 @@ impl NightshadeRuntime { let epoch_manager = self.epoch_manager.read(); let shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; debug!(target: "runtime", - "block height: {}, is next_block_epoch_start {}", - block_height, + "is next_block_epoch_start {}", epoch_manager.is_next_block_epoch_start(prev_block_hash).unwrap() ); diff --git a/runtime/runtime/src/lib.rs b/runtime/runtime/src/lib.rs index 70a2039fee4..34c28f3de9a 100644 --- a/runtime/runtime/src/lib.rs +++ b/runtime/runtime/src/lib.rs @@ -1313,12 +1313,12 @@ impl Runtime { target: "runtime", "process_receipt", receipt_id = %receipt.receipt_id, - node_counter = ?state_update.trie().get_trie_nodes_count(), predecessor = %receipt.predecessor_id, receiver = %receipt.receiver_id, id = %receipt.receipt_id, ) .entered(); + let node_counter_before = state_update.trie().get_trie_nodes_count(); let result = self.process_receipt( state_update, apply_state, @@ -1328,7 +1328,9 @@ impl Runtime { &mut stats, epoch_info_provider, ); - tracing::debug!(target: "runtime", node_counter = ?state_update.trie().get_trie_nodes_count()); + let node_counter_after = state_update.trie().get_trie_nodes_count(); + tracing::trace!(target: "runtime", ?node_counter_before, ?node_counter_after); + if let Some(outcome_with_id) = result? { *total_gas_burnt = safe_add_gas(*total_gas_burnt, outcome_with_id.outcome.gas_burnt)?; From 04b630ac1532faf53e6c23c5e9ac901fae54530c Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 17 Jul 2023 14:41:26 +0200 Subject: [PATCH 13/50] nearcore: remove old deprecation notice about network.external_address (#9315) Users have had enough time to update their config files to no longer specify network.external_address. The comment dictates the warning should be removed by the end of 2022 which was half a year ago. --- nearcore/src/config.rs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/nearcore/src/config.rs b/nearcore/src/config.rs index 72b2eb3e2cf..ae677c815e7 100644 --- a/nearcore/src/config.rs +++ b/nearcore/src/config.rs @@ -465,20 +465,7 @@ impl Config { })?; let config: Config = serde_ignored::deserialize( &mut serde_json::Deserializer::from_str(&json_str_without_comments), - |field| { - let field = field.to_string(); - // TODO(mina86): Remove this deprecation notice some time by the - // end of 2022. - if field == "network.external_address" { - warn!( - target: "neard", - "{}: {field} is deprecated; please remove it from the config file", - path.display(), - ); - } else { - unrecognised_fields.push(field); - } - }, + |field| unrecognised_fields.push(field.to_string()), ) .map_err(|_| ValidationError::ConfigFileError { error_message: format!("Failed to deserialize config from {}", path.display()), From d707f2410d6225e23af3ad41ac826a5ffae040a7 Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:08:14 +0200 Subject: [PATCH 14/50] fix(state-sync): Test showing that state sync can't always generate state parts (#9294) Extracted a test from #9237 . No fix is available yet. --- .../src/tests/nearcore/sync_state_nodes.rs | 156 +++++++++++++++++- 1 file changed, 153 insertions(+), 3 deletions(-) diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index 8f21986fbdc..1971ed2f408 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -2,14 +2,24 @@ use crate::test_helpers::heavy_test; use actix::{Actor, System}; use futures::{future, FutureExt}; use near_actix_test_utils::run_actix; +use near_chain::types::RuntimeAdapter; +use near_chain::{ChainGenesis, Provenance}; use near_chain_configs::ExternalStorageLocation::Filesystem; use near_chain_configs::{DumpConfig, ExternalStorageConfig, Genesis, SyncConfig}; -use near_client::GetBlock; +use near_client::test_utils::TestEnv; +use near_client::{GetBlock, ProcessTxResponse}; +use near_crypto::{InMemorySigner, KeyType}; +use near_epoch_manager::{EpochManager, EpochManagerHandle}; use near_network::tcp; use near_network::test_utils::{convert_boot_nodes, wait_or_timeout, WaitOrTimeoutActor}; -use near_o11y::testonly::init_integration_logger; +use near_o11y::testonly::{init_integration_logger, init_test_logger}; use near_o11y::WithSpanContextExt; -use nearcore::{config::GenesisExt, load_test_config, start_with_config}; +use near_primitives::state_part::PartId; +use near_primitives::syncing::get_num_state_parts; +use near_primitives::transaction::SignedTransaction; +use near_primitives::utils::MaybeValidated; +use near_store::{NodeStorage, Store}; +use nearcore::{config::GenesisExt, load_test_config, start_with_config, NightshadeRuntime}; use std::ops::ControlFlow; use std::sync::{Arc, RwLock}; use std::time::Duration; @@ -530,3 +540,143 @@ fn sync_state_dump() { }); }); } + +#[test] +#[ignore] +// Test that state sync behaves well when the chunks are absent at the end of the epoch. +// The test actually fails and the code needs fixing. +fn test_dump_epoch_missing_chunk_in_last_block() { + heavy_test(|| { + init_test_logger(); + let epoch_length = 10; + + for num_last_chunks_missing in 0..5 { + assert!(num_last_chunks_missing < epoch_length); + let mut genesis = + Genesis::test(vec!["test0".parse().unwrap(), "test1".parse().unwrap()], 1); + genesis.config.epoch_length = epoch_length; + let chain_genesis = ChainGenesis::new(&genesis); + + let num_clients = 2; + let env_objects = + (0..num_clients) + .map(|_| { + let tmp_dir = tempfile::tempdir().unwrap(); + // Use default StoreConfig rather than NodeStorage::test_opener so we’re using the + // same configuration as in production. + let store = + NodeStorage::opener(&tmp_dir.path(), false, &Default::default(), None) + .open() + .unwrap() + .get_hot_store(); + let epoch_manager = + EpochManager::new_arc_handle(store.clone(), &genesis.config); + let runtime = NightshadeRuntime::test( + tmp_dir.path(), + store.clone(), + &genesis, + epoch_manager.clone(), + ) as Arc; + (tmp_dir, store, epoch_manager, runtime) + }) + .collect::, + Arc, + )>>(); + + let stores = env_objects.iter().map(|x| x.1.clone()).collect(); + let epoch_managers = env_objects.iter().map(|x| x.2.clone()).collect(); + let runtimes = env_objects.iter().map(|x| x.3.clone()).collect(); + + let mut env = TestEnv::builder(chain_genesis) + .clients_count(num_clients) + .stores(stores) + .epoch_managers(epoch_managers) + .runtimes(runtimes) + .use_state_snapshots() + .build(); + + let genesis_block = env.clients[0].chain.get_block_by_height(0).unwrap(); + let mut blocks = vec![genesis_block.clone()]; + let signer = + InMemorySigner::from_seed("test0".parse().unwrap(), KeyType::ED25519, "test0"); + let target_height = epoch_length + 1; + for i in 1..=target_height { + let block = env.clients[0].produce_block(i).unwrap().unwrap(); + blocks.push(block.clone()); + if (i % epoch_length) != 0 + && epoch_length - (i % epoch_length) <= num_last_chunks_missing + { + // Don't produce chunks for the last blocks of an epoch. + env.clients[0] + .process_block_test_no_produce_chunk( + MaybeValidated::from(block.clone()), + Provenance::PRODUCED, + ) + .unwrap(); + tracing::info!( + "Block {i}: {:?} -- produced no chunk", + block.header().epoch_id() + ); + } else { + env.process_block(0, block.clone(), Provenance::PRODUCED); + tracing::info!( + "Block {i}: {:?} -- also produced a chunk", + block.header().epoch_id() + ); + } + env.process_block(1, block, Provenance::NONE); + + let tx = SignedTransaction::send_money( + i + 1, + "test0".parse().unwrap(), + "test1".parse().unwrap(), + &signer, + 1, + *genesis_block.hash(), + ); + assert_eq!(env.clients[0].process_tx(tx, false, false), ProcessTxResponse::ValidTx); + } + + // Simulate state sync + + // No blocks were skipped, therefore we can compute the block height of the first block of the current epoch. + let sync_hash_height = ((target_height / epoch_length) * epoch_length + 1) as usize; + let sync_hash = *blocks[sync_hash_height].hash(); + assert_ne!( + blocks[sync_hash_height].header().epoch_id(), + blocks[sync_hash_height - 1].header().epoch_id() + ); + + let state_sync_header = + env.clients[0].chain.get_state_response_header(0, sync_hash).unwrap(); + let state_root = state_sync_header.chunk_prev_state_root(); + let state_root_node = state_sync_header.state_root_node(); + let num_parts = get_num_state_parts(state_root_node.memory_usage); + // Check that state parts can be obtained. + let state_parts: Vec<_> = (0..num_parts) + .map(|i| { + // This should obviously not fail, aka succeed. + env.clients[0].chain.get_state_response_part(0, i, sync_hash).unwrap() + }) + .collect(); + + env.clients[1].chain.reset_data_pre_state_sync(sync_hash).unwrap(); + let epoch_id = blocks.last().unwrap().header().epoch_id(); + for i in 0..num_parts { + env.clients[1] + .runtime_adapter + .apply_state_part( + 0, + &state_root, + PartId::new(i, num_parts), + &state_parts[i as usize], + &epoch_id, + ) + .unwrap(); + } + } + }); +} From 8e7262aefd8aaf4f93610154528a020c418f2848 Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Mon, 17 Jul 2023 10:28:58 -0400 Subject: [PATCH 15/50] fix(locust): wait for base on_locust_init() to finish before other init fns (#9313) the base on_locust_init() function sets `environment.master_funding_account`, and other init functions expect it to be set when they're run. When that isn't the case, you can get this sort of error: ``` Traceback (most recent call last): File "/home/ubuntu/.local/lib/python3.8/site-packages/locust/event.py", line 40, in fire handler(**kwargs) File "/home/ubuntu/nearcore/pytest/tests/loadtest/locust/common/social.py", line 261, in on_locust_init funding_account = environment.master_funding_account AttributeError: 'Environment' object has no attribute 'master_funding_account ``` This error can even happen in the master, before the workers have been started, and it might be related to this issue (which has been closed due to inactivity): https://github.com/locustio/locust/issues/1730. That bug mentions that `User`s get started before on_locust_init() runs, but maybe for similar reasons, we can't guarantee the order in which each on_locust_init() function will run. This doesn't seem to happen every time, and it hasn't really been triggered on MacOS, only on Linux. But this makes it kind of a blocker for setting this test up on cloud VMs (where this bug has been observed) --- pytest/tests/loadtest/locust/common/base.py | 13 +++++++++++-- pytest/tests/loadtest/locust/common/congestion.py | 1 + pytest/tests/loadtest/locust/common/ft.py | 3 ++- pytest/tests/loadtest/locust/common/social.py | 3 ++- pytest/tests/loadtest/locust/common/sweat.py | 3 ++- 5 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pytest/tests/loadtest/locust/common/base.py b/pytest/tests/loadtest/locust/common/base.py index 440a54c9ca2..9894ed7c4f6 100644 --- a/pytest/tests/loadtest/locust/common/base.py +++ b/pytest/tests/loadtest/locust/common/base.py @@ -11,6 +11,7 @@ import pathlib import requests import sys +import threading import time import typing import unittest @@ -550,8 +551,7 @@ def init_account_generator(parsed_options): # called once per process before user initialization -@events.init.add_listener -def on_locust_init(environment, **kwargs): +def do_on_locust_init(environment): node = NearNodeProxy(environment) master_funding_key = key.Key.from_json_file( @@ -594,6 +594,15 @@ def on_locust_init(environment, **kwargs): environment.master_funding_account = master_funding_account +INIT_DONE = threading.Event() + + +@events.init.add_listener +def on_locust_init(environment, **kwargs): + do_on_locust_init(environment) + INIT_DONE.set() + + # Add custom CLI args here, will be available in `environment.parsed_options` @events.init_command_line_parser.add_listener def _(parser): diff --git a/pytest/tests/loadtest/locust/common/congestion.py b/pytest/tests/loadtest/locust/common/congestion.py index 96b933b775c..781d1372c41 100644 --- a/pytest/tests/loadtest/locust/common/congestion.py +++ b/pytest/tests/loadtest/locust/common/congestion.py @@ -66,6 +66,7 @@ def sender_account(self) -> base.Account: @events.init.add_listener def on_locust_init(environment, **kwargs): + base.INIT_DONE.wait() # `master_funding_account` is the same on all runners, allowing to share a # single instance of congestion contract. funding_account = environment.master_funding_account diff --git a/pytest/tests/loadtest/locust/common/ft.py b/pytest/tests/loadtest/locust/common/ft.py index b7c959ac02e..037c1bdb477 100644 --- a/pytest/tests/loadtest/locust/common/ft.py +++ b/pytest/tests/loadtest/locust/common/ft.py @@ -6,7 +6,7 @@ sys.path.append(str(pathlib.Path(__file__).resolve().parents[4] / 'lib')) import key -from common.base import Account, Deploy, NearNodeProxy, NearUser, FunctionCall +from common.base import Account, Deploy, NearNodeProxy, NearUser, FunctionCall, INIT_DONE class FTContract: @@ -116,6 +116,7 @@ def sender_account(self) -> Account: @events.init.add_listener def on_locust_init(environment, **kwargs): + INIT_DONE.wait() node = NearNodeProxy(environment) ft_contract_code = environment.parsed_options.fungible_token_wasm num_ft_contracts = environment.parsed_options.num_ft_contracts diff --git a/pytest/tests/loadtest/locust/common/social.py b/pytest/tests/loadtest/locust/common/social.py index 56eef25809e..5e9cd643996 100644 --- a/pytest/tests/loadtest/locust/common/social.py +++ b/pytest/tests/loadtest/locust/common/social.py @@ -11,7 +11,7 @@ from account import TGAS, NEAR_BASE import key -from common.base import Account, Deploy, NearNodeProxy, Transaction, FunctionCall +from common.base import Account, Deploy, NearNodeProxy, Transaction, FunctionCall, INIT_DONE from locust import events, runners from transaction import create_function_call_action @@ -256,6 +256,7 @@ def test_post(self): @events.init.add_listener def on_locust_init(environment, **kwargs): + INIT_DONE.wait() # `master_funding_account` is the same on all runners, allowing to share a # single instance of SocialDB in its `social` sub account funding_account = environment.master_funding_account diff --git a/pytest/tests/loadtest/locust/common/sweat.py b/pytest/tests/loadtest/locust/common/sweat.py index 973e7670a15..f711e4ab298 100644 --- a/pytest/tests/loadtest/locust/common/sweat.py +++ b/pytest/tests/loadtest/locust/common/sweat.py @@ -1,6 +1,6 @@ import typing from common.ft import FTContract, InitFTAccount -from common.base import Account, NearNodeProxy, NearUser, FunctionCall, MultiFunctionCall +from common.base import Account, NearNodeProxy, NearUser, FunctionCall, MultiFunctionCall, INIT_DONE import locust import sys import pathlib @@ -130,6 +130,7 @@ def args(self) -> dict: @events.init.add_listener def on_locust_init(environment, **kwargs): + INIT_DONE.wait() node = NearNodeProxy(environment) worker_id = getattr(environment.runner, "worker_index", "_master") run_id = environment.parsed_options.run_id From 8402f750389598113608330ca8f01da7be0ab14b Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Mon, 17 Jul 2023 16:49:12 +0200 Subject: [PATCH 16/50] fix(state-sync): Simplify storage format of state sync dump progress (#9289) No reason why `StateSyncDumpProgress` had to be stored as `Some(x)` instead of simply `x` --- chain/chain/src/store.rs | 7 +++++-- nearcore/src/state_sync.rs | 8 ++------ tools/state-viewer/src/scan_db.rs | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/chain/chain/src/store.rs b/chain/chain/src/store.rs index 7221a222e68..48ca877888f 100644 --- a/chain/chain/src/store.rs +++ b/chain/chain/src/store.rs @@ -886,7 +886,7 @@ impl ChainStore { pub fn get_state_sync_dump_progress( &self, shard_id: ShardId, - ) -> Result, Error> { + ) -> Result { option_to_not_found( self.store .get_ser(DBCol::BlockMisc, &ChainStore::state_sync_dump_progress_key(shard_id)), @@ -902,7 +902,10 @@ impl ChainStore { ) -> Result<(), Error> { let mut store_update = self.store.store_update(); let key = ChainStore::state_sync_dump_progress_key(shard_id); - store_update.set_ser(DBCol::BlockMisc, &key, &value)?; + match value { + None => store_update.delete(DBCol::BlockMisc, &key), + Some(value) => store_update.set_ser(DBCol::BlockMisc, &key, &value)?, + } store_update.commit().map_err(|err| err.into()) } } diff --git a/nearcore/src/state_sync.rs b/nearcore/src/state_sync.rs index eeb356853aa..482d075fcd5 100644 --- a/nearcore/src/state_sync.rs +++ b/nearcore/src/state_sync.rs @@ -174,7 +174,7 @@ fn get_current_state( epoch_manager: Arc, ) -> Result, Error> { let was_last_epoch_dumped = match chain.store().get_state_sync_dump_progress(*shard_id) { - Ok(Some(StateSyncDumpProgress::AllDumped { epoch_id, .. })) => Some(epoch_id), + Ok(StateSyncDumpProgress::AllDumped { epoch_id, .. }) => Some(epoch_id), _ => None, }; @@ -338,11 +338,7 @@ async fn state_sync_dump( &shard_id, epoch_height, Some(state_part.len()), - num_parts - .checked_sub( - parts_to_dump.len().checked_add(1).unwrap() as u64, - ) - .unwrap(), + num_parts.checked_sub(parts_to_dump.len() as u64).unwrap(), num_parts, ); dumped_any_state_part = true; diff --git a/tools/state-viewer/src/scan_db.rs b/tools/state-viewer/src/scan_db.rs index 62b4cd57368..67e900cc461 100644 --- a/tools/state-viewer/src/scan_db.rs +++ b/tools/state-viewer/src/scan_db.rs @@ -231,7 +231,7 @@ fn format_block_misc_value<'a>(key: &'a [u8], value: &'a [u8]) -> Box::try_from_slice(value).unwrap()) } else if key.starts_with(near_store::STATE_SYNC_DUMP_KEY) { - Box::new(Option::::try_from_slice(value).unwrap()) + Box::new(StateSyncDumpProgress::try_from_slice(value).unwrap()) } else { Box::new(value) } From 86ea6e38fd974900a93b448db102cd8b91c9df43 Mon Sep 17 00:00:00 2001 From: robin-near <111538878+robin-near@users.noreply.github.com> Date: Mon, 17 Jul 2023 08:36:46 -0700 Subject: [PATCH 17/50] Fix proxy-based nayduck tests so that they can run on non-unix systems. (#9314) Before this, running proxy-based nayduck tests (such as proxy_simple.py) fails on Mac because on Mac, multiprocessing.Process uses spawn, not fork, and our tests were written in a way that was unfriendly to spawn: 1. the entry point was not protected by `if __name__ == '__main__':`, causing spawned processes to re-execute the main module's code; 2. shared memory was not properly passed to the child process - we relied on referencing the same global variable which only worked with the fork implementation. This PR fixes these. Also, re-enable two tests which are now fixed. --- nightly/pytest-sanity.txt | 14 +- pytest/tests/sanity/block_chunk_signature.py | 11 +- pytest/tests/sanity/network_drop_package.py | 40 +++--- pytest/tests/sanity/proxy_example.py | 24 ++-- pytest/tests/sanity/proxy_restart.py | 9 +- pytest/tests/sanity/proxy_simple.py | 26 ++-- pytest/tests/sanity/sync_ban.py | 139 ++++++++++--------- 7 files changed, 145 insertions(+), 118 deletions(-) diff --git a/nightly/pytest-sanity.txt b/nightly/pytest-sanity.txt index 2c90746239e..c8f521b6fb0 100644 --- a/nightly/pytest-sanity.txt +++ b/nightly/pytest-sanity.txt @@ -45,9 +45,8 @@ pytest --timeout=3600 sanity/state_sync_massive_validator.py pytest --timeout=3600 sanity/state_sync_massive.py --features nightly pytest --timeout=3600 sanity/state_sync_massive_validator.py --features nightly -# TODO(#8211) - tests broken due to bad behavior in chunk fetching - re-enable when that PR is submitted. -# pytest sanity/sync_chunks_from_archival.py -# pytest sanity/sync_chunks_from_archival.py --features nightly +pytest sanity/sync_chunks_from_archival.py +pytest sanity/sync_chunks_from_archival.py --features nightly pytest sanity/rpc_tx_forwarding.py pytest sanity/rpc_tx_forwarding.py --features nightly pytest --timeout=240 sanity/skip_epoch.py @@ -98,11 +97,10 @@ pytest sanity/proxy_restart.py pytest sanity/proxy_restart.py --features nightly pytest sanity/network_drop_package.py pytest sanity/network_drop_package.py --features nightly -# TODO: enable them when we fix the issue with proxy shutdown (#2942) -# pytest --timeout=900 sanity/sync_ban.py true -# pytest --timeout=900 sanity/sync_ban.py true --features nightly -# pytest --timeout=900 sanity/sync_ban.py false -# pytest --timeout=900 sanity/sync_ban.py false --features nightly +pytest --timeout=900 sanity/sync_ban.py true +pytest --timeout=900 sanity/sync_ban.py true --features nightly +pytest --timeout=900 sanity/sync_ban.py false +pytest --timeout=900 sanity/sync_ban.py false --features nightly pytest sanity/block_chunk_signature.py pytest sanity/block_chunk_signature.py --features nightly pytest sanity/concurrent_function_calls.py diff --git a/pytest/tests/sanity/block_chunk_signature.py b/pytest/tests/sanity/block_chunk_signature.py index 39aa112adba..981931cf8f0 100755 --- a/pytest/tests/sanity/block_chunk_signature.py +++ b/pytest/tests/sanity/block_chunk_signature.py @@ -29,9 +29,10 @@ async def handle(self, msg, fr, to): return True -nodes = start_cluster(2, 0, 1, None, [], {}, Handler) +if __name__ == '__main__': + nodes = start_cluster(2, 0, 1, None, [], {}, Handler) -time.sleep(5) -h0 = nodes[0].get_latest_block(verbose=True).height -h1 = nodes[1].get_latest_block(verbose=True).height -assert h0 <= 3 and h1 <= 3 + time.sleep(5) + h0 = nodes[0].get_latest_block(verbose=True).height + h1 = nodes[1].get_latest_block(verbose=True).height + assert h0 <= 3 and h1 <= 3 diff --git a/pytest/tests/sanity/network_drop_package.py b/pytest/tests/sanity/network_drop_package.py index 41cce809dd6..d4da9f80cba 100755 --- a/pytest/tests/sanity/network_drop_package.py +++ b/pytest/tests/sanity/network_drop_package.py @@ -3,6 +3,7 @@ import multiprocessing import logging import pathlib +from functools import partial sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib')) @@ -13,8 +14,6 @@ from multiprocessing import Value TIMEOUT = 90 -success = Value('i', 0) -height = Value('i', 0) # Ratio of message that are dropped to simulate bad network performance DROP_RATIO = 0.05 @@ -22,7 +21,9 @@ class Handler(ProxyHandler): - def __init__(self, *args, **kwargs): + def __init__(self, *args, success=None, **kwargs): + assert success is not None + self.success = success super().__init__(*args, **kwargs) self.dropped = 0 self.total = 0 @@ -31,16 +32,11 @@ async def handle(self, msg, fr, to): if msg.enum == 'Block': h = msg.Block.BlockV2.header.inner_lite().height - with height.get_lock(): - if h > height.value: - height.value = h - logging.info(f"Height: {h}") - - with success.get_lock(): - if h >= 10 and success.value == 0: + with self.success.get_lock(): + if h >= 10 and self.success.value == 0: logging.info( f'SUCCESS DROP={self.dropped} TOTAL={self.total}') - success.value = 1 + self.success.value = 1 drop = random.random() < DROP_RATIO and 'Handshake' not in msg.enum @@ -51,16 +47,20 @@ async def handle(self, msg, fr, to): return not drop -start_cluster(3, 0, 1, None, [["epoch_length", 500]], {}, Handler) +if __name__ == '__main__': + success = Value('i', 0) + + start_cluster(3, 0, 1, None, [["epoch_length", 500]], {}, + partial(Handler, success=success)) -started = time.time() + started = time.time() -while True: - logging.info(f"Time: {time.time() - started:0.2}, Fin: {success.value}") - assert time.time() - started < TIMEOUT - time.sleep(1) + while True: + logging.info(f"Time: {time.time() - started:0.2}, Fin: {success.value}") + assert time.time() - started < TIMEOUT + time.sleep(1) - if success.value == 1: - break + if success.value == 1: + break -logging.info("Success") + logging.info("Success") diff --git a/pytest/tests/sanity/proxy_example.py b/pytest/tests/sanity/proxy_example.py index cb25b04d444..7ba03fd0dc9 100755 --- a/pytest/tests/sanity/proxy_example.py +++ b/pytest/tests/sanity/proxy_example.py @@ -20,12 +20,13 @@ from multiprocessing import Value TIMEOUT = 30 -success = Value('i', 0) class Handler(ProxyHandler): - def __init__(self, *args, **kwargs): + def __init__(self, *args, success=None, **kwargs): + assert success is not None + self.success = success super().__init__(*args, **kwargs) self.peers_response = 0 @@ -43,18 +44,21 @@ async def handle(self, msg, fr, to): self.peers_response += 1 logger.info(f"Total PeersResponses = {self.peers_response}") if self.peers_response == 2: - success.value = 1 + self.success.value = 1 return True -start_cluster(2, 0, 1, None, [], {}, Handler) +if __name__ == '__main__': + success = Value('i', 0) + start_cluster(2, 0, 1, None, [], {}, + functools.partial(Handler, success=success)) -started = time.time() + started = time.time() -while True: - assert time.time() - started < TIMEOUT - time.sleep(1) + while True: + assert time.time() - started < TIMEOUT + time.sleep(1) - if success.value == 1: - break + if success.value == 1: + break diff --git a/pytest/tests/sanity/proxy_restart.py b/pytest/tests/sanity/proxy_restart.py index 43ef881271b..76ee8a883f4 100755 --- a/pytest/tests/sanity/proxy_restart.py +++ b/pytest/tests/sanity/proxy_restart.py @@ -14,9 +14,10 @@ TARGET_HEIGHT = 20 -nodes = start_cluster(2, 0, 1, None, [], {}, ProxyHandler) +if __name__ == '__main__': + nodes = start_cluster(2, 0, 1, None, [], {}, ProxyHandler) -nodes[1].kill() -nodes[1].start(boot_node=nodes[0]) + nodes[1].kill() + nodes[1].start(boot_node=nodes[0]) -utils.wait_for_blocks(nodes[1], target=TARGET_HEIGHT) + utils.wait_for_blocks(nodes[1], target=TARGET_HEIGHT) diff --git a/pytest/tests/sanity/proxy_simple.py b/pytest/tests/sanity/proxy_simple.py index 7f8951be426..8c6462b74bb 100755 --- a/pytest/tests/sanity/proxy_simple.py +++ b/pytest/tests/sanity/proxy_simple.py @@ -9,6 +9,7 @@ from cluster import start_cluster from configured_logger import logger +from functools import partial from peer import * from proxy import ProxyHandler @@ -16,28 +17,35 @@ from utils import obj_to_string TIMEOUT = 30 -success = Value('i', 0) class Handler(ProxyHandler): + def __init__(self, *args, success=None, **kwargs): + assert success is not None + self.success = success + super().__init__(*args, **kwargs) + async def handle(self, msg, fr, to): if msg.enum == 'Block': h = msg.Block.BlockV2.header.inner_lite().height logger.info(f"Height: {h}") if h >= 10: logger.info('SUCCESS') - success.value = 1 + self.success.value = 1 return True -start_cluster(2, 0, 1, None, [], {}, Handler) +if __name__ == '__main__': + success = Value('i', 0) + + start_cluster(2, 0, 1, None, [], {}, partial(Handler, success=success)) -started = time.time() + started = time.time() -while True: - assert time.time() - started < TIMEOUT - time.sleep(1) + while True: + assert time.time() - started < TIMEOUT + time.sleep(1) - if success.value == 1: - break + if success.value == 1: + break diff --git a/pytest/tests/sanity/sync_ban.py b/pytest/tests/sanity/sync_ban.py index d9199c42569..707fcc8ae55 100755 --- a/pytest/tests/sanity/sync_ban.py +++ b/pytest/tests/sanity/sync_ban.py @@ -19,97 +19,112 @@ from proxy import ProxyHandler import utils -should_ban = sys.argv[1] == 'true' - TIMEOUT = 300 EPOCH_LENGTH = 50 BAN_STRING = 'ban a fraudulent peer' -should_sync = Value('i', False) - class Handler(ProxyHandler): + def __init__(self, *args, should_sync=None, should_ban=None, **kwargs): + assert should_sync is not None + self.should_sync = should_sync + assert should_ban is not None + self.should_ban = should_ban + super().__init__(*args, **kwargs) + async def handle(self, msg, fr, to): if msg is not None: if msg.enum == 'Block': loop = asyncio.get_running_loop() send = functools.partial(self.do_send_message, msg, 1) - if should_sync.value: + if self.should_sync.value: loop.call_later(1, send) return False elif msg.enum == 'BlockRequest': loop = asyncio.get_running_loop() send = functools.partial(self.do_send_message, msg, 0) - if should_sync.value: + if self.should_sync.value: loop.call_later(6, send) return False elif msg.enum == 'BlockHeaders': - if should_ban: + if self.should_ban: return False loop = asyncio.get_running_loop() send = functools.partial(self.do_send_message, msg, 1) - if should_sync.value: + if self.should_sync.value: loop.call_later(2, send) return False return True -node0_config = { - "consensus": { - "min_block_production_delay": { - "secs": 0, - "nanos": 400000000 +if __name__ == '__main__': + should_ban = sys.argv[1] == 'true' + node0_config = { + "consensus": { + "min_block_production_delay": { + "secs": 0, + "nanos": 1000000000 + }, + }, + } + node1_config = { + "consensus": { + "header_sync_initial_timeout": { + "secs": 3, + "nanos": 0 + }, + "header_sync_stall_ban_timeout": { + "secs": 5, + "nanos": 0 + } }, + "tracked_shards": [0] } -} -node1_config = { - "consensus": { - "header_sync_initial_timeout": { - "secs": 3, - "nanos": 0 + + should_sync = Value('i', False) + nodes = start_cluster( + 1, 1, 1, None, [["epoch_length", EPOCH_LENGTH]], { + 0: node0_config, + 1: node1_config }, - "header_sync_stall_ban_timeout": { - "secs": 5, - "nanos": 0 - } - }, - "tracked_shards": [0] -} -nodes = start_cluster(1, 1, 1, None, [["epoch_length", EPOCH_LENGTH]], { - 0: node0_config, - 1: node1_config -}, Handler) - -utils.wait_for_blocks(nodes[0], target=110, poll_interval=2) - -should_sync.value = True - -logger.info("sync node 1") - -start = time.time() - -tracker0 = utils.LogTracker(nodes[0]) -tracker1 = utils.LogTracker(nodes[1]) - -while True: - assert time.time() - start < TIMEOUT - - if should_ban: - if tracker1.check(BAN_STRING): - break - else: - cur_height = nodes[0].get_latest_block().height - node1_height = nodes[1].get_latest_block().height - if (abs(node1_height - cur_height) < 5 and - status1['sync_info']['syncing'] is False): - break - time.sleep(2) - -if not should_ban and (tracker0.check(BAN_STRING) or - tracker1.check(BAN_STRING)): - assert False, "unexpected ban of peers" - -logger.info('shutting down') -time.sleep(10) + functools.partial(Handler, + should_sync=should_sync, + should_ban=should_ban)) + + utils.wait_for_blocks(nodes[0], target=30, poll_interval=2) + + should_sync.value = True + + logger.info("sync node 1") + + start = time.time() + + tracker0 = utils.LogTracker(nodes[0]) + tracker1 = utils.LogTracker(nodes[1]) + + while True: + assert time.time() - start < TIMEOUT + + if should_ban: + if tracker1.check(BAN_STRING): + break + else: + cur_height = nodes[0].get_latest_block().height + node1_height = nodes[1].get_latest_block().height + status1 = nodes[1].get_status() + print( + f"Sync: node 1 at block {node1_height}, node 0 at block {cur_height}; waiting for node 1 to catch up" + ) + if (abs(node1_height - cur_height) < 5 and + status1['sync_info']['syncing'] is False): + break + time.sleep(2) + + if not should_ban and (tracker0.check(BAN_STRING) or + tracker1.check(BAN_STRING)): + assert False, "unexpected ban of peers" + + # logger.info('shutting down') + # time.sleep(10) From 2df7a684cb29efa0dc44c1ea466131e996538015 Mon Sep 17 00:00:00 2001 From: wacban Date: Mon, 17 Jul 2023 18:12:50 +0200 Subject: [PATCH 18/50] fix: fixed nayduck test state_sync_fail.py for nightly build (#9320) In #9274 I introduced simple nightshade V2 layout and added it to the nightly build. This broke the nayduck test state_sync_fail.py. Here is the fix for it. The test performs resharding and then checks some postconditions. It broke because it attempted to reshard from V0 shard layout to V2 shard layout. This doesn't work because ShardLayout contains shard split map that only makes sense when resharding from a shard layout version to the immediate next. The fix is to check what is the protocol version supported in the binary and depending on it reshard from V0 to V1 or from V1 to V2. --- pytest/lib/cluster.py | 20 +++++ pytest/tests/sanity/state_sync_fail.py | 118 ++++++++++++++++++++++--- 2 files changed, 127 insertions(+), 11 deletions(-) diff --git a/pytest/lib/cluster.py b/pytest/lib/cluster.py index 09590f0c7a1..05c30ead51e 100644 --- a/pytest/lib/cluster.py +++ b/pytest/lib/cluster.py @@ -946,3 +946,23 @@ def load_config(): else: logger.info(f"Use default config {config}") return config + + +# Returns the protocol version of the binary. +def get_binary_protocol_version(config) -> typing.Optional[int]: + binary_name = config.get('binary_name', 'neard') + near_root = config.get('near_root') + binary_path = os.path.join(near_root, binary_name) + + # Get the protocol version of the binary + # The --version output looks like this: + # neard (release trunk) (build 1.1.0-3884-ge93793a61-modified) (rustc 1.71.0) (protocol 137) (db 37) + out = subprocess.check_output([binary_path, "--version"], text=True) + out = out.replace('(', '') + out = out.replace(')', '') + tokens = out.split() + n = len(tokens) + for i in range(n): + if tokens[i] == "protocol" and i + 1 < n: + return int(tokens[i + 1]) + return None \ No newline at end of file diff --git a/pytest/tests/sanity/state_sync_fail.py b/pytest/tests/sanity/state_sync_fail.py index 367075ee50f..521431cc95f 100755 --- a/pytest/tests/sanity/state_sync_fail.py +++ b/pytest/tests/sanity/state_sync_fail.py @@ -1,15 +1,19 @@ #!/usr/bin/env python3 -# Spins up a node, wait until sharding is upgrade -# and spins up another node -# check that the node can't be started because it cannot state sync to the epoch -# after the sharding upgrade + +# Spins up a node, waits until sharding is upgraded and spins up another node. +# Check that the node can't be started because it cannot state sync to the epoch +# after the sharding upgrade. + +# Depending on the version of the binary (default or nightly) it will perform +# resharding from V0 (1 shard) to V1 (4 shards) or from V1 (4 shards) to V2 (5 +# shards). import sys, time import pathlib sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib')) -from cluster import init_cluster, spin_up_node, load_config +from cluster import init_cluster, spin_up_node, load_config, get_binary_protocol_version from configured_logger import logger import requests import utils @@ -17,13 +21,104 @@ EPOCH_LENGTH = 10 START_AT_BLOCK = int(EPOCH_LENGTH * 2.5) +V1_PROTOCOL_VERSION = 48 +V2_PROTOCOL_VERSION = 135 + +V0_SHARD_LAYOUT = {"V0": {"num_shards": 1, "version": 0}} +V1_SHARD_LAYOUT = { + "V1": { + "boundary_accounts": [ + "aurora", "aurora-0", "kkuuue2akv_1630967379.near" + ], + "shards_split_map": [[0, 1, 2, 3]], + "to_parent_shard_map": [0, 0, 0, 0], + "version": 1 + } +} + + +def append_shard_layout_config_changes( + binary_protocol_version, + genesis_config_changes, +): + if binary_protocol_version >= V2_PROTOCOL_VERSION: + logger.info("Testing migration from V1 to V2.") + # Set the initial protocol version to a version just before V2. + genesis_config_changes.append([ + "protocol_version", + V2_PROTOCOL_VERSION - 1, + ]) + genesis_config_changes.append([ + "shard_layout", + V1_SHARD_LAYOUT, + ]) + genesis_config_changes.append([ + "num_block_producer_seats_per_shard", + [1, 1, 1, 1], + ]) + genesis_config_changes.append([ + "avg_hidden_validator_seats_per_shard", + [0, 0, 0, 0], + ]) + print(genesis_config_changes) + return + + if binary_protocol_version >= V1_PROTOCOL_VERSION: + logger.info("Testing migration from V0 to V1.") + # Set the initial protocol version to a version just before V1. + genesis_config_changes.append([ + "protocol_version", + V1_PROTOCOL_VERSION - 1, + ]) + genesis_config_changes.append([ + "shard_layout", + V0_SHARD_LAYOUT, + ]) + genesis_config_changes.append([ + "num_block_producer_seats_per_shard", + [100], + ]) + genesis_config_changes.append([ + "avg_hidden_validator_seats_per_shard", + [0], + ]) + print(genesis_config_changes) + return + + assert False + + +def get_genesis_config_changes(binary_protocol_version): + genesis_config_changes = [ + ["min_gas_price", 0], + ["max_inflation_rate", [0, 1]], + ["epoch_length", EPOCH_LENGTH], + ["use_production_config", True], + ["block_producer_kickout_threshold", 80], + ] + + append_shard_layout_config_changes( + binary_protocol_version, + genesis_config_changes, + ) + + print(genesis_config_changes) + + return genesis_config_changes + + config = load_config() + +binary_protocol_version = get_binary_protocol_version(config) +assert binary_protocol_version is not None + near_root, node_dirs = init_cluster( - 2, 1, 1, config, - [["min_gas_price", 0], ["max_inflation_rate", [0, 1]], - ["epoch_length", EPOCH_LENGTH], ["protocol_version", 47], - ["use_production_config", True], ["block_producer_kickout_threshold", 80]], - { + num_nodes=2, + num_observers=1, + num_shards=4, + config=config, + genesis_config_changes=get_genesis_config_changes(binary_protocol_version), + client_config_changes={ 0: { "tracked_shards": [0], "state_sync_enabled": True, @@ -42,7 +137,8 @@ "state_sync_enabled": True, "store.state_snapshot_enabled": True, } - }) + }, +) started = time.time() From 33b15e1785cf13941e46ec02fa2fc1acccbc8411 Mon Sep 17 00:00:00 2001 From: Anton Puhach Date: Mon, 17 Jul 2023 22:51:17 +0200 Subject: [PATCH 19/50] feat: add database tool subcommand for State read perf testing (#9276) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a tool used to evaluate State read performance as part of `neard database` CLI. For more details on the approach see [the Methodology section](https://github.com/near/nearcore/discussions/9235). Also includes some minor refactoring around database tool.
Example executions ``` ubuntu@pugachag-mainnet:~/nearcore$ ./target/quick-release/neard database state-perf --help Run performance test for State column reads Usage: neard database state-perf [OPTIONS] Options: -s, --samples Number of requsts to use for the performance evaluation. Increasing this value results in more precise measurements, but longer test execution [default: 10000] -w, --warmup-samples Number of requests to use for database warmup. Those requests will be excluded from the measurements [default: 1000] -h, --help Print help ubuntu@pugachag-mainnet:~/nearcore$ ./target/quick-release/neard database state-perf 2023-07-12T10:21:15.258765Z INFO neard: version="trunk" build="44a09bf39" latest_protocol=62 2023-07-12T10:21:15.292835Z INFO db: Opened a new RocksDB instance. num_instances=1 Start State perf test Generate 11000 requests to State █████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ 11000/11000 Finished requests generation █████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ 11000/11000 Finished State perf test overall | avg observed_latency: 1.45039ms, block_read_time: 1.196571ms, samples with merge: 1596 (15.96%) block_read_count: 0, samples: 7 (0.07%): | avg observed_latency: 36.126µs, block_read_time: 0ns, samples with merge: 4 (57.14%) block_read_count: 1, samples: 4613 (46.13%): | avg observed_latency: 886.908µs, block_read_time: 790.738µs, samples with merge: 36 (0.78%) block_read_count: 2, samples: 1962 (19.62%): | avg observed_latency: 1.383988ms, block_read_time: 1.221933ms, samples with merge: 904 (46.08%) block_read_count: 3, samples: 1375 (13.75%): | avg observed_latency: 1.526996ms, block_read_time: 1.271185ms, samples with merge: 363 (26.40%) block_read_count: 4, samples: 1361 (13.61%): | avg observed_latency: 1.575212ms, block_read_time: 1.207766ms, samples with merge: 148 (10.87%) block_read_count: 5, samples: 221 (2.21%): | avg observed_latency: 2.080291ms, block_read_time: 1.660845ms, samples with merge: 89 (40.27%) block_read_count: 6, samples: 382 (3.82%): | avg observed_latency: 6.281688ms, block_read_time: 4.545931ms, samples with merge: 28 (7.33%) block_read_count: 7, samples: 41 (0.41%): | avg observed_latency: 6.709164ms, block_read_time: 4.897512ms, samples with merge: 14 (34.15%) block_read_count: 8, samples: 13 (0.13%): | avg observed_latency: 6.569955ms, block_read_time: 4.73201ms, samples with merge: 7 (53.85%) block_read_count: 9, samples: 3 (0.03%): | avg observed_latency: 7.457121ms, block_read_time: 5.517267ms, samples with merge: 2 (66.67%) block_read_count: 10, samples: 22 (0.22%): | avg observed_latency: 9.602637ms, block_read_time: 6.658604ms, samples with merge: 1 (4.55%) 2023-07-12T10:21:46.995873Z INFO db: Closed a RocksDB instance. num_instances=0 ```
--- Cargo.lock | 4 + tools/database/Cargo.toml | 9 +- tools/database/README.md | 6 +- tools/database/src/adjust_database.rs | 7 +- .../src/analyse_data_size_distribution.rs | 19 +- tools/database/src/commands.rs | 17 +- tools/database/src/lib.rs | 2 + tools/database/src/state_perf.rs | 195 ++++++++++++++++++ tools/database/src/utils.rs | 16 ++ 9 files changed, 248 insertions(+), 27 deletions(-) create mode 100644 tools/database/src/state_perf.rs create mode 100644 tools/database/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index f6ce6b2c8e3..4fa13804157 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3619,10 +3619,14 @@ version = "0.0.0" dependencies = [ "anyhow", "clap 4.2.4", + "indicatif", "near-chain-configs", + "near-primitives", "near-store", "nearcore", + "rand 0.8.5", "rayon", + "rocksdb", "strum", "tempfile", ] diff --git a/tools/database/Cargo.toml b/tools/database/Cargo.toml index 5ef1a5b4f1c..7ae15618165 100644 --- a/tools/database/Cargo.toml +++ b/tools/database/Cargo.toml @@ -11,24 +11,29 @@ publish = false [dependencies] anyhow.workspace = true clap.workspace = true +indicatif.workspace = true +rand.workspace = true rayon.workspace = true +rocksdb.workspace = true strum.workspace = true tempfile.workspace = true nearcore.workspace = true -near-store.workspace = true near-chain-configs.workspace = true - +near-store.workspace = true +near-primitives.workspace = true [features] nightly = [ "nightly_protocol", "near-chain-configs/nightly", + "near-primitives/nightly", "near-store/nightly", "nearcore/nightly", ] nightly_protocol = [ "near-chain-configs/nightly_protocol", + "near-primitives/nightly_protocol", "near-store/nightly_protocol", "nearcore/nightly_protocol", ] diff --git a/tools/database/README.md b/tools/database/README.md index 29cc2c1664f..7642f841445 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -2,7 +2,7 @@ A set of tools useful when working with the underlying database. -## Analyse Database +## Analyse data size distribution The analyse database script provides an efficient way to assess the size distribution of keys and values within RocksDB. @@ -78,3 +78,7 @@ available in `/home/ubuntu/.near/data/snapshot` This command can be helpful before attempting activities that can potentially corrupt the database. + +## State read perf +A tool for performance testing hot storage RocksDB State column reads. +Use help to get more details: `neard database state-perf --help` diff --git a/tools/database/src/adjust_database.rs b/tools/database/src/adjust_database.rs index 9ba452cfa36..133d88ef386 100644 --- a/tools/database/src/adjust_database.rs +++ b/tools/database/src/adjust_database.rs @@ -1,6 +1,5 @@ use near_store::metadata::DbKind; use near_store::NodeStorage; -use nearcore::NearConfig; use std::path::Path; /// This can potentially support db specified not in config, but in command line. @@ -25,7 +24,11 @@ pub(crate) struct ChangeDbKindCommand { } impl ChangeDbKindCommand { - pub(crate) fn run(&self, home_dir: &Path, near_config: &NearConfig) -> anyhow::Result<()> { + pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { + let near_config = nearcore::config::load_config( + &home_dir, + near_chain_configs::GenesisValidationMode::UnsafeFast, + )?; let opener = NodeStorage::opener( home_dir, near_config.config.archive, diff --git a/tools/database/src/analyse_data_size_distribution.rs b/tools/database/src/analyse_data_size_distribution.rs index 08ce67731fb..4acdbeb6ff2 100644 --- a/tools/database/src/analyse_data_size_distribution.rs +++ b/tools/database/src/analyse_data_size_distribution.rs @@ -1,6 +1,6 @@ use clap::Parser; use near_store::db::{Database, RocksDB}; -use near_store::{DBCol, StoreConfig}; +use near_store::DBCol; use rayon::prelude::*; use std::collections::HashMap; use std::path::PathBuf; @@ -8,14 +8,16 @@ use std::sync::{Arc, Mutex}; use std::{panic, println}; use strum::IntoEnumIterator; +use crate::utils::open_rocksdb; + #[derive(Parser)] pub(crate) struct AnalyseDataSizeDistributionCommand { - #[arg(short, long)] /// If specified only this column will be analysed + #[arg(short, long)] column: Option, - #[arg(short, long, default_value_t = 100)] /// Number of count sizes to output + #[arg(short, long, default_value_t = 100)] top_k: usize, } @@ -195,16 +197,7 @@ fn get_column_families(input_col: &Option) -> Vec { impl AnalyseDataSizeDistributionCommand { pub(crate) fn run(&self, home: &PathBuf) -> anyhow::Result<()> { - // Set db options for maximum read performance - let store_config = StoreConfig::default(); - let db = RocksDB::open( - home, - &store_config, - near_store::Mode::ReadOnly, - near_store::Temperature::Hot, - ) - .unwrap(); - + let db = open_rocksdb(home)?; let column_families = get_column_families(&self.column); let results = read_all_pairs(&db, &column_families); results.print_results(self.top_k); diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 0544f246aff..9ed84bd8285 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -2,6 +2,7 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; use crate::make_snapshot::MakeSnapshotCommand; use crate::run_migrations::RunMigrationsCommand; +use crate::state_perf::StatePerfCommand; use clap::Parser; use std::path::PathBuf; @@ -25,26 +26,23 @@ enum SubCommand { /// Run migrations, RunMigrations(RunMigrationsCommand), + + /// Run performance test for State column reads. + /// Uses RocksDB data specified via --home argument. + StatePerf(StatePerfCommand), } impl DatabaseCommand { pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> { match &self.subcmd { SubCommand::AnalyseDataSizeDistribution(cmd) => cmd.run(home), - SubCommand::ChangeDbKind(cmd) => { - let near_config = nearcore::config::load_config( - &home, - near_chain_configs::GenesisValidationMode::UnsafeFast, - ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - cmd.run(home, &near_config) - } + SubCommand::ChangeDbKind(cmd) => cmd.run(home), SubCommand::MakeSnapshot(cmd) => { let near_config = nearcore::config::load_config( &home, near_chain_configs::GenesisValidationMode::UnsafeFast, ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } SumCommand::RunMigrationsCommand(cmd) => { @@ -55,6 +53,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &mut near_config) } + SubCommand::StatePerf(cmd) => cmd.run(home), } } } diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index a8c8ed15ec5..f4bb1914908 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -3,3 +3,5 @@ mod analyse_data_size_distribution; pub mod commands; mod make_snapshot; mod run_migrations; +mod state_perf; +mod utils; diff --git a/tools/database/src/state_perf.rs b/tools/database/src/state_perf.rs new file mode 100644 index 00000000000..3997752208b --- /dev/null +++ b/tools/database/src/state_perf.rs @@ -0,0 +1,195 @@ +use clap::Parser; +use indicatif::{ProgressBar, ProgressIterator}; +use std::collections::BTreeMap; +use std::fmt::{Display, Write}; +use std::path::Path; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use near_primitives::shard_layout::{ShardLayout, ShardUId}; +use near_primitives::state::ValueRef; +use rand::rngs::StdRng; +use rand::seq::SliceRandom; +use rand::SeedableRng; + +use near_store::flat::store_helper::iter_flat_state_entries; +use near_store::{Store, TrieStorage}; + +use crate::utils::open_rocksdb; + +#[derive(Parser)] +pub(crate) struct StatePerfCommand { + /// Number of requests to use for the performance evaluation. + /// Increasing this value results in more precise measurements, but longer test execution. + #[arg(short, long, default_value_t = 10000)] + samples: usize, + + /// Number of requests to use for the database warmup. + /// Those requests will be excluded from the measurements. + #[arg(short, long, default_value_t = 1000)] + warmup_samples: usize, +} + +impl StatePerfCommand { + pub(crate) fn run(&self, home: &Path) -> anyhow::Result<()> { + let rocksdb = Arc::new(open_rocksdb(home)?); + let store = near_store::NodeStorage::new(rocksdb).get_hot_store(); + eprintln!("Start State perf test"); + let mut perf_context = PerfContext::new(); + let total_samples = self.warmup_samples + self.samples; + for (sample_i, (shard_uid, value_ref)) in + generate_state_requests(store.clone(), total_samples).into_iter().enumerate().progress() + { + let trie_storage = near_store::TrieDBStorage::new(store.clone(), shard_uid); + let include_sample = sample_i >= self.warmup_samples; + if include_sample { + perf_context.reset(); + } + trie_storage.retrieve_raw_bytes(&value_ref.hash).unwrap(); + if include_sample { + perf_context.record(); + } + } + eprintln!("Finished State perf test"); + println!("{}", perf_context.format()); + Ok(()) + } +} + +struct PerfContext { + rocksdb_context: rocksdb::perf::PerfContext, + start: Instant, + measurements_per_block_reads: BTreeMap, + measurements_overall: Measurements, +} + +#[derive(Default)] +struct Measurements { + samples: usize, + total_observed_latency: Duration, + total_read_block_latency: Duration, + samples_with_merge: usize, +} + +impl Measurements { + fn record( + &mut self, + observed_latency: Duration, + read_block_latency: Duration, + has_merge: bool, + ) { + self.samples += 1; + self.total_observed_latency += observed_latency; + self.total_read_block_latency += read_block_latency; + if has_merge { + self.samples_with_merge += 1; + } + } + + fn avg_observed_latency(&self) -> Duration { + self.total_observed_latency / (self.samples as u32) + } + + fn avg_read_block_latency(&self) -> Duration { + self.total_read_block_latency / (self.samples as u32) + } +} + +impl Display for Measurements { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "avg observed_latency: {:?}, block_read_time: {:?}, samples with merge: {}", + self.avg_observed_latency(), + self.avg_read_block_latency(), + format_samples(self.samples_with_merge, self.samples) + ) + } +} + +impl PerfContext { + fn new() -> Self { + rocksdb::perf::set_perf_stats(rocksdb::perf::PerfStatsLevel::EnableTime); + Self { + rocksdb_context: rocksdb::perf::PerfContext::default(), + start: Instant::now(), + measurements_per_block_reads: BTreeMap::new(), + measurements_overall: Measurements::default(), + } + } + + fn reset(&mut self) { + self.rocksdb_context.reset(); + self.start = Instant::now(); + } + + fn record(&mut self) { + let observed_latency = self.start.elapsed(); + let block_read_cnt = + self.rocksdb_context.metric(rocksdb::PerfMetric::BlockReadCount) as usize; + let read_block_latency = + Duration::from_nanos(self.rocksdb_context.metric(rocksdb::PerfMetric::BlockReadTime)); + assert!(observed_latency > read_block_latency); + // This is a hack to check if at least one merge operator was executed during this request, + // will be replaced by a proper metric after `internal_merge_point_lookup_count` is added to + // rust-rocksdb + let has_merge = + self.rocksdb_context.metric(rocksdb::PerfMetric::MergeOperatorTimeNanos) > 0; + self.measurements_per_block_reads.entry(block_read_cnt).or_default().record( + observed_latency, + read_block_latency, + has_merge, + ); + self.measurements_overall.record(observed_latency, read_block_latency, has_merge); + } + + fn format(&self) -> String { + let mut ret = String::new(); + writeln!(&mut ret, "overall | {}", self.measurements_overall).unwrap(); + for (&block_read_cnt, measurements) in &self.measurements_per_block_reads { + writeln!( + &mut ret, + "block_read_count: {block_read_cnt}, samples: {}: | {}", + format_samples(measurements.samples, self.measurements_overall.samples), + measurements + ) + .unwrap(); + } + ret + } +} + +fn generate_state_requests(store: Store, samples: usize) -> Vec<(ShardUId, ValueRef)> { + eprintln!("Generate {samples} requests to State"); + let shard_uids = ShardLayout::get_simple_nightshade_layout().get_shard_uids(); + let mut ret = Vec::new(); + let progress = ProgressBar::new(samples as u64); + for &shard_uid in &shard_uids { + let shard_samples = samples / shard_uids.len(); + let mut keys_read = std::collections::HashSet::new(); + for value_ref in iter_flat_state_entries(shard_uid, &store, None, None) + .flat_map(|res| res.map(|(_, value)| value.to_value_ref())) + { + if value_ref.length > 4096 || !keys_read.insert(value_ref.hash) { + continue; + } + ret.push((shard_uid, value_ref)); + progress.inc(1); + if keys_read.len() == shard_samples { + break; + } + } + } + progress.finish(); + // Shuffle to avoid clustering requests to the same shard + ret.shuffle(&mut StdRng::seed_from_u64(42)); + eprintln!("Finished requests generation"); + ret +} + +fn format_samples(positive: usize, total: usize) -> String { + format!( + "{positive} ({:.2}%)", + if total == 0 { 0.0 } else { 100.0 * positive as f64 / total as f64 } + ) +} diff --git a/tools/database/src/utils.rs b/tools/database/src/utils.rs new file mode 100644 index 00000000000..0de9852ec88 --- /dev/null +++ b/tools/database/src/utils.rs @@ -0,0 +1,16 @@ +use std::path::Path; + +pub(crate) fn open_rocksdb(home: &Path) -> anyhow::Result { + let config = nearcore::config::Config::from_file_skip_validation( + &home.join(nearcore::config::CONFIG_FILENAME), + )?; + let store_config = &config.store; + let db_path = store_config.path.as_ref().cloned().unwrap_or_else(|| home.join("data")); + let rocksdb = near_store::db::RocksDB::open( + &db_path, + store_config, + near_store::Mode::ReadOnly, + near_store::Temperature::Hot, + )?; + Ok(rocksdb) +} From edb3d79d14dff738f16e4d33542a50baf32299d4 Mon Sep 17 00:00:00 2001 From: Saketh Are Date: Mon, 17 Jul 2023 21:04:04 -0400 Subject: [PATCH 20/50] RoutingTable V2: Distance Vector Routing (#9187) ### Suggested Review Path 1. Browse the (relatively small) changes outside of the `chain/network/src/routing` folder to understand the external surface of the new RoutingTableV2 component. 2. Check out the architecture diagram and event flows documented below. 3. Read the documentation for the EdgeCache component and understand the 3 purposes it serves. The primary role of this component is to support efficient implementation of the routing protocol. 4. Review the RoutingTableV2 component and understand how DistanceVectors are ingested and created. This is the core of the new routing protocol. 5. Return to the EdgeCache and review its implementation. 6. Revisit the call-sites outside of the routing folder. ### Architecture ![image](https://github-production-user-asset-6210df.s3.amazonaws.com/3241341/244770041-ee661c90-667c-4db7-b8ac-678c90e75830.png) ### Event Flows - Network Topology Changes - Three Kinds: Peer Connected, Peer Disconnected, received a PeerMessage with new DistanceVector - These are triggered by PeerActor and flow into PeerManagerActor then into the demux - Demux sends batches of updates (up to every 1 second) to the RoutingTableV2 - RoutingTable processes entire batch, expires any outdated routes (relying on too-old edges), then generates updated RoutingTableView and local DistanceVector - If the local DistanceVector changes, it is then broadcast to all peers - Handle RoutedMessage - Received by the PeerActor, which calls into PeerManagerActor for routing decisions - Record the "previous hop" (the peer from which we received this message) in the RouteBackCache - Select a "next hop" from the RoutingTableView and forward the message - Handle response to a RoutedMessage - Received by the PeerActor, which calls into PeerManagerActor for routing decisions - Fetch the "previous hop" from the RouteBackCache and relay the response back to the originating peer for the original message - Connection started - When two nodes A and B connect, each spawns a PeerActor managing the connection - A sends a partially signed edge, which B then signs to produce a complete signed edge - B adds the signed edge to its local routing table, triggering re-computation of routes - B broadcasts its updated DistanceVector, which provides A (and other nodes) with the signed edge - Connection stopped - Node A loses connection to some node B (either B stopped running, or the specific connection was broken) - Node A executes fix_local_edges and notices the lost connection, triggering re-computation of routes - A broadcasts its updated DistanceVector, informing other nodes of the latest routes it has - If B is still running, it will go through the same steps described for A - If B is not running, the other nodes connected to it will process a disconnection (just like A) ### Configurable Parameters To be finalized after further testing in larger topologies: - Minimum interval between routing table reconstruction: 1 second - Time after which edges are considered expired: 30 minutes - How often to refresh the nonces on edges: 10 minutes - How often to check consistency of routing table's local edges with the connection pool: every 1 minute ### Resources - [Design document](https://docs.google.com/document/d/192NdoknskSLavttwOZk40TSYvx2R1if4xNZ51sCNFkI/edit#heading=h.j4e0bgwl42pg) - [Zulip thread](https://near.zulipchat.com/#narrow/stream/297663-pagoda.2Fnetwork/topic/Updated.20thoughts.20on.20TIER2.20routing) with further design discussion #### Future Extensions - [ ] Set up metrics we want to collect - [ ] Implement a debug-ui view showing contents of the V2 routing table - [ ] Implement pruning of non-validator leafs - [ ] Add handling of unreliable peers - [ ] Deprecate the old RoutingTable - [ ] Deprecate negative/tombstone edges --- .../src/network_protocol/borsh_conv.rs | 3 + chain/network/src/network_protocol/edge.rs | 2 +- chain/network/src/network_protocol/mod.rs | 27 + .../src/network_protocol/network.proto | 24 + .../proto_conv/peer_message.rs | 73 +- chain/network/src/peer/peer_actor.rs | 42 +- .../src/peer_manager/network_state/mod.rs | 94 ++- .../src/peer_manager/network_state/routing.rs | 67 +- .../src/peer_manager/peer_manager_actor.rs | 2 +- chain/network/src/routing/edge_cache/mod.rs | 360 ++++++++++ .../src/routing/edge_cache/testonly.rs | 61 ++ chain/network/src/routing/edge_cache/tests.rs | 282 ++++++++ chain/network/src/routing/graph_v2/mod.rs | 674 ++++++++++++++++++ .../network/src/routing/graph_v2/testonly.rs | 81 +++ chain/network/src/routing/graph_v2/tests.rs | 613 ++++++++++++++++ chain/network/src/routing/mod.rs | 3 + chain/network/src/types.rs | 1 + 17 files changed, 2375 insertions(+), 34 deletions(-) create mode 100644 chain/network/src/routing/edge_cache/mod.rs create mode 100644 chain/network/src/routing/edge_cache/testonly.rs create mode 100644 chain/network/src/routing/edge_cache/tests.rs create mode 100644 chain/network/src/routing/graph_v2/mod.rs create mode 100644 chain/network/src/routing/graph_v2/testonly.rs create mode 100644 chain/network/src/routing/graph_v2/tests.rs diff --git a/chain/network/src/network_protocol/borsh_conv.rs b/chain/network/src/network_protocol/borsh_conv.rs index 37eba6b95c8..de3c579312b 100644 --- a/chain/network/src/network_protocol/borsh_conv.rs +++ b/chain/network/src/network_protocol/borsh_conv.rs @@ -177,6 +177,9 @@ impl From<&mem::PeerMessage> for net::PeerMessage { net::PeerMessage::SyncRoutingTable(rtu.into()) } mem::PeerMessage::RequestUpdateNonce(e) => net::PeerMessage::RequestUpdateNonce(e), + mem::PeerMessage::DistanceVector(_) => { + panic!("DistanceVector is not supported in Borsh encoding") + } // This message is not supported, we translate it to an empty RoutingTableUpdate. mem::PeerMessage::SyncAccountsData(_) => { diff --git a/chain/network/src/network_protocol/edge.rs b/chain/network/src/network_protocol/edge.rs index 6c7022c0bd4..19163b85aaa 100644 --- a/chain/network/src/network_protocol/edge.rs +++ b/chain/network/src/network_protocol/edge.rs @@ -78,7 +78,7 @@ impl Edge { pub fn make_fake_edge(peer0: PeerId, peer1: PeerId, nonce: u64) -> Self { Self(Arc::new(EdgeInner { - key: (peer0, peer1), + key: if peer0 < peer1 { (peer0, peer1) } else { (peer1, peer0) }, nonce, signature0: Signature::empty(KeyType::ED25519), signature1: Signature::empty(KeyType::ED25519), diff --git a/chain/network/src/network_protocol/mod.rs b/chain/network/src/network_protocol/mod.rs index 4063c40f192..a9ec99db71b 100644 --- a/chain/network/src/network_protocol/mod.rs +++ b/chain/network/src/network_protocol/mod.rs @@ -288,6 +288,32 @@ impl RoutingTableUpdate { Self { edges, accounts } } } + +/// Denotes a network path to `destination` of length `distance`. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct AdvertisedPeerDistance { + pub destination: PeerId, + pub distance: u32, +} + +/// Struct shared by a peer listing the distances it has to other peers +/// in the NEAR network. +/// +/// It includes a collection of signed edges forming a spanning tree +/// which verifiably achieves the advertised routing distances. +/// +/// The distances in the tree may be the same or better than the advertised +/// distances; see routing::graph_v2::tests::inconsistent_peers. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct DistanceVector { + /// PeerId of the node sending the message. + pub root: PeerId, + /// List of distances the root has to other peers in the network. + pub distances: Vec, + /// Spanning tree of signed edges achieving the claimed distances (or better). + pub edges: Vec, +} + /// Structure representing handshake between peers. #[derive(PartialEq, Eq, Clone, Debug)] pub struct Handshake { @@ -361,6 +387,7 @@ pub enum PeerMessage { LastEdge(Edge), /// Contains accounts and edge information. SyncRoutingTable(RoutingTableUpdate), + DistanceVector(DistanceVector), RequestUpdateNonce(PartialEdgeInfo), SyncAccountsData(SyncAccountsData), diff --git a/chain/network/src/network_protocol/network.proto b/chain/network/src/network_protocol/network.proto index ee84e2cab0d..84f135c176c 100644 --- a/chain/network/src/network_protocol/network.proto +++ b/chain/network/src/network_protocol/network.proto @@ -260,6 +260,29 @@ message RoutingTableUpdate { repeated AnnounceAccount accounts = 2; } +// Denotes an available route to `destination` of length `distance` +message AdvertisedPeerDistance { + PublicKey destination = 1; + uint32 distance = 2; +} + +/// Message shared by a peer listing the distances it has to other peers +/// in the NEAR network. +/// +/// It includes a collection of signed edges forming a spanning tree +/// which verifiably achieves the advertised routing distances. +/// +/// The distances in the tree may be the same or better than the advertised +/// distances; see routing::graph_v2::tests::inconsistent_peers. +message DistanceVector { + // PeerId of the node sending the message. + PublicKey root = 1; + // List of distances the root has to other peers in the network. + repeated AdvertisedPeerDistance distances = 2; + // Spanning tree of signed edges achieving the claimed distances (or better). + repeated Edge edges = 3; +} + // TODO: document it. message UpdateNonceRequest { PartialEdgeInfo partial_edge_info = 1; @@ -417,6 +440,7 @@ message PeerMessage { HandshakeFailure handshake_failure = 5; LastEdge last_edge = 6; RoutingTableUpdate sync_routing_table = 7; + DistanceVector distance_vector = 28; UpdateNonceRequest update_nonce_request = 8; UpdateNonceResponse update_nonce_response = 9; diff --git a/chain/network/src/network_protocol/proto_conv/peer_message.rs b/chain/network/src/network_protocol/proto_conv/peer_message.rs index e7d8f59ea4c..3af21a855ce 100644 --- a/chain/network/src/network_protocol/proto_conv/peer_message.rs +++ b/chain/network/src/network_protocol/proto_conv/peer_message.rs @@ -4,7 +4,8 @@ use super::*; use crate::network_protocol::proto; use crate::network_protocol::proto::peer_message::Message_type as ProtoMT; use crate::network_protocol::{ - Disconnect, PeerMessage, PeersRequest, PeersResponse, RoutingTableUpdate, SyncAccountsData, + AdvertisedPeerDistance, Disconnect, DistanceVector, PeerMessage, PeersRequest, PeersResponse, + RoutingTableUpdate, SyncAccountsData, }; use crate::network_protocol::{RoutedMessage, RoutedMessageV2}; use borsh::{BorshDeserialize as _, BorshSerialize as _}; @@ -45,6 +46,70 @@ impl TryFrom<&proto::RoutingTableUpdate> for RoutingTableUpdate { ////////////////////////////////////////// +#[derive(thiserror::Error, Debug)] +pub enum ParseAdvertisedPeerDistanceError { + #[error("destination {0}")] + Destination(ParseRequiredError), +} + +impl From<&AdvertisedPeerDistance> for proto::AdvertisedPeerDistance { + fn from(x: &AdvertisedPeerDistance) -> Self { + Self { + destination: MF::some((&x.destination).into()), + distance: x.distance, + ..Default::default() + } + } +} + +impl TryFrom<&proto::AdvertisedPeerDistance> for AdvertisedPeerDistance { + type Error = ParseAdvertisedPeerDistanceError; + fn try_from(x: &proto::AdvertisedPeerDistance) -> Result { + Ok(Self { + destination: try_from_required(&x.destination).map_err(Self::Error::Destination)?, + distance: x.distance, + }) + } +} + +////////////////////////////////////////// + +////////////////////////////////////////// + +#[derive(thiserror::Error, Debug)] +pub enum ParseDistanceVectorError { + #[error("root {0}")] + Root(ParseRequiredError), + #[error("distances {0}")] + Distances(ParseVecError), + #[error("edges {0}")] + Edges(ParseVecError), +} + +impl From<&DistanceVector> for proto::DistanceVector { + fn from(x: &DistanceVector) -> Self { + Self { + root: MF::some((&x.root).into()), + distances: x.distances.iter().map(Into::into).collect(), + edges: x.edges.iter().map(Into::into).collect(), + ..Default::default() + } + } +} + +impl TryFrom<&proto::DistanceVector> for DistanceVector { + type Error = ParseDistanceVectorError; + fn try_from(x: &proto::DistanceVector) -> Result { + Ok(Self { + root: try_from_required(&x.root).map_err(Self::Error::Root)?, + distances: try_from_slice(&x.distances).map_err(Self::Error::Distances)?, + edges: try_from_slice(&x.edges).map_err(Self::Error::Edges)?, + }) + } +} + +////////////////////////////////////////// + impl From<&BlockHeader> for proto::BlockHeader { fn from(x: &BlockHeader) -> Self { Self { borsh: x.try_to_vec().unwrap(), ..Default::default() } @@ -93,6 +158,7 @@ impl From<&PeerMessage> for proto::PeerMessage { ..Default::default() }), PeerMessage::SyncRoutingTable(rtu) => ProtoMT::SyncRoutingTable(rtu.into()), + PeerMessage::DistanceVector(spt) => ProtoMT::DistanceVector(spt.into()), PeerMessage::RequestUpdateNonce(pei) => { ProtoMT::UpdateNonceRequest(proto::UpdateNonceRequest { partial_edge_info: MF::some(pei.into()), @@ -182,6 +248,8 @@ pub enum ParsePeerMessageError { LastEdge(ParseRequiredError), #[error("sync_routing_table: {0}")] SyncRoutingTable(ParseRoutingTableUpdateError), + #[error("shortest_path_tree: {0}")] + DistanceVector(ParseDistanceVectorError), #[error("update_nonce_requrest: {0}")] UpdateNonceRequest(ParseRequiredError), #[error("update_nonce_response: {0}")] @@ -230,6 +298,9 @@ impl TryFrom<&proto::PeerMessage> for PeerMessage { ProtoMT::SyncRoutingTable(rtu) => PeerMessage::SyncRoutingTable( rtu.try_into().map_err(Self::Error::SyncRoutingTable)?, ), + ProtoMT::DistanceVector(spt) => { + PeerMessage::DistanceVector(spt.try_into().map_err(Self::Error::DistanceVector)?) + } ProtoMT::UpdateNonceRequest(unr) => PeerMessage::RequestUpdateNonce( try_from_required(&unr.partial_edge_info) .map_err(Self::Error::UpdateNonceRequest)?, diff --git a/chain/network/src/peer/peer_actor.rs b/chain/network/src/peer/peer_actor.rs index 823799fc54c..24de627b1de 100644 --- a/chain/network/src/peer/peer_actor.rs +++ b/chain/network/src/peer/peer_actor.rs @@ -3,9 +3,9 @@ use crate::concurrency::atomic_cell::AtomicCell; use crate::concurrency::demux; use crate::config::PEERS_RESPONSE_MAX_PEERS; use crate::network_protocol::{ - Edge, EdgeState, Encoding, OwnedAccount, ParsePeerMessageError, PartialEdgeInfo, - PeerChainInfoV2, PeerIdOrHash, PeerInfo, PeersRequest, PeersResponse, RawRoutedMessage, - RoutedMessageBody, RoutingTableUpdate, StateResponseInfo, SyncAccountsData, + DistanceVector, Edge, EdgeState, Encoding, OwnedAccount, ParsePeerMessageError, + PartialEdgeInfo, PeerChainInfoV2, PeerIdOrHash, PeerInfo, PeersRequest, PeersResponse, + RawRoutedMessage, RoutedMessageBody, RoutingTableUpdate, StateResponseInfo, SyncAccountsData, }; use crate::peer::stream; use crate::peer::tracker::Tracker; @@ -15,6 +15,7 @@ use crate::peer_manager::peer_manager_actor::Event; use crate::peer_manager::peer_manager_actor::MAX_TIER2_PEERS; use crate::private_actix::{RegisterPeerError, SendMessage}; use crate::routing::edge::verify_nonce; +use crate::routing::NetworkTopologyChange; use crate::shards_manager::ShardsManagerRequestFromNetwork; use crate::stats::metrics; use crate::tcp; @@ -1202,6 +1203,18 @@ impl PeerActor { .push(Event::MessageProcessed(conn.tier, peer_msg)); })); } + PeerMessage::DistanceVector(dv) => { + let clock = self.clock.clone(); + let conn = conn.clone(); + let network_state = self.network_state.clone(); + ctx.spawn(wrap_future(async move { + Self::handle_distance_vector(&clock, &network_state, conn.clone(), dv).await; + network_state + .config + .event_sink + .push(Event::MessageProcessed(conn.tier, peer_msg)); + })); + } PeerMessage::SyncAccountsData(msg) => { metrics::SYNC_ACCOUNTS_DATA .with_label_values(&[ @@ -1303,7 +1316,7 @@ impl PeerActor { return; } - self.network_state.tier2_add_route_back(&self.clock, &conn, msg.as_ref()); + self.network_state.add_route_back(&self.clock, &conn, msg.as_ref()); if for_me { // Handle Ping and Pong message if they are for us without sending to client. // i.e. Return false in case of Ping and Pong @@ -1378,6 +1391,27 @@ impl PeerActor { Ok(accounts) => network_state.add_accounts(accounts).await, } } + + async fn handle_distance_vector( + clock: &time::Clock, + network_state: &Arc, + conn: Arc, + distance_vector: DistanceVector, + ) { + let _span = tracing::trace_span!(target: "network", "handle_distance_vector").entered(); + + if conn.peer_info.id != distance_vector.root { + conn.stop(Some(ReasonForBan::InvalidDistanceVector)); + return; + } + + if let Err(ban_reason) = network_state + .update_routes(&clock, NetworkTopologyChange::PeerAdvertisedDistances(distance_vector)) + .await + { + conn.stop(Some(ban_reason)); + } + } } impl actix::Actor for PeerActor { diff --git a/chain/network/src/peer_manager/network_state/mod.rs b/chain/network/src/peer_manager/network_state/mod.rs index 1a75017627c..e6c98dccc55 100644 --- a/chain/network/src/peer_manager/network_state/mod.rs +++ b/chain/network/src/peer_manager/network_state/mod.rs @@ -16,6 +16,7 @@ use crate::peer_manager::peer_manager_actor::Event; use crate::peer_manager::peer_store; use crate::private_actix::RegisterPeerError; use crate::routing::route_back_cache::RouteBackCache; +use crate::routing::NetworkTopologyChange; use crate::shards_manager::ShardsManagerRequestFromNetwork; use crate::stats::metrics; use crate::store; @@ -118,6 +119,9 @@ pub(crate) struct NetworkState { pub pending_reconnect: Mutex>, /// A graph of the whole NEAR network. pub graph: Arc, + /// A sparsified graph of the whole NEAR network. + /// TODO(saketh): deprecate graph above, rename this to RoutingTable + pub graph_v2: Arc, /// Hashes of the body of recently received routed messages. /// It allows us to determine whether messages arrived faster over TIER1 or TIER2 network. @@ -141,8 +145,11 @@ pub(crate) struct NetworkState { /// Mutex which prevents overlapping calls to tier1_advertise_proxies. tier1_advertise_proxies_mutex: tokio::sync::Mutex<()>, - /// Demultiplexer aggregating calls to add_edges(). + /// Demultiplexer aggregating calls to add_edges(), for V1 routing protocol add_edges_demux: demux::Demux, Result<(), ReasonForBan>>, + /// Demultiplexer aggregating calls to update_routes(), for V2 routing protocol + update_routes_demux: + demux::Demux>, /// Mutex serializing calls to set_chain_info(), which mutates a bunch of stuff non-atomically. /// TODO(gprusak): make it use synchronization primitives in some more canonical way. @@ -170,6 +177,10 @@ impl NetworkState { }, store.clone(), )), + graph_v2: Arc::new(crate::routing::GraphV2::new(crate::routing::GraphConfigV2 { + node_id: config.node_id(), + prune_edges_after: Some(PRUNE_EDGES_AFTER), + })), genesis_id, client, shards_manager_adapter, @@ -190,6 +201,7 @@ impl NetworkState { txns_since_last_block: AtomicUsize::new(0), whitelist_nodes, add_edges_demux: demux::Demux::new(config.routing_table_update_rate_limit), + update_routes_demux: demux::Demux::new(config.routing_table_update_rate_limit), set_chain_info_mutex: Mutex::new(()), config, created_at: clock.now(), @@ -318,9 +330,13 @@ impl NetworkState { // First verify and broadcast the edge of the connection, so that in case // it is invalid, the connection is not added to the pool. // TODO(gprusak): consider actually banning the peer for consistency. - this.add_edges(&clock, vec![edge]) + this.add_edges(&clock, vec![edge.clone()]) .await .map_err(|_: ReasonForBan| RegisterPeerError::InvalidEdge)?; + // Update the V2 routing table + this.update_routes(&clock, NetworkTopologyChange::PeerConnected(peer_info.id.clone(), edge.clone())) + .await.map_err(|_: ReasonForBan| RegisterPeerError::InvalidEdge)?; + // Insert to the local connection pool this.tier2.insert_ready(conn.clone()).map_err(RegisterPeerError::PoolError)?; // Write to the peer store this.peer_store.peer_connected(&clock, peer_info); @@ -364,6 +380,11 @@ impl NetworkState { } } + // Update the V2 routing table + this.update_routes(&clock, NetworkTopologyChange::PeerDisconnected(peer_id.clone())) + .await + .unwrap(); + // Save the fact that we are disconnecting to the PeerStore. let res = match reason { ClosingReason::Ban(ban_reason) => { @@ -494,30 +515,34 @@ impl NetworkState { }; return self.tier1.send_message(peer_id, Arc::new(PeerMessage::Routed(msg))); } - tcp::Tier::T2 => match self.tier2_find_route(&clock, &msg.target) { - Ok(peer_id) => { - // Remember if we expect a response for this message. - if msg.author == my_peer_id && msg.expect_response() { - tracing::trace!(target: "network", ?msg, "initiate route back"); - self.tier2_route_back.lock().insert(clock, msg.hash(), my_peer_id); + tcp::Tier::T2 => { + match self.tier2_find_route(&clock, &msg.target) { + Ok(peer_id) => { + // Remember if we expect a response for this message. + if msg.author == my_peer_id && msg.expect_response() { + tracing::trace!(target: "network", ?msg, "initiate route back"); + self.tier2_route_back.lock().insert(clock, msg.hash(), my_peer_id); + } + return self + .tier2 + .send_message(peer_id, Arc::new(PeerMessage::Routed(msg))); + } + Err(find_route_error) => { + // TODO(MarX, #1369): Message is dropped here. Define policy for this case. + metrics::MessageDropped::NoRouteFound.inc(&msg.body); + + tracing::debug!(target: "network", + account_id = ?self.config.validator.as_ref().map(|v|v.account_id()), + to = ?msg.target, + reason = ?find_route_error, + known_peers = ?self.graph.routing_table.reachable_peers(), + msg = ?msg.body, + "Drop signed message" + ); + return false; } - return self.tier2.send_message(peer_id, Arc::new(PeerMessage::Routed(msg))); - } - Err(find_route_error) => { - // TODO(MarX, #1369): Message is dropped here. Define policy for this case. - metrics::MessageDropped::NoRouteFound.inc(&msg.body); - - tracing::debug!(target: "network", - account_id = ?self.config.validator.as_ref().map(|v|v.account_id()), - to = ?msg.target, - reason = ?find_route_error, - known_peers = ?self.graph.routing_table.reachable_peers(), - msg = ?msg.body, - "Drop signed message" - ); - return false; } - }, + } } } @@ -700,6 +725,27 @@ impl NetworkState { for t in tasks { let _ = t.await; } + + // Now that `graph` has been synchronized with the state of the local connections, + // use it as the source of truth to fix the local state in `graph_v2` + let mut tasks = vec![]; + let node_id = this.config.node_id(); + for edge in graph.local_edges.values() { + let other_peer = edge.other(&node_id).unwrap(); + tasks.push(match edge.edge_type() { + EdgeState::Active => this.update_routes( + &clock, + NetworkTopologyChange::PeerConnected(other_peer.clone(), edge.clone()), + ), + EdgeState::Removed => this.update_routes( + &clock, + NetworkTopologyChange::PeerDisconnected(other_peer.clone()), + ), + }); + } + for t in tasks { + let _ = t.await; + } }) .await .unwrap() diff --git a/chain/network/src/peer_manager/network_state/routing.rs b/chain/network/src/peer_manager/network_state/routing.rs index 9132c6b4aa7..1826a955030 100644 --- a/chain/network/src/peer_manager/network_state/routing.rs +++ b/chain/network/src/peer_manager/network_state/routing.rs @@ -1,17 +1,20 @@ use super::NetworkState; use crate::network_protocol::{ - Edge, EdgeState, PartialEdgeInfo, PeerMessage, RoutedMessageV2, RoutingTableUpdate, + DistanceVector, Edge, EdgeState, PartialEdgeInfo, PeerMessage, RoutedMessageV2, + RoutingTableUpdate, }; use crate::peer_manager::connection; use crate::peer_manager::network_state::PeerIdOrHash; use crate::peer_manager::peer_manager_actor::Event; use crate::routing::routing_table_view::FindRouteError; +use crate::routing::NetworkTopologyChange; use crate::stats::metrics; use crate::tcp; use crate::types::ReasonForBan; use near_async::time; use near_primitives::hash::CryptoHash; use near_primitives::network::{AnnounceAccount, PeerId}; +use std::collections::HashSet; use std::sync::Arc; impl NetworkState { @@ -28,6 +31,15 @@ impl NetworkState { } } + // TODO(saketh-are): eventually, this should be blocking, as it should be up to the caller + // whether to wait for the broadcast to finish, or run it in parallel with sth else. + fn broadcast_distance_vector(&self, distance_vector: DistanceVector) { + let msg = Arc::new(PeerMessage::DistanceVector(distance_vector)); + for conn in self.tier2.load().ready.values() { + conn.send_message(msg.clone()); + } + } + /// Adds AnnounceAccounts (without validating them) to the routing table. /// Then it broadcasts all the AnnounceAccounts that haven't been seen before. pub async fn add_accounts(self: &Arc, accounts: Vec) { @@ -82,6 +94,12 @@ impl NetworkState { edge_info.signature, ); self.add_edges(&clock, vec![edge.clone()]).await?; + self.update_routes( + &clock, + NetworkTopologyChange::PeerConnected(peer_id.clone(), edge.clone()), + ) + .await?; + Ok(edge) } @@ -133,7 +151,10 @@ impl NetworkState { ) -> Result { match target { PeerIdOrHash::PeerId(peer_id) => { - self.graph.routing_table.find_next_hop_for_target(peer_id) + match self.graph.routing_table.find_next_hop_for_target(peer_id) { + Ok(peer_id) => Ok(peer_id), + Err(_) => self.graph_v2.routing_table.find_next_hop_for_target(peer_id), + } } PeerIdOrHash::Hash(hash) => self .tier2_route_back @@ -143,7 +164,10 @@ impl NetworkState { } } - pub(crate) fn tier2_add_route_back( + /// Accepts a routed message. If we expect a response for the message, writes an entry in + /// the appropriate RouteBackCache recording the peer node from which the message came. + /// The cache entry will later be used to route back the response to the message. + pub(crate) fn add_route_back( &self, clock: &time::Clock, conn: &connection::Connection, @@ -164,4 +188,41 @@ impl NetworkState { pub(crate) fn compare_route_back(&self, hash: CryptoHash, peer_id: &PeerId) -> bool { self.tier2_route_back.lock().get(&hash).map_or(false, |value| value == peer_id) } + + /// Accepts NetworkTopologyChange events. + /// Changes are batched via the `update_routes_demux`, then passed to the V2 routing table. + /// If an updated DistanceVector is returned by the routing table, broadcasts it to peers. + /// If an error occurs while processing a DistanceVector advertised by a peer, bans the peer. + pub async fn update_routes( + self: &Arc, + clock: &time::Clock, + event: NetworkTopologyChange, + ) -> Result<(), ReasonForBan> { + let this = self.clone(); + let clock = clock.clone(); + self.update_routes_demux + .call(event, |events: Vec| async move { + let (to_broadcast, oks) = + this.graph_v2.batch_process_network_changes(&clock, events).await; + + if let Some(my_distance_vector) = to_broadcast { + this.broadcast_distance_vector(my_distance_vector); + } + + oks.iter() + .map(|ok| match ok { + true => Ok(()), + false => Err(ReasonForBan::InvalidDistanceVector), + }) + .collect() + }) + .await + .unwrap_or(Ok(())) + } + + /// Update the routing protocols with a set of peers to avoid routing through. + pub fn set_unreliable_peers(&self, unreliable_peers: HashSet) { + self.graph.set_unreliable_peers(unreliable_peers.clone()); + self.graph_v2.set_unreliable_peers(unreliable_peers); + } } diff --git a/chain/network/src/peer_manager/peer_manager_actor.rs b/chain/network/src/peer_manager/peer_manager_actor.rs index b30277bc2f5..4021c0348e2 100644 --- a/chain/network/src/peer_manager/peer_manager_actor.rs +++ b/chain/network/src/peer_manager/peer_manager_actor.rs @@ -614,7 +614,7 @@ impl PeerManagerActor { // Find peers that are not reliable (too much behind) - and make sure that we're not routing messages through them. let unreliable_peers = self.unreliable_peers(); metrics::PEER_UNRELIABLE.set(unreliable_peers.len() as i64); - self.state.graph.set_unreliable_peers(unreliable_peers); + self.state.set_unreliable_peers(unreliable_peers); let new_interval = min(max_interval, interval * EXPONENTIAL_BACKOFF_RATIO); diff --git a/chain/network/src/routing/edge_cache/mod.rs b/chain/network/src/routing/edge_cache/mod.rs new file mode 100644 index 00000000000..d81afa6b13c --- /dev/null +++ b/chain/network/src/routing/edge_cache/mod.rs @@ -0,0 +1,360 @@ +use crate::network_protocol::Edge; +use near_primitives::network::PeerId; +use std::collections::hash_map::{Entry, Iter}; +use std::collections::{HashMap, HashSet}; + +#[cfg(test)] +mod testonly; +#[cfg(test)] +mod tests; + +// Connections in the network are bi-directional between a pair of peers (peer0, peer1). +// For keys in the EdgeCache, we maintain the invariant that peer0 < peer1 +#[derive(Clone, Eq, Hash, PartialEq)] +pub(crate) struct EdgeKey { + peer0: PeerId, + peer1: PeerId, +} + +impl From<&(PeerId, PeerId)> for EdgeKey { + fn from(peers: &(PeerId, PeerId)) -> EdgeKey { + let (peer0, peer1) = peers.clone(); + if peer0 < peer1 { + Self { peer0, peer1 } + } else { + Self { peer1, peer0 } + } + } +} + +/// The EdgeCache stores the latest spanning tree shared by each direct peer of the local node. +/// The trees are stored in `active_trees` as lists of EdgeKeys. +/// A separate map `active_edges` is kept mapping EdgeKeys to complete signed Edge objects. +/// This struct is used to store a signed Edge object along with a `refcount`; the number of +/// spanning trees which contain the edge. +#[derive(Clone)] +struct ActiveEdge { + edge: Edge, + refcount: u32, +} + +/// Cache of all known edges in the network. +/// +/// Edges in the network come to us in the form of signed tuples +/// (PeerId, PeerId, nonce: u64) +/// The two nodes connected by the edge both sign the tuple as proof of the connection's +/// existence. The nonce is a unix timestamp, letting us know the time at which the +/// connection was signed. +/// +/// We maintain multiple representations of the network, each serving different purposes. +/// +/// 1) `verified_nonces`: +/// A mapping from (PeerId, PeerId) to the latest nonce we have verified ourselves +/// for that pair of nodes. It allows the local node to avoid repeating computationally +/// expensive cryptographic verification of signatures. +/// +/// Storing only the verified nonce (and not the actual Edge object with signatures) +/// is a memory optimization. We can trust existence of these edges as we have seen and +/// verified the signatures locally at some point, but we cannot provide trustless proof +/// to a peer that these edges exist in the network. +/// +/// 2) `active_edges` +/// A mapping from (PeerId, PeerId) to complete Edge objects. It does not contain all known +/// edges, but rather a subset which the local node may wish to subsequently re-broadcast +/// to peers in the network. +/// +/// In particular, for each direct peer of the local node, the set of edges appearing in the +/// most recent spanning tree advertised by the peer are kept in memory. +/// +/// 3) `p2id` +/// A mapping from known PeerIds to distinct integer (u32) ids 0,1,2,... +/// The `p2id` mapping is used to allow indexing nodes into Vecs rather than HashMaps, +/// improving performance and reducing memory usage of the routing protocol implementation. +pub struct EdgeCache { + verified_nonces: im::HashMap, + active_edges: im::HashMap, + + // Mapping from neighbor PeerId to the latest spanning tree advertised by the peer, + // used to decide which edges are active. The key set of `active_edges` is the + // union of the value set of `active_trees`. + active_trees: HashMap>, + + /// Mapping from PeerId to assigned u32 id + p2id: HashMap, + /// Mapping from u32 id to the number of distinct active edges for the node + degree: Vec, + /// List of unused u32 ids + unused: Vec, +} + +impl EdgeCache { + pub fn new(local_node_id: PeerId) -> Self { + // Initializes the EdgeCache assigning id 0 to the local node + Self { + verified_nonces: Default::default(), + active_edges: Default::default(), + active_trees: HashMap::new(), + p2id: HashMap::from([(local_node_id, 0)]), + degree: vec![0], + unused: vec![], + } + } + + /// Accepts a verified edge and updates the state of `verified_nonces`. + pub fn write_verified_nonce(&mut self, edge: &Edge) { + self.verified_nonces.insert(edge.key().into(), edge.nonce()); + } + + /// Accepts an edge. Returns true iff we already have a verified nonce + /// for the edge's key which is at least as new as the edge's nonce. + pub fn has_edge_nonce_or_newer(&self, edge: &Edge) -> bool { + self.verified_nonces + .get(&edge.key().into()) + .map_or(false, |cached_nonce| cached_nonce >= &edge.nonce()) + } + + /// Returns the u32 id associated with the given PeerId. + /// Expects that an id was already assigned; will error otherwise. + pub(crate) fn get_id(&self, peer: &PeerId) -> u32 { + *self.p2id.get(peer).unwrap() + } + + /// Id 0 is always assigned to the local node. + pub(crate) fn get_local_node_id(&self) -> u32 { + 0 + } + + /// Returns the u32 id associated with the given PeerId, assigning one if necessary. + pub(crate) fn get_or_create_id(&mut self, peer: &PeerId) -> u32 { + match self.p2id.entry(peer.clone()) { + Entry::Occupied(occupied) => *occupied.get(), + Entry::Vacant(vacant) => { + let id = if let Some(id) = self.unused.pop() { + // If some unused id is available, take it + assert!(self.degree[id as usize] == 0); + id + } else { + // Otherwise, create a new one + let id = self.degree.len() as u32; + self.degree.push(0); + id + }; + + vacant.insert(id); + id + } + } + } + + /// Iterates over all peers appearing in the given spanning tree, + /// assigning ids to those which don't have one already. + pub(crate) fn create_ids_for_tree(&mut self, root: &PeerId, edges: &Vec) { + self.get_or_create_id(root); + + edges.iter().for_each(|edge| { + let (peer0, peer1) = edge.key(); + self.get_or_create_id(peer0); + self.get_or_create_id(peer1); + }); + } + + /// Checks for and frees any assigned ids which have degree 0. + /// Id 0 remains assigned to the local node, regardless of degree. + pub(crate) fn free_unused_ids(&mut self) { + assert!(self.get_local_node_id() == 0); + + // Erase entries from the `p2id` map + self.p2id.retain(|_, id| *id == 0 || self.degree[*id as usize] != 0); + + // Shrink max_id if possible + let mut max_id = self.max_id(); + while max_id >= 2 && self.degree[max_id - 1] == 0 { + max_id -= 1; + } + self.degree.truncate(max_id); + + // Reconstruct the list of unused ids + self.unused.clear(); + for id in 1..self.max_id() { + if self.degree[id] == 0 { + self.unused.push(id as u32); + } + } + } + + /// Called when storing an active edge. + /// Increments the degrees for the connected peers, assigning ids if necessary. + fn increment_degrees_for_key(&mut self, key: &EdgeKey) { + let id0 = self.get_or_create_id(&key.peer0) as usize; + let id1 = self.get_or_create_id(&key.peer1) as usize; + self.degree[id0] += 1; + self.degree[id1] += 1; + } + + /// Called when erasing an active edge. + /// Decrements the degrees for the connected peers. + fn decrement_degrees_for_key(&mut self, key: &EdgeKey) { + self.decrement_degree(&key.peer0); + self.decrement_degree(&key.peer1); + } + + /// Decrements the degree for the given peer. + /// If the degree reaches 0, frees the peer's id for reuse. + fn decrement_degree(&mut self, peer_id: &PeerId) { + let id = self.get_id(peer_id) as usize; + assert!(self.degree[id] > 0); + self.degree[id] -= 1; + + // If the degree for the id reaches 0, free it. + // The local node is always mapped to 0. + if self.degree[id] == 0 && id != 0 { + self.p2id.remove(peer_id); + self.unused.push(id as u32); + } + } + + /// Inserts a copy of the given edge to `active_edges`. + /// If it's the first copy, increments degrees for the incident nodes. + fn insert_active_edge(&mut self, edge: &Edge) { + let is_newly_active = match self.active_edges.entry(edge.key().into()) { + im::hashmap::Entry::Occupied(mut occupied) => { + let val: &mut ActiveEdge = occupied.get_mut(); + if edge.nonce() > val.edge.nonce() { + val.edge = edge.clone(); + } + val.refcount += 1; + false + } + im::hashmap::Entry::Vacant(vacant) => { + vacant.insert(ActiveEdge { edge: edge.clone(), refcount: 1 }); + true + } + }; + if is_newly_active { + self.increment_degrees_for_key(&edge.key().into()); + } + } + + /// Removes an edge with the given EdgeKey from the active edge cache. + /// If the last such edge is removed, decrements degrees for the incident nodes. + fn remove_active_edge(&mut self, key: &EdgeKey) { + let is_newly_inactive = match self.active_edges.entry(key.clone()) { + im::hashmap::Entry::Occupied(mut occupied) => { + let val: &mut ActiveEdge = occupied.get_mut(); + assert!(val.refcount > 0); + if val.refcount == 1 { + occupied.remove_entry(); + true + } else { + val.refcount -= 1; + false + } + } + im::hashmap::Entry::Vacant(_) => { + assert!(false); + false + } + }; + if is_newly_inactive { + self.decrement_degrees_for_key(key); + } + } + + /// Stores the key-value pair (peer_id, edges) in the EdgeCache's `active_trees` map, overwriting + /// any previous entry for the same peer. Updates `active_edges` accordingly. + pub fn update_tree(&mut self, peer_id: &PeerId, tree: &Vec) { + // Insert the new edges before removing any old ones. + // Nodes are pruned from the `p2id` mapping as soon as all edges incident with them are + // removed. If we removed the edges in the old tree first, we might unlabel and relabel a + // node unnecessarily. Inserting the new edges first minimizes churn on `p2id`. + for edge in tree { + self.insert_active_edge(edge); + } + + let edge_keys: Vec = tree.iter().map(|edge| edge.key().into()).collect(); + + if let Some(old_edge_keys) = self.active_trees.insert(peer_id.clone(), edge_keys) { + // If a previous tree was present, process removal of its edges + for key in &old_edge_keys { + self.remove_active_edge(key); + } + } + } + + /// Removes the tree stored for the given peer, if there is one. + pub fn remove_tree(&mut self, peer_id: &PeerId) { + if let Some(edges) = self.active_trees.remove(peer_id) { + for e in &edges { + self.remove_active_edge(e); + } + } + } + + /// Upper bound on mapped u32 ids; not inclusive + pub fn max_id(&self) -> usize { + self.degree.len() + } + + /// Iterator over the (PeerId, u32) mapping + pub fn iter_peers(&self) -> Iter<'_, PeerId, u32> { + self.p2id.iter() + } + + /// Number of known edges in the network + pub fn known_edges_ct(&self) -> usize { + self.verified_nonces.len() + } + + /// Prunes entries with nonces older than `prune_nonces_older_than` + /// from `verified_nonces` + pub fn prune_old_edges(&mut self, prune_nonces_older_than: u64) { + // Drop any entries with old nonces from the verified_nonces cache + self.verified_nonces.retain(|_, nonce| nonce >= &prune_nonces_older_than); + } + + /// Accepts a mapping over the set of reachable PeerIds in the network + /// to the shortest path lengths to those peers. + /// + /// Constructs a tree from among the `active_edges` which has the same + /// reachability and the same distances or better. + /// + /// Returns None if the input is inconsistent with the state of the cache + /// (reachability or distances are not consistent with the `active_edges`). + pub fn construct_spanning_tree(&self, distance: &HashMap) -> Option> { + let mut edges = Vec::::new(); + let mut has_edge = HashSet::::new(); + + // Make sure some node has distance 0 + let mut has_root = false; + for (_, val) in distance { + if *val == 0 { + has_root = true; + } + } + + // Iterate through the known useful edges. + // We want to find for each node in the tree some edge + // which connects it to another node with smaller distance. + for (edge_key, active_edge) in &self.active_edges { + if let (Some(dist0), Some(dist1)) = + (distance.get(&edge_key.peer0), distance.get(&edge_key.peer1)) + { + if dist0 < dist1 && !has_edge.contains(&edge_key.peer1) { + has_edge.insert(edge_key.peer1.clone()); + edges.push(active_edge.edge.clone()); + } + + if dist1 < dist0 && !has_edge.contains(&edge_key.peer0) { + has_edge.insert(edge_key.peer0.clone()); + edges.push(active_edge.edge.clone()); + } + } + } + + if has_root && has_edge.len() + 1 == distance.len() { + Some(edges) + } else { + None + } + } +} diff --git a/chain/network/src/routing/edge_cache/testonly.rs b/chain/network/src/routing/edge_cache/testonly.rs new file mode 100644 index 00000000000..6aa60e3cc7d --- /dev/null +++ b/chain/network/src/routing/edge_cache/testonly.rs @@ -0,0 +1,61 @@ +use crate::routing::edge_cache::{EdgeCache, EdgeKey}; +use crate::types::Edge; +use near_primitives::network::PeerId; + +impl EdgeCache { + pub(crate) fn is_active(&self, edge: &Edge) -> bool { + self.active_edges.contains_key(&edge.key().into()) + } + + pub(crate) fn get_nonce_for_active_edge(&self, key: &EdgeKey) -> Option { + self.active_edges.get(key).map(|val| val.edge.nonce()) + } + + pub(crate) fn check_mapping_external(&self, mapped_nodes: &Vec) { + // Check the mapped ids for externally visible properties of the mapping + let mut assigned_ids: Vec = + mapped_nodes.iter().map(|peer_id| self.get_id(peer_id)).collect(); + assigned_ids.sort(); + assigned_ids.dedup(); + assert_eq!(mapped_nodes.len(), assigned_ids.len()); + for id in assigned_ids { + assert!(id < (self.max_id() as u32)); + } + } + + pub(crate) fn check_mapping_internal(&self, mapped_nodes: &Vec) { + // Check internally that the set of mapped nodes is exactly those which are expected + assert_eq!(mapped_nodes.len(), self.p2id.len()); + for peer_id in mapped_nodes { + assert!(self.p2id.contains_key(&peer_id)); + } + + // Check internally that the mapped ids and unused ids together are precisely 0,1,2,... + let universe = Vec::from_iter(0..(self.max_id() as u32)); + let mut actual_ids: Vec = self.p2id.values().cloned().collect(); + actual_ids.append(&mut self.unused.clone()); + actual_ids.sort(); + assert_eq!(universe, actual_ids); + + // An id should be in use iff it's id 0 (the local node's id) + // or if it's assigned to some node incident with an active edge + for id in universe { + let should_be_used = id == 0 || self.degree[id as usize] != 0; + assert_eq!(should_be_used, !self.unused.contains(&id)); + } + + // Check exact consistency of the degree counts with the active edges + let mut expected_degree = vec![0; self.max_id()]; + for (key, edge) in &self.active_edges { + assert!(edge.refcount > 0); + expected_degree[self.get_id(&key.peer0) as usize] += 1; + expected_degree[self.get_id(&key.peer1) as usize] += 1; + } + assert_eq!(expected_degree, self.degree); + } + + pub(crate) fn check_mapping(&self, mapped_nodes: Vec) { + self.check_mapping_external(&mapped_nodes); + self.check_mapping_internal(&mapped_nodes); + } +} diff --git a/chain/network/src/routing/edge_cache/tests.rs b/chain/network/src/routing/edge_cache/tests.rs new file mode 100644 index 00000000000..3c9d5d08ea9 --- /dev/null +++ b/chain/network/src/routing/edge_cache/tests.rs @@ -0,0 +1,282 @@ +use crate::routing::edge_cache::*; +use crate::test_utils::random_peer_id; +use crate::testonly::make_rng; +use rand::Rng; +use std::collections::HashSet; + +#[test] +fn has_edge_nonce_or_newer() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node0.clone(), node1, 456); + + let mut ec = EdgeCache::new(node0); + + // Initially empty + assert!(!ec.has_edge_nonce_or_newer(&edge0)); + assert!(!ec.has_edge_nonce_or_newer(&edge1)); + + // Write the older nonce + ec.write_verified_nonce(&edge0); + assert!(ec.has_edge_nonce_or_newer(&edge0)); + assert!(!ec.has_edge_nonce_or_newer(&edge1)); + + // Write the newer nonce + ec.write_verified_nonce(&edge1); + assert!(ec.has_edge_nonce_or_newer(&edge0)); + assert!(ec.has_edge_nonce_or_newer(&edge1)); +} + +#[test] +fn update_active_edge_nonce() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node0.clone(), node1, 456); + + assert_eq!(edge0.key(), edge1.key()); + let key: EdgeKey = edge0.key().into(); + + let mut ec = EdgeCache::new(node0); + + // First insert with the older nonce + ec.insert_active_edge(&edge0); + assert_eq!(Some(123), ec.get_nonce_for_active_edge(&key)); + + // Insert another copy of the same edge with a newer nonce + ec.insert_active_edge(&edge1); + assert_eq!(Some(456), ec.get_nonce_for_active_edge(&key)); + + // Insert with the older nonce again; should not overwrite + ec.insert_active_edge(&edge0); + assert_eq!(Some(456), ec.get_nonce_for_active_edge(&key)); + + // Remove a copy; should still remember it + ec.remove_active_edge(&key); + assert_eq!(Some(456), ec.get_nonce_for_active_edge(&key)); + + // Remove another copy; should still remember it + ec.remove_active_edge(&key); + assert_eq!(Some(456), ec.get_nonce_for_active_edge(&key)); + + // Remove final copy + ec.remove_active_edge(&key); + assert_eq!(None, ec.get_nonce_for_active_edge(&key)); +} + +#[test] +fn test_p2id_mapping() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + let node3 = random_peer_id(); + + // Set up a simple line graph 0--1--2--3 + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + let edge2 = Edge::make_fake_edge(node2.clone(), node3.clone(), 789); + + // Set up the EdgeCache with node0 as the local node + let mut ec = EdgeCache::new(node0.clone()); + ec.check_mapping(vec![node0.clone()]); + + // Insert and remove a single edge + ec.insert_active_edge(&edge0); + ec.check_mapping(vec![node0.clone(), node1.clone()]); + ec.remove_active_edge(&edge0.key().into()); + ec.check_mapping(vec![node0.clone()]); + + // Insert all edges (0--1--2--3) + ec.insert_active_edge(&edge0); + ec.insert_active_edge(&edge1); + ec.insert_active_edge(&edge2); + ec.check_mapping(vec![node0.clone(), node1.clone(), node2.clone(), node3.clone()]); + + // Remove edge1; all nodes are still active (0--1 2--3) + ec.remove_active_edge(&edge1.key().into()); + ec.check_mapping(vec![node0.clone(), node1.clone(), node2.clone(), node3.clone()]); + + // Remove edge0; node1 will no longer be active (0 1 2--3) + ec.remove_active_edge(&edge0.key().into()); + ec.check_mapping(vec![node0.clone(), node2.clone(), node3.clone()]); + + // Insert edge1; reactivates node1 (0 1--2--3) + ec.insert_active_edge(&edge1); + ec.check_mapping(vec![node0.clone(), node1.clone(), node2.clone(), node3]); + + // Remove edge2; deactivates only node2 (0 1--2 3) + ec.remove_active_edge(&edge2.key().into()); + ec.check_mapping(vec![node0.clone(), node1, node2]); + + // Remove edge1; only the local node should remain mapped (0 1 2 3) + ec.remove_active_edge(&edge1.key().into()); + ec.check_mapping(vec![node0]); +} + +#[test] +fn reuse_ids() { + let max_node_ct = 5; + + let mut rng = make_rng(921853233); + let rng = &mut rng; + + let local_node_id = random_peer_id(); + let mut ec = EdgeCache::new(local_node_id.clone()); + + // Run multiple iterations of inserting and deleting sets of edges + for _ in 0..25 { + // Generate some random PeerIds; should have at least 2 so we can make some edges + let node_ct = rng.gen::() % (max_node_ct - 2) + 2; + let mut peer_ids: Vec = (1..node_ct).map(|_| random_peer_id()).collect(); + peer_ids.push(local_node_id.clone()); + + // Generate some random edges + let edge_ct = rng.gen::() % 10 + 0; + let edges: Vec = (0..edge_ct) + .map(|_| { + // Generate two distinct indices at random in (0..node_ct) + let peer0 = rng.gen::() % (node_ct - 1); + let peer1 = peer0 + 1 + (rng.gen::() % (node_ct - peer0 - 1)); + + // Make an edge with the chosen nodes and a random nonce + Edge::make_fake_edge( + peer_ids[peer0].clone(), + peer_ids[peer1].clone(), + rng.gen::(), + ) + }) + .collect(); + + let mut active = HashSet::::from([local_node_id.clone()]); + for e in &edges { + // Insert the edge to the EdgeCache + ec.insert_active_edge(e); + + // Update our record of active nodes + let (peer0, peer1) = e.key().clone(); + active.insert(peer0); + active.insert(peer1); + + ec.check_mapping(Vec::from_iter(active.clone())); + + // u32 ids should be reused across iterations + assert!(ec.max_id() <= max_node_ct); + } + + for e in &edges { + ec.remove_active_edge(&e.key().into()); + } + } +} + +#[test] +fn free_unused_after_create_for_tree() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let edge = Edge::make_fake_edge(node1.clone(), node2.clone(), 123); + + // Initialize the edge cache and check that just the local node has an id + let mut ec = EdgeCache::new(node0.clone()); + ec.check_mapping(vec![node0.clone()]); + + // Create and check ids for the tree 1--2 + ec.create_ids_for_tree(&node1, &vec![edge]); + ec.check_mapping_external(&vec![node0.clone(), node1, node2]); + + // Free unused ids + ec.free_unused_ids(); + ec.check_mapping(vec![node0]); +} + +#[test] +fn overwrite_shortest_path_tree() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let mut ec = EdgeCache::new(node0.clone()); + + let edge0 = Edge::make_fake_edge(node0, node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 123); + + // Write an SPT for node1 advertising node2 behind it; 0--1--2 + ec.update_tree(&node1, &vec![edge0.clone(), edge1.clone()]); + + assert!(ec.is_active(&edge1)); + assert!(ec.p2id.contains_key(&node2)); + + // Now write an SPT for node1 without the connection to node2; 0--1 2 + ec.update_tree(&node1, &vec![edge0]); + + // edge1 should have been pruned from node0's `active_edges` map + assert!(!ec.is_active(&edge1)); + // node2 should have been pruned from node0's `p2id` mapping + assert!(!ec.p2id.contains_key(&node2)); +} + +fn assert_eq_unordered(a: Vec, b: Vec) { + for x in &a { + assert!(b.contains(x)); + } + for x in &b { + assert!(a.contains(x)); + } +} + +#[test] +fn test_construct_shortest_path_tree() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + let node3 = random_peer_id(); + + // Set up a simple line graph 0--1--2--3 + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + let edge2 = Edge::make_fake_edge(node2.clone(), node3.clone(), 789); + + // Set up the EdgeCache with node0 as the local node + let mut ec = EdgeCache::new(node0.clone()); + ec.check_mapping(vec![node0.clone()]); + + // Insert the edges to the cache + ec.insert_active_edge(&edge0); + ec.insert_active_edge(&edge1); + ec.insert_active_edge(&edge2); + + // Construct tree 0--1--2--3 + assert_eq_unordered( + ec.construct_spanning_tree(&HashMap::from([ + (node0.clone(), 0), + (node1.clone(), 1), + (node2.clone(), 2), + (node3.clone(), 3), + ])) + .unwrap(), + vec![edge0.clone(), edge1.clone(), edge2.clone()], + ); + + // Add direct edges to node2 and node3 + let edge02 = Edge::make_fake_edge(node0.clone(), node2.clone(), 123); + let edge03 = Edge::make_fake_edge(node0.clone(), node3.clone(), 456); + + ec.insert_active_edge(&edge02); + ec.insert_active_edge(&edge03); + + // Construct tree 0--{1,2,3} + assert_eq_unordered( + ec.construct_spanning_tree(&HashMap::from([ + (node0, 0), + (node1, 1), + (node2, 1), + (node3, 1), + ])) + .unwrap(), + vec![edge0, edge02, edge03], + ); +} diff --git a/chain/network/src/routing/graph_v2/mod.rs b/chain/network/src/routing/graph_v2/mod.rs new file mode 100644 index 00000000000..60490803325 --- /dev/null +++ b/chain/network/src/routing/graph_v2/mod.rs @@ -0,0 +1,674 @@ +use crate::concurrency::runtime::Runtime; +use crate::network_protocol; +use crate::network_protocol::{AdvertisedPeerDistance, Edge, EdgeState}; +use crate::routing::edge_cache::EdgeCache; +use crate::routing::routing_table_view::RoutingTableView; +use crate::stats::metrics; +use arc_swap::ArcSwap; +use near_async::time; +use near_primitives::network::PeerId; +use parking_lot::Mutex; +use std::collections::VecDeque; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +#[cfg(not(test))] +use crate::concurrency; +#[cfg(not(test))] +use rayon::iter::ParallelBridge; + +#[cfg(test)] +mod testonly; +#[cfg(test)] +mod tests; + +pub type NextHopTable = HashMap>; + +#[derive(Clone)] +pub struct GraphConfigV2 { + pub node_id: PeerId, + pub prune_edges_after: Option, +} + +pub enum NetworkTopologyChange { + PeerConnected(PeerId, Edge), + PeerDisconnected(PeerId), + PeerAdvertisedDistances(network_protocol::DistanceVector), +} + +/// Locally stored properties of a received network_protocol::DistanceVector message +struct PeerDistances { + /// Advertised distances indexed by the local EdgeCache's peer to id mapping. + pub distance: Vec>, + /// The lowest nonce among all edges used to validate the distances. + /// For simplicity, used to expire the entire distance vector at once. + pub min_nonce: u64, +} + +struct Inner { + config: GraphConfigV2, + + /// Data structure maintaing information about the entire known network + edge_cache: EdgeCache, + + /// Edges of the local node's direct connections + local_edges: HashMap, + /// Distances advertised by the local node's direct peers + peer_distances: HashMap, + + /// Distances from the local node to other nodes + my_distances: HashMap, + /// The latest DistanceVector advertised by the local node + my_distance_vector: network_protocol::DistanceVector, +} + +impl Inner { + /// Function which verifies signed edges. + /// Returns true iff all the edges provided were valid. + /// + /// This method implements a security measure against an adversary sending invalid edges. + /// It verifies edges in parallel until the first invalid edge is found. It adds nonces + /// for all the edges verified so far to the cache, but drops all the remaining ones. This way + /// the wasted work (verification of invalid edges) is constant, no matter how large the input + /// size is. + /// + /// Edge verification is expensive, and it would be an attack vector if we dropped on the + /// floor valid edges verified so far: an attacker could prepare a message containing + /// a lot of valid edges, except for the last one, and send it repeatedly to a node. + /// The node would then validate all the edges every time, then reject the whole set + /// because just the last edge was invalid. Instead, we cache all the edges verified so + /// far and return an error only afterwards. + #[cfg(not(test))] + fn verify_and_cache_edge_nonces(&mut self, edges: &Vec) -> bool { + metrics::EDGE_UPDATES.inc_by(edges.len() as u64); + + // Collect only those edges which are new to us for verification. + let mut unverified_edges = Vec::::new(); + for e in edges { + // V2 routing protocol only shares Active edges + // TODO(saketh): deprecate tombstones entirely + if e.edge_type() != EdgeState::Active { + return false; + } + + if !self.edge_cache.has_edge_nonce_or_newer(e) { + unverified_edges.push(e.clone()); + } + } + + // Verify the new edges in parallel on rayon. + // Stop at first invalid edge. + let (verified_edges, ok) = concurrency::rayon::run_blocking(move || { + concurrency::rayon::try_map(unverified_edges.into_iter().par_bridge(), |e| { + if e.verify() { + Some(e) + } else { + None + } + }) + }); + + // Store the verified nonces in the cache + verified_edges.iter().for_each(|e| self.edge_cache.write_verified_nonce(e)); + + ok + } + + /// Function computing basic properties of a tree. + /// + /// Accepts a root node and a list of edges specifying a tree. If the edges form + /// a valid tree containing the specified `root`, returns a pair of vectors + /// (distance, first_step). Otherwise, returns None. + /// + /// Nodes are indexed into the vectors according to the peer to id mapping in the EdgeCache. + /// If `tree_edges` contain some previously unseen peers, new ids are allocated for them. + /// + /// For each node in the tree, `distance` indicates the length of the path + /// from the root to the node. Nodes outside the tree have distance None. + /// + /// For each node in the tree, `first_step` indicates the root's neighbor on the path + /// from the root to the node. The root of the tree, as well as any nodes outside + /// the tree, have a first_step of None. + pub(crate) fn calculate_tree_distances( + &mut self, + root: &PeerId, + tree_edges: &Vec, + ) -> Option<(Vec>, Vec>)> { + // Prepare for graph traversal by ensuring all PeerIds in the tree have a u32 label + self.edge_cache.create_ids_for_tree(root, tree_edges); + + // Build adjacency-list representation of the edges + let mut adjacency = vec![Vec::::new(); self.edge_cache.max_id()]; + for edge in tree_edges { + let (peer0, peer1) = edge.key(); + let id0 = self.edge_cache.get_id(peer0); + let id1 = self.edge_cache.get_id(peer1); + adjacency[id0 as usize].push(id1); + adjacency[id1 as usize].push(id0); + } + + // Compute distances from the root by breadth-first search + let mut distance: Vec> = vec![None; self.edge_cache.max_id()]; + let mut first_step: Vec> = vec![None; self.edge_cache.max_id()]; + { + let root_id = self.edge_cache.get_id(root); + let mut queue = VecDeque::new(); + queue.push_back(root_id); + distance[root_id as usize] = Some(0); + + while let Some(cur_peer) = queue.pop_front() { + let cur_peer = cur_peer as usize; + // The unwrap here is safe because anything pushed to the queue has a distance + let cur_distance = distance[cur_peer].unwrap(); + + for &neighbor in &adjacency[cur_peer] { + let neighbor = neighbor as usize; + if distance[neighbor].is_none() { + distance[neighbor] = Some(cur_distance + 1); + first_step[neighbor] = first_step[cur_peer].or(Some(neighbor as u32)); + queue.push_back(neighbor as u32); + } + } + } + } + + // Check that the edges in `tree_edges` actually form a tree containing `root` + let mut num_reachable_nodes = 0; + for &dist in &distance { + if dist.is_some() { + num_reachable_nodes += 1; + } + } + if num_reachable_nodes != tree_edges.len() + 1 { + return None; + } + + Some((distance, first_step)) + } + + /// Given a DistanceVector message, validates the advertised distances against the spanning tree. + /// + /// If valid, returns a vector of distances indexed according to the local node's EdgeCache's + /// peer to id mapping. Otherwise, returns None. + /// + /// Removes any advertised routes which go through the local node; it doesn't make sense + /// to forward to a neighbor who will just sent the message right back to us. + pub(crate) fn validate_routing_distances( + &mut self, + distance_vector: &network_protocol::DistanceVector, + ) -> Option>> { + // A valid DistanceVector must contain distinct, correctly signed edges + let original_len = distance_vector.edges.len(); + let edges = Edge::deduplicate(distance_vector.edges.clone()); + if edges.len() != original_len || !self.verify_and_cache_edge_nonces(&edges) { + return None; + } + + // Check validity of the spanning tree and compute its basic properties + let tree_traversal = self.calculate_tree_distances(&distance_vector.root, &edges); + let (tree_distance, first_step) = tree_traversal?; + + // Verify that the advertised distances are corroborated by the spanning tree distances + let mut advertised_distances: Vec> = vec![None; self.edge_cache.max_id()]; + for entry in &distance_vector.distances { + let destination_id = self.edge_cache.get_or_create_id(&entry.destination) as usize; + advertised_distances[destination_id] = Some(entry.distance); + } + let mut consistent = true; + for id in 0..self.edge_cache.max_id() { + if let Some(advertised_distance) = advertised_distances[id] { + // The tree must have a route, but it can be shorter than the advertised distance + consistent &= tree_distance[id] + .is_some_and(|tree_distance| tree_distance <= advertised_distance); + } else { + consistent &= tree_distance[id].is_none(); + } + } + // After this point, we know that the DistanceVector message is valid + if !consistent { + return None; + } + + // Now, prune any advertised routes which go through the local node; it doesn't make + // sense to forward a message to a neighbor who will send it back to us + let local_node_id = self.edge_cache.get_local_node_id() as usize; + for id in 0..self.edge_cache.max_id() { + if id != local_node_id + && first_step[id].is_some_and(|first_step| first_step == local_node_id as u32) + { + advertised_distances[id] = None; + } + } + + Some(advertised_distances) + } + + /// Accepts a validated DistanceVector and its `advertised_distances`. + /// Updates the status of the direct connection between the local node and the direct peer. + /// If the peer can be used for forwarding, stores the advertised distances. + /// Returns true iff the distances are stored. + fn store_validated_peer_distances( + &mut self, + distance_vector: &network_protocol::DistanceVector, + mut advertised_distances: Vec>, + ) -> bool { + let local_node_id = self.edge_cache.get_local_node_id() as usize; + + // A direct peer's distance vector which advertises an indirect path to the local node + // is outdated and can be ignored. + if advertised_distances[local_node_id].is_some_and(|distance| distance > 1) { + // TODO(saketh): We could try to be more clever here and do some surgery on the tree + // to replace the indirect path and speed up convergence of the routing protocol. + return false; + } + + // Look in the spanning tree for the direct edge between the local node and the root + let tree_edge = distance_vector.edges.iter().find(|edge| { + edge.contains_peer(&self.config.node_id) && edge.contains_peer(&distance_vector.root) + }); + + // If the tree has more recent state for the direct edge, replace the local state + if let Some(tree_edge) = tree_edge { + self.local_edges + .entry(distance_vector.root.clone()) + .and_modify(|local_edge| { + if tree_edge.nonce() > local_edge.nonce() { + *local_edge = tree_edge.clone(); + } + }) + .or_insert(tree_edge.clone()); + } + + // Without a direct edge, we cannot use the distances advertised by the peer + let Some(local_edge) = self.local_edges.get(&distance_vector.root) else { + return false; + }; + if local_edge.edge_type() == EdgeState::Removed { + return false; + } + + // If the spanning tree doesn't already include the direct edge, add it + let mut spanning_tree = distance_vector.edges.clone(); + if tree_edge.is_none() { + if !advertised_distances[local_node_id].is_none() { + debug_assert!(false); + return false; + } + + spanning_tree.push(local_edge.clone()); + advertised_distances[local_node_id] = Some(1); + } + + // .min().unwrap() is safe here because the tree is now guaranteed to at least + // include the direct edge between the local node and the peer + debug_assert!(!spanning_tree.is_empty()); + let min_nonce = spanning_tree.iter().map(|e| e.nonce()).min().unwrap(); + + // Store the tree used to validate the distances. + self.edge_cache.update_tree(&distance_vector.root, &spanning_tree); + // Store the validated distances + self.peer_distances.insert( + distance_vector.root.clone(), + PeerDistances { distance: advertised_distances, min_nonce }, + ); + + true + } + + /// Verifies the given DistanceVector. + /// Returns a boolean indicating whether the DistanceVector was valid. + /// If applicable, stores the advertised distances for forwarding. + fn handle_distance_vector( + &mut self, + distance_vector: &network_protocol::DistanceVector, + ) -> bool { + // Basic sanity check; `distance_vector` should come from some other peer + if self.config.node_id == distance_vector.root { + return false; + } + + // Validate the advertised distances against the accompanying spanning tree + let validated_distances = self.validate_routing_distances(distance_vector); + + let is_valid = validated_distances.is_some(); + + let stored = match validated_distances { + Some(distances) => self.store_validated_peer_distances(&distance_vector, distances), + None => false, + }; + + if !stored { + // Free ids which may have been allocated to perform validation + self.edge_cache.free_unused_ids(); + } + + return is_valid; + } + + /// Handles disconnection of a peer. + /// - Updates the state of `local_edges`. + /// - Erases the peer's latest spanning tree, if there is one, from `edge_cache`. + /// - Erases the advertised distances for the peer. + pub(crate) fn remove_direct_peer(&mut self, peer_id: &PeerId) { + if let Some(edge) = self.local_edges.get_mut(peer_id) { + // TODO(saketh): refactor Edge once the old routing protocol is deprecated + if edge.edge_type() != EdgeState::Removed { + let (peer0, peer1) = edge.key().clone(); + // V2 routing protocol doesn't broadcast tombstones; don't bother to sign them + *edge = Edge::make_fake_edge(peer0, peer1, edge.nonce() + 1); + } + assert!(edge.edge_type() == EdgeState::Removed); + } + + self.edge_cache.remove_tree(peer_id); + self.peer_distances.remove(peer_id); + } + + /// Handles connection of a new peer or nonce refresh for an existing one. + /// - Updates the state of `local_edges`. + /// - Adds or updates the nonce in the `edge_cache`. + /// - If we don't already have a DistanceVector for this peer, initializes one. + pub(crate) fn add_or_update_direct_peer(&mut self, peer_id: PeerId, edge: Edge) -> bool { + assert_eq!(edge.edge_type(), EdgeState::Active); + + // We have this nonce or a newer one already; ignore the update entirely + if self.edge_cache.has_edge_nonce_or_newer(&edge) { + return true; + } + + // Reject invalid edge + if !self.verify_and_cache_edge_nonces(&vec![edge.clone()]) { + return false; + } + + // Update the state of `local_edges` + self.local_edges.insert(peer_id.clone(), edge.clone()); + + // If we don't already have a DistanceVector received from this peer, + // create one for it and process it as if we received it + if !self.peer_distances.contains_key(&peer_id) { + self.handle_distance_vector(&network_protocol::DistanceVector { + root: peer_id.clone(), + distances: vec![ + // The peer has distance 0 to itself + AdvertisedPeerDistance { destination: peer_id, distance: 0 }, + // The peer is distance 1 from this node + AdvertisedPeerDistance { + destination: self.config.node_id.clone(), + distance: 1, + }, + ], + edges: vec![edge], + }); + } + + true + } + + pub(crate) fn handle_network_change( + &mut self, + _clock: &time::Clock, + update: &NetworkTopologyChange, + ) -> bool { + match update { + NetworkTopologyChange::PeerConnected(peer_id, edge) => { + self.add_or_update_direct_peer(peer_id.clone(), edge.clone()) + } + NetworkTopologyChange::PeerDisconnected(peer_id) => { + self.remove_direct_peer(peer_id); + true + } + NetworkTopologyChange::PeerAdvertisedDistances(distance_vector) => { + self.handle_distance_vector(distance_vector) + } + } + } + + /// Computes and returns "next hops" for all reachable destinations in the network. + /// Accepts a set of "unreliable peers" to avoid routing through. + /// TODO: Actually avoid the unreliable peers + /// + /// Returns the NextHopTable along with a mapping from the reachable nodes in the + /// network to their shortest-path distances. + pub(crate) fn compute_next_hops( + &mut self, + _unreliable_peers: &HashSet, + ) -> (NextHopTable, HashMap) { + let max_id = self.edge_cache.max_id(); + let local_node_id = self.edge_cache.get_local_node_id() as usize; + + // Calculate the min distance to each routable node + let mut min_distance: Vec> = vec![None; max_id]; + min_distance[local_node_id] = Some(0); + for (_, entry) in &mut self.peer_distances { + // The peer to id mapping in the edge_cache is dynamic. We can still use previous distance + // calculations because a node incident to an active edge won't be relabelled. However, + // we may need to resize the distance vector. + entry.distance.resize(max_id, None); + + for id in 0..max_id { + if let Some(peer_distance) = entry.distance[id] { + if !min_distance[id] + .is_some_and(|min_distance| min_distance <= peer_distance + 1) + { + min_distance[id] = Some(peer_distance + 1); + } + } + } + } + + // Compute the next hop table + let mut next_hops_by_id: Vec> = vec![vec![]; self.edge_cache.max_id()]; + for id in 0..max_id { + if let Some(id_distance) = min_distance[id] { + for (peer_id, entry) in &self.peer_distances { + if entry.distance[id] + .is_some_and(|peer_distance| peer_distance + 1 == id_distance) + { + next_hops_by_id[id].push(peer_id.clone()); + } + } + } + } + let mut next_hops = HashMap::>::new(); + for (peer_id, id) in self.edge_cache.iter_peers() { + if !next_hops_by_id[*id as usize].is_empty() { + next_hops.insert(peer_id.clone(), next_hops_by_id[*id as usize].clone()); + } + } + + // Build a PeerId-keyed map of distances + let mut distance: HashMap = HashMap::new(); + for (peer_id, id) in self.edge_cache.iter_peers() { + if let Some(peer_distance) = min_distance[*id as usize] { + distance.insert(peer_id.clone(), peer_distance); + } + } + + (next_hops, distance) + } + + /// Each DistanceVector advertised by a peer includes a collection of edges + /// used to validate the advertised distances. + /// + /// Edges are timestamped when signed and we consider them to be expired + /// once a duration of `self.config.prune_edges_after` has passed. + /// + /// This function checks `peer_distances` for any DistanceVectors containing + /// expired edges. Any such DistanceVectors are removed in their entirety. + /// + /// Also removes old edges from `local_edges` and from the EdgeCache. + fn prune_expired_peer_distances(&mut self, clock: &time::Clock) { + if let Some(prune_edges_after) = self.config.prune_edges_after { + let prune_nonces_older_than = + (clock.now_utc() - prune_edges_after).unix_timestamp() as u64; + + let peers_to_remove: Vec = self + .peer_distances + .iter() + .filter_map(|(peer, entry)| { + if entry.min_nonce < prune_nonces_older_than { + Some(peer.clone()) + } else { + None + } + }) + .collect(); + + for peer_id in &peers_to_remove { + self.remove_direct_peer(peer_id); + } + + self.local_edges.retain(|_, edge| edge.nonce() >= prune_nonces_older_than); + + self.edge_cache.prune_old_edges(prune_nonces_older_than); + } + } + + /// Constructs an instance of network_protocol::DistanceVector advertising the given distances. + /// Returns None iff the `edge_cache` cannot construct a spanning tree achieving the distances. + fn construct_distance_vector_message( + &self, + distances: &HashMap, + ) -> Option { + Some(network_protocol::DistanceVector { + root: self.config.node_id.clone(), + // Collect distances for all known reachable nodes + distances: distances + .iter() + .map(|(destination, distance)| AdvertisedPeerDistance { + destination: destination.clone(), + distance: *distance, + }) + .collect(), + // Construct a spanning tree of signed edges achieving the claimed distances + edges: self.edge_cache.construct_spanning_tree(distances)?, + }) + } + + /// Given the latest computed `distances`, updates `my_distances` and `my_distance_vector`. + /// If distances have changed, returns a DistanceVector message to be broadcast to peers. + fn update_distances( + &mut self, + distances: HashMap, + ) -> Option { + if self.my_distances == distances { + return None; + } + + let distance_vector = self.construct_distance_vector_message(&distances)?; + + self.my_distances = distances; + self.my_distance_vector = distance_vector; + + Some(self.my_distance_vector.clone()) + } + + /// Prunes expired peer distances, then recomputes the distances for the local node. + /// Returns the recomputed NextHopTable. + /// If distances have changed, returns an updated DistanceVector to be broadcast. + pub(crate) fn compute_routes( + &mut self, + clock: &time::Clock, + unreliable_peers: &HashSet, + ) -> (NextHopTable, Option) { + let _update_time = metrics::ROUTING_TABLE_RECALCULATION_HISTOGRAM.start_timer(); + + // First prune any peer distances which have expired + self.prune_expired_peer_distances(&clock); + + // Recompute the NextHopTable + let (next_hops, distances) = self.compute_next_hops(unreliable_peers); + + // Store the newly computed distances and construct a DistanceVector message for broadcast + let to_broadcast = self.update_distances(distances); + + // Update metrics after update + metrics::ROUTING_TABLE_RECALCULATIONS.inc(); + metrics::PEER_REACHABLE.set(next_hops.len() as i64); + metrics::EDGE_TOTAL.set(self.edge_cache.known_edges_ct() as i64); + + (next_hops, to_broadcast) + } +} + +pub(crate) struct GraphV2 { + inner: Arc>, + unreliable_peers: ArcSwap>, + pub routing_table: RoutingTableView, + + runtime: Runtime, +} + +impl GraphV2 { + pub fn new(config: GraphConfigV2) -> Self { + let local_node = config.node_id.clone(); + let edge_cache = EdgeCache::new(local_node.clone()); + + let my_distance_vector = network_protocol::DistanceVector { + root: local_node.clone(), + distances: vec![AdvertisedPeerDistance { + destination: local_node.clone(), + distance: 0, + }], + edges: vec![], + }; + + Self { + routing_table: RoutingTableView::new(), + inner: Arc::new(Mutex::new(Inner { + config, + edge_cache, + local_edges: HashMap::new(), + peer_distances: HashMap::new(), + my_distances: HashMap::from([(local_node, 0)]), + my_distance_vector, + })), + unreliable_peers: ArcSwap::default(), + runtime: Runtime::new(), + } + } + + pub fn set_unreliable_peers(&self, unreliable_peers: HashSet) { + self.unreliable_peers.store(Arc::new(unreliable_peers)); + } + + /// Accepts and processes a batch of NetworkTopologyChanges. + /// Each update is verified and, if valid, the advertised distances are stored. + /// After all updates are processed, recomputes the local node's next hop table. + /// + /// May return a new DistanceVector for the local node, to be broadcasted to peers. + /// Does so iff routing distances have changed due to the processed updates. + /// + /// Returns (distance_vector, oks) where + /// * distance_vector is an Option to be broadcasted + /// * oks.len() == distance_vectors.len() and oks[i] is true iff distance_vectors[i] was valid + pub async fn batch_process_network_changes( + self: &Arc, + clock: &time::Clock, + updates: Vec, + ) -> (Option, Vec) { + // TODO(saketh): Consider whether we can move this to rayon. + let this = self.clone(); + let clock = clock.clone(); + self.runtime + .handle + .spawn_blocking(move || { + let mut inner = this.inner.lock(); + + let oks = updates + .iter() + .map(|update| inner.handle_network_change(&clock, update)) + .collect(); + + let (next_hops, to_broadcast) = + inner.compute_routes(&clock, &this.unreliable_peers.load()); + + this.routing_table.update(next_hops.into()); + + (to_broadcast, oks) + }) + .await + .unwrap() + } +} diff --git a/chain/network/src/routing/graph_v2/testonly.rs b/chain/network/src/routing/graph_v2/testonly.rs new file mode 100644 index 00000000000..831c20d9a7f --- /dev/null +++ b/chain/network/src/routing/graph_v2/testonly.rs @@ -0,0 +1,81 @@ +use crate::network_protocol; +use crate::routing::graph_v2::AdvertisedPeerDistance; +use crate::routing::graph_v2::Inner; +use crate::routing::{GraphV2, NetworkTopologyChange, NextHopTable}; +use crate::types::Edge; +use near_async::time; +use near_primitives::network::PeerId; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +impl Inner { + pub(crate) fn verify_and_cache_edge_nonces(&mut self, edges: &Vec) -> bool { + // In tests we make fake edges and don't bother to sign them + for edge in edges { + self.edge_cache.write_verified_nonce(edge); + } + true + } +} + +impl GraphV2 { + pub(crate) fn compute_next_hops(&self) -> (NextHopTable, HashMap) { + self.inner.lock().compute_next_hops(&HashSet::new()) + } + + pub(crate) fn update_distance_vector( + &self, + root: PeerId, + distances: Vec, + edges: Vec, + ) -> bool { + self.inner.lock().handle_distance_vector(&network_protocol::DistanceVector { + root, + distances, + edges, + }) + } + + pub(crate) async fn process_network_event( + self: &Arc, + event: NetworkTopologyChange, + ) -> Option { + let clock = time::FakeClock::default(); + let (to_broadcast, oks) = + self.batch_process_network_changes(&clock.clock(), vec![event]).await; + assert!(oks[0]); + to_broadcast + } + + pub(crate) async fn process_invalid_network_event( + self: &Arc, + event: NetworkTopologyChange, + ) { + let clock = time::FakeClock::default(); + let (_, oks) = self.batch_process_network_changes(&clock.clock(), vec![event]).await; + assert!(!oks[0]); + } + + // Checks that the DistanceVector message for the local node is valid + // and correctly advertises the node's available routes. + pub(crate) fn verify_own_distance_vector( + &self, + expected_distances: HashMap, + distance_vector: &network_protocol::DistanceVector, + ) { + let mut inner = self.inner.lock(); + + assert_eq!(expected_distances, inner.my_distances); + + let mut expected_distances_by_id: Vec> = vec![None; inner.edge_cache.max_id()]; + for (peer_id, distance) in expected_distances.iter() { + let id = inner.edge_cache.get_id(peer_id) as usize; + expected_distances_by_id[id] = Some(*distance); + } + + assert_eq!( + expected_distances_by_id, + inner.validate_routing_distances(distance_vector).unwrap() + ); + } +} diff --git a/chain/network/src/routing/graph_v2/tests.rs b/chain/network/src/routing/graph_v2/tests.rs new file mode 100644 index 00000000000..92bf8a044e3 --- /dev/null +++ b/chain/network/src/routing/graph_v2/tests.rs @@ -0,0 +1,613 @@ +use crate::network_protocol; +use crate::network_protocol::AdvertisedPeerDistance; +use crate::routing::{GraphConfigV2, GraphV2, NetworkTopologyChange}; +use crate::test_utils::expected_routing_tables; +use crate::test_utils::random_peer_id; +use crate::types::Edge; +use near_primitives::network::PeerId; +use std::collections::HashMap; +use std::sync::Arc; + +// Calls `calculate_tree_distances` on the given `root` and `edges`. +// Verifies that the calculated distances and first steps match those in `expected`. +fn verify_calculate_tree_distances( + expected: Option)>>, + root: PeerId, + edges: Vec, +) { + let graph = GraphV2::new(GraphConfigV2 { node_id: random_peer_id(), prune_edges_after: None }); + let mut inner = graph.inner.lock(); + + let calculated = inner.calculate_tree_distances(&root, &edges); + match expected { + Some(ref expected) => { + let (distance, first_step) = calculated.unwrap(); + + // Check for the expected entries + for (node, (expected_distance, expected_first_step)) in expected { + let id = inner.edge_cache.get_id(node) as usize; + + // Map the expected first step to its internal label + let expected_first_step = + expected_first_step.as_ref().map(|peer_id| inner.edge_cache.get_id(&peer_id)); + + // Expected distance should match the calculated one + assert_eq!(*expected_distance, distance[id].unwrap()); + + // Expected first step should match the calculated one + assert_eq!(expected_first_step, first_step[id]); + } + + // Make sure there are no unexpected entries + let mut calculated_reachable_nodes = 0; + for id in 0..inner.edge_cache.max_id() { + if distance[id].is_some() || first_step[id].is_some() { + calculated_reachable_nodes += 1; + } + } + assert_eq!(calculated_reachable_nodes, expected.len()); + } + None => { + assert_eq!(None, calculated); + } + } +} + +#[test] +fn calculate_tree_distances() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 123); + let edge2 = Edge::make_fake_edge(node0.clone(), node2.clone(), 123); + + // Test behavior of distance calculation on an empty tree + verify_calculate_tree_distances( + Some(HashMap::from([(node0.clone(), (0, None))])), + node0.clone(), + vec![], + ); + + // Test behavior of distance calculation on a simple tree 0--1 + verify_calculate_tree_distances( + Some(HashMap::from([ + (node0.clone(), (0, None)), + (node1.clone(), (1, Some(node1.clone()))), + ])), + node0.clone(), + vec![edge0.clone()], + ); + + // Distance calculation should reject a tree which doesn't contain the root + verify_calculate_tree_distances(None, node0.clone(), vec![edge1.clone()]); + + // Test behavior of distance calculation on a line graph 0--1--2 + verify_calculate_tree_distances( + Some(HashMap::from([ + (node0.clone(), (0, None)), + (node1.clone(), (1, Some(node1.clone()))), + (node2.clone(), (2, Some(node1.clone()))), + ])), + node0.clone(), + vec![edge0.clone(), edge1.clone()], + ); + // Test again from root 1 in 0--1--2 + verify_calculate_tree_distances( + Some(HashMap::from([ + (node0.clone(), (1, Some(node0.clone()))), + (node1.clone(), (0, None)), + (node2.clone(), (1, Some(node2))), + ])), + node1, + vec![edge0.clone(), edge1.clone()], + ); + + // Distance calculation rejects non-trees + verify_calculate_tree_distances(None, node0, vec![edge0, edge1, edge2]); +} + +#[test] +fn compute_next_hops() { + let node0 = random_peer_id(); + let graph = GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None }); + + // Test behavior on a node with no peers + assert_eq!((HashMap::new(), HashMap::from([(node0.clone(), 0)])), graph.compute_next_hops()); + + // Add a peer node1; 0--1 + let node1 = random_peer_id(); + let edge01 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + assert!(graph.update_distance_vector( + node1.clone(), + vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 } + ], + vec![edge01.clone()] + )); + + let (next_hops, distance) = graph.compute_next_hops(); + assert!(expected_routing_tables(&next_hops, &[(node1.clone(), vec![node1.clone()])])); + assert_eq!(distance, HashMap::from([(node0.clone(), 0), (node1.clone(), 1)])); + + // Add another peer node2 advertising a node3 behind it; 0--2--3 + let node2 = random_peer_id(); + let node3 = random_peer_id(); + let edge02 = Edge::make_fake_edge(node0.clone(), node2.clone(), 123); + let edge23 = Edge::make_fake_edge(node2.clone(), node3.clone(), 123); + assert!(graph.update_distance_vector( + node2.clone(), + vec![ + AdvertisedPeerDistance { destination: node2.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node3.clone(), distance: 1 }, + ], + vec![edge02.clone(), edge23] + )); + + let (next_hops, distance) = graph.compute_next_hops(); + assert!(expected_routing_tables( + &next_hops, + &[ + (node1.clone(), vec![node1.clone()]), + (node2.clone(), vec![node2.clone()]), + (node3.clone(), vec![node2.clone()]), + ] + )); + assert_eq!( + distance, + HashMap::from([ + (node0.clone(), 0), + (node1.clone(), 1), + (node2.clone(), 1), + (node3.clone(), 2) + ]) + ); + + // Update the SPT for node1, also advertising node3 behind it; 0--1--3 + let edge13 = Edge::make_fake_edge(node1.clone(), node3.clone(), 123); + assert!(graph.update_distance_vector( + node1.clone(), + vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node3.clone(), distance: 1 }, + ], + vec![edge01, edge13] + )); + + let (next_hops, distance) = graph.compute_next_hops(); + assert!(expected_routing_tables( + &next_hops, + &[ + (node1.clone(), vec![node1.clone()]), + (node2.clone(), vec![node2.clone()]), + (node3.clone(), vec![node1.clone(), node2.clone()]), + ] + )); + assert_eq!( + distance, + HashMap::from([ + (node0.clone(), 0), + (node1.clone(), 1), + (node2.clone(), 1), + (node3.clone(), 2) + ]) + ); + + // Update the SPT for node2, removing the route to node3; 0--2 + assert!(graph.update_distance_vector( + node2.clone(), + vec![ + AdvertisedPeerDistance { destination: node2.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + ], + vec![edge02] + )); + + let (next_hops, distance) = graph.compute_next_hops(); + assert!(expected_routing_tables( + &next_hops, + &[ + (node1.clone(), vec![node1.clone()]), + (node2.clone(), vec![node2.clone()]), + (node3.clone(), vec![node1.clone()]), + ] + )); + assert_eq!(distance, HashMap::from([(node0, 0), (node1, 1), (node2, 1), (node3, 2)])); +} + +#[test] +fn compute_next_hops_discard_loop() { + let node0 = random_peer_id(); + let graph = GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None }); + + // Add a peer node1 which advertises node2 via node0; 2--0--1 + let node1 = random_peer_id(); + let node2 = random_peer_id(); + let edge01 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge02 = Edge::make_fake_edge(node0.clone(), node2.clone(), 123); + assert!(graph.update_distance_vector( + node1.clone(), + vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node2, distance: 2 }, + ], + vec![edge01, edge02] + )); + + // node2 should be ignored because the advertised route to it goes back through the local node + let (next_hops, distance) = graph.compute_next_hops(); + assert!(expected_routing_tables(&next_hops, &[(node1.clone(), vec![node1.clone()])])); + assert_eq!(distance, HashMap::from([(node0, 0), (node1, 1)])); +} + +#[tokio::test] +async fn test_process_network_event() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + + // Process a new connection 0--1 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerConnected(node1.clone(), edge0.clone())) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1)]), + &distance_vector_update, + ); + + // Receive a DistanceVector from node1 with node2 behind it; 0--1--2 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + ], + edges: vec![edge0.clone(), edge1.clone()], + }, + )) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1), (node2.clone(), 2)]), + &distance_vector_update, + ); + + // Process a local update (nonce refresh) to the connection 0--1 + let edge0_refreshed = Edge::make_fake_edge(node0.clone(), node1.clone(), 789); + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerConnected(node1.clone(), edge0_refreshed)) + .await; + // This update doesn't trigger a broadcast because node0's available routes haven't changed + assert_eq!(None, distance_vector_update); + // node0's locally stored DistanceVector should have the route to node2 + let distance_vector_update = graph.inner.lock().my_distance_vector.clone(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1), (node2.clone(), 2)]), + &distance_vector_update, + ); + + // Process disconnection of node1 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerDisconnected(node1.clone())) + .await + .unwrap(); + graph.verify_own_distance_vector(HashMap::from([(node0.clone(), 0)]), &distance_vector_update); +} + +#[tokio::test] +async fn test_process_network_event_idempotent() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + + // Process a new connection 0--1 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerConnected(node1.clone(), edge0.clone())) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1)]), + &distance_vector_update, + ); + // Process the same event without error + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerConnected(node1.clone(), edge0.clone())) + .await; + // This update doesn't trigger a broadcast because node0's available routes haven't changed + assert_eq!(None, distance_vector_update); + + // Process disconnection of node1 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerDisconnected(node1.clone())) + .await + .unwrap(); + graph.verify_own_distance_vector(HashMap::from([(node0.clone(), 0)]), &distance_vector_update); + // Process the same event without error + let distance_vector_update = + graph.process_network_event(NetworkTopologyChange::PeerDisconnected(node1.clone())).await; + // This update doesn't trigger a broadcast because node0's available routes haven't changed + assert_eq!(None, distance_vector_update); +} + +#[tokio::test] +async fn test_receive_distance_vector_before_processing_local_connection() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + + // Receive a DistanceVector from node1 with node2 behind it; 0--1--2 + // The local node has not processed a NetworkTopologyChange::PeerConnected event + // for node1, but it should handle this DistanceVector correctly anyway. + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + ], + edges: vec![edge0.clone(), edge1.clone()], + }, + )) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1), (node2.clone(), 2)]), + &distance_vector_update, + ); +} + +#[tokio::test] +async fn test_receive_invalid_distance_vector() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + + graph + .process_invalid_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + ], + // Missing edge + edges: vec![edge1.clone()], + }, + )) + .await; + + graph + .process_invalid_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + // Missing route shown by edges + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + ], + edges: vec![edge0.clone(), edge1.clone()], + }, + )) + .await; + + graph + .process_invalid_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + // Route length is shorter than shown by edges + AdvertisedPeerDistance { destination: node2.clone(), distance: 0 }, + ], + edges: vec![edge0.clone(), edge1.clone()], + }, + )) + .await; +} + +#[tokio::test] +async fn receive_distance_vector_without_route_to_local_node() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge0 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge1 = Edge::make_fake_edge(node1.clone(), node2.clone(), 456); + + // Broadcasting a distance vector which doesn't have a route to the receiving node + // is valid behavior, but it doesn't provide the receiving node any routes. + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + // No route to the receiving node node0 + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + ], + edges: vec![edge1.clone()], + }, + )) + .await; + assert_eq!(None, distance_vector_update); + + // Let node0 realize it has a direct connection to node1 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerConnected(node1.clone(), edge0)) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1)]), + &distance_vector_update, + ); + + // Now the same advertised routes from the tree tree 1--2 can be handled by node0, + // which will combine it with the direct edge 0--1 to produce 0--1--2. + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + // No route to the receiving node node0 + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + ], + edges: vec![edge1.clone()], + }, + )) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1), (node2.clone(), 2)]), + &distance_vector_update, + ); + + // node0 should also be able to handle node1's default DistanceVector with no edges + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }], + edges: vec![], + }, + )) + .await + .unwrap(); + graph.verify_own_distance_vector( + HashMap::from([(node0.clone(), 0), (node1.clone(), 1)]), + &distance_vector_update, + ); +} + +/// This test produces a situation in which it is not possible for node0 to construct a spanning +/// tree which is exactly consistent with its distance vector. +/// +/// node0 ends up with a distance of 3 to node4 and a distance of 1 to node2. +/// For either destination, node0 knows a chain of signed edges producing the claimed distance: +/// 0--1--2--4 +/// 0--2 +/// However, it is not possible to construct a tree containing both of these chains. +/// +/// We handle this by allowing the node to construct a spanning tree which achieves all of its +/// claimed distances _or better_. In this case, 0--2--4 is valid. +/// +/// The situation arises as a result of inconsistent states of node1 and node2: +/// - node1 is telling us that node2 has a connection to node4 +/// - node2 is telling us that it has no connection to node4 +/// +/// It is not the responsibility of node0 to decide who is right; perhaps the connection was lost +/// and node1 hasn't realized it yet, or perhaps the connection is newly formed and we haven't received +/// an update from node2 yet (note that direct latency for 0--2 may be worse than the latency 0--1--2). +/// +/// Instead, node0 trusts its peers to have the exact distances which they claim, and does not try to +/// deduce anything from the spanning trees they provide other than verifying the claimed distances. +#[tokio::test] +async fn inconsistent_peers() { + let node0 = random_peer_id(); + let node1 = random_peer_id(); + let node2 = random_peer_id(); + let node3 = random_peer_id(); + let node4 = random_peer_id(); + + let graph = + Arc::new(GraphV2::new(GraphConfigV2 { node_id: node0.clone(), prune_edges_after: None })); + + let edge01 = Edge::make_fake_edge(node0.clone(), node1.clone(), 123); + let edge02 = Edge::make_fake_edge(node0.clone(), node2.clone(), 123); + let edge12 = Edge::make_fake_edge(node1.clone(), node2.clone(), 123); + let edge13 = Edge::make_fake_edge(node1.clone(), node3.clone(), 123); + let edge24 = Edge::make_fake_edge(node2.clone(), node4.clone(), 123); + + // Receive a DistanceVector from node1 with routes to 2, 3, 4 behind it + // 0 -- 1 -- 3 + // \ + // 2 -- 4 + graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node1.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node1.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node2.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node3.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node4.clone(), distance: 2 }, + ], + edges: vec![edge01.clone(), edge12.clone(), edge13.clone(), edge24.clone()], + }, + )) + .await; + + // Receive a DistanceVector from node2 with routes to 1, 3 behind it + // 1 -- 3 + // / + // 0 -- 2 + // + // Notably, node2 does not advertise a route to node 4 + let distance_vector_update = graph + .process_network_event(NetworkTopologyChange::PeerAdvertisedDistances( + network_protocol::DistanceVector { + root: node2.clone(), + distances: vec![ + AdvertisedPeerDistance { destination: node2.clone(), distance: 0 }, + AdvertisedPeerDistance { destination: node0.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node1.clone(), distance: 1 }, + AdvertisedPeerDistance { destination: node3.clone(), distance: 2 }, + ], + edges: vec![edge02.clone(), edge12.clone(), edge13.clone()], + }, + )) + .await + .unwrap(); + + // Best available advertised route to each destination + let expected_routes = HashMap::from([ + (node0.clone(), 0), + (node1.clone(), 1), + (node2.clone(), 1), + (node3.clone(), 2), + (node4.clone(), 3), + ]); + + // There is no set of edges which produces a tree exactly consistent with `expected_routes`, + // but we should be able to construct a valid DistanceVector anyway + graph.verify_own_distance_vector(expected_routes, &distance_vector_update); +} diff --git a/chain/network/src/routing/mod.rs b/chain/network/src/routing/mod.rs index 7112ce2b92d..baf80b6a186 100644 --- a/chain/network/src/routing/mod.rs +++ b/chain/network/src/routing/mod.rs @@ -1,7 +1,10 @@ mod bfs; pub(crate) mod edge; +mod edge_cache; mod graph; +mod graph_v2; pub(crate) mod route_back_cache; pub mod routing_table_view; pub(crate) use graph::{Graph, GraphConfig, NextHopTable}; +pub(crate) use graph_v2::{GraphConfigV2, GraphV2, NetworkTopologyChange}; diff --git a/chain/network/src/types.rs b/chain/network/src/types.rs index 74189f43998..b4e3a679924 100644 --- a/chain/network/src/types.rs +++ b/chain/network/src/types.rs @@ -65,6 +65,7 @@ pub enum ReasonForBan { InvalidPeerId = 8, InvalidHash = 9, InvalidEdge = 10, + InvalidDistanceVector = 11, Blacklisted = 14, } From 612348aa37083f73c958d1e1b2274a17d6c4659e Mon Sep 17 00:00:00 2001 From: Yasir Date: Tue, 18 Jul 2023 12:52:41 +0300 Subject: [PATCH 21/50] fix: use logging instead of print statements (#9277) @frol I went through the related code, found this is the only required edit as we already set up logging services in the nearcore. --- core/o11y/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 0ef1f85ba36..2a255e6d2d7 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -462,7 +462,10 @@ pub fn reload_log_config(config: Option<&log_config::LogConfig>) { tracing::info!("Updated the logging layer according to `log_config.json`"); } Err(err) => { - tracing::info!("Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", err); + eprintln!( + "Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", + err + ); } } } From c935186ad416ba5cb0ea20f8c124e9a3a6395bd5 Mon Sep 17 00:00:00 2001 From: Aleksandr Logunov Date: Tue, 18 Jul 2023 15:45:00 +0400 Subject: [PATCH 22/50] refactor: todo to remove flat storage creation parameters (#9250) Recommend future readers to stop considering these parameters, because heavy flat storage migration already happened on all nodes in the ecosystem. So this case shouldn't complicate work like #9121. --- core/store/src/config.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/store/src/config.rs b/core/store/src/config.rs index 18fc123108f..77d20c6b331 100644 --- a/core/store/src/config.rs +++ b/core/store/src/config.rs @@ -85,14 +85,17 @@ pub struct StoreConfig { /// Number of threads to execute storage background migrations. /// Needed to create flat storage which need to happen in parallel /// with block processing. + /// TODO (#8826): remove, because creation successfully happened in 1.34. pub background_migration_threads: usize, /// Enables background flat storage creation. + /// TODO (#8826): remove, because creation successfully happened in 1.34. pub flat_storage_creation_enabled: bool, /// Duration to perform background flat storage creation step. Defines how /// frequently we check creation status and execute work related to it in /// main thread (scheduling and collecting state parts, catching up blocks, etc.). + /// TODO (#8826): remove, because creation successfully happened in 1.34. pub flat_storage_creation_period: Duration, /// Enables state snapshot at the beginning of epochs. From e254ae8aaf66c67a5158c929cd8c625e0c58e4d9 Mon Sep 17 00:00:00 2001 From: Jakob Meier Date: Tue, 18 Jul 2023 16:24:26 +0200 Subject: [PATCH 23/50] refactor(loadtest): backwards compatible type hints (#9323) `list[...]` in type hints only works for python 3.9 and up. For older python versions, we should use `typing.List[...]`. I first thought we should require newer python for locust tests, also using `match` (see #9125) but it seems we are somewhat dependent on older Ubuntu versions for now. At least I've been checking out code on gcp machines created by terraform templates and needed to patch the type hints to get the code running without installing a new python version. This PR makes the code fully backward compatible again by simply using the `typing` module which is available since python 3.5. --- pytest/tests/loadtest/locust/common/base.py | 2 +- pytest/tests/loadtest/locust/common/ft.py | 3 ++- pytest/tests/loadtest/locust/common/social.py | 7 ++++--- pytest/tests/loadtest/locust/common/sweat.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pytest/tests/loadtest/locust/common/base.py b/pytest/tests/loadtest/locust/common/base.py index 9894ed7c4f6..0f61d85d3cc 100644 --- a/pytest/tests/loadtest/locust/common/base.py +++ b/pytest/tests/loadtest/locust/common/base.py @@ -270,7 +270,7 @@ def send_tx(self, tx: Transaction, locust_name) -> dict: self.request_event.fire(**meta) return meta - def post_json(self, method: str, params: list[str]): + def post_json(self, method: str, params: typing.List[str]): j = { "method": method, "params": params, diff --git a/pytest/tests/loadtest/locust/common/ft.py b/pytest/tests/loadtest/locust/common/ft.py index 037c1bdb477..4572f1c30f8 100644 --- a/pytest/tests/loadtest/locust/common/ft.py +++ b/pytest/tests/loadtest/locust/common/ft.py @@ -1,6 +1,7 @@ import random import sys import pathlib +import typing from locust import events sys.path.append(str(pathlib.Path(__file__).resolve().parents[4] / 'lib')) @@ -46,7 +47,7 @@ def register_user(self, user: NearUser): def random_receiver(self, sender: str) -> str: return self.random_receivers(sender, 1)[0] - def random_receivers(self, sender: str, num) -> list[str]: + def random_receivers(self, sender: str, num) -> typing.List[str]: rng = random.Random() receivers = rng.sample(self.registered_users, num) # Sender must be != receiver but maybe there is no other registered user diff --git a/pytest/tests/loadtest/locust/common/social.py b/pytest/tests/loadtest/locust/common/social.py index 5e9cd643996..1d735824c68 100644 --- a/pytest/tests/loadtest/locust/common/social.py +++ b/pytest/tests/loadtest/locust/common/social.py @@ -2,6 +2,7 @@ import json import sys import pathlib +import typing import unittest sys.path.append(str(pathlib.Path(__file__).resolve().parents[4] / 'lib')) @@ -40,7 +41,7 @@ def args(self) -> dict: class Follow(SocialDbSet): def __init__(self, contract_id: str, sender: Account, - follow_list: list[str]): + follow_list: typing.List[str]): super().__init__(contract_id, sender) self.follow_list = follow_list @@ -118,7 +119,7 @@ def social_db_build_index_obj(key_list_pairs: dict) -> dict: A dict instead of a list of tuples doesn't work because keys can be duplicated. """ - def serialize_values(values: list[tuple[str, dict]]): + def serialize_values(values: typing.List[typing.Tuple[str, dict]]): return json.dumps([{"key": k, "value": v} for k, v in values]) return { @@ -151,7 +152,7 @@ def social_db_set_msg(sender: str, values: dict, index: dict) -> dict: return msg -def social_follow_args(sender: str, follow_list: list[str]) -> dict: +def social_follow_args(sender: str, follow_list: typing.List[str]) -> dict: follow_map = {} graph = [] notify = [] diff --git a/pytest/tests/loadtest/locust/common/sweat.py b/pytest/tests/loadtest/locust/common/sweat.py index f711e4ab298..ff60c8cd434 100644 --- a/pytest/tests/loadtest/locust/common/sweat.py +++ b/pytest/tests/loadtest/locust/common/sweat.py @@ -120,7 +120,7 @@ class SweatMintBatch(FunctionCall): """ def __init__(self, sweat_id: str, oracle: Account, - recipient_step_pairs: list[RecipientSteps]): + recipient_step_pairs: typing.List[RecipientSteps]): super().__init__(oracle, sweat_id, "record_batch") self.recipient_step_pairs = recipient_step_pairs From 365c69aeba392aeb061829ccfce78aa8017a9a63 Mon Sep 17 00:00:00 2001 From: Razvan Barbascu Date: Tue, 18 Jul 2023 23:02:38 +0100 Subject: [PATCH 24/50] feat(state-sync): Add config for number of downloads during catchup (#9318) We can limit the impact of state sync during catchup by turning this number down. This way validation of blocks will not be hindered while the node downloads the state. --- chain/client/src/client.rs | 2 ++ chain/client/src/sync/state.rs | 12 +++++++++--- chain/client/src/sync/state_sync_actor.rs | 0 core/chain-configs/src/client_config.rs | 11 ++++++++++- .../src/tests/nearcore/sync_state_nodes.rs | 1 + 5 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 chain/client/src/sync/state_sync_actor.rs diff --git a/chain/client/src/client.rs b/chain/client/src/client.rs index 8128c5097d9..0f7c589d574 100644 --- a/chain/client/src/client.rs +++ b/chain/client/src/client.rs @@ -271,6 +271,7 @@ impl Client { config.state_sync_timeout, &config.chain_id, &config.state_sync.sync, + false, ); let num_block_producer_seats = config.num_block_producer_seats as usize; let data_parts = epoch_manager.num_data_parts(); @@ -2140,6 +2141,7 @@ impl Client { state_sync_timeout, &self.config.chain_id, &self.config.state_sync.sync, + true, ), shards_to_split, BlocksCatchUpState::new(sync_hash, epoch_id), diff --git a/chain/client/src/sync/state.rs b/chain/client/src/sync/state.rs index 96acb702b63..9b2a02514f1 100644 --- a/chain/client/src/sync/state.rs +++ b/chain/client/src/sync/state.rs @@ -166,6 +166,7 @@ impl StateSync { timeout: TimeDuration, chain_id: &str, sync_config: &SyncConfig, + catchup: bool, ) -> Self { let inner = match sync_config { SyncConfig::Peers => StateSyncInner::Peers { @@ -175,6 +176,7 @@ impl StateSync { SyncConfig::ExternalStorage(ExternalStorageConfig { location, num_concurrent_requests, + num_concurrent_requests_during_catchup, }) => { let external = match location { ExternalStorageLocation::S3 { bucket, region } => { @@ -188,11 +190,14 @@ impl StateSync { ExternalConnection::Filesystem { root_dir: root_dir.clone() } } }; + let num_permits = if catchup { + *num_concurrent_requests_during_catchup + } else { + *num_concurrent_requests + } as usize; StateSyncInner::PartsFromExternal { chain_id: chain_id.to_string(), - semaphore: Arc::new(tokio::sync::Semaphore::new( - *num_concurrent_requests as usize, - )), + semaphore: Arc::new(tokio::sync::Semaphore::new(num_permits)), external, } } @@ -1414,6 +1419,7 @@ mod test { TimeDuration::from_secs(1), "chain_id", &SyncConfig::Peers, + false, ); let mut new_shard_sync = HashMap::new(); diff --git a/chain/client/src/sync/state_sync_actor.rs b/chain/client/src/sync/state_sync_actor.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/chain-configs/src/client_config.rs b/core/chain-configs/src/client_config.rs index a425718d6fb..de6ff8377a8 100644 --- a/core/chain-configs/src/client_config.rs +++ b/core/chain-configs/src/client_config.rs @@ -26,6 +26,7 @@ pub const DEFAULT_GC_NUM_EPOCHS_TO_KEEP: u64 = 5; /// Default number of concurrent requests to external storage to fetch state parts. pub const DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL: u32 = 25; +pub const DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL: u32 = 5; /// Configuration for garbage collection. #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)] @@ -77,14 +78,22 @@ fn default_num_concurrent_requests() -> u32 { DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL } +fn default_num_concurrent_requests_during_catchup() -> u32 { + DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL +} + #[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] pub struct ExternalStorageConfig { /// Location of state parts. pub location: ExternalStorageLocation, /// When fetching state parts from external storage, throttle fetch requests - /// to this many concurrent requests per shard. + /// to this many concurrent requests. #[serde(default = "default_num_concurrent_requests")] pub num_concurrent_requests: u32, + /// During catchup, the node will use a different number of concurrent requests + /// to reduce the performance impact of state sync. + #[serde(default = "default_num_concurrent_requests_during_catchup")] + pub num_concurrent_requests_during_catchup: u32, } #[derive(serde::Serialize, serde::Deserialize, Clone, Debug)] diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index 1971ed2f408..c3df857a62e 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -492,6 +492,7 @@ fn sync_state_dump() { root_dir: dump_dir.path().to_path_buf(), }, num_concurrent_requests: 10, + num_concurrent_requests_during_catchup: 1, }); let nearcore::NearNode { From 1477ec897c7cb035351e7a44e3bffcd6780770da Mon Sep 17 00:00:00 2001 From: Jure Bajic Date: Thu, 20 Jul 2023 12:01:02 +0200 Subject: [PATCH 25/50] chore: Update RocksDB to 0.21 (#9298) This update brings a lot of new changes: - Update to RocksDB 8.1.1 - `io_uring` enabled which can be tested - Added `load_latest` to open RocksDB with the latest options file - and other fixes No degradation was seen using a `perf-state` tool --- Cargo.lock | 35 +++++++++++++++++++++++++++++------ Cargo.toml | 2 +- core/store/src/db/rocksdb.rs | 7 +++---- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4fa13804157..1119744036a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -611,9 +611,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.60.1" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags 1.3.2", "cexpr", @@ -621,11 +621,13 @@ dependencies = [ "lazy_static", "lazycell", "peeking_take_while", + "prettyplease", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", + "syn 2.0.15", ] [[package]] @@ -2939,9 +2941,9 @@ dependencies = [ [[package]] name = "librocksdb-sys" -version = "0.8.0+7.4.4" +version = "0.11.0+8.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611804e4666a25136fcc5f8cf425ab4d26c7f74ea245ffe92ea23b85b6420b5d" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" dependencies = [ "bindgen", "bzip2-sys", @@ -2949,6 +2951,7 @@ dependencies = [ "glob", "libc", "libz-sys", + "lz4-sys", "tikv-jemalloc-sys", "zstd-sys", ] @@ -3077,6 +3080,16 @@ dependencies = [ "hashbrown 0.11.2", ] +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "lzma-sys" version = "0.1.17" @@ -5163,6 +5176,16 @@ dependencies = [ "output_vt100", ] +[[package]] +name = "prettyplease" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +dependencies = [ + "proc-macro2", + "syn 2.0.15", +] + [[package]] name = "primitive-types" version = "0.10.1" @@ -5754,9 +5777,9 @@ dependencies = [ [[package]] name = "rocksdb" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9562ea1d70c0cc63a34a22d977753b50cca91cc6b6527750463bd5dd8697bc" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" dependencies = [ "libc", "librocksdb-sys", diff --git a/Cargo.toml b/Cargo.toml index 864c06a6b0b..4a206dfe961 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -270,7 +270,7 @@ reqwest = { version = "0.11.14", features = ["blocking"] } ripemd = "0.1.1" rkyv = "0.7.31" rlimit = "0.7" -rocksdb = { version = "0.19.0", default-features = false, features = ["snappy", "lz4", "zstd", "zlib", "jemalloc"] } +rocksdb = { version = "0.21.0", default-features = false, features = ["snappy", "lz4", "zstd", "zlib", "jemalloc"] } runtime-tester = { path = "test-utils/runtime-tester" } rusqlite = { version = "0.27.0", features = ["bundled", "chrono", "functions"] } rustc-demangle = "0.1" diff --git a/core/store/src/db/rocksdb.rs b/core/store/src/db/rocksdb.rs index 54d48d86cfd..bd448f0a194 100644 --- a/core/store/src/db/rocksdb.rs +++ b/core/store/src/db/rocksdb.rs @@ -147,7 +147,7 @@ impl RocksDB { .map_err(into_other)?; if cfg!(feature = "single_thread_rocksdb") { // These have to be set after open db - let mut env = Env::default().unwrap(); + let mut env = Env::new().unwrap(); env.set_bottom_priority_background_threads(0); env.set_high_priority_background_threads(0); env.set_low_priority_background_threads(0); @@ -473,8 +473,7 @@ fn rocksdb_block_based_options( let mut block_opts = BlockBasedOptions::default(); block_opts.set_block_size(block_size.as_u64().try_into().unwrap()); // We create block_cache for each of 47 columns, so the total cache size is 32 * 47 = 1504mb - block_opts - .set_block_cache(&Cache::new_lru_cache(cache_size.as_u64().try_into().unwrap()).unwrap()); + block_opts.set_block_cache(&Cache::new_lru_cache(cache_size.as_u64().try_into().unwrap())); block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); block_opts.set_cache_index_and_filter_blocks(true); block_opts.set_bloom_filter(10.0, true); @@ -584,7 +583,7 @@ impl Drop for RocksDB { if cfg!(feature = "single_thread_rocksdb") { // RocksDB with only one thread stuck on wait some condition var // Turn on additional threads to proceed - let mut env = Env::default().unwrap(); + let mut env = Env::new().unwrap(); env.set_background_threads(4); } self.db.cancel_all_background_work(true); From 4c76e19c333b036a6fdbc040efd22418f87652e1 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 14:14:27 +0200 Subject: [PATCH 26/50] fix(db-tool): Tool to run DB migrations --- tools/database/README.md | 10 ++++++++++ tools/database/src/commands.rs | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/database/README.md b/tools/database/README.md index 7642f841445..19523d59f8c 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -62,6 +62,16 @@ Then you can call `neard database change-db-kind --new-kind Cold change-hot`. Notice that even though in your mind this db is cold, in your config this db hot, so you have to pass `change-hot`. +### run-migrations +Opens the DB and runs migrations to bring it to the actual version expected by `neard` +Example usage: +```bash +cargo run --bin neard database run-migrations +``` + +For example, if the binary expects DB version `38`, but the DB is currently +version `36`, the command will open the DB, run migrations that bring the DB +from version `36` to version `38`, and then exits. ## Make a DB Snapshot Makes a copy of a DB (hot store only) at a specified location. If the diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 9ed84bd8285..047ba9bea3a 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -45,7 +45,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } - SumCommand::RunMigrationsCommand(cmd) => { + SubCommand::RunMigrationsCommand(cmd) => { let mut near_config = nearcore::config::load_config( &home, near_chain_configs::GenesisValidationMode::UnsafeFast, From c5a85e8417ec0334369a95270f90528949b30794 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 14:32:07 +0200 Subject: [PATCH 27/50] fix(db-tool): Tool to run DB migrations --- tools/database/src/commands.rs | 9 +-------- tools/database/src/run_migrations.rs | 9 +++++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 047ba9bea3a..eab6fb58e4d 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -45,14 +45,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } - SubCommand::RunMigrationsCommand(cmd) => { - let mut near_config = nearcore::config::load_config( - &home, - near_chain_configs::GenesisValidationMode::UnsafeFast, - ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - cmd.run(home, &mut near_config) - } + SubCommand::RunMigrations(cmd) => cmd.run(home), SubCommand::StatePerf(cmd) => cmd.run(home), } } diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index 5d713cc12aa..bc741cb150d 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -18,8 +18,13 @@ enum DbSelector { pub(crate) struct RunMigrationsCommand { } impl RunMigrationsCommand { - pub(crate) fn run(&self, home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result<()> { - let storage = open_storage(home_dir, near_config)?; + pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { + let mut near_config = nearcore::config::load_config( + &home_dir, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + open_storage(home_dir, &mut near_config)?; Ok(()) } } From dcb483d61d7f959de51949702000254727fb185f Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 14:46:33 +0200 Subject: [PATCH 28/50] fix(db-tool): Tool to run DB migrations --- tools/database/src/run_migrations.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index bc741cb150d..b3fcfa0dec8 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -1,6 +1,3 @@ -use near_store::metadata::DbKind; -use near_store::{Mode, NodeStorage}; -use nearcore::{migrations, NearConfig, open_storage}; use std::path::Path; /// This can potentially support db specified not in config, but in command line. @@ -24,7 +21,7 @@ impl RunMigrationsCommand { near_chain_configs::GenesisValidationMode::UnsafeFast, ) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - open_storage(home_dir, &mut near_config)?; + nearcore::open_storage(home_dir, &mut near_config)?; Ok(()) } } From a1f5fe82bca8e1f1fdb609ea3d3f3ac248b0bf90 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 16:21:54 +0200 Subject: [PATCH 29/50] fmt --- tools/database/src/run_migrations.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index b3fcfa0dec8..cf9f88bbc45 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -1,16 +1,5 @@ use std::path::Path; -/// This can potentially support db specified not in config, but in command line. -/// `ChangeRelative { path: Path, archive: bool }` -/// But it is a pain to implement, because of all the current storage possibilities. -/// So, I'll leave it as a TODO(posvyatokum): implement relative path DbSelector. -/// This can be useful workaround for config modification. -#[derive(clap::Subcommand)] -enum DbSelector { - ChangeHot, - ChangeCold, -} - #[derive(clap::Args)] pub(crate) struct RunMigrationsCommand { } From 24ebda2002f6bc73064796e0080685acc80e9476 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 17:15:31 +0200 Subject: [PATCH 30/50] fmt --- tools/database/src/run_migrations.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index cf9f88bbc45..a301d39260b 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -1,7 +1,7 @@ use std::path::Path; #[derive(clap::Args)] -pub(crate) struct RunMigrationsCommand { } +pub(crate) struct RunMigrationsCommand {} impl RunMigrationsCommand { pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { @@ -9,7 +9,7 @@ impl RunMigrationsCommand { &home_dir, near_chain_configs::GenesisValidationMode::UnsafeFast, ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); nearcore::open_storage(home_dir, &mut near_config)?; Ok(()) } From 12a8e6f942e5b1fc8a4f9cecd37fd46d6f944b23 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Thu, 20 Jul 2023 14:08:10 +0200 Subject: [PATCH 31/50] fix(db-tool): Tool to run DB migrations --- tools/database/src/commands.rs | 24 ++++++++++-------------- tools/database/src/lib.rs | 3 --- tools/database/src/run_migrations.rs | 25 +++++++++++++++++-------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index eab6fb58e4d..8c3e946ab24 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -1,8 +1,6 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; -use crate::make_snapshot::MakeSnapshotCommand; use crate::run_migrations::RunMigrationsCommand; -use crate::state_perf::StatePerfCommand; use clap::Parser; use std::path::PathBuf; @@ -21,32 +19,30 @@ enum SubCommand { /// Change DbKind of hot or cold db. ChangeDbKind(ChangeDbKindCommand), - /// Make snapshot of the database - MakeSnapshot(MakeSnapshotCommand), - /// Run migrations, RunMigrations(RunMigrationsCommand), - - /// Run performance test for State column reads. - /// Uses RocksDB data specified via --home argument. - StatePerf(StatePerfCommand), } impl DatabaseCommand { pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> { match &self.subcmd { SubCommand::AnalyseDataSizeDistribution(cmd) => cmd.run(home), - SubCommand::ChangeDbKind(cmd) => cmd.run(home), - SubCommand::MakeSnapshot(cmd) => { + SubCommand::ChangeDbKind(cmd) => { let near_config = nearcore::config::load_config( &home, near_chain_configs::GenesisValidationMode::UnsafeFast, ) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - cmd.run(home, near_config.config.archive, &near_config.config.store) + cmd.run(home, &near_config) + } + SumCommand::RunMigrationsCommand(cmd) => { + let mut near_config = nearcore::config::load_config( + &home, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + cmd.run(home, &mut near_config) } - SubCommand::RunMigrations(cmd) => cmd.run(home), - SubCommand::StatePerf(cmd) => cmd.run(home), } } } diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index f4bb1914908..a9261186b50 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -1,7 +1,4 @@ mod adjust_database; mod analyse_data_size_distribution; pub mod commands; -mod make_snapshot; mod run_migrations; -mod state_perf; -mod utils; diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index a301d39260b..5d713cc12aa 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -1,16 +1,25 @@ +use near_store::metadata::DbKind; +use near_store::{Mode, NodeStorage}; +use nearcore::{migrations, NearConfig, open_storage}; use std::path::Path; +/// This can potentially support db specified not in config, but in command line. +/// `ChangeRelative { path: Path, archive: bool }` +/// But it is a pain to implement, because of all the current storage possibilities. +/// So, I'll leave it as a TODO(posvyatokum): implement relative path DbSelector. +/// This can be useful workaround for config modification. +#[derive(clap::Subcommand)] +enum DbSelector { + ChangeHot, + ChangeCold, +} + #[derive(clap::Args)] -pub(crate) struct RunMigrationsCommand {} +pub(crate) struct RunMigrationsCommand { } impl RunMigrationsCommand { - pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { - let mut near_config = nearcore::config::load_config( - &home_dir, - near_chain_configs::GenesisValidationMode::UnsafeFast, - ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - nearcore::open_storage(home_dir, &mut near_config)?; + pub(crate) fn run(&self, home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result<()> { + let storage = open_storage(home_dir, near_config)?; Ok(()) } } From 18c2333f3d33ee622c2c02fd23964855b9956e50 Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 13 Jul 2023 14:22:41 +0200 Subject: [PATCH 32/50] feat: simple nightshade v2 - shard layout with 5 shards (#9274) Introduced new protocol version called SimpleNightshadeV2, guarded it behind the rust feature `protocol_feature_simple_nightshade_v2` and added it to nightly. Refactored the AllEpochConfig::for_protocol_version a bit and added the SimpleNightshadeV2 shard layout to it. Note that I'm only hiding the SimpleNightshadeV2 behind the rust feature, I'm not planning on adding it everywhere. I'm reusing the same ShardLayout::V1 structure, just with bumped version and an extra boundary account. This should allow for smooth development since we won't need to guard all of the new code behind the new rust feature. I tested it manually and some sort of resharding did happen. I'm yet to fully appreciate what exactly happened and if it's any good, as well as add some proper tests. I'll do that in separate PRs. test repro instructions: ``` - get the current layout in json by running the print_shard_layout_all test and put it in $SHARD_LAYOUT - generate localnet setup with 4 shards and 1 validator - in the genesis file overwrite: - .epoch_length=10 - .use_production_config=true - .shard_layout=$SHARD_LAYOUT - build neard with nightly not enabled - run neard for at least one epoch - build neard with nightly enabled - run neard - watch resharding happening (only enabled debug logs for "catchup" target) - see new shard layout in the debug page ``` ![Screenshot 2023-07-11 at 15 34 36](https://github.com/near/nearcore/assets/1555986/5b83d645-4fdf-4994-a215-a500c0c0092f) resharding logs: https://gist.github.com/wacban/7b3a8c74c80f99003c71b92bea44539f --- chain/chain/src/chain.rs | 371 ++++++++++++-------------- chain/chain/src/tests/simple_chain.rs | 4 +- core/primitives-core/Cargo.toml | 2 - core/primitives-core/src/version.rs | 6 +- core/primitives/Cargo.toml | 2 - 5 files changed, 172 insertions(+), 213 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index a8becf1ecdb..779dc948317 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -27,17 +27,17 @@ use delay_detector::DelayDetector; use itertools::Itertools; use lru::LruCache; use near_chain_primitives::error::{BlockKnownError, Error, LogTransientStorageError}; +use near_client_primitives::types::StateSplitApplyingStatus; use near_epoch_manager::shard_tracker::ShardTracker; use near_epoch_manager::types::BlockHeaderInfo; use near_epoch_manager::EpochManagerAdapter; use near_o11y::log_assert; -use near_primitives::block::{genesis_chunks, BlockValidityError, Tip}; +use near_primitives::block::{genesis_chunks, Tip}; use near_primitives::challenge::{ BlockDoubleSign, Challenge, ChallengeBody, ChallengesResult, ChunkProofs, ChunkState, MaybeEncodedShardChunk, PartialState, SlashedValidator, }; use near_primitives::checked_feature; -use near_primitives::errors::EpochError; use near_primitives::hash::{hash, CryptoHash}; use near_primitives::merkle::{ combine_hash, merklize, verify_path, Direction, MerklePath, MerklePathItem, PartialMerkleTree, @@ -75,7 +75,7 @@ use near_store::flat::{ store_helper, FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata, FlatStorageError, FlatStorageReadyStatus, FlatStorageStatus, }; -use near_store::{get_genesis_state_roots, StorageError}; +use near_store::StorageError; use near_store::{DBCol, ShardTries, WrappedTrieChanges}; use once_cell::sync::OnceCell; use rand::seq::SliceRandom; @@ -449,8 +449,7 @@ pub struct Chain { apply_chunks_receiver: Receiver, /// Time when head was updated most recently. last_time_head_updated: Instant, - /// Prevents re-application of known-to-be-invalid blocks, so that in case of a - /// protocol issue we can recover faster by focusing on correct blocks. + invalid_blocks: LruCache, /// Support for sandbox's patch_state requests. @@ -490,31 +489,13 @@ impl Drop for Chain { let _ = self.blocks_in_processing.wait_for_all_blocks(); } } - -/// PreprocessBlockResult is a tuple where -/// the first element is a vector of jobs to apply chunks -/// the second element is BlockPreprocessInfo -type PreprocessBlockResult = ( - Vec Result + Send + 'static>>, - BlockPreprocessInfo, -); - -// Used only for verify_block_hash_and_signature. See that method. -#[derive(Clone, Copy, PartialEq, Eq)] -pub enum VerifyBlockHashAndSignatureResult { - Correct, - Incorrect, - CannotVerifyBecauseBlockIsOrphan, -} - impl Chain { pub fn make_genesis_block( epoch_manager: &dyn EpochManagerAdapter, runtime_adapter: &dyn RuntimeAdapter, chain_genesis: &ChainGenesis, ) -> Result { - let state_roots = get_genesis_state_roots(runtime_adapter.store())? - .expect("genesis should be initialized."); + let (_, state_roots) = runtime_adapter.genesis_state(); let genesis_chunks = genesis_chunks( state_roots, epoch_manager.num_shards(&EpochId::default())?, @@ -588,13 +569,9 @@ impl Chain { make_snapshot_callback: Option, ) -> Result { // Get runtime initial state and create genesis block out of it. - let state_roots = get_genesis_state_roots(runtime_adapter.store())? - .expect("genesis should be initialized."); - let mut store = ChainStore::new( - runtime_adapter.store().clone(), - chain_genesis.height, - chain_config.save_trie_changes, - ); + let (store, state_roots) = runtime_adapter.genesis_state(); + let mut store = + ChainStore::new(store, chain_genesis.height, chain_config.save_trie_changes); let genesis_chunks = genesis_chunks( state_roots.clone(), epoch_manager.num_shards(&EpochId::default())?, @@ -1090,27 +1067,15 @@ impl Chain { Ok(()) } - fn maybe_mark_block_invalid(&mut self, block_hash: CryptoHash, error: &Error) { - metrics::NUM_INVALID_BLOCKS.inc(); - // We only mark the block as invalid if the block has bad data (not for other errors that would - // not be the fault of the block itself), except when the block has a bad signature which means - // the block might not have been what the block producer originally produced. Either way, it's - // OK if we miss some cases here because this is just an optimization to avoid reprocessing - // known invalid blocks so the network recovers faster in case of any issues. - if error.is_bad_data() && !matches!(error, Error::InvalidSignature) { - self.invalid_blocks.put(block_hash, ()); - } - } - /// Return a StateSyncInfo that includes the information needed for syncing state for shards needed /// in the next epoch. - fn get_state_sync_info( + fn get_state_dl_info( &self, me: &Option, block: &Block, ) -> Result, Error> { let prev_hash = *block.header().prev_hash(); - let shards_to_state_sync = Chain::get_shards_to_state_sync( + let shards_to_dl = Chain::get_shards_to_dl_state( self.epoch_manager.as_ref(), &self.shard_tracker, me, @@ -1118,7 +1083,7 @@ impl Chain { )?; let prev_block = self.get_block(&prev_hash)?; - if prev_block.chunks().len() != block.chunks().len() && !shards_to_state_sync.is_empty() { + if prev_block.chunks().len() != block.chunks().len() && !shards_to_dl.is_empty() { // Currently, the state sync algorithm assumes that the number of chunks do not change // between the epoch being synced to and the last epoch. // For example, if shard layout changes at the beginning of epoch T, validators @@ -1135,14 +1100,14 @@ impl Chain { ); debug_assert!(false); } - if shards_to_state_sync.is_empty() { + if shards_to_dl.is_empty() { Ok(None) } else { - debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_state_sync, me); + debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_dl, me); - let state_sync_info = StateSyncInfo { + let state_dl_info = StateSyncInfo { epoch_tail_hash: *block.header().hash(), - shards: shards_to_state_sync + shards: shards_to_dl .iter() .map(|shard_id| { let chunk = &prev_block.chunks()[*shard_id as usize]; @@ -1151,7 +1116,7 @@ impl Chain { .collect(), }; - Ok(Some(state_sync_info)) + Ok(Some(state_dl_info)) } } @@ -1197,8 +1162,7 @@ impl Chain { } } } - block.check_validity().map_err(|e| >::into(e))?; - Ok(()) + block.check_validity().map_err(|e| e.into()) } /// Verify header signature when the epoch is known, but not the whole chain. @@ -1264,7 +1228,7 @@ impl Chain { return Err(Error::InvalidBlockFutureTime(header.timestamp())); } - // Check the signature. + // First I/O cost, delay as much as possible. if !self.epoch_manager.verify_header_signature(header)? { return Err(Error::InvalidSignature); } @@ -1442,56 +1406,6 @@ impl Chain { Ok(()) } - /// Verify that the block signature and block body hash matches. It makes sure that the block - /// content is not tampered by a middle man. - /// Returns Correct if the both check succeeds. Returns Incorrect if either check fails. - /// Returns CannotVerifyBecauseBlockIsOrphan, if we could not verify the signature because - /// the parent block is not yet available. - pub fn verify_block_hash_and_signature( - &self, - block: &Block, - ) -> Result { - // skip the verification if we are processing the genesis block - if block.hash() == self.genesis.hash() { - return Ok(VerifyBlockHashAndSignatureResult::Correct); - } - let epoch_id = match self.epoch_manager.get_epoch_id(block.header().prev_hash()) { - Ok(epoch_id) => epoch_id, - Err(EpochError::MissingBlock(missing_block)) - if &missing_block == block.header().prev_hash() => - { - return Ok(VerifyBlockHashAndSignatureResult::CannotVerifyBecauseBlockIsOrphan); - } - Err(err) => return Err(err.into()), - }; - let epoch_protocol_version = self.epoch_manager.get_epoch_protocol_version(&epoch_id)?; - // Check that block body hash matches the block body. This makes sure that the block body - // content is not tampered - if checked_feature!( - "protocol_feature_block_header_v4", - BlockHeaderV4, - epoch_protocol_version - ) { - let block_body_hash = block.compute_block_body_hash(); - if block_body_hash.is_none() { - tracing::warn!("Block version too old for block: {:?}", block.hash()); - return Ok(VerifyBlockHashAndSignatureResult::Incorrect); - } - if block.header().block_body_hash() != block_body_hash { - tracing::warn!("Invalid block body hash for block: {:?}", block.hash()); - return Ok(VerifyBlockHashAndSignatureResult::Incorrect); - } - } - - // Verify the signature. Since the signature is signed on the hash of block header, this check - // makes sure the block header content is not tampered - if !self.epoch_manager.verify_header_signature(block.header())? { - tracing::error!("wrong signature"); - return Ok(VerifyBlockHashAndSignatureResult::Incorrect); - } - Ok(VerifyBlockHashAndSignatureResult::Correct) - } - /// Verify that `challenges` are valid /// If all challenges are valid, returns ChallengesResult, which comprises of the list of /// validators that need to be slashed and the list of blocks that are challenged. @@ -2078,20 +1992,6 @@ impl Chain { "start_process_block_impl", height = block_height) .entered(); - // 0) Before we proceed with any further processing, we first check that the block - // hash and signature matches to make sure the block is indeed produced by the assigned - // block producer. If not, we drop the block immediately - // Note that it may appear that we call verify_block_hash_signature twice, once in - // receive_block_impl, once here. The redundancy is because if a block is received as an orphan, - // the check in receive_block_impl will not be complete and the block will be stored in - // the orphan pool. When the orphaned block is ready to be processed, we must perform this check. - // Also note that we purposely separates the check from the rest of the block verification check in - // preprocess_block. - if self.verify_block_hash_and_signature(&block)? - == VerifyBlockHashAndSignatureResult::Incorrect - { - return Err(Error::InvalidSignature); - } // 1) preprocess the block where we verify that the block is valid and ready to be processed // No chain updates are applied at this step. @@ -2112,7 +2012,10 @@ impl Chain { preprocess_res } Err(e) => { - self.maybe_mark_block_invalid(*block.hash(), &e); + if e.is_bad_data() { + metrics::NUM_INVALID_BLOCKS.inc(); + self.invalid_blocks.put(*block.hash(), ()); + } preprocess_timer.stop_and_discard(); match &e { Error::Orphan => { @@ -2348,7 +2251,10 @@ impl Chain { let new_head = match self.postprocess_block_only(me, &block, block_preprocess_info, apply_results) { Err(err) => { - self.maybe_mark_block_invalid(*block.hash(), &err); + if err.is_bad_data() { + self.invalid_blocks.put(*block.hash(), ()); + metrics::NUM_INVALID_BLOCKS.inc(); + } self.blocks_delay_tracker.mark_block_errored(&block_hash, err.to_string()); return Err(err); } @@ -2450,43 +2356,49 @@ impl Chain { invalid_chunks: &mut Vec, block_received_time: Instant, state_patch: SandboxStatePatch, - ) -> Result { - let header = block.header(); - + ) -> Result< + ( + Vec Result + Send + 'static>>, + BlockPreprocessInfo, + ), + Error, + > { // see if the block is already in processing or if there are too many blocks being processed self.blocks_in_processing.add_dry_run(block.hash())?; - debug!(target: "chain", num_approvals = header.num_approvals(), "Preprocess block"); + debug!(target: "chain", num_approvals = block.header().num_approvals(), "Preprocess block"); // Check that we know the epoch of the block before we try to get the header // (so that a block from unknown epoch doesn't get marked as an orphan) - if !self.epoch_manager.epoch_exists(header.epoch_id()) { - return Err(Error::EpochOutOfBounds(header.epoch_id().clone())); + if !self.epoch_manager.epoch_exists(block.header().epoch_id()) { + return Err(Error::EpochOutOfBounds(block.header().epoch_id().clone())); } - if block.chunks().len() != self.epoch_manager.num_shards(header.epoch_id())? as usize { + if block.chunks().len() + != self.epoch_manager.num_shards(block.header().epoch_id())? as usize + { return Err(Error::IncorrectNumberOfChunkHeaders); } // Check if we have already processed this block previously. - check_known(self, header.hash())?.map_err(|e| Error::BlockKnown(e))?; + check_known(self, block.header().hash())?.map_err(|e| Error::BlockKnown(e))?; // Delay hitting the db for current chain head until we know this block is not already known. let head = self.head()?; - let is_next = header.prev_hash() == &head.last_block_hash; + let is_next = block.header().prev_hash() == &head.last_block_hash; // Sandbox allows fast-forwarding, so only enable when not within sandbox if !cfg!(feature = "sandbox") { // A heuristic to prevent block height to jump too fast towards BlockHeight::max and cause // overflow-related problems - let block_height = header.height(); + let block_height = block.header().height(); if block_height > head.height + self.epoch_length * 20 { return Err(Error::InvalidBlockHeight(block_height)); } } // Block is an orphan if we do not know about the previous full block. - if !is_next && !self.block_exists(header.prev_hash())? { + if !is_next && !self.block_exists(block.header().prev_hash())? { // Before we add the block to the orphan pool, do some checks: // 1. Block header is signed by the block producer for height. // 2. Chunk headers in block body match block header. @@ -2494,23 +2406,23 @@ impl Chain { // Not checked: // - Block producer could be slashed // - Chunk header signatures could be wrong - if !self.partial_verify_orphan_header_signature(header)? { + if !self.partial_verify_orphan_header_signature(block.header())? { return Err(Error::InvalidSignature); } block.check_validity()?; // TODO: enable after #3729 and #3863 - // self.verify_orphan_header_approvals(&header)?; + // self.verify_orphan_header_approvals(&block.header())?; return Err(Error::Orphan); } let epoch_protocol_version = - self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; + self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; if epoch_protocol_version > PROTOCOL_VERSION { panic!("The client protocol version is older than the protocol version of the network. Please update nearcore. Client protocol version:{}, network protocol version {}", PROTOCOL_VERSION, epoch_protocol_version); } // First real I/O expense. - let prev = self.get_previous_header(header)?; + let prev = self.get_previous_header(block.header())?; let prev_hash = *prev.hash(); let prev_prev_hash = *prev.prev_hash(); let prev_gas_price = prev.gas_price(); @@ -2522,25 +2434,43 @@ impl Chain { return Err(Error::InvalidBlockHeight(prev_height)); } - let (is_caught_up, state_sync_info, need_state_snapshot) = - self.get_catchup_and_state_sync_infos(header, prev_hash, prev_prev_hash, me, block)?; + let (is_caught_up, state_dl_info, need_state_snapshot) = + if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { + debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); + if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { + // The previous block is not caught up for the next epoch relative to the previous + // block, which is the current epoch for this block, so this block cannot be applied + // at all yet, needs to be orphaned + return Err(Error::Orphan); + } - self.check_if_challenged_block_on_chain(header)?; + // For the first block of the epoch we check if we need to start download states for + // shards that we will care about in the next epoch. If there is no state to be downloaded, + // we consider that we are caught up, otherwise not + let state_dl_info = self.get_state_dl_info(me, block)?; + let is_genesis = prev_prev_hash == CryptoHash::default(); + let need_state_snapshot = !is_genesis; + (state_dl_info.is_none(), state_dl_info, need_state_snapshot) + } else { + (self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false) + }; - debug!(target: "chain", block_hash = ?header.hash(), me=?me, is_caught_up=is_caught_up, "Process block"); + self.check_if_challenged_block_on_chain(block.header())?; + + debug!(target: "chain", "{:?} Process block {}, is_caught_up: {}", me, block.hash(), is_caught_up); // Check the header is valid before we proceed with the full block. - self.validate_header(header, provenance, challenges)?; + self.validate_header(block.header(), provenance, challenges)?; self.epoch_manager.verify_block_vrf( - header.epoch_id(), - header.height(), + block.header().epoch_id(), + block.header().height(), &prev_random_value, block.vrf_value(), block.vrf_proof(), )?; - if header.random_value() != &hash(block.vrf_value().0.as_ref()) { + if block.header().random_value() != &hash(block.vrf_value().0.as_ref()) { return Err(Error::InvalidRandomnessBeaconOutput); } @@ -2553,7 +2483,8 @@ impl Chain { return Err(e); } - let protocol_version = self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; + let protocol_version = + self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; if !block.verify_gas_price( prev_gas_price, self.block_economics_config.min_gas_price(protocol_version), @@ -2564,7 +2495,7 @@ impl Chain { return Err(Error::InvalidGasPrice); } let minted_amount = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - Some(self.epoch_manager.get_epoch_minted_amount(header.next_epoch_id())?) + Some(self.epoch_manager.get_epoch_minted_amount(block.header().next_epoch_id())?) } else { None }; @@ -2574,8 +2505,11 @@ impl Chain { return Err(Error::InvalidGasPrice); } - let (challenges_result, challenged_blocks) = - self.verify_challenges(block.challenges(), header.epoch_id(), header.prev_hash())?; + let (challenges_result, challenged_blocks) = self.verify_challenges( + block.challenges(), + block.header().epoch_id(), + block.header().prev_hash(), + )?; let prev_block = self.get_block(&prev_hash)?; @@ -2585,7 +2519,7 @@ impl Chain { let incoming_receipts = self.collect_incoming_receipts_from_block(me, block)?; // Check if block can be finalized and drop it otherwise. - self.check_if_finalizable(header)?; + self.check_if_finalizable(block.header())?; let apply_chunk_work = self.apply_chunks_preprocessing( me, @@ -2604,7 +2538,7 @@ impl Chain { apply_chunk_work, BlockPreprocessInfo { is_caught_up, - state_sync_info, + state_dl_info, incoming_receipts, challenges_result, challenged_blocks, @@ -2616,35 +2550,6 @@ impl Chain { )) } - fn get_catchup_and_state_sync_infos( - &self, - header: &BlockHeader, - prev_hash: CryptoHash, - prev_prev_hash: CryptoHash, - me: &Option, - block: &MaybeValidated, - ) -> Result<(bool, Option, bool), Error> { - if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - debug!(target: "chain", block_hash=?header.hash(), "block is the first block of an epoch"); - if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { - // The previous block is not caught up for the next epoch relative to the previous - // block, which is the current epoch for this block, so this block cannot be applied - // at all yet, needs to be orphaned - return Err(Error::Orphan); - } - - // For the first block of the epoch we check if we need to start download states for - // shards that we will care about in the next epoch. If there is no state to be downloaded, - // we consider that we are caught up, otherwise not - let state_sync_info = self.get_state_sync_info(me, block)?; - let is_genesis = prev_prev_hash == CryptoHash::default(); - let need_state_snapshot = !is_genesis; - Ok((state_sync_info.is_none(), state_sync_info, need_state_snapshot)) - } else { - Ok((self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false)) - } - } - /// Check if we can request chunks for this orphan. Conditions are /// 1) Orphans that with outstanding missing chunks request has not exceed `MAX_ORPHAN_MISSING_CHUNKS` /// 2) we haven't already requested missing chunks for the orphan @@ -2732,7 +2637,7 @@ impl Chain { /// in the current epoch that will be split into a future shard that `me` will track. /// 2) Shard layout will be the same. In this case, the method returns all shards that `me` will /// track in the next epoch but not this epoch - fn get_shards_to_state_sync( + fn get_shards_to_dl_state( epoch_manager: &dyn EpochManagerAdapter, shard_tracker: &ShardTracker, me: &Option, @@ -2780,9 +2685,7 @@ impl Chain { apply_chunks_done_callback: DoneApplyChunkCallback, ) { let blocks = self.blocks_with_missing_chunks.ready_blocks(); - if !blocks.is_empty() { - debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); - } + debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); for block in blocks { let block_hash = *block.block.header().hash(); let height = block.block.header().height(); @@ -3428,6 +3331,53 @@ impl Chain { Ok(()) } + pub fn build_state_for_split_shards_preprocessing( + &self, + sync_hash: &CryptoHash, + shard_id: ShardId, + state_split_scheduler: &dyn Fn(StateSplitRequest), + state_split_status: Arc, + ) -> Result<(), Error> { + let (epoch_id, next_epoch_id) = { + let block_header = self.get_block_header(sync_hash)?; + (block_header.epoch_id().clone(), block_header.next_epoch_id().clone()) + }; + let shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; + let next_epoch_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; + let shard_uid = ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); + let prev_hash = *self.get_block_header(sync_hash)?.prev_hash(); + let state_root = *self.get_chunk_extra(&prev_hash, &shard_uid)?.state_root(); + assert_ne!(shard_layout, next_epoch_shard_layout); + + state_split_scheduler(StateSplitRequest { + runtime_adapter: self.runtime_adapter.clone(), + sync_hash: *sync_hash, + shard_id, + shard_uid, + state_root, + next_epoch_shard_layout, + state_split_status, + }); + + Ok(()) + } + + pub fn build_state_for_split_shards_postprocessing( + &mut self, + sync_hash: &CryptoHash, + state_roots: Result, Error>, + ) -> Result<(), Error> { + let prev_hash = *self.get_block_header(sync_hash)?.prev_hash(); + let mut chain_update = self.chain_update(); + for (shard_uid, state_root) in state_roots? { + // here we store the state roots in chunk_extra in the database for later use + let chunk_extra = ChunkExtra::new_with_only_state_root(&state_root); + chain_update.chain_store_update.save_chunk_extra(&prev_hash, &shard_uid, chunk_extra); + debug!(target:"chain", "Finish building split state for shard {:?} {:?} {:?} ", shard_uid, prev_hash, state_root); + } + chain_update.commit() + } + pub fn clear_downloaded_parts( &mut self, shard_id: ShardId, @@ -3446,13 +3396,9 @@ impl Chain { blocks_catch_up_state: &mut BlocksCatchUpState, block_catch_up_scheduler: &dyn Fn(BlockCatchUpRequest), ) -> Result<(), Error> { - tracing::debug!( - target: "catchup", - pending_blocks = ?blocks_catch_up_state.pending_blocks, - processed_blocks = ?blocks_catch_up_state.processed_blocks.keys().collect::>(), - scheduled_blocks = ?blocks_catch_up_state.scheduled_blocks, - done_blocks = blocks_catch_up_state.done_blocks.len(), - "catch up blocks"); + debug!(target:"catchup", "catch up blocks: pending blocks: {:?}, processed {:?}, scheduled: {:?}, done: {:?}", + blocks_catch_up_state.pending_blocks, blocks_catch_up_state.processed_blocks.keys().collect::>(), + blocks_catch_up_state.scheduled_blocks, blocks_catch_up_state.done_blocks.len()); let mut processed_blocks = HashMap::new(); for (queued_block, results) in blocks_catch_up_state.processed_blocks.drain() { // If this block is parent of some blocks in processing that need to be caught up, @@ -3503,7 +3449,6 @@ impl Chain { Default::default(), &mut Vec::new(), )?; - metrics::SCHEDULED_CATCHUP_BLOCK.set(block.header().height() as i64); blocks_catch_up_state.scheduled_blocks.insert(pending_block); block_catch_up_scheduler(BlockCatchUpRequest { sync_hash: *sync_hash, @@ -3584,7 +3529,7 @@ impl Chain { debug!(target: "chain", "Catching up: removing prev={:?} from the queue. I'm {:?}", block_hash, me); chain_store_update.remove_prev_block_to_catchup(*block_hash); } - chain_store_update.remove_state_sync_info(*epoch_first_block); + chain_store_update.remove_state_dl_info(*epoch_first_block); chain_store_update.commit()?; @@ -4026,7 +3971,7 @@ impl Chain { true, is_first_block_with_chunk_of_version, state_patch, - true, + cares_about_shard_this_epoch, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = @@ -4087,7 +4032,7 @@ impl Chain { false, false, state_patch, - true, + cares_about_shard_this_epoch, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = @@ -5220,7 +5165,7 @@ impl<'a> ChainUpdate<'a> { &mut self, me: &Option, block: &Block, - block_preprocess_info: BlockPreprocessInfo, + preprocess_block_info: BlockPreprocessInfo, apply_chunks_results: Vec>, ) -> Result, Error> { let prev_hash = block.header().prev_hash(); @@ -5235,12 +5180,12 @@ impl<'a> ChainUpdate<'a> { let BlockPreprocessInfo { is_caught_up, - state_sync_info, + state_dl_info, incoming_receipts, challenges_result, challenged_blocks, .. - } = block_preprocess_info; + } = preprocess_block_info; if !is_caught_up { debug!(target: "chain", %prev_hash, hash = %*block.hash(), "Add block to catch up"); @@ -5254,8 +5199,8 @@ impl<'a> ChainUpdate<'a> { Arc::new(receipt_proofs), ); } - if let Some(state_sync_info) = state_sync_info { - self.chain_store_update.add_state_sync_info(state_sync_info); + if let Some(state_dl_info) = state_dl_info { + self.chain_store_update.add_state_dl_info(state_dl_info); } self.chain_store_update.save_block_extra(block.hash(), BlockExtra { challenges_result }); @@ -5517,10 +5462,12 @@ impl<'a> ChainUpdate<'a> { let chunk_header = chunk.cloned_header(); let gas_limit = chunk_header.gas_limit(); - // This is set to false because the value is only relevant - // during protocol version RestoreReceiptsAfterFixApplyChunks. - // TODO(nikurt): Determine the value correctly. - let is_first_block_with_chunk_of_version = false; + let is_first_block_with_chunk_of_version = check_if_block_is_first_with_chunk_of_version( + &mut self.chain_store_update, + self.epoch_manager.as_ref(), + &chunk_header.prev_block_hash(), + shard_id, + )?; let apply_result = self.runtime_adapter.apply_transactions( shard_id, @@ -5718,6 +5665,26 @@ pub struct BlockCatchUpResponse { pub results: Vec>, } +#[derive(actix::Message)] +#[rtype(result = "()")] +pub struct StateSplitRequest { + pub runtime_adapter: Arc, + pub sync_hash: CryptoHash, + pub shard_id: ShardId, + pub shard_uid: ShardUId, + pub state_root: StateRoot, + pub next_epoch_shard_layout: ShardLayout, + pub state_split_status: Arc, +} + +#[derive(actix::Message)] +#[rtype(result = "()")] +pub struct StateSplitResponse { + pub sync_hash: CryptoHash, + pub shard_id: ShardId, + pub new_state_roots: Result, Error>, +} + /// Helper to track blocks catch up /// Lifetime of a block_hash is as follows: /// 1. It is added to pending blocks, either as first block of an epoch or because we (post) diff --git a/chain/chain/src/tests/simple_chain.rs b/chain/chain/src/tests/simple_chain.rs index f864bfff441..b01ad79cc78 100644 --- a/chain/chain/src/tests/simple_chain.rs +++ b/chain/chain/src/tests/simple_chain.rs @@ -48,7 +48,7 @@ fn build_chain() { // cargo insta test --accept -p near-chain --features nightly -- tests::simple_chain::build_chain let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"GargNTMFiuET32KH5uPLFwMSU8xXtvrk6aGqgkPbRZg8"); + insta::assert_display_snapshot!(hash, @"86ZZBdNhwHbXDXdTjFZxGbddSy4qLpoxpWtqJtYwYXX"); } else { insta::assert_display_snapshot!(hash, @"8GP6PcFavb4pqeofMFjDyKUQnfVZtwPWsVA4V47WNbRn"); } @@ -78,7 +78,7 @@ fn build_chain() { let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"2aurKZqRfPkZ3woNjA7Kf79wq5MYz98AohTYWoBFiG7o"); + insta::assert_display_snapshot!(hash, @"8XW5k1JDHWPXkRcGwb6PTEgwggnppAW1qwWgwiqPY286"); } else { insta::assert_display_snapshot!(hash, @"319JoVaUej5iXmrZMeaZBPMeBLePQzJofA5Y1ztdyPw9"); } diff --git a/core/primitives-core/Cargo.toml b/core/primitives-core/Cargo.toml index 2795feab9e1..eca9aa693f8 100644 --- a/core/primitives-core/Cargo.toml +++ b/core/primitives-core/Cargo.toml @@ -36,11 +36,9 @@ protocol_feature_fix_staking_threshold = [] protocol_feature_fix_contract_loading_cost = [] protocol_feature_reject_blocks_with_outdated_protocol_version = [] protocol_feature_simple_nightshade_v2 = [] -protocol_feature_block_header_v4 = [] nightly = [ "nightly_protocol", - "protocol_feature_block_header_v4", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index 5c25776ad4c..1a5bc78f9d5 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -122,8 +122,6 @@ pub enum ProtocolFeature { RejectBlocksWithOutdatedProtocolVersions, #[cfg(feature = "protocol_feature_simple_nightshade_v2")] SimpleNightshadeV2, - #[cfg(feature = "protocol_feature_block_header_v4")] - BlockHeaderV4, } impl ProtocolFeature { @@ -176,8 +174,6 @@ impl ProtocolFeature { ProtocolFeature::RejectBlocksWithOutdatedProtocolVersions => 132, #[cfg(feature = "protocol_feature_simple_nightshade_v2")] ProtocolFeature::SimpleNightshadeV2 => 135, - #[cfg(feature = "protocol_feature_block_header_v4")] - ProtocolFeature::BlockHeaderV4 => 138, } } } @@ -190,7 +186,7 @@ const STABLE_PROTOCOL_VERSION: ProtocolVersion = 62; /// Largest protocol version supported by the current binary. pub const PROTOCOL_VERSION: ProtocolVersion = if cfg!(feature = "nightly_protocol") { // On nightly, pick big enough version to support all features. - 138 + 137 } else { // Enable all stable features. STABLE_PROTOCOL_VERSION diff --git a/core/primitives/Cargo.toml b/core/primitives/Cargo.toml index 757d537d389..0f073bf0213 100644 --- a/core/primitives/Cargo.toml +++ b/core/primitives/Cargo.toml @@ -48,10 +48,8 @@ protocol_feature_fix_staking_threshold = ["near-primitives-core/protocol_feature protocol_feature_fix_contract_loading_cost = ["near-primitives-core/protocol_feature_fix_contract_loading_cost"] protocol_feature_reject_blocks_with_outdated_protocol_version = ["near-primitives-core/protocol_feature_reject_blocks_with_outdated_protocol_version"] protocol_feature_simple_nightshade_v2 = ["near-primitives-core/protocol_feature_simple_nightshade_v2"] -protocol_feature_block_header_v4 = ["near-primitives-core/protocol_feature_block_header_v4"] nightly = [ "nightly_protocol", - "protocol_feature_block_header_v4", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", From 3dbc17d35ef69e23848fb2937f38d30692a8a140 Mon Sep 17 00:00:00 2001 From: wacban Date: Thu, 13 Jul 2023 15:38:43 +0200 Subject: [PATCH 33/50] refactor: small refactorings and improvements (#9296) - Renamed a lot of "dl_info" and 'to_dl" to "state_sync_info". I'm too afraid to ask what "dl" stands for but either way it's very confusing. (it could be download). I'm not sure I fully appreciate the difference between state sync, catchup and download and I'm open for a better suggestion how to rename those. - In the LocalnetCmd I added logic to generate default LogConfig - to get rid of a pesky log message about this config missing when starting neard. - In docs, renamed `SyncJobActor` to `SyncJobsActor` which is the correct name. - Allowing the `stable_hash` to be unused. It's only unused on macOS so we need to keep it but let's not generate a warning. All of the failed builds (red cross) below are due to this. cc @andrei-near shall we add some automation to notify us when builds are failing? Should this build be also part of PR-buildkite? ![Screenshot 2023-07-13 at 15 03 36](https://github.com/near/nearcore/assets/1555986/3adf18bf-6adc-4bf3-9996-55dc2ac8ad68) --- chain/chain/src/chain.rs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 779dc948317..0220741c1bc 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -1069,13 +1069,13 @@ impl Chain { /// Return a StateSyncInfo that includes the information needed for syncing state for shards needed /// in the next epoch. - fn get_state_dl_info( + fn get_state_sync_info( &self, me: &Option, block: &Block, ) -> Result, Error> { let prev_hash = *block.header().prev_hash(); - let shards_to_dl = Chain::get_shards_to_dl_state( + let shards_to_state_sync = Chain::get_shards_to_state_sync( self.epoch_manager.as_ref(), &self.shard_tracker, me, @@ -1083,7 +1083,7 @@ impl Chain { )?; let prev_block = self.get_block(&prev_hash)?; - if prev_block.chunks().len() != block.chunks().len() && !shards_to_dl.is_empty() { + if prev_block.chunks().len() != block.chunks().len() && !shards_to_state_sync.is_empty() { // Currently, the state sync algorithm assumes that the number of chunks do not change // between the epoch being synced to and the last epoch. // For example, if shard layout changes at the beginning of epoch T, validators @@ -1100,14 +1100,14 @@ impl Chain { ); debug_assert!(false); } - if shards_to_dl.is_empty() { + if shards_to_state_sync.is_empty() { Ok(None) } else { - debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_dl, me); + debug!(target: "chain", "Downloading state for {:?}, I'm {:?}", shards_to_state_sync, me); - let state_dl_info = StateSyncInfo { + let state_sync_info = StateSyncInfo { epoch_tail_hash: *block.header().hash(), - shards: shards_to_dl + shards: shards_to_state_sync .iter() .map(|shard_id| { let chunk = &prev_block.chunks()[*shard_id as usize]; @@ -1116,7 +1116,7 @@ impl Chain { .collect(), }; - Ok(Some(state_dl_info)) + Ok(Some(state_sync_info)) } } @@ -2434,7 +2434,7 @@ impl Chain { return Err(Error::InvalidBlockHeight(prev_height)); } - let (is_caught_up, state_dl_info, need_state_snapshot) = + let (is_caught_up, state_sync_info, need_state_snapshot) = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { @@ -2447,10 +2447,10 @@ impl Chain { // For the first block of the epoch we check if we need to start download states for // shards that we will care about in the next epoch. If there is no state to be downloaded, // we consider that we are caught up, otherwise not - let state_dl_info = self.get_state_dl_info(me, block)?; + let state_sync_info = self.get_state_sync_info(me, block)?; let is_genesis = prev_prev_hash == CryptoHash::default(); let need_state_snapshot = !is_genesis; - (state_dl_info.is_none(), state_dl_info, need_state_snapshot) + (state_sync_info.is_none(), state_sync_info, need_state_snapshot) } else { (self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false) }; @@ -2538,7 +2538,7 @@ impl Chain { apply_chunk_work, BlockPreprocessInfo { is_caught_up, - state_dl_info, + state_sync_info, incoming_receipts, challenges_result, challenged_blocks, @@ -2637,7 +2637,7 @@ impl Chain { /// in the current epoch that will be split into a future shard that `me` will track. /// 2) Shard layout will be the same. In this case, the method returns all shards that `me` will /// track in the next epoch but not this epoch - fn get_shards_to_dl_state( + fn get_shards_to_state_sync( epoch_manager: &dyn EpochManagerAdapter, shard_tracker: &ShardTracker, me: &Option, @@ -3529,7 +3529,7 @@ impl Chain { debug!(target: "chain", "Catching up: removing prev={:?} from the queue. I'm {:?}", block_hash, me); chain_store_update.remove_prev_block_to_catchup(*block_hash); } - chain_store_update.remove_state_dl_info(*epoch_first_block); + chain_store_update.remove_state_sync_info(*epoch_first_block); chain_store_update.commit()?; @@ -5180,7 +5180,7 @@ impl<'a> ChainUpdate<'a> { let BlockPreprocessInfo { is_caught_up, - state_dl_info, + state_sync_info, incoming_receipts, challenges_result, challenged_blocks, @@ -5199,8 +5199,8 @@ impl<'a> ChainUpdate<'a> { Arc::new(receipt_proofs), ); } - if let Some(state_dl_info) = state_dl_info { - self.chain_store_update.add_state_dl_info(state_dl_info); + if let Some(state_sync_info) = state_sync_info { + self.chain_store_update.add_state_sync_info(state_sync_info); } self.chain_store_update.save_block_extra(block.hash(), BlockExtra { challenges_result }); From 44fbef5713051f8d82fcd8af83417c8234d9d456 Mon Sep 17 00:00:00 2001 From: wacban Date: Fri, 14 Jul 2023 09:15:12 +0200 Subject: [PATCH 34/50] refactor: refactoring and commenting some resharding code (#9299) --- chain/chain/src/chain.rs | 128 ++++++++++++---------- core/o11y/src/lib.rs | 7 +- nearcore/src/runtime/mod.rs | 205 +++++++++++++++++++++++++++++++----- 3 files changed, 251 insertions(+), 89 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 0220741c1bc..4e9ba507b6a 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -489,6 +489,15 @@ impl Drop for Chain { let _ = self.blocks_in_processing.wait_for_all_blocks(); } } + +/// PreprocessBlockResult is a tuple where +/// the first element is a vector of jobs to apply chunks +/// the second element is BlockPreprocessInfo +type PreprocessBlockResult = ( + Vec Result + Send + 'static>>, + BlockPreprocessInfo, +); + impl Chain { pub fn make_genesis_block( epoch_manager: &dyn EpochManagerAdapter, @@ -2356,49 +2365,43 @@ impl Chain { invalid_chunks: &mut Vec, block_received_time: Instant, state_patch: SandboxStatePatch, - ) -> Result< - ( - Vec Result + Send + 'static>>, - BlockPreprocessInfo, - ), - Error, - > { + ) -> Result { + let header = block.header(); + // see if the block is already in processing or if there are too many blocks being processed self.blocks_in_processing.add_dry_run(block.hash())?; - debug!(target: "chain", num_approvals = block.header().num_approvals(), "Preprocess block"); + debug!(target: "chain", num_approvals = header.num_approvals(), "Preprocess block"); // Check that we know the epoch of the block before we try to get the header // (so that a block from unknown epoch doesn't get marked as an orphan) - if !self.epoch_manager.epoch_exists(block.header().epoch_id()) { - return Err(Error::EpochOutOfBounds(block.header().epoch_id().clone())); + if !self.epoch_manager.epoch_exists(header.epoch_id()) { + return Err(Error::EpochOutOfBounds(header.epoch_id().clone())); } - if block.chunks().len() - != self.epoch_manager.num_shards(block.header().epoch_id())? as usize - { + if block.chunks().len() != self.epoch_manager.num_shards(header.epoch_id())? as usize { return Err(Error::IncorrectNumberOfChunkHeaders); } // Check if we have already processed this block previously. - check_known(self, block.header().hash())?.map_err(|e| Error::BlockKnown(e))?; + check_known(self, header.hash())?.map_err(|e| Error::BlockKnown(e))?; // Delay hitting the db for current chain head until we know this block is not already known. let head = self.head()?; - let is_next = block.header().prev_hash() == &head.last_block_hash; + let is_next = header.prev_hash() == &head.last_block_hash; // Sandbox allows fast-forwarding, so only enable when not within sandbox if !cfg!(feature = "sandbox") { // A heuristic to prevent block height to jump too fast towards BlockHeight::max and cause // overflow-related problems - let block_height = block.header().height(); + let block_height = header.height(); if block_height > head.height + self.epoch_length * 20 { return Err(Error::InvalidBlockHeight(block_height)); } } // Block is an orphan if we do not know about the previous full block. - if !is_next && !self.block_exists(block.header().prev_hash())? { + if !is_next && !self.block_exists(header.prev_hash())? { // Before we add the block to the orphan pool, do some checks: // 1. Block header is signed by the block producer for height. // 2. Chunk headers in block body match block header. @@ -2406,23 +2409,23 @@ impl Chain { // Not checked: // - Block producer could be slashed // - Chunk header signatures could be wrong - if !self.partial_verify_orphan_header_signature(block.header())? { + if !self.partial_verify_orphan_header_signature(header)? { return Err(Error::InvalidSignature); } block.check_validity()?; // TODO: enable after #3729 and #3863 - // self.verify_orphan_header_approvals(&block.header())?; + // self.verify_orphan_header_approvals(&header)?; return Err(Error::Orphan); } let epoch_protocol_version = - self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; + self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; if epoch_protocol_version > PROTOCOL_VERSION { panic!("The client protocol version is older than the protocol version of the network. Please update nearcore. Client protocol version:{}, network protocol version {}", PROTOCOL_VERSION, epoch_protocol_version); } // First real I/O expense. - let prev = self.get_previous_header(block.header())?; + let prev = self.get_previous_header(header)?; let prev_hash = *prev.hash(); let prev_prev_hash = *prev.prev_hash(); let prev_gas_price = prev.gas_price(); @@ -2435,42 +2438,24 @@ impl Chain { } let (is_caught_up, state_sync_info, need_state_snapshot) = - if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - debug!(target: "chain", "block {} is the first block of an epoch", block.hash()); - if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { - // The previous block is not caught up for the next epoch relative to the previous - // block, which is the current epoch for this block, so this block cannot be applied - // at all yet, needs to be orphaned - return Err(Error::Orphan); - } + self.get_catchup_and_state_sync_infos(header, prev_hash, prev_prev_hash, me, block)?; - // For the first block of the epoch we check if we need to start download states for - // shards that we will care about in the next epoch. If there is no state to be downloaded, - // we consider that we are caught up, otherwise not - let state_sync_info = self.get_state_sync_info(me, block)?; - let is_genesis = prev_prev_hash == CryptoHash::default(); - let need_state_snapshot = !is_genesis; - (state_sync_info.is_none(), state_sync_info, need_state_snapshot) - } else { - (self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false) - }; + self.check_if_challenged_block_on_chain(header)?; - self.check_if_challenged_block_on_chain(block.header())?; - - debug!(target: "chain", "{:?} Process block {}, is_caught_up: {}", me, block.hash(), is_caught_up); + debug!(target: "chain", block_hash = ?header.hash(), me=?me, is_caught_up=is_caught_up, "Process block"); // Check the header is valid before we proceed with the full block. - self.validate_header(block.header(), provenance, challenges)?; + self.validate_header(header, provenance, challenges)?; self.epoch_manager.verify_block_vrf( - block.header().epoch_id(), - block.header().height(), + header.epoch_id(), + header.height(), &prev_random_value, block.vrf_value(), block.vrf_proof(), )?; - if block.header().random_value() != &hash(block.vrf_value().0.as_ref()) { + if header.random_value() != &hash(block.vrf_value().0.as_ref()) { return Err(Error::InvalidRandomnessBeaconOutput); } @@ -2483,8 +2468,7 @@ impl Chain { return Err(e); } - let protocol_version = - self.epoch_manager.get_epoch_protocol_version(block.header().epoch_id())?; + let protocol_version = self.epoch_manager.get_epoch_protocol_version(header.epoch_id())?; if !block.verify_gas_price( prev_gas_price, self.block_economics_config.min_gas_price(protocol_version), @@ -2495,7 +2479,7 @@ impl Chain { return Err(Error::InvalidGasPrice); } let minted_amount = if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { - Some(self.epoch_manager.get_epoch_minted_amount(block.header().next_epoch_id())?) + Some(self.epoch_manager.get_epoch_minted_amount(header.next_epoch_id())?) } else { None }; @@ -2505,11 +2489,8 @@ impl Chain { return Err(Error::InvalidGasPrice); } - let (challenges_result, challenged_blocks) = self.verify_challenges( - block.challenges(), - block.header().epoch_id(), - block.header().prev_hash(), - )?; + let (challenges_result, challenged_blocks) = + self.verify_challenges(block.challenges(), header.epoch_id(), header.prev_hash())?; let prev_block = self.get_block(&prev_hash)?; @@ -2519,7 +2500,7 @@ impl Chain { let incoming_receipts = self.collect_incoming_receipts_from_block(me, block)?; // Check if block can be finalized and drop it otherwise. - self.check_if_finalizable(block.header())?; + self.check_if_finalizable(header)?; let apply_chunk_work = self.apply_chunks_preprocessing( me, @@ -2550,6 +2531,35 @@ impl Chain { )) } + fn get_catchup_and_state_sync_infos( + &self, + header: &BlockHeader, + prev_hash: CryptoHash, + prev_prev_hash: CryptoHash, + me: &Option, + block: &MaybeValidated, + ) -> Result<(bool, Option, bool), Error> { + if self.epoch_manager.is_next_block_epoch_start(&prev_hash)? { + debug!(target: "chain", block_hash=?header.hash(), "block is the first block of an epoch"); + if !self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)? { + // The previous block is not caught up for the next epoch relative to the previous + // block, which is the current epoch for this block, so this block cannot be applied + // at all yet, needs to be orphaned + return Err(Error::Orphan); + } + + // For the first block of the epoch we check if we need to start download states for + // shards that we will care about in the next epoch. If there is no state to be downloaded, + // we consider that we are caught up, otherwise not + let state_sync_info = self.get_state_sync_info(me, block)?; + let is_genesis = prev_prev_hash == CryptoHash::default(); + let need_state_snapshot = !is_genesis; + Ok((state_sync_info.is_none(), state_sync_info, need_state_snapshot)) + } else { + Ok((self.prev_block_is_caught_up(&prev_prev_hash, &prev_hash)?, None, false)) + } + } + /// Check if we can request chunks for this orphan. Conditions are /// 1) Orphans that with outstanding missing chunks request has not exceed `MAX_ORPHAN_MISSING_CHUNKS` /// 2) we haven't already requested missing chunks for the orphan @@ -2685,7 +2695,9 @@ impl Chain { apply_chunks_done_callback: DoneApplyChunkCallback, ) { let blocks = self.blocks_with_missing_chunks.ready_blocks(); - debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); + if !blocks.is_empty() { + debug!(target:"chain", "Got {} blocks that were missing chunks but now are ready.", blocks.len()); + } for block in blocks { let block_hash = *block.block.header().hash(); let height = block.block.header().height(); @@ -5165,7 +5177,7 @@ impl<'a> ChainUpdate<'a> { &mut self, me: &Option, block: &Block, - preprocess_block_info: BlockPreprocessInfo, + block_preprocess_info: BlockPreprocessInfo, apply_chunks_results: Vec>, ) -> Result, Error> { let prev_hash = block.header().prev_hash(); @@ -5185,7 +5197,7 @@ impl<'a> ChainUpdate<'a> { challenges_result, challenged_blocks, .. - } = preprocess_block_info; + } = block_preprocess_info; if !is_caught_up { debug!(target: "chain", %prev_hash, hash = %*block.hash(), "Add block to catch up"); diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 2a255e6d2d7..9403e230187 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -1,5 +1,5 @@ #![doc = include_str!("../README.md")] -#![deny(clippy::arithmetic_side_effects)] +#![deny(clippy::integer_arithmetic)] pub use context::*; use near_crypto::PublicKey; @@ -462,10 +462,7 @@ pub fn reload_log_config(config: Option<&log_config::LogConfig>) { tracing::info!("Updated the logging layer according to `log_config.json`"); } Err(err) => { - eprintln!( - "Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", - err - ); + tracing::info!("Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", err); } } } diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 7391b95c729..8a25701f4b0 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -7,10 +7,13 @@ use errors::FromStateViewerErrors; use near_chain::types::{ApplySplitStateResult, ApplyTransactionResult, RuntimeAdapter, Tip}; use near_chain::Error; use near_chain_configs::{ - GenesisConfig, ProtocolConfig, DEFAULT_GC_NUM_EPOCHS_TO_KEEP, MIN_GC_NUM_EPOCHS_TO_KEEP, + Genesis, GenesisConfig, ProtocolConfig, DEFAULT_GC_NUM_EPOCHS_TO_KEEP, + MIN_GC_NUM_EPOCHS_TO_KEEP, }; +use near_client_primitives::types::StateSplitApplyingStatus; use near_crypto::PublicKey; use near_epoch_manager::{EpochManagerAdapter, EpochManagerHandle}; +use near_o11y::log_assert; use near_pool::types::PoolIterator; use near_primitives::account::{AccessKey, Account}; use near_primitives::challenge::ChallengesResult; @@ -26,11 +29,12 @@ use near_primitives::shard_layout::{ account_id_to_shard_id, account_id_to_shard_uid, ShardLayout, ShardUId, }; use near_primitives::state_part::PartId; +use near_primitives::syncing::{get_num_state_parts, STATE_PART_MEMORY_LIMIT}; use near_primitives::transaction::SignedTransaction; use near_primitives::types::validator_stake::ValidatorStakeIter; use near_primitives::types::{ AccountId, Balance, BlockHeight, EpochHeight, EpochId, EpochInfoProvider, Gas, MerkleHash, - ShardId, StateChangeCause, StateChangesForSplitStates, StateRoot, StateRootNode, + NumShards, ShardId, StateChangeCause, StateChangesForSplitStates, StateRoot, StateRootNode, }; use near_primitives::version::ProtocolVersion; use near_primitives::views::{ @@ -38,20 +42,24 @@ use near_primitives::views::{ ViewStateResult, }; use near_store::flat::FlatStorageManager; +use near_store::genesis::initialize_genesis_state; use near_store::metadata::DbKind; +use near_store::split_state::get_delayed_receipts; use near_store::{ + get_genesis_hash, get_genesis_state_roots, set_genesis_hash, set_genesis_state_roots, ApplyStatePartResult, DBCol, PartialStorage, ShardTries, StateSnapshotConfig, Store, StoreCompiledContractCache, Trie, TrieConfig, WrappedTrieChanges, COLD_HEAD_KEY, }; use near_vm_runner::logic::CompiledContractCache; use near_vm_runner::precompile_contract; use node_runtime::adapter::ViewRuntimeAdapter; +use node_runtime::config::RuntimeConfig; use node_runtime::state_viewer::TrieViewer; use node_runtime::{ validate_transaction, verify_and_charge_transaction, ApplyState, Runtime, ValidatorAccountsUpdate, }; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Instant; @@ -71,6 +79,7 @@ pub struct NightshadeRuntime { flat_storage_manager: FlatStorageManager, pub runtime: Runtime, epoch_manager: Arc, + genesis_state_roots: Vec, migration_data: Arc, gc_num_epochs_to_keep: u64, } @@ -93,8 +102,9 @@ impl NightshadeRuntime { StateSnapshotConfig::Disabled }; Self::new( + home_dir, store, - &config.genesis.config, + &config.genesis, epoch_manager, config.client_config.trie_viewer_state_size_limit, config.client_config.max_gas_burnt_view, @@ -106,8 +116,9 @@ impl NightshadeRuntime { } fn new( + home_dir: &Path, store: Store, - genesis_config: &GenesisConfig, + genesis: &Genesis, epoch_manager: Arc, trie_viewer_state_size_limit: Option, max_gas_burnt_view: Option, @@ -118,11 +129,21 @@ impl NightshadeRuntime { ) -> Arc { let runtime_config_store = match runtime_config_store { Some(store) => store, - None => RuntimeConfigStore::for_chain_id(&genesis_config.chain_id), + None => Self::create_runtime_config_store(&genesis.config.chain_id), }; let runtime = Runtime::new(); let trie_viewer = TrieViewer::new(trie_viewer_state_size_limit, max_gas_burnt_view); + let genesis_config = genesis.config.clone(); + assert_eq!( + genesis_config.shard_layout.num_shards(), + genesis_config.num_block_producer_seats_per_shard.len() as NumShards, + "genesis config shard_layout and num_block_producer_seats_per_shard indicate inconsistent number of shards {} vs {}", + genesis_config.shard_layout.num_shards(), + genesis_config.num_block_producer_seats_per_shard.len() as NumShards, + ); + let state_roots = + Self::initialize_genesis_state_if_needed(store.clone(), home_dir, genesis); let flat_storage_manager = FlatStorageManager::new(store.clone()); let tries = ShardTries::new_with_state_snapshot( store.clone(), @@ -140,9 +161,8 @@ impl NightshadeRuntime { tracing::error!(target: "runtime", ?err, "Failed to check if a state snapshot exists"); } - let migration_data = Arc::new(load_migration_data(&genesis_config.chain_id)); Arc::new(NightshadeRuntime { - genesis_config: genesis_config.clone(), + genesis_config, runtime_config_store, store, tries, @@ -150,7 +170,8 @@ impl NightshadeRuntime { trie_viewer, epoch_manager, flat_storage_manager, - migration_data, + genesis_state_roots: state_roots, + migration_data: Arc::new(load_migration_data(&genesis.config.chain_id)), gc_num_epochs_to_keep: gc_num_epochs_to_keep.max(MIN_GC_NUM_EPOCHS_TO_KEEP), }) } @@ -158,13 +179,14 @@ impl NightshadeRuntime { pub fn test_with_runtime_config_store( home_dir: &Path, store: Store, - genesis_config: &GenesisConfig, + genesis: &Genesis, epoch_manager: Arc, runtime_config_store: RuntimeConfigStore, ) -> Arc { Self::new( + home_dir, store, - genesis_config, + genesis, epoch_manager, None, None, @@ -183,18 +205,64 @@ impl NightshadeRuntime { pub fn test( home_dir: &Path, store: Store, - genesis_config: &GenesisConfig, + genesis: &Genesis, epoch_manager: Arc, ) -> Arc { Self::test_with_runtime_config_store( home_dir, store, - genesis_config, + genesis, epoch_manager, RuntimeConfigStore::test(), ) } + /// Create store of runtime configs for the given chain id. + /// + /// For mainnet and other chains except testnet we don't need to override runtime config for + /// first protocol versions. + /// For testnet, runtime config for genesis block was (incorrectly) different, that's why we + /// need to override it specifically to preserve compatibility. + fn create_runtime_config_store(chain_id: &str) -> RuntimeConfigStore { + match chain_id { + "testnet" => { + let genesis_runtime_config = RuntimeConfig::initial_testnet_config(); + RuntimeConfigStore::new(Some(&genesis_runtime_config)) + } + _ => RuntimeConfigStore::new(None), + } + } + + /// On first start: compute state roots, load genesis state into storage. + /// After that: return genesis state roots. The state is not guaranteed to be in storage, as + /// GC and state sync are allowed to delete it. + fn initialize_genesis_state_if_needed( + store: Store, + home_dir: &Path, + genesis: &Genesis, + ) -> Vec { + let stored_hash = get_genesis_hash(&store).expect("Store failed on genesis intialization"); + if let Some(_hash) = stored_hash { + // TODO: re-enable this check (#4447) + //assert_eq!(hash, genesis_hash, "Storage already exists, but has a different genesis"); + get_genesis_state_roots(&store) + .expect("Store failed on genesis intialization") + .expect("Genesis state roots not found in storage") + } else { + let runtime_config_store = Self::create_runtime_config_store(&genesis.config.chain_id); + let runtime_config = runtime_config_store.get_config(genesis.config.protocol_version); + let store_usage_config = &runtime_config.fees.storage_usage_config; + let genesis_hash = genesis.json_hash(); + let state_roots = + initialize_genesis_state(store.clone(), home_dir, &store_usage_config, genesis); + let mut store_update = store.store_update(); + set_genesis_hash(&mut store_update, &genesis_hash); + set_genesis_state_roots(&mut store_update, &state_roots); + store_update.commit().expect("Store failed on genesis intialization"); + state_roots + } + } + fn get_shard_uid_from_prev_hash( &self, shard_id: ShardId, @@ -256,7 +324,8 @@ impl NightshadeRuntime { let epoch_manager = self.epoch_manager.read(); let shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; debug!(target: "runtime", - "is next_block_epoch_start {}", + "block height: {}, is next_block_epoch_start {}", + block_height, epoch_manager.is_next_block_epoch_start(prev_block_hash).unwrap() ); @@ -569,7 +638,38 @@ fn format_total_gas_burnt(gas: Gas) -> String { format!("{:.0}", ((gas as f64) / 1e14).ceil() * 100.0) } +fn apply_delayed_receipts<'a>( + tries: &ShardTries, + orig_shard_uid: ShardUId, + orig_state_root: StateRoot, + state_roots: HashMap, + account_id_to_shard_id: &(dyn Fn(&AccountId) -> ShardUId + 'a), +) -> Result, Error> { + let orig_trie_update = tries.new_trie_update_view(orig_shard_uid, orig_state_root); + + let mut start_index = None; + let mut new_state_roots = state_roots; + while let Some((next_index, receipts)) = + get_delayed_receipts(&orig_trie_update, start_index, STATE_PART_MEMORY_LIMIT)? + { + let (store_update, updated_state_roots) = tries.apply_delayed_receipts_to_split_states( + &new_state_roots, + &receipts, + account_id_to_shard_id, + )?; + new_state_roots = updated_state_roots; + start_index = Some(next_index); + store_update.commit()?; + } + + Ok(new_state_roots) +} + impl RuntimeAdapter for NightshadeRuntime { + fn genesis_state(&self) -> (Store, Vec) { + (self.store.clone(), self.genesis_state_roots.clone()) + } + fn store(&self) -> &Store { &self.store } @@ -1072,6 +1172,63 @@ impl RuntimeAdapter for NightshadeRuntime { .collect()) } + fn build_state_for_split_shards( + &self, + shard_uid: ShardUId, + state_root: &StateRoot, + next_epoch_shard_layout: &ShardLayout, + state_split_status: Arc, + ) -> Result, Error> { + // TODO(resharding) use flat storage to split the trie here + let trie = self.tries.get_view_trie_for_shard(shard_uid, *state_root); + let shard_id = shard_uid.shard_id(); + let new_shards = next_epoch_shard_layout + .get_split_shard_uids(shard_id) + .ok_or(Error::InvalidShardId(shard_id))?; + let mut state_roots: HashMap<_, _> = + new_shards.iter().map(|shard_uid| (*shard_uid, Trie::EMPTY_ROOT)).collect(); + let split_shard_ids: HashSet<_> = new_shards.into_iter().collect(); + let checked_account_id_to_shard_id = |account_id: &AccountId| { + let new_shard_uid = account_id_to_shard_uid(account_id, next_epoch_shard_layout); + // check that all accounts in the shard are mapped the shards that this shard will split + // to according to shard layout + assert!( + split_shard_ids.contains(&new_shard_uid), + "Inconsistent shard_layout specs. Account {:?} in shard {:?} and in shard {:?}, but the former is not parent shard for the latter", + account_id, + shard_uid, + new_shard_uid, + ); + new_shard_uid + }; + + let state_root_node = trie.retrieve_root_node()?; + let num_parts = get_num_state_parts(state_root_node.memory_usage); + if state_split_status.total_parts.set(num_parts).is_err() { + log_assert!(false, "splitting state was done twice for shard {}", shard_id); + } + debug!(target: "runtime", "splitting state for shard {} to {} parts to build new states", shard_id, num_parts); + for part_id in 0..num_parts { + let trie_items = trie.get_trie_items_for_part(PartId::new(part_id, num_parts))?; + let (store_update, new_state_roots) = self.tries.add_values_to_split_states( + &state_roots, + trie_items.into_iter().map(|(key, value)| (key, Some(value))).collect(), + &checked_account_id_to_shard_id, + )?; + state_roots = new_state_roots; + store_update.commit()?; + state_split_status.done_parts.fetch_add(1, core::sync::atomic::Ordering::Relaxed); + } + state_roots = apply_delayed_receipts( + &self.tries, + shard_uid, + *state_root, + state_roots, + &checked_account_id_to_shard_id, + )?; + Ok(state_roots) + } + fn apply_state_part( &self, shard_id: ShardId, @@ -1261,26 +1418,24 @@ mod test { use near_primitives::test_utils::create_test_signer; use near_primitives::types::validator_stake::ValidatorStake; use near_store::flat::{FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata}; - use near_store::genesis::initialize_genesis_state; use num_rational::Ratio; use crate::config::{GenesisExt, TESTING_INIT_BALANCE, TESTING_INIT_STAKE}; - use near_chain_configs::{Genesis, DEFAULT_GC_NUM_EPOCHS_TO_KEEP}; + use near_chain_configs::DEFAULT_GC_NUM_EPOCHS_TO_KEEP; use near_crypto::{InMemorySigner, KeyType, Signer}; use near_o11y::testonly::init_test_logger; use near_primitives::block::Tip; use near_primitives::challenge::SlashedValidator; use near_primitives::transaction::{Action, DeleteAccountAction, StakeAction, TransferAction}; use near_primitives::types::{ - BlockHeightDelta, Nonce, NumShards, ValidatorId, ValidatorInfoIdentifier, - ValidatorKickoutReason, + BlockHeightDelta, Nonce, ValidatorId, ValidatorInfoIdentifier, ValidatorKickoutReason, }; use near_primitives::validator_signer::ValidatorSigner; use near_primitives::views::{ AccountView, CurrentEpochValidatorInfo, EpochValidatorInfo, NextEpochValidatorInfo, ValidatorKickoutView, }; - use near_store::{get_genesis_state_roots, NodeStorage}; + use near_store::NodeStorage; use super::*; @@ -1424,12 +1579,11 @@ mod test { } let genesis_total_supply = genesis.config.total_supply; let genesis_protocol_version = genesis.config.protocol_version; - - initialize_genesis_state(store.clone(), &genesis, Some(dir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::new( - store.clone(), - &genesis.config, + dir.path(), + store, + &genesis, epoch_manager.clone(), None, None, @@ -1443,7 +1597,7 @@ mod test { compaction_enabled: false, }, ); - let state_roots = get_genesis_state_roots(&store).unwrap().unwrap(); + let (store, state_roots) = runtime.genesis_state(); let genesis_hash = hash(&[0]); // Create flat storage. Naturally it happens on Chain creation, but here we test only Runtime behaviour @@ -2731,12 +2885,11 @@ mod test { let store = near_store::test_utils::create_test_store(); let tempdir = tempfile::tempdir().unwrap(); - initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::test_with_runtime_config_store( tempdir.path(), store.clone(), - &genesis.config, + &genesis, epoch_manager.clone(), RuntimeConfigStore::new(None), ); From 408e2114ba01a85a921ba8ccdebdcd10b7753a89 Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Fri, 14 Jul 2023 16:10:49 +0300 Subject: [PATCH 35/50] rust: 1.70.0 -> 1.71.0 (#9302) Announcement: https://blog.rust-lang.org/2023/07/13/Rust-1.71.0.html Notable breakages for us involve tightened down lints and replacement of the `clippy::integer_arithtmetic` lint with a more general `clippy::arithmentic_side_effects` lint. The latter was particularly angry about `curve25519-dalek` crate which only exposes unchecked arithmetic operations. I had no clue what the expected behaviour there is (wrapping? a panic?) so I simply allowed the lint for now, but somebody should definitely take a look at it in the future cc @abacabadabacaba --- core/o11y/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 9403e230187..0ef1f85ba36 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -1,5 +1,5 @@ #![doc = include_str!("../README.md")] -#![deny(clippy::integer_arithmetic)] +#![deny(clippy::arithmetic_side_effects)] pub use context::*; use near_crypto::PublicKey; From 741963319f7e92c75150f8414ba2be6a919d4392 Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:16:12 +0200 Subject: [PATCH 36/50] fix(state-sync): Always use flat storage when catching up (#9311) The original code made the use of flat storage conditional on the node tracking that shard this epoch. If a node prepares to track shard S next epoch E, then it downloads its state (E-1) and applies chunks in order. To apply chunks correctly in a way compatible with the rest of the network, it needs to be using flat storage. Also add a metric for the latest block processed during catchup. Also fix `view-state apply-range` tool not to fail because of getting delayed indices. Also reduce verbosity of the inlining migration. --- chain/chain/src/chain.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index 4e9ba507b6a..d08ad654a3c 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -3408,9 +3408,13 @@ impl Chain { blocks_catch_up_state: &mut BlocksCatchUpState, block_catch_up_scheduler: &dyn Fn(BlockCatchUpRequest), ) -> Result<(), Error> { - debug!(target:"catchup", "catch up blocks: pending blocks: {:?}, processed {:?}, scheduled: {:?}, done: {:?}", - blocks_catch_up_state.pending_blocks, blocks_catch_up_state.processed_blocks.keys().collect::>(), - blocks_catch_up_state.scheduled_blocks, blocks_catch_up_state.done_blocks.len()); + tracing::debug!( + target: "catchup", + pending_blocks = ?blocks_catch_up_state.pending_blocks, + processed_blocks = ?blocks_catch_up_state.processed_blocks.keys().collect::>(), + scheduled_blocks = ?blocks_catch_up_state.scheduled_blocks, + done_blocks = blocks_catch_up_state.done_blocks.len(), + "catch up blocks"); let mut processed_blocks = HashMap::new(); for (queued_block, results) in blocks_catch_up_state.processed_blocks.drain() { // If this block is parent of some blocks in processing that need to be caught up, @@ -3461,6 +3465,7 @@ impl Chain { Default::default(), &mut Vec::new(), )?; + metrics::SCHEDULED_CATCHUP_BLOCK.set(block.header().height() as i64); blocks_catch_up_state.scheduled_blocks.insert(pending_block); block_catch_up_scheduler(BlockCatchUpRequest { sync_hash: *sync_hash, @@ -3983,7 +3988,7 @@ impl Chain { true, is_first_block_with_chunk_of_version, state_patch, - cares_about_shard_this_epoch, + true, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = @@ -4044,7 +4049,7 @@ impl Chain { false, false, state_patch, - cares_about_shard_this_epoch, + true, ) { Ok(apply_result) => { let apply_split_result_or_state_changes = From c972d75dcb6a75104abee66b8c782803c77c4cba Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Fri, 14 Jul 2023 16:54:53 +0200 Subject: [PATCH 37/50] fix(state-snapshot): Tool to make DB snapshots (#9308) Co-authored-by: near-bulldozer[bot] <73298989+near-bulldozer[bot]@users.noreply.github.com> --- tools/database/README.md | 17 +---------------- tools/database/src/commands.rs | 12 ++++++++++++ tools/database/src/lib.rs | 1 + 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/database/README.md b/tools/database/README.md index 071b1a697a4..29cc2c1664f 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -2,7 +2,7 @@ A set of tools useful when working with the underlying database. -## Analyse data size distribution +## Analyse Database The analyse database script provides an efficient way to assess the size distribution of keys and values within RocksDB. @@ -78,18 +78,3 @@ available in `/home/ubuntu/.near/data/snapshot` This command can be helpful before attempting activities that can potentially corrupt the database. - -### run-migrations -Opens the DB and runs migrations to bring it to the actual version expected by `neard` -Example usage: -```bash -cargo run --bin neard database run-migrations -``` - -For example, if the binary expects DB version `38`, but the DB is currently -version `36`, the command will open the DB, run migrations that bring the DB -from version `36` to version `38`, and then exits. - -## State read perf -A tool for performance testing hot storage RocksDB State column reads. -Use help to get more details: `neard database state-perf --help` diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 8c3e946ab24..0544f246aff 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -1,5 +1,6 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; +use crate::make_snapshot::MakeSnapshotCommand; use crate::run_migrations::RunMigrationsCommand; use clap::Parser; use std::path::PathBuf; @@ -19,6 +20,9 @@ enum SubCommand { /// Change DbKind of hot or cold db. ChangeDbKind(ChangeDbKindCommand), + /// Make snapshot of the database + MakeSnapshot(MakeSnapshotCommand), + /// Run migrations, RunMigrations(RunMigrationsCommand), } @@ -35,6 +39,14 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &near_config) } + SubCommand::MakeSnapshot(cmd) => { + let near_config = nearcore::config::load_config( + &home, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + cmd.run(home, near_config.config.archive, &near_config.config.store) + } SumCommand::RunMigrationsCommand(cmd) => { let mut near_config = nearcore::config::load_config( &home, diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index a9261186b50..a8c8ed15ec5 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -1,4 +1,5 @@ mod adjust_database; mod analyse_data_size_distribution; pub mod commands; +mod make_snapshot; mod run_migrations; From 03e7c0a8ca66982f81c86f4c4a6a50f2a370b027 Mon Sep 17 00:00:00 2001 From: wacban Date: Sat, 15 Jul 2023 13:08:58 +0200 Subject: [PATCH 38/50] refactor: improvements to logging (#9309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are plenty of log lines that don't fit in a single line, even on a quite wide monitor. This is an attempt to improve that. - Removed a few variables in tracing spans that were redundant - already included in parent span. - Removed `apply_transactions_with_optional_storage_proof` span that immediately enters `process_state_update` and doesn't provide much value. - Set the test formatter to use a new custom time formatter that only prints seconds and milliseconds since the test started. The default one prints full date, time, and nanoseconds. - Mini refactor of the sharding_upgrade.rs that I'm just trying to sneak through. These tests are the inspiration for improving the spam log since I can't parse it. - **RFC: changed the log level of the `process_receipt` log to `trace!`. This is very subjective but my reasoning is that if a log line appears more that a few times per block, then if should have the trace level.** Since it's runtime related, cc @jakmeier @nagisa, are you fine with that change? For any of those I can be convinced otherwise, please shout. new log lines look like this: ``` 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=1}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=2}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update: runtime: is next_block_epoch_start false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=2}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=39.2µs time.idle=3.04µs 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update: runtime: epoch_height=4 epoch_id=EpochId(4kD9) current_protocol_version=48 is_first_block_of_version=false 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=1}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=71.0µs time.idle=2.67µs 1.075s DEBUG do_apply_chunks{block_height=23 block_hash=9yH4}:new_chunk{shard_id=3}:process_state_update:apply{num_transactions=0}: runtime: close time.busy=62.2µs time.idle=3.58µs ``` (with the exception of hashes, I have them shortened locally, but I'm not including that in this PR) On a sidenote, I quite like tracing spans but we may be overdoing it a bit. --- nearcore/src/runtime/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 8a25701f4b0..22499a27bb2 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -324,8 +324,7 @@ impl NightshadeRuntime { let epoch_manager = self.epoch_manager.read(); let shard_layout = epoch_manager.get_shard_layout(&epoch_id)?; debug!(target: "runtime", - "block height: {}, is next_block_epoch_start {}", - block_height, + "is next_block_epoch_start {}", epoch_manager.is_next_block_epoch_start(prev_block_hash).unwrap() ); From 798a4a3cc78955f58b5a84f6145a9eecd0a9d886 Mon Sep 17 00:00:00 2001 From: nikurt <86772482+nikurt@users.noreply.github.com> Date: Mon, 17 Jul 2023 15:08:14 +0200 Subject: [PATCH 39/50] fix(state-sync): Test showing that state sync can't always generate state parts (#9294) Extracted a test from #9237 . No fix is available yet. --- integration-tests/src/tests/nearcore/sync_state_nodes.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index 47954f8805d..1971ed2f408 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -18,7 +18,6 @@ use near_primitives::state_part::PartId; use near_primitives::syncing::get_num_state_parts; use near_primitives::transaction::SignedTransaction; use near_primitives::utils::MaybeValidated; -use near_store::genesis::initialize_genesis_state; use near_store::{NodeStorage, Store}; use nearcore::{config::GenesisExt, load_test_config, start_with_config, NightshadeRuntime}; use std::ops::ControlFlow; @@ -493,7 +492,6 @@ fn sync_state_dump() { root_dir: dump_dir.path().to_path_buf(), }, num_concurrent_requests: 10, - num_concurrent_requests_during_catchup: 1, }); let nearcore::NearNode { @@ -571,13 +569,12 @@ fn test_dump_epoch_missing_chunk_in_last_block() { .open() .unwrap() .get_hot_store(); - initialize_genesis_state(store.clone(), &genesis, Some(tmp_dir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::test( tmp_dir.path(), store.clone(), - &genesis.config, + &genesis, epoch_manager.clone(), ) as Arc; (tmp_dir, store, epoch_manager, runtime) From a98274d7e30b4a98a3ea929742ca4f92079cc24b Mon Sep 17 00:00:00 2001 From: Anton Puhach Date: Mon, 17 Jul 2023 22:51:17 +0200 Subject: [PATCH 40/50] feat: add database tool subcommand for State read perf testing (#9276) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a tool used to evaluate State read performance as part of `neard database` CLI. For more details on the approach see [the Methodology section](https://github.com/near/nearcore/discussions/9235). Also includes some minor refactoring around database tool.
Example executions ``` ubuntu@pugachag-mainnet:~/nearcore$ ./target/quick-release/neard database state-perf --help Run performance test for State column reads Usage: neard database state-perf [OPTIONS] Options: -s, --samples Number of requsts to use for the performance evaluation. Increasing this value results in more precise measurements, but longer test execution [default: 10000] -w, --warmup-samples Number of requests to use for database warmup. Those requests will be excluded from the measurements [default: 1000] -h, --help Print help ubuntu@pugachag-mainnet:~/nearcore$ ./target/quick-release/neard database state-perf 2023-07-12T10:21:15.258765Z INFO neard: version="trunk" build="44a09bf39" latest_protocol=62 2023-07-12T10:21:15.292835Z INFO db: Opened a new RocksDB instance. num_instances=1 Start State perf test Generate 11000 requests to State █████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ 11000/11000 Finished requests generation █████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ 11000/11000 Finished State perf test overall | avg observed_latency: 1.45039ms, block_read_time: 1.196571ms, samples with merge: 1596 (15.96%) block_read_count: 0, samples: 7 (0.07%): | avg observed_latency: 36.126µs, block_read_time: 0ns, samples with merge: 4 (57.14%) block_read_count: 1, samples: 4613 (46.13%): | avg observed_latency: 886.908µs, block_read_time: 790.738µs, samples with merge: 36 (0.78%) block_read_count: 2, samples: 1962 (19.62%): | avg observed_latency: 1.383988ms, block_read_time: 1.221933ms, samples with merge: 904 (46.08%) block_read_count: 3, samples: 1375 (13.75%): | avg observed_latency: 1.526996ms, block_read_time: 1.271185ms, samples with merge: 363 (26.40%) block_read_count: 4, samples: 1361 (13.61%): | avg observed_latency: 1.575212ms, block_read_time: 1.207766ms, samples with merge: 148 (10.87%) block_read_count: 5, samples: 221 (2.21%): | avg observed_latency: 2.080291ms, block_read_time: 1.660845ms, samples with merge: 89 (40.27%) block_read_count: 6, samples: 382 (3.82%): | avg observed_latency: 6.281688ms, block_read_time: 4.545931ms, samples with merge: 28 (7.33%) block_read_count: 7, samples: 41 (0.41%): | avg observed_latency: 6.709164ms, block_read_time: 4.897512ms, samples with merge: 14 (34.15%) block_read_count: 8, samples: 13 (0.13%): | avg observed_latency: 6.569955ms, block_read_time: 4.73201ms, samples with merge: 7 (53.85%) block_read_count: 9, samples: 3 (0.03%): | avg observed_latency: 7.457121ms, block_read_time: 5.517267ms, samples with merge: 2 (66.67%) block_read_count: 10, samples: 22 (0.22%): | avg observed_latency: 9.602637ms, block_read_time: 6.658604ms, samples with merge: 1 (4.55%) 2023-07-12T10:21:46.995873Z INFO db: Closed a RocksDB instance. num_instances=0 ```
--- tools/database/README.md | 6 +++++- tools/database/src/commands.rs | 17 ++++++++--------- tools/database/src/lib.rs | 2 ++ 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/database/README.md b/tools/database/README.md index 29cc2c1664f..7642f841445 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -2,7 +2,7 @@ A set of tools useful when working with the underlying database. -## Analyse Database +## Analyse data size distribution The analyse database script provides an efficient way to assess the size distribution of keys and values within RocksDB. @@ -78,3 +78,7 @@ available in `/home/ubuntu/.near/data/snapshot` This command can be helpful before attempting activities that can potentially corrupt the database. + +## State read perf +A tool for performance testing hot storage RocksDB State column reads. +Use help to get more details: `neard database state-perf --help` diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 0544f246aff..9ed84bd8285 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -2,6 +2,7 @@ use crate::adjust_database::ChangeDbKindCommand; use crate::analyse_data_size_distribution::AnalyseDataSizeDistributionCommand; use crate::make_snapshot::MakeSnapshotCommand; use crate::run_migrations::RunMigrationsCommand; +use crate::state_perf::StatePerfCommand; use clap::Parser; use std::path::PathBuf; @@ -25,26 +26,23 @@ enum SubCommand { /// Run migrations, RunMigrations(RunMigrationsCommand), + + /// Run performance test for State column reads. + /// Uses RocksDB data specified via --home argument. + StatePerf(StatePerfCommand), } impl DatabaseCommand { pub fn run(&self, home: &PathBuf) -> anyhow::Result<()> { match &self.subcmd { SubCommand::AnalyseDataSizeDistribution(cmd) => cmd.run(home), - SubCommand::ChangeDbKind(cmd) => { - let near_config = nearcore::config::load_config( - &home, - near_chain_configs::GenesisValidationMode::UnsafeFast, - ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - cmd.run(home, &near_config) - } + SubCommand::ChangeDbKind(cmd) => cmd.run(home), SubCommand::MakeSnapshot(cmd) => { let near_config = nearcore::config::load_config( &home, near_chain_configs::GenesisValidationMode::UnsafeFast, ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } SumCommand::RunMigrationsCommand(cmd) => { @@ -55,6 +53,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &mut near_config) } + SubCommand::StatePerf(cmd) => cmd.run(home), } } } diff --git a/tools/database/src/lib.rs b/tools/database/src/lib.rs index a8c8ed15ec5..f4bb1914908 100644 --- a/tools/database/src/lib.rs +++ b/tools/database/src/lib.rs @@ -3,3 +3,5 @@ mod analyse_data_size_distribution; pub mod commands; mod make_snapshot; mod run_migrations; +mod state_perf; +mod utils; From 76127bf18acfa7f65ea6f8b553f397f4b60a365a Mon Sep 17 00:00:00 2001 From: Saketh Are Date: Mon, 17 Jul 2023 21:04:04 -0400 Subject: [PATCH 41/50] RoutingTable V2: Distance Vector Routing (#9187) ### Suggested Review Path 1. Browse the (relatively small) changes outside of the `chain/network/src/routing` folder to understand the external surface of the new RoutingTableV2 component. 2. Check out the architecture diagram and event flows documented below. 3. Read the documentation for the EdgeCache component and understand the 3 purposes it serves. The primary role of this component is to support efficient implementation of the routing protocol. 4. Review the RoutingTableV2 component and understand how DistanceVectors are ingested and created. This is the core of the new routing protocol. 5. Return to the EdgeCache and review its implementation. 6. Revisit the call-sites outside of the routing folder. ### Architecture ![image](https://github-production-user-asset-6210df.s3.amazonaws.com/3241341/244770041-ee661c90-667c-4db7-b8ac-678c90e75830.png) ### Event Flows - Network Topology Changes - Three Kinds: Peer Connected, Peer Disconnected, received a PeerMessage with new DistanceVector - These are triggered by PeerActor and flow into PeerManagerActor then into the demux - Demux sends batches of updates (up to every 1 second) to the RoutingTableV2 - RoutingTable processes entire batch, expires any outdated routes (relying on too-old edges), then generates updated RoutingTableView and local DistanceVector - If the local DistanceVector changes, it is then broadcast to all peers - Handle RoutedMessage - Received by the PeerActor, which calls into PeerManagerActor for routing decisions - Record the "previous hop" (the peer from which we received this message) in the RouteBackCache - Select a "next hop" from the RoutingTableView and forward the message - Handle response to a RoutedMessage - Received by the PeerActor, which calls into PeerManagerActor for routing decisions - Fetch the "previous hop" from the RouteBackCache and relay the response back to the originating peer for the original message - Connection started - When two nodes A and B connect, each spawns a PeerActor managing the connection - A sends a partially signed edge, which B then signs to produce a complete signed edge - B adds the signed edge to its local routing table, triggering re-computation of routes - B broadcasts its updated DistanceVector, which provides A (and other nodes) with the signed edge - Connection stopped - Node A loses connection to some node B (either B stopped running, or the specific connection was broken) - Node A executes fix_local_edges and notices the lost connection, triggering re-computation of routes - A broadcasts its updated DistanceVector, informing other nodes of the latest routes it has - If B is still running, it will go through the same steps described for A - If B is not running, the other nodes connected to it will process a disconnection (just like A) ### Configurable Parameters To be finalized after further testing in larger topologies: - Minimum interval between routing table reconstruction: 1 second - Time after which edges are considered expired: 30 minutes - How often to refresh the nonces on edges: 10 minutes - How often to check consistency of routing table's local edges with the connection pool: every 1 minute ### Resources - [Design document](https://docs.google.com/document/d/192NdoknskSLavttwOZk40TSYvx2R1if4xNZ51sCNFkI/edit#heading=h.j4e0bgwl42pg) - [Zulip thread](https://near.zulipchat.com/#narrow/stream/297663-pagoda.2Fnetwork/topic/Updated.20thoughts.20on.20TIER2.20routing) with further design discussion #### Future Extensions - [ ] Set up metrics we want to collect - [ ] Implement a debug-ui view showing contents of the V2 routing table - [ ] Implement pruning of non-validator leafs - [ ] Add handling of unreliable peers - [ ] Deprecate the old RoutingTable - [ ] Deprecate negative/tombstone edges --- chain/network/src/routing/edge_cache/mod.rs | 24 ----------------- chain/network/src/routing/graph_v2/mod.rs | 30 --------------------- 2 files changed, 54 deletions(-) diff --git a/chain/network/src/routing/edge_cache/mod.rs b/chain/network/src/routing/edge_cache/mod.rs index 46a1df37a43..d81afa6b13c 100644 --- a/chain/network/src/routing/edge_cache/mod.rs +++ b/chain/network/src/routing/edge_cache/mod.rs @@ -1,6 +1,5 @@ use crate::network_protocol::Edge; use near_primitives::network::PeerId; -use near_primitives::views::{EdgeCacheView, LabeledEdgeView}; use std::collections::hash_map::{Entry, Iter}; use std::collections::{HashMap, HashSet}; @@ -358,27 +357,4 @@ impl EdgeCache { None } } - - pub(crate) fn get_debug_view(&self) -> EdgeCacheView { - EdgeCacheView { - peer_labels: self.p2id.clone(), - spanning_trees: self - .active_trees - .iter() - .map(|(peer_id, edge_keys)| { - ( - self.get_id(&peer_id), - edge_keys - .iter() - .map(|key| LabeledEdgeView { - peer0: self.get_id(&key.peer0), - peer1: self.get_id(&key.peer1), - nonce: *self.verified_nonces.get(&key).unwrap(), - }) - .collect(), - ) - }) - .collect(), - } - } } diff --git a/chain/network/src/routing/graph_v2/mod.rs b/chain/network/src/routing/graph_v2/mod.rs index 8412b262488..60490803325 100644 --- a/chain/network/src/routing/graph_v2/mod.rs +++ b/chain/network/src/routing/graph_v2/mod.rs @@ -7,7 +7,6 @@ use crate::stats::metrics; use arc_swap::ArcSwap; use near_async::time; use near_primitives::network::PeerId; -use near_primitives::views::{EdgeView, NetworkRoutesView, PeerDistancesView}; use parking_lot::Mutex; use std::collections::VecDeque; use std::collections::{HashMap, HashSet}; @@ -672,33 +671,4 @@ impl GraphV2 { .await .unwrap() } - - pub(crate) fn get_debug_view(&self) -> NetworkRoutesView { - let inner = self.inner.lock(); - NetworkRoutesView { - edge_cache: inner.edge_cache.get_debug_view(), - local_edges: inner - .local_edges - .iter() - .map(|(peer_id, edge)| { - let (peer0, peer1) = edge.key().clone(); - (peer_id.clone(), EdgeView { peer0, peer1, nonce: edge.nonce() }) - }) - .collect(), - peer_distances: inner - .peer_distances - .iter() - .map(|(peer_id, routes)| { - ( - peer_id.clone(), - PeerDistancesView { - distance: routes.distance.clone(), - min_nonce: routes.min_nonce, - }, - ) - }) - .collect(), - my_distances: inner.my_distances.clone(), - } - } } From 5eb5c8a6eaed113c0f4f1717a3ab334dec7be56c Mon Sep 17 00:00:00 2001 From: Razvan Barbascu Date: Tue, 18 Jul 2023 23:02:38 +0100 Subject: [PATCH 42/50] feat(state-sync): Add config for number of downloads during catchup (#9318) We can limit the impact of state sync during catchup by turning this number down. This way validation of blocks will not be hindered while the node downloads the state. --- integration-tests/src/tests/nearcore/sync_state_nodes.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index 1971ed2f408..c3df857a62e 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -492,6 +492,7 @@ fn sync_state_dump() { root_dir: dump_dir.path().to_path_buf(), }, num_concurrent_requests: 10, + num_concurrent_requests_during_catchup: 1, }); let nearcore::NearNode { From 232d8a67a1555af3bea0244963ead20bb06ca050 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:22:25 +0200 Subject: [PATCH 43/50] Merge --- chain/chain/src/chain.rs | 198 +++++++++-------- chain/chain/src/tests/simple_chain.rs | 4 +- chain/network/src/routing/edge_cache/mod.rs | 24 +++ chain/network/src/routing/graph_v2/mod.rs | 30 +++ core/primitives-core/Cargo.toml | 2 + core/primitives-core/src/version.rs | 6 +- core/primitives/Cargo.toml | 2 + .../src/tests/nearcore/sync_state_nodes.rs | 4 +- nearcore/src/runtime/mod.rs | 202 +++--------------- tools/database/README.md | 12 ++ 10 files changed, 212 insertions(+), 272 deletions(-) diff --git a/chain/chain/src/chain.rs b/chain/chain/src/chain.rs index d08ad654a3c..a8becf1ecdb 100644 --- a/chain/chain/src/chain.rs +++ b/chain/chain/src/chain.rs @@ -27,17 +27,17 @@ use delay_detector::DelayDetector; use itertools::Itertools; use lru::LruCache; use near_chain_primitives::error::{BlockKnownError, Error, LogTransientStorageError}; -use near_client_primitives::types::StateSplitApplyingStatus; use near_epoch_manager::shard_tracker::ShardTracker; use near_epoch_manager::types::BlockHeaderInfo; use near_epoch_manager::EpochManagerAdapter; use near_o11y::log_assert; -use near_primitives::block::{genesis_chunks, Tip}; +use near_primitives::block::{genesis_chunks, BlockValidityError, Tip}; use near_primitives::challenge::{ BlockDoubleSign, Challenge, ChallengeBody, ChallengesResult, ChunkProofs, ChunkState, MaybeEncodedShardChunk, PartialState, SlashedValidator, }; use near_primitives::checked_feature; +use near_primitives::errors::EpochError; use near_primitives::hash::{hash, CryptoHash}; use near_primitives::merkle::{ combine_hash, merklize, verify_path, Direction, MerklePath, MerklePathItem, PartialMerkleTree, @@ -75,7 +75,7 @@ use near_store::flat::{ store_helper, FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata, FlatStorageError, FlatStorageReadyStatus, FlatStorageStatus, }; -use near_store::StorageError; +use near_store::{get_genesis_state_roots, StorageError}; use near_store::{DBCol, ShardTries, WrappedTrieChanges}; use once_cell::sync::OnceCell; use rand::seq::SliceRandom; @@ -449,7 +449,8 @@ pub struct Chain { apply_chunks_receiver: Receiver, /// Time when head was updated most recently. last_time_head_updated: Instant, - + /// Prevents re-application of known-to-be-invalid blocks, so that in case of a + /// protocol issue we can recover faster by focusing on correct blocks. invalid_blocks: LruCache, /// Support for sandbox's patch_state requests. @@ -498,13 +499,22 @@ type PreprocessBlockResult = ( BlockPreprocessInfo, ); +// Used only for verify_block_hash_and_signature. See that method. +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum VerifyBlockHashAndSignatureResult { + Correct, + Incorrect, + CannotVerifyBecauseBlockIsOrphan, +} + impl Chain { pub fn make_genesis_block( epoch_manager: &dyn EpochManagerAdapter, runtime_adapter: &dyn RuntimeAdapter, chain_genesis: &ChainGenesis, ) -> Result { - let (_, state_roots) = runtime_adapter.genesis_state(); + let state_roots = get_genesis_state_roots(runtime_adapter.store())? + .expect("genesis should be initialized."); let genesis_chunks = genesis_chunks( state_roots, epoch_manager.num_shards(&EpochId::default())?, @@ -578,9 +588,13 @@ impl Chain { make_snapshot_callback: Option, ) -> Result { // Get runtime initial state and create genesis block out of it. - let (store, state_roots) = runtime_adapter.genesis_state(); - let mut store = - ChainStore::new(store, chain_genesis.height, chain_config.save_trie_changes); + let state_roots = get_genesis_state_roots(runtime_adapter.store())? + .expect("genesis should be initialized."); + let mut store = ChainStore::new( + runtime_adapter.store().clone(), + chain_genesis.height, + chain_config.save_trie_changes, + ); let genesis_chunks = genesis_chunks( state_roots.clone(), epoch_manager.num_shards(&EpochId::default())?, @@ -1076,6 +1090,18 @@ impl Chain { Ok(()) } + fn maybe_mark_block_invalid(&mut self, block_hash: CryptoHash, error: &Error) { + metrics::NUM_INVALID_BLOCKS.inc(); + // We only mark the block as invalid if the block has bad data (not for other errors that would + // not be the fault of the block itself), except when the block has a bad signature which means + // the block might not have been what the block producer originally produced. Either way, it's + // OK if we miss some cases here because this is just an optimization to avoid reprocessing + // known invalid blocks so the network recovers faster in case of any issues. + if error.is_bad_data() && !matches!(error, Error::InvalidSignature) { + self.invalid_blocks.put(block_hash, ()); + } + } + /// Return a StateSyncInfo that includes the information needed for syncing state for shards needed /// in the next epoch. fn get_state_sync_info( @@ -1171,7 +1197,8 @@ impl Chain { } } } - block.check_validity().map_err(|e| e.into()) + block.check_validity().map_err(|e| >::into(e))?; + Ok(()) } /// Verify header signature when the epoch is known, but not the whole chain. @@ -1237,7 +1264,7 @@ impl Chain { return Err(Error::InvalidBlockFutureTime(header.timestamp())); } - // First I/O cost, delay as much as possible. + // Check the signature. if !self.epoch_manager.verify_header_signature(header)? { return Err(Error::InvalidSignature); } @@ -1415,6 +1442,56 @@ impl Chain { Ok(()) } + /// Verify that the block signature and block body hash matches. It makes sure that the block + /// content is not tampered by a middle man. + /// Returns Correct if the both check succeeds. Returns Incorrect if either check fails. + /// Returns CannotVerifyBecauseBlockIsOrphan, if we could not verify the signature because + /// the parent block is not yet available. + pub fn verify_block_hash_and_signature( + &self, + block: &Block, + ) -> Result { + // skip the verification if we are processing the genesis block + if block.hash() == self.genesis.hash() { + return Ok(VerifyBlockHashAndSignatureResult::Correct); + } + let epoch_id = match self.epoch_manager.get_epoch_id(block.header().prev_hash()) { + Ok(epoch_id) => epoch_id, + Err(EpochError::MissingBlock(missing_block)) + if &missing_block == block.header().prev_hash() => + { + return Ok(VerifyBlockHashAndSignatureResult::CannotVerifyBecauseBlockIsOrphan); + } + Err(err) => return Err(err.into()), + }; + let epoch_protocol_version = self.epoch_manager.get_epoch_protocol_version(&epoch_id)?; + // Check that block body hash matches the block body. This makes sure that the block body + // content is not tampered + if checked_feature!( + "protocol_feature_block_header_v4", + BlockHeaderV4, + epoch_protocol_version + ) { + let block_body_hash = block.compute_block_body_hash(); + if block_body_hash.is_none() { + tracing::warn!("Block version too old for block: {:?}", block.hash()); + return Ok(VerifyBlockHashAndSignatureResult::Incorrect); + } + if block.header().block_body_hash() != block_body_hash { + tracing::warn!("Invalid block body hash for block: {:?}", block.hash()); + return Ok(VerifyBlockHashAndSignatureResult::Incorrect); + } + } + + // Verify the signature. Since the signature is signed on the hash of block header, this check + // makes sure the block header content is not tampered + if !self.epoch_manager.verify_header_signature(block.header())? { + tracing::error!("wrong signature"); + return Ok(VerifyBlockHashAndSignatureResult::Incorrect); + } + Ok(VerifyBlockHashAndSignatureResult::Correct) + } + /// Verify that `challenges` are valid /// If all challenges are valid, returns ChallengesResult, which comprises of the list of /// validators that need to be slashed and the list of blocks that are challenged. @@ -2001,6 +2078,20 @@ impl Chain { "start_process_block_impl", height = block_height) .entered(); + // 0) Before we proceed with any further processing, we first check that the block + // hash and signature matches to make sure the block is indeed produced by the assigned + // block producer. If not, we drop the block immediately + // Note that it may appear that we call verify_block_hash_signature twice, once in + // receive_block_impl, once here. The redundancy is because if a block is received as an orphan, + // the check in receive_block_impl will not be complete and the block will be stored in + // the orphan pool. When the orphaned block is ready to be processed, we must perform this check. + // Also note that we purposely separates the check from the rest of the block verification check in + // preprocess_block. + if self.verify_block_hash_and_signature(&block)? + == VerifyBlockHashAndSignatureResult::Incorrect + { + return Err(Error::InvalidSignature); + } // 1) preprocess the block where we verify that the block is valid and ready to be processed // No chain updates are applied at this step. @@ -2021,10 +2112,7 @@ impl Chain { preprocess_res } Err(e) => { - if e.is_bad_data() { - metrics::NUM_INVALID_BLOCKS.inc(); - self.invalid_blocks.put(*block.hash(), ()); - } + self.maybe_mark_block_invalid(*block.hash(), &e); preprocess_timer.stop_and_discard(); match &e { Error::Orphan => { @@ -2260,10 +2348,7 @@ impl Chain { let new_head = match self.postprocess_block_only(me, &block, block_preprocess_info, apply_results) { Err(err) => { - if err.is_bad_data() { - self.invalid_blocks.put(*block.hash(), ()); - metrics::NUM_INVALID_BLOCKS.inc(); - } + self.maybe_mark_block_invalid(*block.hash(), &err); self.blocks_delay_tracker.mark_block_errored(&block_hash, err.to_string()); return Err(err); } @@ -3343,53 +3428,6 @@ impl Chain { Ok(()) } - pub fn build_state_for_split_shards_preprocessing( - &self, - sync_hash: &CryptoHash, - shard_id: ShardId, - state_split_scheduler: &dyn Fn(StateSplitRequest), - state_split_status: Arc, - ) -> Result<(), Error> { - let (epoch_id, next_epoch_id) = { - let block_header = self.get_block_header(sync_hash)?; - (block_header.epoch_id().clone(), block_header.next_epoch_id().clone()) - }; - let shard_layout = self.epoch_manager.get_shard_layout(&epoch_id)?; - let next_epoch_shard_layout = self.epoch_manager.get_shard_layout(&next_epoch_id)?; - let shard_uid = ShardUId::from_shard_id_and_layout(shard_id, &shard_layout); - let prev_hash = *self.get_block_header(sync_hash)?.prev_hash(); - let state_root = *self.get_chunk_extra(&prev_hash, &shard_uid)?.state_root(); - assert_ne!(shard_layout, next_epoch_shard_layout); - - state_split_scheduler(StateSplitRequest { - runtime_adapter: self.runtime_adapter.clone(), - sync_hash: *sync_hash, - shard_id, - shard_uid, - state_root, - next_epoch_shard_layout, - state_split_status, - }); - - Ok(()) - } - - pub fn build_state_for_split_shards_postprocessing( - &mut self, - sync_hash: &CryptoHash, - state_roots: Result, Error>, - ) -> Result<(), Error> { - let prev_hash = *self.get_block_header(sync_hash)?.prev_hash(); - let mut chain_update = self.chain_update(); - for (shard_uid, state_root) in state_roots? { - // here we store the state roots in chunk_extra in the database for later use - let chunk_extra = ChunkExtra::new_with_only_state_root(&state_root); - chain_update.chain_store_update.save_chunk_extra(&prev_hash, &shard_uid, chunk_extra); - debug!(target:"chain", "Finish building split state for shard {:?} {:?} {:?} ", shard_uid, prev_hash, state_root); - } - chain_update.commit() - } - pub fn clear_downloaded_parts( &mut self, shard_id: ShardId, @@ -5479,12 +5517,10 @@ impl<'a> ChainUpdate<'a> { let chunk_header = chunk.cloned_header(); let gas_limit = chunk_header.gas_limit(); - let is_first_block_with_chunk_of_version = check_if_block_is_first_with_chunk_of_version( - &mut self.chain_store_update, - self.epoch_manager.as_ref(), - &chunk_header.prev_block_hash(), - shard_id, - )?; + // This is set to false because the value is only relevant + // during protocol version RestoreReceiptsAfterFixApplyChunks. + // TODO(nikurt): Determine the value correctly. + let is_first_block_with_chunk_of_version = false; let apply_result = self.runtime_adapter.apply_transactions( shard_id, @@ -5682,26 +5718,6 @@ pub struct BlockCatchUpResponse { pub results: Vec>, } -#[derive(actix::Message)] -#[rtype(result = "()")] -pub struct StateSplitRequest { - pub runtime_adapter: Arc, - pub sync_hash: CryptoHash, - pub shard_id: ShardId, - pub shard_uid: ShardUId, - pub state_root: StateRoot, - pub next_epoch_shard_layout: ShardLayout, - pub state_split_status: Arc, -} - -#[derive(actix::Message)] -#[rtype(result = "()")] -pub struct StateSplitResponse { - pub sync_hash: CryptoHash, - pub shard_id: ShardId, - pub new_state_roots: Result, Error>, -} - /// Helper to track blocks catch up /// Lifetime of a block_hash is as follows: /// 1. It is added to pending blocks, either as first block of an epoch or because we (post) diff --git a/chain/chain/src/tests/simple_chain.rs b/chain/chain/src/tests/simple_chain.rs index b01ad79cc78..f864bfff441 100644 --- a/chain/chain/src/tests/simple_chain.rs +++ b/chain/chain/src/tests/simple_chain.rs @@ -48,7 +48,7 @@ fn build_chain() { // cargo insta test --accept -p near-chain --features nightly -- tests::simple_chain::build_chain let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"86ZZBdNhwHbXDXdTjFZxGbddSy4qLpoxpWtqJtYwYXX"); + insta::assert_display_snapshot!(hash, @"GargNTMFiuET32KH5uPLFwMSU8xXtvrk6aGqgkPbRZg8"); } else { insta::assert_display_snapshot!(hash, @"8GP6PcFavb4pqeofMFjDyKUQnfVZtwPWsVA4V47WNbRn"); } @@ -78,7 +78,7 @@ fn build_chain() { let hash = chain.head().unwrap().last_block_hash; if cfg!(feature = "nightly") { - insta::assert_display_snapshot!(hash, @"8XW5k1JDHWPXkRcGwb6PTEgwggnppAW1qwWgwiqPY286"); + insta::assert_display_snapshot!(hash, @"2aurKZqRfPkZ3woNjA7Kf79wq5MYz98AohTYWoBFiG7o"); } else { insta::assert_display_snapshot!(hash, @"319JoVaUej5iXmrZMeaZBPMeBLePQzJofA5Y1ztdyPw9"); } diff --git a/chain/network/src/routing/edge_cache/mod.rs b/chain/network/src/routing/edge_cache/mod.rs index d81afa6b13c..46a1df37a43 100644 --- a/chain/network/src/routing/edge_cache/mod.rs +++ b/chain/network/src/routing/edge_cache/mod.rs @@ -1,5 +1,6 @@ use crate::network_protocol::Edge; use near_primitives::network::PeerId; +use near_primitives::views::{EdgeCacheView, LabeledEdgeView}; use std::collections::hash_map::{Entry, Iter}; use std::collections::{HashMap, HashSet}; @@ -357,4 +358,27 @@ impl EdgeCache { None } } + + pub(crate) fn get_debug_view(&self) -> EdgeCacheView { + EdgeCacheView { + peer_labels: self.p2id.clone(), + spanning_trees: self + .active_trees + .iter() + .map(|(peer_id, edge_keys)| { + ( + self.get_id(&peer_id), + edge_keys + .iter() + .map(|key| LabeledEdgeView { + peer0: self.get_id(&key.peer0), + peer1: self.get_id(&key.peer1), + nonce: *self.verified_nonces.get(&key).unwrap(), + }) + .collect(), + ) + }) + .collect(), + } + } } diff --git a/chain/network/src/routing/graph_v2/mod.rs b/chain/network/src/routing/graph_v2/mod.rs index 60490803325..8412b262488 100644 --- a/chain/network/src/routing/graph_v2/mod.rs +++ b/chain/network/src/routing/graph_v2/mod.rs @@ -7,6 +7,7 @@ use crate::stats::metrics; use arc_swap::ArcSwap; use near_async::time; use near_primitives::network::PeerId; +use near_primitives::views::{EdgeView, NetworkRoutesView, PeerDistancesView}; use parking_lot::Mutex; use std::collections::VecDeque; use std::collections::{HashMap, HashSet}; @@ -671,4 +672,33 @@ impl GraphV2 { .await .unwrap() } + + pub(crate) fn get_debug_view(&self) -> NetworkRoutesView { + let inner = self.inner.lock(); + NetworkRoutesView { + edge_cache: inner.edge_cache.get_debug_view(), + local_edges: inner + .local_edges + .iter() + .map(|(peer_id, edge)| { + let (peer0, peer1) = edge.key().clone(); + (peer_id.clone(), EdgeView { peer0, peer1, nonce: edge.nonce() }) + }) + .collect(), + peer_distances: inner + .peer_distances + .iter() + .map(|(peer_id, routes)| { + ( + peer_id.clone(), + PeerDistancesView { + distance: routes.distance.clone(), + min_nonce: routes.min_nonce, + }, + ) + }) + .collect(), + my_distances: inner.my_distances.clone(), + } + } } diff --git a/core/primitives-core/Cargo.toml b/core/primitives-core/Cargo.toml index eca9aa693f8..2795feab9e1 100644 --- a/core/primitives-core/Cargo.toml +++ b/core/primitives-core/Cargo.toml @@ -36,9 +36,11 @@ protocol_feature_fix_staking_threshold = [] protocol_feature_fix_contract_loading_cost = [] protocol_feature_reject_blocks_with_outdated_protocol_version = [] protocol_feature_simple_nightshade_v2 = [] +protocol_feature_block_header_v4 = [] nightly = [ "nightly_protocol", + "protocol_feature_block_header_v4", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", diff --git a/core/primitives-core/src/version.rs b/core/primitives-core/src/version.rs index 1a5bc78f9d5..5c25776ad4c 100644 --- a/core/primitives-core/src/version.rs +++ b/core/primitives-core/src/version.rs @@ -122,6 +122,8 @@ pub enum ProtocolFeature { RejectBlocksWithOutdatedProtocolVersions, #[cfg(feature = "protocol_feature_simple_nightshade_v2")] SimpleNightshadeV2, + #[cfg(feature = "protocol_feature_block_header_v4")] + BlockHeaderV4, } impl ProtocolFeature { @@ -174,6 +176,8 @@ impl ProtocolFeature { ProtocolFeature::RejectBlocksWithOutdatedProtocolVersions => 132, #[cfg(feature = "protocol_feature_simple_nightshade_v2")] ProtocolFeature::SimpleNightshadeV2 => 135, + #[cfg(feature = "protocol_feature_block_header_v4")] + ProtocolFeature::BlockHeaderV4 => 138, } } } @@ -186,7 +190,7 @@ const STABLE_PROTOCOL_VERSION: ProtocolVersion = 62; /// Largest protocol version supported by the current binary. pub const PROTOCOL_VERSION: ProtocolVersion = if cfg!(feature = "nightly_protocol") { // On nightly, pick big enough version to support all features. - 137 + 138 } else { // Enable all stable features. STABLE_PROTOCOL_VERSION diff --git a/core/primitives/Cargo.toml b/core/primitives/Cargo.toml index 0f073bf0213..757d537d389 100644 --- a/core/primitives/Cargo.toml +++ b/core/primitives/Cargo.toml @@ -48,8 +48,10 @@ protocol_feature_fix_staking_threshold = ["near-primitives-core/protocol_feature protocol_feature_fix_contract_loading_cost = ["near-primitives-core/protocol_feature_fix_contract_loading_cost"] protocol_feature_reject_blocks_with_outdated_protocol_version = ["near-primitives-core/protocol_feature_reject_blocks_with_outdated_protocol_version"] protocol_feature_simple_nightshade_v2 = ["near-primitives-core/protocol_feature_simple_nightshade_v2"] +protocol_feature_block_header_v4 = ["near-primitives-core/protocol_feature_block_header_v4"] nightly = [ "nightly_protocol", + "protocol_feature_block_header_v4", "protocol_feature_fix_contract_loading_cost", "protocol_feature_fix_staking_threshold", "protocol_feature_reject_blocks_with_outdated_protocol_version", diff --git a/integration-tests/src/tests/nearcore/sync_state_nodes.rs b/integration-tests/src/tests/nearcore/sync_state_nodes.rs index c3df857a62e..47954f8805d 100644 --- a/integration-tests/src/tests/nearcore/sync_state_nodes.rs +++ b/integration-tests/src/tests/nearcore/sync_state_nodes.rs @@ -18,6 +18,7 @@ use near_primitives::state_part::PartId; use near_primitives::syncing::get_num_state_parts; use near_primitives::transaction::SignedTransaction; use near_primitives::utils::MaybeValidated; +use near_store::genesis::initialize_genesis_state; use near_store::{NodeStorage, Store}; use nearcore::{config::GenesisExt, load_test_config, start_with_config, NightshadeRuntime}; use std::ops::ControlFlow; @@ -570,12 +571,13 @@ fn test_dump_epoch_missing_chunk_in_last_block() { .open() .unwrap() .get_hot_store(); + initialize_genesis_state(store.clone(), &genesis, Some(tmp_dir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::test( tmp_dir.path(), store.clone(), - &genesis, + &genesis.config, epoch_manager.clone(), ) as Arc; (tmp_dir, store, epoch_manager, runtime) diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index 22499a27bb2..7391b95c729 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -7,13 +7,10 @@ use errors::FromStateViewerErrors; use near_chain::types::{ApplySplitStateResult, ApplyTransactionResult, RuntimeAdapter, Tip}; use near_chain::Error; use near_chain_configs::{ - Genesis, GenesisConfig, ProtocolConfig, DEFAULT_GC_NUM_EPOCHS_TO_KEEP, - MIN_GC_NUM_EPOCHS_TO_KEEP, + GenesisConfig, ProtocolConfig, DEFAULT_GC_NUM_EPOCHS_TO_KEEP, MIN_GC_NUM_EPOCHS_TO_KEEP, }; -use near_client_primitives::types::StateSplitApplyingStatus; use near_crypto::PublicKey; use near_epoch_manager::{EpochManagerAdapter, EpochManagerHandle}; -use near_o11y::log_assert; use near_pool::types::PoolIterator; use near_primitives::account::{AccessKey, Account}; use near_primitives::challenge::ChallengesResult; @@ -29,12 +26,11 @@ use near_primitives::shard_layout::{ account_id_to_shard_id, account_id_to_shard_uid, ShardLayout, ShardUId, }; use near_primitives::state_part::PartId; -use near_primitives::syncing::{get_num_state_parts, STATE_PART_MEMORY_LIMIT}; use near_primitives::transaction::SignedTransaction; use near_primitives::types::validator_stake::ValidatorStakeIter; use near_primitives::types::{ AccountId, Balance, BlockHeight, EpochHeight, EpochId, EpochInfoProvider, Gas, MerkleHash, - NumShards, ShardId, StateChangeCause, StateChangesForSplitStates, StateRoot, StateRootNode, + ShardId, StateChangeCause, StateChangesForSplitStates, StateRoot, StateRootNode, }; use near_primitives::version::ProtocolVersion; use near_primitives::views::{ @@ -42,24 +38,20 @@ use near_primitives::views::{ ViewStateResult, }; use near_store::flat::FlatStorageManager; -use near_store::genesis::initialize_genesis_state; use near_store::metadata::DbKind; -use near_store::split_state::get_delayed_receipts; use near_store::{ - get_genesis_hash, get_genesis_state_roots, set_genesis_hash, set_genesis_state_roots, ApplyStatePartResult, DBCol, PartialStorage, ShardTries, StateSnapshotConfig, Store, StoreCompiledContractCache, Trie, TrieConfig, WrappedTrieChanges, COLD_HEAD_KEY, }; use near_vm_runner::logic::CompiledContractCache; use near_vm_runner::precompile_contract; use node_runtime::adapter::ViewRuntimeAdapter; -use node_runtime::config::RuntimeConfig; use node_runtime::state_viewer::TrieViewer; use node_runtime::{ validate_transaction, verify_and_charge_transaction, ApplyState, Runtime, ValidatorAccountsUpdate, }; -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Instant; @@ -79,7 +71,6 @@ pub struct NightshadeRuntime { flat_storage_manager: FlatStorageManager, pub runtime: Runtime, epoch_manager: Arc, - genesis_state_roots: Vec, migration_data: Arc, gc_num_epochs_to_keep: u64, } @@ -102,9 +93,8 @@ impl NightshadeRuntime { StateSnapshotConfig::Disabled }; Self::new( - home_dir, store, - &config.genesis, + &config.genesis.config, epoch_manager, config.client_config.trie_viewer_state_size_limit, config.client_config.max_gas_burnt_view, @@ -116,9 +106,8 @@ impl NightshadeRuntime { } fn new( - home_dir: &Path, store: Store, - genesis: &Genesis, + genesis_config: &GenesisConfig, epoch_manager: Arc, trie_viewer_state_size_limit: Option, max_gas_burnt_view: Option, @@ -129,21 +118,11 @@ impl NightshadeRuntime { ) -> Arc { let runtime_config_store = match runtime_config_store { Some(store) => store, - None => Self::create_runtime_config_store(&genesis.config.chain_id), + None => RuntimeConfigStore::for_chain_id(&genesis_config.chain_id), }; let runtime = Runtime::new(); let trie_viewer = TrieViewer::new(trie_viewer_state_size_limit, max_gas_burnt_view); - let genesis_config = genesis.config.clone(); - assert_eq!( - genesis_config.shard_layout.num_shards(), - genesis_config.num_block_producer_seats_per_shard.len() as NumShards, - "genesis config shard_layout and num_block_producer_seats_per_shard indicate inconsistent number of shards {} vs {}", - genesis_config.shard_layout.num_shards(), - genesis_config.num_block_producer_seats_per_shard.len() as NumShards, - ); - let state_roots = - Self::initialize_genesis_state_if_needed(store.clone(), home_dir, genesis); let flat_storage_manager = FlatStorageManager::new(store.clone()); let tries = ShardTries::new_with_state_snapshot( store.clone(), @@ -161,8 +140,9 @@ impl NightshadeRuntime { tracing::error!(target: "runtime", ?err, "Failed to check if a state snapshot exists"); } + let migration_data = Arc::new(load_migration_data(&genesis_config.chain_id)); Arc::new(NightshadeRuntime { - genesis_config, + genesis_config: genesis_config.clone(), runtime_config_store, store, tries, @@ -170,8 +150,7 @@ impl NightshadeRuntime { trie_viewer, epoch_manager, flat_storage_manager, - genesis_state_roots: state_roots, - migration_data: Arc::new(load_migration_data(&genesis.config.chain_id)), + migration_data, gc_num_epochs_to_keep: gc_num_epochs_to_keep.max(MIN_GC_NUM_EPOCHS_TO_KEEP), }) } @@ -179,14 +158,13 @@ impl NightshadeRuntime { pub fn test_with_runtime_config_store( home_dir: &Path, store: Store, - genesis: &Genesis, + genesis_config: &GenesisConfig, epoch_manager: Arc, runtime_config_store: RuntimeConfigStore, ) -> Arc { Self::new( - home_dir, store, - genesis, + genesis_config, epoch_manager, None, None, @@ -205,64 +183,18 @@ impl NightshadeRuntime { pub fn test( home_dir: &Path, store: Store, - genesis: &Genesis, + genesis_config: &GenesisConfig, epoch_manager: Arc, ) -> Arc { Self::test_with_runtime_config_store( home_dir, store, - genesis, + genesis_config, epoch_manager, RuntimeConfigStore::test(), ) } - /// Create store of runtime configs for the given chain id. - /// - /// For mainnet and other chains except testnet we don't need to override runtime config for - /// first protocol versions. - /// For testnet, runtime config for genesis block was (incorrectly) different, that's why we - /// need to override it specifically to preserve compatibility. - fn create_runtime_config_store(chain_id: &str) -> RuntimeConfigStore { - match chain_id { - "testnet" => { - let genesis_runtime_config = RuntimeConfig::initial_testnet_config(); - RuntimeConfigStore::new(Some(&genesis_runtime_config)) - } - _ => RuntimeConfigStore::new(None), - } - } - - /// On first start: compute state roots, load genesis state into storage. - /// After that: return genesis state roots. The state is not guaranteed to be in storage, as - /// GC and state sync are allowed to delete it. - fn initialize_genesis_state_if_needed( - store: Store, - home_dir: &Path, - genesis: &Genesis, - ) -> Vec { - let stored_hash = get_genesis_hash(&store).expect("Store failed on genesis intialization"); - if let Some(_hash) = stored_hash { - // TODO: re-enable this check (#4447) - //assert_eq!(hash, genesis_hash, "Storage already exists, but has a different genesis"); - get_genesis_state_roots(&store) - .expect("Store failed on genesis intialization") - .expect("Genesis state roots not found in storage") - } else { - let runtime_config_store = Self::create_runtime_config_store(&genesis.config.chain_id); - let runtime_config = runtime_config_store.get_config(genesis.config.protocol_version); - let store_usage_config = &runtime_config.fees.storage_usage_config; - let genesis_hash = genesis.json_hash(); - let state_roots = - initialize_genesis_state(store.clone(), home_dir, &store_usage_config, genesis); - let mut store_update = store.store_update(); - set_genesis_hash(&mut store_update, &genesis_hash); - set_genesis_state_roots(&mut store_update, &state_roots); - store_update.commit().expect("Store failed on genesis intialization"); - state_roots - } - } - fn get_shard_uid_from_prev_hash( &self, shard_id: ShardId, @@ -637,38 +569,7 @@ fn format_total_gas_burnt(gas: Gas) -> String { format!("{:.0}", ((gas as f64) / 1e14).ceil() * 100.0) } -fn apply_delayed_receipts<'a>( - tries: &ShardTries, - orig_shard_uid: ShardUId, - orig_state_root: StateRoot, - state_roots: HashMap, - account_id_to_shard_id: &(dyn Fn(&AccountId) -> ShardUId + 'a), -) -> Result, Error> { - let orig_trie_update = tries.new_trie_update_view(orig_shard_uid, orig_state_root); - - let mut start_index = None; - let mut new_state_roots = state_roots; - while let Some((next_index, receipts)) = - get_delayed_receipts(&orig_trie_update, start_index, STATE_PART_MEMORY_LIMIT)? - { - let (store_update, updated_state_roots) = tries.apply_delayed_receipts_to_split_states( - &new_state_roots, - &receipts, - account_id_to_shard_id, - )?; - new_state_roots = updated_state_roots; - start_index = Some(next_index); - store_update.commit()?; - } - - Ok(new_state_roots) -} - impl RuntimeAdapter for NightshadeRuntime { - fn genesis_state(&self) -> (Store, Vec) { - (self.store.clone(), self.genesis_state_roots.clone()) - } - fn store(&self) -> &Store { &self.store } @@ -1171,63 +1072,6 @@ impl RuntimeAdapter for NightshadeRuntime { .collect()) } - fn build_state_for_split_shards( - &self, - shard_uid: ShardUId, - state_root: &StateRoot, - next_epoch_shard_layout: &ShardLayout, - state_split_status: Arc, - ) -> Result, Error> { - // TODO(resharding) use flat storage to split the trie here - let trie = self.tries.get_view_trie_for_shard(shard_uid, *state_root); - let shard_id = shard_uid.shard_id(); - let new_shards = next_epoch_shard_layout - .get_split_shard_uids(shard_id) - .ok_or(Error::InvalidShardId(shard_id))?; - let mut state_roots: HashMap<_, _> = - new_shards.iter().map(|shard_uid| (*shard_uid, Trie::EMPTY_ROOT)).collect(); - let split_shard_ids: HashSet<_> = new_shards.into_iter().collect(); - let checked_account_id_to_shard_id = |account_id: &AccountId| { - let new_shard_uid = account_id_to_shard_uid(account_id, next_epoch_shard_layout); - // check that all accounts in the shard are mapped the shards that this shard will split - // to according to shard layout - assert!( - split_shard_ids.contains(&new_shard_uid), - "Inconsistent shard_layout specs. Account {:?} in shard {:?} and in shard {:?}, but the former is not parent shard for the latter", - account_id, - shard_uid, - new_shard_uid, - ); - new_shard_uid - }; - - let state_root_node = trie.retrieve_root_node()?; - let num_parts = get_num_state_parts(state_root_node.memory_usage); - if state_split_status.total_parts.set(num_parts).is_err() { - log_assert!(false, "splitting state was done twice for shard {}", shard_id); - } - debug!(target: "runtime", "splitting state for shard {} to {} parts to build new states", shard_id, num_parts); - for part_id in 0..num_parts { - let trie_items = trie.get_trie_items_for_part(PartId::new(part_id, num_parts))?; - let (store_update, new_state_roots) = self.tries.add_values_to_split_states( - &state_roots, - trie_items.into_iter().map(|(key, value)| (key, Some(value))).collect(), - &checked_account_id_to_shard_id, - )?; - state_roots = new_state_roots; - store_update.commit()?; - state_split_status.done_parts.fetch_add(1, core::sync::atomic::Ordering::Relaxed); - } - state_roots = apply_delayed_receipts( - &self.tries, - shard_uid, - *state_root, - state_roots, - &checked_account_id_to_shard_id, - )?; - Ok(state_roots) - } - fn apply_state_part( &self, shard_id: ShardId, @@ -1417,24 +1261,26 @@ mod test { use near_primitives::test_utils::create_test_signer; use near_primitives::types::validator_stake::ValidatorStake; use near_store::flat::{FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata}; + use near_store::genesis::initialize_genesis_state; use num_rational::Ratio; use crate::config::{GenesisExt, TESTING_INIT_BALANCE, TESTING_INIT_STAKE}; - use near_chain_configs::DEFAULT_GC_NUM_EPOCHS_TO_KEEP; + use near_chain_configs::{Genesis, DEFAULT_GC_NUM_EPOCHS_TO_KEEP}; use near_crypto::{InMemorySigner, KeyType, Signer}; use near_o11y::testonly::init_test_logger; use near_primitives::block::Tip; use near_primitives::challenge::SlashedValidator; use near_primitives::transaction::{Action, DeleteAccountAction, StakeAction, TransferAction}; use near_primitives::types::{ - BlockHeightDelta, Nonce, ValidatorId, ValidatorInfoIdentifier, ValidatorKickoutReason, + BlockHeightDelta, Nonce, NumShards, ValidatorId, ValidatorInfoIdentifier, + ValidatorKickoutReason, }; use near_primitives::validator_signer::ValidatorSigner; use near_primitives::views::{ AccountView, CurrentEpochValidatorInfo, EpochValidatorInfo, NextEpochValidatorInfo, ValidatorKickoutView, }; - use near_store::NodeStorage; + use near_store::{get_genesis_state_roots, NodeStorage}; use super::*; @@ -1578,11 +1424,12 @@ mod test { } let genesis_total_supply = genesis.config.total_supply; let genesis_protocol_version = genesis.config.protocol_version; + + initialize_genesis_state(store.clone(), &genesis, Some(dir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::new( - dir.path(), - store, - &genesis, + store.clone(), + &genesis.config, epoch_manager.clone(), None, None, @@ -1596,7 +1443,7 @@ mod test { compaction_enabled: false, }, ); - let (store, state_roots) = runtime.genesis_state(); + let state_roots = get_genesis_state_roots(&store).unwrap().unwrap(); let genesis_hash = hash(&[0]); // Create flat storage. Naturally it happens on Chain creation, but here we test only Runtime behaviour @@ -2884,11 +2731,12 @@ mod test { let store = near_store::test_utils::create_test_store(); let tempdir = tempfile::tempdir().unwrap(); + initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path())); let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config); let runtime = NightshadeRuntime::test_with_runtime_config_store( tempdir.path(), store.clone(), - &genesis, + &genesis.config, epoch_manager.clone(), RuntimeConfigStore::new(None), ); diff --git a/tools/database/README.md b/tools/database/README.md index 7642f841445..9c4e3ec6dd6 100644 --- a/tools/database/README.md +++ b/tools/database/README.md @@ -79,6 +79,18 @@ available in `/home/ubuntu/.near/data/snapshot` This command can be helpful before attempting activities that can potentially corrupt the database. +### Run DB Migrations + +Opens the DB and runs migrations to bring it to the actual version expected by `neard` +Example usage: +```bash +cargo run --bin neard database run-migrations +``` + +For example, if the binary expects DB version `38`, but the DB is currently +version `36`, the command will open the DB, run migrations that bring the DB +from version `36` to version `38`, and then exits. + ## State read perf A tool for performance testing hot storage RocksDB State column reads. Use help to get more details: `neard database state-perf --help` From 578e1d77505f470e718f0f192ff38cade8493e41 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:22:52 +0200 Subject: [PATCH 44/50] Merge --- core/o11y/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/o11y/src/lib.rs b/core/o11y/src/lib.rs index 0ef1f85ba36..2a255e6d2d7 100644 --- a/core/o11y/src/lib.rs +++ b/core/o11y/src/lib.rs @@ -462,7 +462,10 @@ pub fn reload_log_config(config: Option<&log_config::LogConfig>) { tracing::info!("Updated the logging layer according to `log_config.json`"); } Err(err) => { - tracing::info!("Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", err); + eprintln!( + "Failed to update the logging layer according to the changed `log_config.json`. Errors: {:?}", + err + ); } } } From 46bd0c064faa868ff4368d33e3ebb1ae2060616a Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:45:05 +0200 Subject: [PATCH 45/50] fmt --- tools/database/src/commands.rs | 2 +- tools/database/src/run_migrations.rs | 20 +++----------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 9ed84bd8285..c8ba10427c1 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -50,7 +50,7 @@ impl DatabaseCommand { &home, near_chain_configs::GenesisValidationMode::UnsafeFast, ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, &mut near_config) } SubCommand::StatePerf(cmd) => cmd.run(home), diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index 5d713cc12aa..aa250431290 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -1,25 +1,11 @@ -use near_store::metadata::DbKind; -use near_store::{Mode, NodeStorage}; -use nearcore::{migrations, NearConfig, open_storage}; use std::path::Path; -/// This can potentially support db specified not in config, but in command line. -/// `ChangeRelative { path: Path, archive: bool }` -/// But it is a pain to implement, because of all the current storage possibilities. -/// So, I'll leave it as a TODO(posvyatokum): implement relative path DbSelector. -/// This can be useful workaround for config modification. -#[derive(clap::Subcommand)] -enum DbSelector { - ChangeHot, - ChangeCold, -} - #[derive(clap::Args)] -pub(crate) struct RunMigrationsCommand { } +pub(crate) struct RunMigrationsCommand {} impl RunMigrationsCommand { - pub(crate) fn run(&self, home_dir: &Path, near_config: &mut NearConfig) -> anyhow::Result<()> { - let storage = open_storage(home_dir, near_config)?; + pub(crate) fn run(&self, home_dir: &Path, near_config: &mut nearcore::NearConfig) -> anyhow::Result<()> { + let storage = nearcore::open_storage(home_dir, near_config)?; Ok(()) } } From 2687ca31d44a27665457f52dab2e980c67cac39b Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:46:11 +0200 Subject: [PATCH 46/50] fmt --- tools/database/src/commands.rs | 2 +- tools/database/src/run_migrations.rs | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index c8ba10427c1..28ed436725e 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -45,7 +45,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } - SumCommand::RunMigrationsCommand(cmd) => { + SubCommand::RunMigrationsCommand(cmd) => { let mut near_config = nearcore::config::load_config( &home, near_chain_configs::GenesisValidationMode::UnsafeFast, diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index aa250431290..dbafc4ca2a1 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -4,8 +4,12 @@ use std::path::Path; pub(crate) struct RunMigrationsCommand {} impl RunMigrationsCommand { - pub(crate) fn run(&self, home_dir: &Path, near_config: &mut nearcore::NearConfig) -> anyhow::Result<()> { - let storage = nearcore::open_storage(home_dir, near_config)?; + pub(crate) fn run( + &self, + home_dir: &Path, + near_config: &mut nearcore::NearConfig, + ) -> anyhow::Result<()> { + nearcore::open_storage(home_dir, near_config)?; Ok(()) } } From bef509cdf8ec79cf7ddb69ce36dfec204329e263 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:59:28 +0200 Subject: [PATCH 47/50] fmt --- tools/database/src/commands.rs | 9 +-------- tools/database/src/run_migrations.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 28ed436725e..3704e7ccd90 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -45,14 +45,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } - SubCommand::RunMigrationsCommand(cmd) => { - let mut near_config = nearcore::config::load_config( - &home, - near_chain_configs::GenesisValidationMode::UnsafeFast, - ) - .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - cmd.run(home, &mut near_config) - } + SubCommand::RunMigrationsCommand(cmd) => cmd.run(home), SubCommand::StatePerf(cmd) => cmd.run(home), } } diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index dbafc4ca2a1..c79b706fd1b 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -4,11 +4,12 @@ use std::path::Path; pub(crate) struct RunMigrationsCommand {} impl RunMigrationsCommand { - pub(crate) fn run( - &self, - home_dir: &Path, - near_config: &mut nearcore::NearConfig, - ) -> anyhow::Result<()> { + pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { + let mut near_config = nearcore::config::load_config( + &home, + near_chain_configs::GenesisValidationMode::UnsafeFast, + ) + .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); nearcore::open_storage(home_dir, near_config)?; Ok(()) } From dae314b941e062931953446f358c81884f87e8a0 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 18:59:43 +0200 Subject: [PATCH 48/50] fmt --- tools/database/src/run_migrations.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index c79b706fd1b..5b285b7a880 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -10,7 +10,7 @@ impl RunMigrationsCommand { near_chain_configs::GenesisValidationMode::UnsafeFast, ) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); - nearcore::open_storage(home_dir, near_config)?; + nearcore::open_storage(home_dir, &mut near_config)?; Ok(()) } } From 6834636b0c98fbd9e0ac99f5d131b3e85df41c5d Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 19:04:09 +0200 Subject: [PATCH 49/50] fmt --- tools/database/src/run_migrations.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/database/src/run_migrations.rs b/tools/database/src/run_migrations.rs index 5b285b7a880..a301d39260b 100644 --- a/tools/database/src/run_migrations.rs +++ b/tools/database/src/run_migrations.rs @@ -6,7 +6,7 @@ pub(crate) struct RunMigrationsCommand {} impl RunMigrationsCommand { pub(crate) fn run(&self, home_dir: &Path) -> anyhow::Result<()> { let mut near_config = nearcore::config::load_config( - &home, + &home_dir, near_chain_configs::GenesisValidationMode::UnsafeFast, ) .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); From 94ac6df94293ba7b24aed430b53c8f5ee30bb380 Mon Sep 17 00:00:00 2001 From: Nikolay Kurtov Date: Mon, 24 Jul 2023 19:04:41 +0200 Subject: [PATCH 50/50] fmt --- tools/database/src/commands.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/database/src/commands.rs b/tools/database/src/commands.rs index 3704e7ccd90..eab6fb58e4d 100644 --- a/tools/database/src/commands.rs +++ b/tools/database/src/commands.rs @@ -45,7 +45,7 @@ impl DatabaseCommand { .unwrap_or_else(|e| panic!("Error loading config: {:#}", e)); cmd.run(home, near_config.config.archive, &near_config.config.store) } - SubCommand::RunMigrationsCommand(cmd) => cmd.run(home), + SubCommand::RunMigrations(cmd) => cmd.run(home), SubCommand::StatePerf(cmd) => cmd.run(home), } }