From f699747028ce9718972f3f540a677d29d0efc62d Mon Sep 17 00:00:00 2001 From: Razvan Barbascu Date: Thu, 20 Apr 2023 15:44:53 +0100 Subject: [PATCH] feat: Add per shard granularity for validator info (#8934) Count the number chunks produced and expected to be produced so far for each individual shard. When one chunk producer is assigned to multiple shards, we can see the production / expected for each of the shard. Tested on an RPC node in mainnet. Paste from the metrics tab of a node in mainnet. [link](https://gist.github.com/VanBarbascu/26dc011291df7f128d609c8457d57469) --- CHANGELOG.md | 1 + chain/client/src/info.rs | 22 ++++++++++++++++++++++ chain/client/src/metrics.rs | 18 ++++++++++++++++++ chain/epoch-manager/src/lib.rs | 25 ++++++++++++++++++++++--- core/primitives/src/views.rs | 5 +++++ nearcore/src/runtime/mod.rs | 8 ++++++++ 6 files changed, 76 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1cdd9a9beb..19f37fbd287 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * Node can sync State from S3. [#8789](https://github.com/near/nearcore/pull/8789) * The contract runtime switched to using our fork of wasmer, with various improvements. * undo-block tool to reset the chain head from current head to its prev block. Use the tool by running: `./target/release/neard --home {path_to_config_directory} undo-block`. [#8681](https://github.com/near/nearcore/pull/8681) +* Add per shard granularity for chunks in validator info metric. [#8934](https://github.com/near/nearcore/pull/8934) ## 1.33.0 diff --git a/chain/client/src/info.rs b/chain/client/src/info.rs index bc9516a0aee..fe95aba2488 100644 --- a/chain/client/src/info.rs +++ b/chain/client/src/info.rs @@ -357,6 +357,19 @@ impl InfoHelper { (metrics::VALIDATORS_CHUNKS_EXPECTED .with_label_values(&[stats.account_id.as_str()]) .set(stats.num_expected_chunks as i64)); + for ((shard, expected), produced) in stats + .shards + .iter() + .zip(stats.num_expected_chunks_per_shard.iter()) + .zip(stats.num_produced_chunks_per_shard.iter()) + { + (metrics::VALIDATORS_CHUNKS_EXPECTED_BY_SHARD + .with_label_values(&[stats.account_id.as_str(), &shard.to_string()]) + .set(*expected as i64)); + (metrics::VALIDATORS_CHUNKS_PRODUCED_BY_SHARD + .with_label_values(&[stats.account_id.as_str(), &shard.to_string()]) + .set(*produced as i64)); + } } self.started = StaticClock::instant(); @@ -669,6 +682,9 @@ pub struct ValidatorProductionStats { pub num_expected_blocks: NumBlocks, pub num_produced_chunks: NumBlocks, pub num_expected_chunks: NumBlocks, + pub shards: Vec, + pub num_produced_chunks_per_shard: Vec, + pub num_expected_chunks_per_shard: Vec, } impl ValidatorProductionStats { @@ -679,6 +695,9 @@ impl ValidatorProductionStats { num_expected_blocks: 0, num_produced_chunks: 0, num_expected_chunks: 0, + shards: vec![], + num_produced_chunks_per_shard: vec![], + num_expected_chunks_per_shard: vec![], } } pub fn validator(info: CurrentEpochValidatorInfo) -> Self { @@ -688,6 +707,9 @@ impl ValidatorProductionStats { num_expected_blocks: info.num_expected_blocks, num_produced_chunks: info.num_produced_chunks, num_expected_chunks: info.num_expected_chunks, + shards: info.shards, + num_produced_chunks_per_shard: info.num_produced_chunks_per_shard, + num_expected_chunks_per_shard: info.num_expected_chunks_per_shard, } } } diff --git a/chain/client/src/metrics.rs b/chain/client/src/metrics.rs index 8de327457a1..27cb03a461d 100644 --- a/chain/client/src/metrics.rs +++ b/chain/client/src/metrics.rs @@ -105,6 +105,24 @@ pub(crate) static VALIDATORS_CHUNKS_EXPECTED: Lazy = Lazy::new(|| { .unwrap() }); +pub(crate) static VALIDATORS_CHUNKS_PRODUCED_BY_SHARD: Lazy = Lazy::new(|| { + try_create_int_gauge_vec( + "near_validators_chunks_produced_by_shard", + "Number of chunks produced by a validator", + &["account_id", "shard_id"], + ) + .unwrap() +}); + +pub(crate) static VALIDATORS_CHUNKS_EXPECTED_BY_SHARD: Lazy = Lazy::new(|| { + try_create_int_gauge_vec( + "near_validators_chunks_expected_by_shard", + "Number of chunks expected to be produced by a validator", + &["account_id", "shard_id"], + ) + .unwrap() +}); + pub(crate) static VALIDATORS_BLOCKS_PRODUCED: Lazy = Lazy::new(|| { try_create_int_gauge_vec( "near_validators_blocks_produced", diff --git a/chain/epoch-manager/src/lib.rs b/chain/epoch-manager/src/lib.rs index c094d5ef209..0aaabfd59f3 100644 --- a/chain/epoch-manager/src/lib.rs +++ b/chain/epoch-manager/src/lib.rs @@ -14,7 +14,7 @@ use near_primitives::shard_layout::ShardLayout; use near_primitives::types::validator_stake::ValidatorStake; use near_primitives::types::{ AccountId, ApprovalStake, Balance, BlockChunkValidatorStats, BlockHeight, EpochId, - EpochInfoProvider, NumSeats, ShardId, ValidatorId, ValidatorInfoIdentifier, + EpochInfoProvider, NumBlocks, NumSeats, ShardId, ValidatorId, ValidatorInfoIdentifier, ValidatorKickoutReason, ValidatorStats, }; use near_primitives::version::{ProtocolVersion, UPGRADABILITY_FIX_PROTOCOL_VERSION}; @@ -1237,6 +1237,9 @@ impl EpochManager { account_id, public_key, stake, + // TODO: Maybe fill in the per shard info about the chunk produced for requests coming from RPC. + num_produced_chunks_per_shard: vec![0; shards.len()], + num_expected_chunks_per_shard: vec![0; shards.len()], shards, num_produced_blocks: validator_stats.block_stats.produced, num_expected_blocks: validator_stats.block_stats.expected, @@ -1265,11 +1268,19 @@ impl EpochManager { .unwrap_or_else(|| &ValidatorStats { produced: 0, expected: 0 }) .clone(); + let mut chunks_produced_by_shard: HashMap = + HashMap::new(); + let mut chunks_expected_by_shard: HashMap = + HashMap::new(); let mut chunk_stats = ValidatorStats { produced: 0, expected: 0 }; - for (_shard, tracker) in aggregator.shard_tracker.iter() { + for (shard, tracker) in aggregator.shard_tracker.iter() { if let Some(stats) = tracker.get(&(validator_id as u64)) { chunk_stats.produced += stats.produced; chunk_stats.expected += stats.expected; + *chunks_produced_by_shard.entry(*shard).or_insert(0) += + stats.produced; + *chunks_expected_by_shard.entry(*shard).or_insert(0) += + stats.expected; } } let mut shards = validator_to_shard[validator_id] @@ -1283,11 +1294,19 @@ impl EpochManager { account_id, public_key, stake, - shards, + shards: shards.clone(), num_produced_blocks: block_stats.produced, num_expected_blocks: block_stats.expected, num_produced_chunks: chunk_stats.produced, num_expected_chunks: chunk_stats.expected, + num_produced_chunks_per_shard: shards + .iter() + .map(|shard| *chunks_produced_by_shard.entry(*shard).or_default()) + .collect(), + num_expected_chunks_per_shard: shards + .iter() + .map(|shard| *chunks_expected_by_shard.entry(*shard).or_default()) + .collect(), }) }) .collect::, EpochError>>()?; diff --git a/core/primitives/src/views.rs b/core/primitives/src/views.rs index b424bdbce23..34ef0682a12 100644 --- a/core/primitives/src/views.rs +++ b/core/primitives/src/views.rs @@ -1936,6 +1936,11 @@ pub struct CurrentEpochValidatorInfo { pub num_produced_chunks: NumBlocks, #[serde(default)] pub num_expected_chunks: NumBlocks, + // The following two fields correspond to the shards in the shard array. + #[serde(default)] + pub num_produced_chunks_per_shard: Vec, + #[serde(default)] + pub num_expected_chunks_per_shard: Vec, } #[derive( diff --git a/nearcore/src/runtime/mod.rs b/nearcore/src/runtime/mod.rs index d7ea5b9211c..9f3108d4937 100644 --- a/nearcore/src/runtime/mod.rs +++ b/nearcore/src/runtime/mod.rs @@ -2414,6 +2414,8 @@ mod test { num_expected_blocks: expected_blocks[0], num_produced_chunks: expected_chunks[0], num_expected_chunks: expected_chunks[0], + num_produced_chunks_per_shard: vec![expected_chunks[0]], + num_expected_chunks_per_shard: vec![expected_chunks[0]], }, CurrentEpochValidatorInfo { account_id: "test2".parse().unwrap(), @@ -2425,6 +2427,8 @@ mod test { num_expected_blocks: expected_blocks[1], num_produced_chunks: expected_chunks[1], num_expected_chunks: expected_chunks[1], + num_produced_chunks_per_shard: vec![expected_chunks[1]], + num_expected_chunks_per_shard: vec![expected_chunks[1]], }, ]; let next_epoch_validator_info = vec![ @@ -2476,10 +2480,14 @@ mod test { current_epoch_validator_info[0].num_expected_blocks = expected_blocks[0]; current_epoch_validator_info[0].num_produced_chunks = expected_chunks[0]; current_epoch_validator_info[0].num_expected_chunks = expected_chunks[0]; + current_epoch_validator_info[0].num_produced_chunks_per_shard = vec![expected_chunks[0]]; + current_epoch_validator_info[0].num_expected_chunks_per_shard = vec![expected_chunks[0]]; current_epoch_validator_info[1].num_produced_blocks = expected_blocks[1]; current_epoch_validator_info[1].num_expected_blocks = expected_blocks[1]; current_epoch_validator_info[1].num_produced_chunks = expected_chunks[1]; current_epoch_validator_info[1].num_expected_chunks = expected_chunks[1]; + current_epoch_validator_info[1].num_produced_chunks_per_shard = vec![expected_chunks[1]]; + current_epoch_validator_info[1].num_expected_chunks_per_shard = vec![expected_chunks[1]]; assert_eq!(response.current_validators, current_epoch_validator_info); assert_eq!( response.next_validators,