Skip to content

Commit

Permalink
feat: Add per shard granularity for validator info (#8934)
Browse files Browse the repository at this point in the history
Count the number chunks produced and expected to be produced so far for each individual shard.
When one chunk producer is assigned to multiple shards, we can see the production / expected for each of the shard.

Tested on an RPC node in mainnet.
Paste from the metrics tab of a node in mainnet. [link](https://gist.github.com/VanBarbascu/26dc011291df7f128d609c8457d57469)
  • Loading branch information
VanBarbascu authored Apr 20, 2023
1 parent 8f8211f commit a808352
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* Node can sync State from S3. [#8789](https://github.com/near/nearcore/pull/8789)
* The contract runtime switched to using our fork of wasmer, with various improvements.
* undo-block tool to reset the chain head from current head to its prev block. Use the tool by running: `./target/release/neard --home {path_to_config_directory} undo-block`. [#8681](https://github.com/near/nearcore/pull/8681)
* Add per shard granularity for chunks in validator info metric. [#8934](https://github.com/near/nearcore/pull/8934)

## 1.33.0

Expand Down
22 changes: 22 additions & 0 deletions chain/client/src/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,19 @@ impl InfoHelper {
(metrics::VALIDATORS_CHUNKS_EXPECTED
.with_label_values(&[stats.account_id.as_str()])
.set(stats.num_expected_chunks as i64));
for ((shard, expected), produced) in stats
.shards
.iter()
.zip(stats.num_expected_chunks_per_shard.iter())
.zip(stats.num_produced_chunks_per_shard.iter())
{
(metrics::VALIDATORS_CHUNKS_EXPECTED_BY_SHARD
.with_label_values(&[stats.account_id.as_str(), &shard.to_string()])
.set(*expected as i64));
(metrics::VALIDATORS_CHUNKS_PRODUCED_BY_SHARD
.with_label_values(&[stats.account_id.as_str(), &shard.to_string()])
.set(*produced as i64));
}
}

self.started = StaticClock::instant();
Expand Down Expand Up @@ -669,6 +682,9 @@ pub struct ValidatorProductionStats {
pub num_expected_blocks: NumBlocks,
pub num_produced_chunks: NumBlocks,
pub num_expected_chunks: NumBlocks,
pub shards: Vec<ShardId>,
pub num_produced_chunks_per_shard: Vec<NumBlocks>,
pub num_expected_chunks_per_shard: Vec<NumBlocks>,
}

impl ValidatorProductionStats {
Expand All @@ -679,6 +695,9 @@ impl ValidatorProductionStats {
num_expected_blocks: 0,
num_produced_chunks: 0,
num_expected_chunks: 0,
shards: vec![],
num_produced_chunks_per_shard: vec![],
num_expected_chunks_per_shard: vec![],
}
}
pub fn validator(info: CurrentEpochValidatorInfo) -> Self {
Expand All @@ -688,6 +707,9 @@ impl ValidatorProductionStats {
num_expected_blocks: info.num_expected_blocks,
num_produced_chunks: info.num_produced_chunks,
num_expected_chunks: info.num_expected_chunks,
shards: info.shards,
num_produced_chunks_per_shard: info.num_produced_chunks_per_shard,
num_expected_chunks_per_shard: info.num_expected_chunks_per_shard,
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions chain/client/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,24 @@ pub(crate) static VALIDATORS_CHUNKS_EXPECTED: Lazy<IntGaugeVec> = Lazy::new(|| {
.unwrap()
});

pub(crate) static VALIDATORS_CHUNKS_PRODUCED_BY_SHARD: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"near_validators_chunks_produced_by_shard",
"Number of chunks produced by a validator",
&["account_id", "shard_id"],
)
.unwrap()
});

pub(crate) static VALIDATORS_CHUNKS_EXPECTED_BY_SHARD: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"near_validators_chunks_expected_by_shard",
"Number of chunks expected to be produced by a validator",
&["account_id", "shard_id"],
)
.unwrap()
});

pub(crate) static VALIDATORS_BLOCKS_PRODUCED: Lazy<IntGaugeVec> = Lazy::new(|| {
try_create_int_gauge_vec(
"near_validators_blocks_produced",
Expand Down
25 changes: 22 additions & 3 deletions chain/epoch-manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use near_primitives::shard_layout::ShardLayout;
use near_primitives::types::validator_stake::ValidatorStake;
use near_primitives::types::{
AccountId, ApprovalStake, Balance, BlockChunkValidatorStats, BlockHeight, EpochId,
EpochInfoProvider, NumSeats, ShardId, ValidatorId, ValidatorInfoIdentifier,
EpochInfoProvider, NumBlocks, NumSeats, ShardId, ValidatorId, ValidatorInfoIdentifier,
ValidatorKickoutReason, ValidatorStats,
};
use near_primitives::version::{ProtocolVersion, UPGRADABILITY_FIX_PROTOCOL_VERSION};
Expand Down Expand Up @@ -1237,6 +1237,9 @@ impl EpochManager {
account_id,
public_key,
stake,
// TODO: Maybe fill in the per shard info about the chunk produced for requests coming from RPC.
num_produced_chunks_per_shard: vec![0; shards.len()],
num_expected_chunks_per_shard: vec![0; shards.len()],
shards,
num_produced_blocks: validator_stats.block_stats.produced,
num_expected_blocks: validator_stats.block_stats.expected,
Expand Down Expand Up @@ -1265,11 +1268,19 @@ impl EpochManager {
.unwrap_or_else(|| &ValidatorStats { produced: 0, expected: 0 })
.clone();

let mut chunks_produced_by_shard: HashMap<ShardId, NumBlocks> =
HashMap::new();
let mut chunks_expected_by_shard: HashMap<ShardId, NumBlocks> =
HashMap::new();
let mut chunk_stats = ValidatorStats { produced: 0, expected: 0 };
for (_shard, tracker) in aggregator.shard_tracker.iter() {
for (shard, tracker) in aggregator.shard_tracker.iter() {
if let Some(stats) = tracker.get(&(validator_id as u64)) {
chunk_stats.produced += stats.produced;
chunk_stats.expected += stats.expected;
*chunks_produced_by_shard.entry(*shard).or_insert(0) +=
stats.produced;
*chunks_expected_by_shard.entry(*shard).or_insert(0) +=
stats.expected;
}
}
let mut shards = validator_to_shard[validator_id]
Expand All @@ -1283,11 +1294,19 @@ impl EpochManager {
account_id,
public_key,
stake,
shards,
shards: shards.clone(),
num_produced_blocks: block_stats.produced,
num_expected_blocks: block_stats.expected,
num_produced_chunks: chunk_stats.produced,
num_expected_chunks: chunk_stats.expected,
num_produced_chunks_per_shard: shards
.iter()
.map(|shard| *chunks_produced_by_shard.entry(*shard).or_default())
.collect(),
num_expected_chunks_per_shard: shards
.iter()
.map(|shard| *chunks_expected_by_shard.entry(*shard).or_default())
.collect(),
})
})
.collect::<Result<Vec<CurrentEpochValidatorInfo>, EpochError>>()?;
Expand Down
5 changes: 5 additions & 0 deletions core/primitives/src/views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,11 @@ pub struct CurrentEpochValidatorInfo {
pub num_produced_chunks: NumBlocks,
#[serde(default)]
pub num_expected_chunks: NumBlocks,
// The following two fields correspond to the shards in the shard array.
#[serde(default)]
pub num_produced_chunks_per_shard: Vec<NumBlocks>,
#[serde(default)]
pub num_expected_chunks_per_shard: Vec<NumBlocks>,
}

#[derive(
Expand Down
8 changes: 8 additions & 0 deletions nearcore/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2414,6 +2414,8 @@ mod test {
num_expected_blocks: expected_blocks[0],
num_produced_chunks: expected_chunks[0],
num_expected_chunks: expected_chunks[0],
num_produced_chunks_per_shard: vec![expected_chunks[0]],
num_expected_chunks_per_shard: vec![expected_chunks[0]],
},
CurrentEpochValidatorInfo {
account_id: "test2".parse().unwrap(),
Expand All @@ -2425,6 +2427,8 @@ mod test {
num_expected_blocks: expected_blocks[1],
num_produced_chunks: expected_chunks[1],
num_expected_chunks: expected_chunks[1],
num_produced_chunks_per_shard: vec![expected_chunks[1]],
num_expected_chunks_per_shard: vec![expected_chunks[1]],
},
];
let next_epoch_validator_info = vec![
Expand Down Expand Up @@ -2476,10 +2480,14 @@ mod test {
current_epoch_validator_info[0].num_expected_blocks = expected_blocks[0];
current_epoch_validator_info[0].num_produced_chunks = expected_chunks[0];
current_epoch_validator_info[0].num_expected_chunks = expected_chunks[0];
current_epoch_validator_info[0].num_produced_chunks_per_shard = vec![expected_chunks[0]];
current_epoch_validator_info[0].num_expected_chunks_per_shard = vec![expected_chunks[0]];
current_epoch_validator_info[1].num_produced_blocks = expected_blocks[1];
current_epoch_validator_info[1].num_expected_blocks = expected_blocks[1];
current_epoch_validator_info[1].num_produced_chunks = expected_chunks[1];
current_epoch_validator_info[1].num_expected_chunks = expected_chunks[1];
current_epoch_validator_info[1].num_produced_chunks_per_shard = vec![expected_chunks[1]];
current_epoch_validator_info[1].num_expected_chunks_per_shard = vec![expected_chunks[1]];
assert_eq!(response.current_validators, current_epoch_validator_info);
assert_eq!(
response.next_validators,
Expand Down

0 comments on commit a808352

Please sign in to comment.