Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: properly handle genesis as part of stateless validation #10633

Merged
merged 4 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chain/client/src/view_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ impl ViewClientActor {
let cps: Vec<AccountId> = shard_ids
.iter()
.map(|&shard_id| {
let cp = epoch_info.sample_chunk_producer(block_height, shard_id);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to unwrap() here? The function returns an Error, so maybe it would be better to return an error.
(Also applies to other unwraps() on the new sample_chunk_producer)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shard id should be valid here let shard_ids = self.epoch_manager.shard_ids(&epoch_id)?
returning error requires a bit of refactoring here since it is part of map, so we cannot just add ?, so probably should be done in a separate PR

let cp = epoch_info.sample_chunk_producer(block_height, shard_id).unwrap();
let cp = epoch_info.get_validator(cp).account_id().clone();
cp
})
Expand Down
10 changes: 7 additions & 3 deletions chain/epoch-manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ impl EpochManager {
shard_id: ShardId,
) -> Result<ValidatorStake, EpochError> {
let epoch_info = self.get_epoch_info(epoch_id)?;
let validator_id = Self::chunk_producer_from_info(&epoch_info, height, shard_id);
let validator_id = Self::chunk_producer_from_info(&epoch_info, height, shard_id)?;
Ok(epoch_info.get_validator(validator_id))
}

Expand Down Expand Up @@ -1556,8 +1556,12 @@ impl EpochManager {
epoch_info: &EpochInfo,
height: BlockHeight,
shard_id: ShardId,
) -> ValidatorId {
epoch_info.sample_chunk_producer(height, shard_id)
) -> Result<ValidatorId, EpochError> {
epoch_info.sample_chunk_producer(height, shard_id).ok_or_else(|| {
EpochError::ChunkProducerSelectionError(format!(
"Invalid shard {shard_id} for height {height}"
))
})
}

/// Returns true, if given current block info, next block supposed to be in the next epoch.
Expand Down
5 changes: 3 additions & 2 deletions chain/epoch-manager/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ fn test_expected_chunks_prev_block_not_produced() {
let prev_block_info = epoch_manager.get_block_info(&prev_block).unwrap();
let prev_height = prev_block_info.height();
let expected_chunk_producer =
EpochManager::chunk_producer_from_info(&epoch_info, prev_height + 1, 0);
EpochManager::chunk_producer_from_info(&epoch_info, prev_height + 1, 0).unwrap();
// test1 does not produce blocks during first epoch
if block_producer == 0 && epoch_id == initial_epoch_id {
expected += 1;
Expand Down Expand Up @@ -1473,7 +1473,8 @@ fn test_chunk_validator_kickout() {
&epoch_info,
height,
shard_id as u64,
);
)
.unwrap();
// test1 skips chunks
if chunk_producer == 0 {
expected += 1;
Expand Down
3 changes: 2 additions & 1 deletion chain/epoch-manager/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ impl EpochInfoAggregator {
epoch_info,
prev_block_height + 1,
i as ShardId,
);
)
.unwrap();
let tracker = self.shard_tracker.entry(i as ShardId).or_insert_with(HashMap::new);
tracker
.entry(chunk_validator_id)
Expand Down
4 changes: 2 additions & 2 deletions chain/epoch-manager/src/validator_selection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ mod tests {
let cp = epoch_info.sample_chunk_producer(h, shard_id);
// Don't read too much into this. The reason the ValidatorId always
// equals the ShardId is because the validators are assigned to shards in order.
assert_eq!(cp, shard_id)
assert_eq!(cp, Some(shard_id))
}
}

Expand All @@ -632,7 +632,7 @@ mod tests {
for shard_id in 0..num_shards {
let mut counts: [i32; 2] = [0, 0];
for h in 0..100_000 {
let cp = epoch_info.sample_chunk_producer(h, shard_id);
let cp = epoch_info.sample_chunk_producer(h, shard_id).unwrap();
// if ValidatorId is in the second half then it is the lower
// stake validator (because they are sorted by decreasing stake).
let index = if cp >= num_shards { 1 } else { 0 };
Expand Down
22 changes: 13 additions & 9 deletions core/primitives/src/epoch_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1081,33 +1081,37 @@ pub mod epoch_info {
}
}

pub fn sample_chunk_producer(&self, height: BlockHeight, shard_id: ShardId) -> ValidatorId {
pub fn sample_chunk_producer(
&self,
height: BlockHeight,
shard_id: ShardId,
) -> Option<ValidatorId> {
match &self {
Self::V1(v1) => {
let cp_settlement = &v1.chunk_producers_settlement;
let shard_cps = &cp_settlement[shard_id as usize];
shard_cps[(height as u64 % (shard_cps.len() as u64)) as usize]
let shard_cps = cp_settlement.get(shard_id as usize)?;
shard_cps.get((height as u64 % (shard_cps.len() as u64)) as usize).copied()
}
Self::V2(v2) => {
let cp_settlement = &v2.chunk_producers_settlement;
let shard_cps = &cp_settlement[shard_id as usize];
shard_cps[(height as u64 % (shard_cps.len() as u64)) as usize]
let shard_cps = cp_settlement.get(shard_id as usize)?;
shard_cps.get((height as u64 % (shard_cps.len() as u64)) as usize).copied()
}
Self::V3(v3) => {
let protocol_version = self.protocol_version();
let seed =
Self::chunk_produce_seed(protocol_version, &v3.rng_seed, height, shard_id);
let shard_id = shard_id as usize;
let sample = v3.chunk_producers_sampler[shard_id].sample(seed);
v3.chunk_producers_settlement[shard_id][sample]
let sample = v3.chunk_producers_sampler.get(shard_id)?.sample(seed);
v3.chunk_producers_settlement.get(shard_id)?.get(sample).copied()
}
Self::V4(v4) => {
let protocol_version = self.protocol_version();
let seed =
Self::chunk_produce_seed(protocol_version, &v4.rng_seed, height, shard_id);
let shard_id = shard_id as usize;
let sample = v4.chunk_producers_sampler[shard_id].sample(seed);
v4.chunk_producers_settlement[shard_id][sample]
let sample = v4.chunk_producers_sampler.get(shard_id)?.sample(seed);
v4.chunk_producers_settlement.get(shard_id)?.get(sample).copied()
}
}
}
Comment on lines +1113 to 1117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_chunk_state_witness_bad_shard_id test started failing: this actually uncovered a real issue which could result in crashing chunk validator when state witness contains invalid shard id, fixed in c5b2c5e

So IIUC we are now validating the shard_id of ChunkStateWitness by trying to sample a chunk producer for this shard_id. That feels a bit hacky, maybe we should have some function that prevalidates the witness header (or just shard_id) before doing anything else. Sampling the producer could potentially work with invalid shard ids (e.g producers[(height + shard) % producers.len()), so it's not obvious that it can be done with untrusted data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's another time that an invalid shard_id could cause a crash (#10621 ...), I think it would be good to go through the code and just remove all usages of [shard_id]. It causes problems now, and it's gonna be even worse with resharding :/ I'll make an issue.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #10648

Expand Down
8 changes: 8 additions & 0 deletions core/primitives/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,8 @@ pub enum EpochError {
},
/// Error selecting validators for a chunk.
ChunkValidatorSelectionError(String),
/// Error selecting chunk producer for a shard.
ChunkProducerSelectionError(String),
}

impl std::error::Error for EpochError {}
Expand Down Expand Up @@ -892,6 +894,9 @@ impl Display for EpochError {
EpochError::ChunkValidatorSelectionError(err) => {
write!(f, "Error selecting validators for a chunk: {}", err)
}
EpochError::ChunkProducerSelectionError(err) => {
write!(f, "Error selecting chunk producer: {}", err)
}
}
}
}
Expand All @@ -915,6 +920,9 @@ impl Debug for EpochError {
EpochError::ChunkValidatorSelectionError(err) => {
write!(f, "ChunkValidatorSelectionError({})", err)
}
EpochError::ChunkProducerSelectionError(err) => {
write!(f, "ChunkProducerSelectionError({})", err)
}
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions tools/state-viewer/src/epoch_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ fn display_block_and_chunk_producers(
let cps: Vec<String> = shard_ids
.iter()
.map(|&shard_id| {
let cp = epoch_info.sample_chunk_producer(block_height, shard_id);
let cp = epoch_info.sample_chunk_producer(block_height, shard_id).unwrap();
let cp = epoch_info.get_validator(cp).account_id().clone();
cp.as_str().to_string()
})
Expand Down Expand Up @@ -280,7 +280,8 @@ fn display_validator_info(
.iter()
.map(|&shard_id| (block_height, shard_id))
.filter(|&(block_height, shard_id)| {
epoch_info.sample_chunk_producer(block_height, shard_id) == *validator_id
epoch_info.sample_chunk_producer(block_height, shard_id)
== Some(*validator_id)
})
.collect::<Vec<(BlockHeight, ShardId)>>()
})
Expand Down
Loading