Skip to content

Commit

Permalink
fix(resharding): catchup shard even if not tracked for 1 epoch (near#…
Browse files Browse the repository at this point in the history
…12756)

Follow up to [zulip
discussion](https://near.zulipchat.com/#narrow/channel/407288-core.2Fresharding/topic/forknet/near/494153325).

Hot fix to unblock resharding testing, until we have the proper fix in
place.
  • Loading branch information
staffik authored Jan 20, 2025
1 parent 515b5fa commit da97178
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 21 deletions.
31 changes: 16 additions & 15 deletions chain/chain/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2465,12 +2465,12 @@ impl Chain {
/// where we'll go from tracking it to not tracking it and back to tracking it in consecutive epochs,
/// then we can just continue to apply chunks as if we were tracking it in epoch T, and there's no need to state sync.
fn should_catch_up_shard(
epoch_manager: &dyn EpochManagerAdapter,
_epoch_manager: &dyn EpochManagerAdapter,
shard_tracker: &ShardTracker,
me: &Option<AccountId>,
epoch_id: &EpochId,
epoch_last_block: &CryptoHash,
epoch_last_block_prev: &CryptoHash,
_epoch_last_block_prev: &CryptoHash,
shard_id: ShardId,
) -> Result<bool, Error> {
// Won't care about it next epoch, no need to state sync it.
Expand All @@ -2488,19 +2488,20 @@ impl Chain {
return Ok(true);
}

let (_layout, parent_shard_id, _index) =
epoch_manager.get_prev_shard_id_from_prev_hash(epoch_last_block, shard_id)?;
// Note that here passing `epoch_last_block_prev` to care_about_shard() will have us check whether we were tracking it in
// the previous epoch, because it is the "parent_hash" of the last block of the previous epoch.
// TODO: consider refactoring these ShardTracker functions to accept an epoch_id
// to make this less tricky.
let tracked_before = shard_tracker.care_about_shard(
me.as_ref(),
epoch_last_block_prev,
parent_shard_id,
true,
);
Ok(!tracked_before)
// let (_layout, parent_shard_id, _index) =
// epoch_manager.get_prev_shard_id_from_prev_hash(epoch_last_block, shard_id)?;
// // Note that here passing `epoch_last_block_prev` to care_about_shard() will have us check whether we were tracking it in
// // the previous epoch, because it is the "parent_hash" of the last block of the previous epoch.
// // TODO: consider refactoring these ShardTracker functions to accept an epoch_id
// // to make this less tricky.
// let tracked_before = shard_tracker.care_about_shard(
// me.as_ref(),
// epoch_last_block_prev,
// parent_shard_id,
// true,
// );
// TODO(resharding) Uncomment or remove above, and accordingly `_epoch_manager` and `_epoch_last_block_prev`.
Ok(true)
}

/// Check if any block with missing chunk is ready to be processed and start processing these blocks
Expand Down
16 changes: 10 additions & 6 deletions integration-tests/src/test_loop/tests/resharding_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,12 @@ const INCREASED_EPOCH_LENGTH: u64 = 10;
const GC_NUM_EPOCHS_TO_KEEP: u64 = 3;

/// Default number of epochs for resharding testloop to run.
// TODO(resharding) Fix nearcore and set it to 10.
const DEFAULT_TESTLOOP_NUM_EPOCHS_TO_WAIT: u64 = 8;

/// Increased number of epochs for resharding testloop to run.
/// To be used in tests with shard shuffling enabled, to cover more configurations of shard assignment.
const INCREASED_TESTLOOP_NUM_EPOCHS_TO_WAIT: u64 = 12;

/// Default shard layout version used in resharding tests.
const DEFAULT_SHARD_LAYOUT_VERSION: u64 = 2;

Expand Down Expand Up @@ -832,6 +835,7 @@ fn test_resharding_v3_double_sign_resharding_block() {
fn test_resharding_v3_shard_shuffling() {
let params = TestReshardingParametersBuilder::default()
.shuffle_shard_assignment_for_chunk_producers(true)
.num_epochs_to_wait(INCREASED_TESTLOOP_NUM_EPOCHS_TO_WAIT)
.build();
test_resharding_v3_base(params);
}
Expand All @@ -854,15 +858,14 @@ fn test_resharding_v3_shard_shuffling_untrack_then_track() {
let tracked_shard_sequence =
vec![parent_shard_id, parent_shard_id, unrelated_shard_id, child_shard_id];
let num_clients = 8;
let num_epochs_to_wait = DEFAULT_TESTLOOP_NUM_EPOCHS_TO_WAIT;
let num_epochs_to_wait = INCREASED_TESTLOOP_NUM_EPOCHS_TO_WAIT;
let tracked_shard_schedule = TrackedShardSchedule {
client_index: (num_clients - 1) as usize,
schedule: shard_sequence_to_schedule(tracked_shard_sequence, num_epochs_to_wait),
};
let params = TestReshardingParametersBuilder::default()
// TODO(resharding): uncomment after the bug in the comment above get_should_apply_chunk()
// in chain.rs is fixed
//.shuffle_shard_assignment_for_chunk_producers(true)
.shuffle_shard_assignment_for_chunk_producers(true)
.num_epochs_to_wait(num_epochs_to_wait)
.num_clients(num_clients)
.tracked_shard_schedule(Some(tracked_shard_schedule.clone()))
.add_loop_action(check_state_cleanup(tracked_shard_schedule, num_epochs_to_wait, true))
Expand All @@ -875,7 +878,7 @@ fn test_resharding_v3_shard_shuffling_intense() {
let chunk_ranges_to_drop = HashMap::from([(0, -1..2), (1, -3..0), (2, -3..3), (3, 0..1)]);
let params = TestReshardingParametersBuilder::default()
.num_accounts(8)
.epoch_length(INCREASED_EPOCH_LENGTH)
.epoch_length(INCREASED_TESTLOOP_NUM_EPOCHS_TO_WAIT)
.shuffle_shard_assignment_for_chunk_producers(true)
.chunk_ranges_to_drop(chunk_ranges_to_drop)
.add_loop_action(execute_money_transfers(
Expand Down Expand Up @@ -1103,6 +1106,7 @@ fn test_resharding_v3_slower_post_processing_tasks() {
fn test_resharding_v3_shard_shuffling_slower_post_processing_tasks() {
let params = TestReshardingParametersBuilder::default()
.shuffle_shard_assignment_for_chunk_producers(true)
.num_epochs_to_wait(INCREASED_TESTLOOP_NUM_EPOCHS_TO_WAIT)
.delay_flat_state_resharding(2)
.epoch_length(INCREASED_EPOCH_LENGTH)
.build();
Expand Down

0 comments on commit da97178

Please sign in to comment.