From 4cdf0f90404bff8725ca63cfb5038a53c476a991 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Fri, 6 Sep 2019 11:21:02 +0200 Subject: [PATCH] storage: regression test applying split while non-member Without the preceding commit, this fatals as seen in #39796: > F190821 15:14:28.241623 312191 storage/store.go:2172 > [n2,s2,r21/3:/{Table/54-Max}] replica descriptor of local store not > found in right hand side of split Note that #40459 will break these tests since it will not allow the replicaID to change without an intermittent GC cycle, but we now rely on catching up via the raft log across a removal, split, and then re-add. At that point, the only way in which a split trigger could execute while in non-member status is if a replica got removed but would still receive a later split trigger. This could happen as follows: The log is `[..., remove, split]` a) the follower receives `split` b) it learns that `split` is committed so that c) it will apply both `remove` and then `split`. a) seems possible; if the removed follower is trailing a bit, the leader may append both entries at the same time before it (the leader) applies the replication change that removes the follower from the leader's config. b) seems possible like above, at least once we do async command application, if said append carries a commit index encompassing the append itself. c) will then naturally happen in today's code. There's no way we can reasonably enact this scenario in our testing at this point, though. The suggestion for #40459 is to essentially revert this commit, so that we at least retain the coverage related to the RHS of this test. Release note (bug fix): Avoid crashes with message "replica descriptor of local store not found". --- pkg/storage/client_split_test.go | 87 ++++++++++++++++++++++++++--- pkg/storage/raft_snapshot_queue.go | 3 + pkg/storage/replica_command.go | 8 ++- pkg/storage/replica_learner_test.go | 2 +- pkg/storage/testing_knobs.go | 7 ++- 5 files changed, 94 insertions(+), 13 deletions(-) diff --git a/pkg/storage/client_split_test.go b/pkg/storage/client_split_test.go index 39c251e7b3fe..6db3d819b5f9 100644 --- a/pkg/storage/client_split_test.go +++ b/pkg/storage/client_split_test.go @@ -3195,22 +3195,71 @@ func TestStoreSplitDisappearingReplicas(t *testing.T) { // Regression test for #21146. This verifies the behavior of when the // application of some split command (part of the lhs's log) is delayed on some // store and meanwhile the rhs has rebalanced away and back, ending up with a -// larger ReplicaID than the split thinks it will have. +// larger ReplicaID than the split thinks it will have. Additionally we remove +// the LHS replica on the same store before the split and re-add it after, so +// that when the connectivity restores the LHS will apply a split trigger while +// it is not a part of the descriptor. +// +// Or, in pictures (s3 looks like s1 throughout and is omitted): +// +// s1: [----r1@all-------------] +// s2: [----r1@all-------------] +// Remove s2: +// s1: [----r1@s1s3------------] +// s2: [----r1@all-------------] (outdated) +// Split r1: +// s1: [-r1@s1s3-|--r2@s1s3----] +// s2: [----r1@all-------------] (outdated) +// Add s2: +// s1: [-r1@all-|--r2@s1s3-----] +// s2: [----r1@all-------------] (outdated) +// Add learner to s2 on r2 (remains uninitialized due to LHS state blocking it): +// s1: [-r1@s1s3-|--r2@all-----] +// s2: [----r1@all-------------] (outdated), uninitialized replica r2/3 +// Remove and re-add learner multiple times: r2/3 becomes r2/100 +// (diagram looks the same except for replacing r2/3) +// +// When connectivity is restored, r1@s2 will start to catch up on the raft log +// after it learns of its new replicaID. It first processes the replication +// change that removes it and switches to a desc that doesn't contain itself as +// a replica. Next it sees the split trigger that once caused a crash because +// the store tried to look up itself and failed. This being handled correctly, +// the split trigger next has to look up the right hand side, which surprisingly +// has a higher replicaID than that seen in the split trigger. This too needs to +// be tolerated. func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() blockPromoteCh := make(chan struct{}) + var skipLearnerSnaps int32 + withoutLearnerSnap := func(fn func()) { + atomic.StoreInt32(&skipLearnerSnaps, 1) + fn() + atomic.StoreInt32(&skipLearnerSnaps, 0) + } knobs := base.TestingKnobs{Store: &storage.StoreTestingKnobs{ - ReplicaAddStopAfterLearnerSnapshot: func() bool { - <-blockPromoteCh + ReplicaSkipLearnerSnapshot: func() bool { + return atomic.LoadInt32(&skipLearnerSnaps) != 0 + }, + ReplicaAddStopAfterLearnerSnapshot: func(targets []roachpb.ReplicationTarget) bool { + if atomic.LoadInt32(&skipLearnerSnaps) != 0 { + return false + } + if len(targets) > 0 && targets[0].StoreID == 2 { + <-blockPromoteCh + } return false }, ReplicaAddSkipLearnerRollback: func() bool { return true }, + // We rely on replicas remaining where they are even when they are removed + // from the range as this lets us set up a split trigger that will apply + // on a replica that is (at the time of the split trigger) not a member. + DisableReplicaGCQueue: true, }} - tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ + tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, }) @@ -3219,6 +3268,9 @@ func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) { k := tc.ScratchRange(t) desc := tc.LookupRangeOrFatal(t, k) + // Add a replica on n3 which we'll need to achieve quorum while we cut off n2 below. + tc.AddReplicasOrFatal(t, k, tc.Target(2)) + // First construct a range with a learner replica on the second node (index 1) // and split it, ending up with an orphaned learner on each side of the split. // After the learner is created, but before the split, block all incoming raft @@ -3237,23 +3289,40 @@ func TestSplitTriggerMeetsUnexpectedReplicaID(t *testing.T) { }) _, kRHS := k, k.Next() + // Remove the LHS on the isolated store, split the range, and re-add it. + tc.RemoveReplicasOrFatal(t, k, tc.Target(1)) descLHS, descRHS := tc.SplitRangeOrFatal(t, kRHS) + withoutLearnerSnap(func() { + // NB: can't use AddReplicas since that waits for the target to be up + // to date, which it won't in this case. + // + // We avoid sending a snapshot because that snapshot would include the + // split trigger and we want that to be processed via the log. + d, err := tc.Servers[0].DB().AdminChangeReplicas( + ctx, descLHS.StartKey.AsRawKey(), descLHS, roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, tc.Target(1)), + ) + require.NoError(t, err) + descLHS = *d + }) close(blockPromoteCh) if err := g.Wait(); !testutils.IsError(err, `descriptor changed`) { t.Fatalf(`expected "descriptor changed" error got: %+v`, err) } - // Now repeatedly remove and re-add the learner on the rhs, so it has a + // Now repeatedly re-add the learner on the rhs, so it has a // different replicaID than the split trigger expects. - for i := 0; i < 5; i++ { - _, err := tc.RemoveReplicas(kRHS, tc.Target(1)) - require.NoError(t, err) - _, err = tc.AddReplicas(kRHS, tc.Target(1)) + add := func() { + _, err := tc.AddReplicas(kRHS, tc.Target(1)) if !testutils.IsError(err, `snapshot intersects existing range`) { t.Fatalf(`expected snapshot intersects existing range" error got: %+v`, err) } } + for i := 0; i < 5; i++ { + add() + tc.RemoveReplicasOrFatal(t, kRHS, tc.Target(1)) + } + add() // Normally AddReplicas will return the latest version of the RangeDescriptor, // but because we're getting snapshot errors and using the diff --git a/pkg/storage/raft_snapshot_queue.go b/pkg/storage/raft_snapshot_queue.go index 1993737e9fe2..0f9637032ac2 100644 --- a/pkg/storage/raft_snapshot_queue.go +++ b/pkg/storage/raft_snapshot_queue.go @@ -113,6 +113,9 @@ func (rq *raftSnapshotQueue) processRaftSnapshot( // the replicate queue. Either way, no point in also sending it a snapshot of // type RAFT. if repDesc.GetType() == roachpb.LEARNER { + if fn := repl.store.cfg.TestingKnobs.ReplicaSkipLearnerSnapshot; fn != nil && fn() { + return nil + } snapType = SnapshotRequest_LEARNER if index := repl.getAndGCSnapshotLogTruncationConstraints(timeutil.Now(), repDesc.StoreID); index > 0 { // There is a snapshot being transferred. It's probably a LEARNER snap, so diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 9f5b6c924e42..57641a063064 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1211,6 +1211,10 @@ func (r *Replica) atomicReplicationChange( return nil, errors.Errorf("programming error: cannot promote replica of type %s", rDesc.Type) } + if fn := r.store.cfg.TestingKnobs.ReplicaSkipLearnerSnapshot; fn != nil && fn() { + continue + } + // Note that raft snapshot queue will refuse to send a snapshot to a learner // replica if its store is already sending a snapshot to that replica. That // would race with this snapshot, except that we've put a (best effort) lock @@ -1231,8 +1235,8 @@ func (r *Replica) atomicReplicationChange( } } - if len(chgs.Additions()) > 0 { - if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil && fn() { + if adds := chgs.Additions(); len(adds) > 0 { + if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil && fn(adds) { return desc, nil } } diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 6a9f067a84fd..6f2a5ff6825a 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -68,7 +68,7 @@ func (rtl *replicationTestKnobs) withStopAfterJointConfig(f func()) { func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) { var k replicationTestKnobs - k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func() bool { + k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func(_ []roachpb.ReplicationTarget) bool { return atomic.LoadInt64(&k.replicaAddStopAfterLearnerAtomic) > 0 } k.storeKnobs.ReplicaAddStopAfterJointConfig = func() bool { diff --git a/pkg/storage/testing_knobs.go b/pkg/storage/testing_knobs.go index c20d107923f8..7ee8dccf6953 100644 --- a/pkg/storage/testing_knobs.go +++ b/pkg/storage/testing_knobs.go @@ -195,7 +195,12 @@ type StoreTestingKnobs struct { // and after the LEARNER type snapshot, but before promoting it to a voter. // This ensures the `*Replica` will be materialized on the Store when it // returns. - ReplicaAddStopAfterLearnerSnapshot func() bool + ReplicaAddStopAfterLearnerSnapshot func([]roachpb.ReplicationTarget) bool + // ReplicaSkipLearnerSnapshot causes snapshots to never be sent to learners + // if the func returns true. Adding replicas proceeds as usual, though if + // the added replica has no prior state which can be caught up from the raft + // log, the result will be an voter that is unable to participate in quorum. + ReplicaSkipLearnerSnapshot func() bool // ReplicaAddStopAfterJointConfig causes replica addition to return early if // the func returns true. This happens before transitioning out of a joint // configuration, after the joint configuration has been entered by means