From ddf8e15b21d8ca6bfb691e96353909c27e3fcc8a Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:17:55 +0200 Subject: [PATCH 01/27] storage: rename a test Release note: None --- pkg/storage/replica_learner_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 7a19a5bb97c5..964dd44dacfe 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -344,7 +344,7 @@ func TestReplicateQueueSeesLearner(t *testing.T) { require.Len(t, desc.Replicas().Voters(), 3) } -func TestGCQueueSeesLearner(t *testing.T) { +func TestReplicaGCQueueSeesLearner(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() knobs, ltk := makeLearnerTestKnobs() From 63c49c59988406c7bc040eaf6d1610b1cff7f01c Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:26:05 +0200 Subject: [PATCH 02/27] storage: add ReplicaAddStopAfterJointConfig store knob This will be used to test the behavior of and interactions with joint configurations. Release note: None --- pkg/storage/replica_command.go | 11 ++++++--- pkg/storage/replica_learner_test.go | 38 ++++++++++++++++------------- pkg/storage/testing_knobs.go | 4 +++ 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 77071b82c3ad..e49f44645ed3 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1153,10 +1153,8 @@ func (r *Replica) atomicReplicationChange( } } - if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil { - if fn() { - return desc, nil - } + if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterLearnerSnapshot; fn != nil && fn() { + return desc, nil } for _, target := range chgs.Removals() { @@ -1168,6 +1166,11 @@ func (r *Replica) atomicReplicationChange( if err != nil { return nil, err } + + if fn := r.store.cfg.TestingKnobs.ReplicaAddStopAfterJointConfig; fn != nil && fn() { + return desc, nil + } + // Leave the joint config if we entered one. return r.maybeLeaveAtomicChangeReplicas(ctx, desc) } diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 964dd44dacfe..93db64c92974 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -38,16 +38,20 @@ import ( "github.com/stretchr/testify/require" ) -type learnerTestKnobs struct { +type replicationTestKnobs struct { storeKnobs storage.StoreTestingKnobs replicaAddStopAfterLearnerAtomic int64 + replicaAddStopAfterJointConfig int64 } -func makeLearnerTestKnobs() (base.TestingKnobs, *learnerTestKnobs) { - var k learnerTestKnobs +func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) { + var k replicationTestKnobs k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func() bool { return atomic.LoadInt64(&k.replicaAddStopAfterLearnerAtomic) > 0 } + k.storeKnobs.ReplicaAddStopAfterJointConfig = func() bool { + return atomic.LoadInt64(&k.replicaAddStopAfterJointConfig) > 0 + } return base.TestingKnobs{Store: &k.storeKnobs}, &k } @@ -106,7 +110,7 @@ func TestAddReplicaViaLearner(t *testing.T) { blockUntilSnapshotCh := make(chan struct{}) blockSnapshotsCh := make(chan struct{}) - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ltk.storeKnobs.ReceiveSnapshot = func(h *storage.SnapshotRequest_Header) error { close(blockUntilSnapshotCh) select { @@ -189,7 +193,7 @@ func TestLearnerRaftConfState(t *testing.T) { dir, cleanup := testutils.TempDir(t) defer cleanup() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ctx := context.Background() const numNodes = 2 serverArgsPerNode := make(map[int]base.TestServerArgs) @@ -246,7 +250,7 @@ func TestLearnerSnapshotFailsRollback(t *testing.T) { defer leaktest.AfterTest(t)() var rejectSnapshots int64 - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ltk.storeKnobs.ReceiveSnapshot = func(h *storage.SnapshotRequest_Header) error { if atomic.LoadInt64(&rejectSnapshots) > 0 { return errors.New(`nope`) @@ -282,7 +286,7 @@ func TestSplitWithLearner(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -311,7 +315,7 @@ func TestReplicateQueueSeesLearner(t *testing.T) { // NB also see TestAllocatorRemoveLearner for a lower-level test. ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -347,7 +351,7 @@ func TestReplicateQueueSeesLearner(t *testing.T) { func TestReplicaGCQueueSeesLearner(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -380,7 +384,7 @@ func TestRaftSnapshotQueueSeesLearner(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() blockSnapshotsCh := make(chan struct{}) - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ltk.storeKnobs.DisableRaftSnapshotQueue = true ltk.storeKnobs.ReceiveSnapshot = func(h *storage.SnapshotRequest_Header) error { select { @@ -444,7 +448,7 @@ func TestLearnerAdminChangeReplicasRace(t *testing.T) { blockUntilSnapshotCh := make(chan struct{}, 2) blockSnapshotsCh := make(chan struct{}) - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ltk.storeKnobs.ReceiveSnapshot = func(h *storage.SnapshotRequest_Header) error { blockUntilSnapshotCh <- struct{}{} <-blockSnapshotsCh @@ -501,7 +505,7 @@ func TestLearnerReplicateQueueRace(t *testing.T) { var skipReceiveSnapshotKnobAtomic int64 = 1 blockUntilSnapshotCh := make(chan struct{}, 2) blockSnapshotsCh := make(chan struct{}) - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() ltk.storeKnobs.ReceiveSnapshot = func(h *storage.SnapshotRequest_Header) error { if atomic.LoadInt64(&skipReceiveSnapshotKnobAtomic) > 0 { return nil @@ -574,7 +578,7 @@ func TestLearnerReplicateQueueRace(t *testing.T) { func TestLearnerNoAcceptLease(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -607,7 +611,7 @@ func TestLearnerFollowerRead(t *testing.T) { } ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -656,7 +660,7 @@ func TestLearnerAdminRelocateRange(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 4, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -693,7 +697,7 @@ func TestLearnerAdminMerge(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, @@ -728,7 +732,7 @@ func TestLearnerAdminMerge(t *testing.T) { func TestMergeQueueSeesLearner(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - knobs, ltk := makeLearnerTestKnobs() + knobs, ltk := makeReplicationTestKnobs() tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{Knobs: knobs}, ReplicationMode: base.ReplicationManual, diff --git a/pkg/storage/testing_knobs.go b/pkg/storage/testing_knobs.go index 92e3d910856e..aff808389828 100644 --- a/pkg/storage/testing_knobs.go +++ b/pkg/storage/testing_knobs.go @@ -198,6 +198,10 @@ type StoreTestingKnobs struct { // This ensures the `*Replica` will be materialized on the Store when it // returns. ReplicaAddStopAfterLearnerSnapshot func() bool + // ReplicaAddStopAfterJointConfig causes replica addition to return early if + // the func returns true. This happens before transitioning out of a joint + // configuration. + ReplicaAddStopAfterJointConfig func() bool // BeforeSnapshotSSTIngestion is run just before the SSTs are ingested when // applying a snapshot. BeforeSnapshotSSTIngestion func(IncomingSnapshot, SnapshotRequest_Type, []string) error From 5df1dc57b6f6a29aaf18e8dd3e8b139a7edf874b Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:40:35 +0200 Subject: [PATCH 03/27] storage: add ReplicationAlwaysUseJointConfig testing knob Release note: None --- pkg/storage/replica_command.go | 3 +++ pkg/storage/replica_learner_test.go | 4 ++++ pkg/storage/testing_knobs.go | 3 +++ 3 files changed, 10 insertions(+) diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index e49f44645ed3..cc40313126ae 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1322,6 +1322,9 @@ func execChangeReplicasTxn( } useJoint := len(chgs) > 1 + if fn := store.TestingKnobs().ReplicationAlwaysUseJointConfig; fn != nil && fn() { + useJoint = true + } for _, chg := range chgs { switch chg.typ { case internalChangeTypeAddVoterViaPreemptiveSnap: diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 93db64c92974..ba8baf235ccc 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -42,6 +42,7 @@ type replicationTestKnobs struct { storeKnobs storage.StoreTestingKnobs replicaAddStopAfterLearnerAtomic int64 replicaAddStopAfterJointConfig int64 + replicationAlwaysUseJointConfig int64 } func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) { @@ -52,6 +53,9 @@ func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) { k.storeKnobs.ReplicaAddStopAfterJointConfig = func() bool { return atomic.LoadInt64(&k.replicaAddStopAfterJointConfig) > 0 } + k.storeKnobs.ReplicationAlwaysUseJointConfig = func() bool { + return atomic.LoadInt64(&k.replicationAlwaysUseJointConfig) > 0 + } return base.TestingKnobs{Store: &k.storeKnobs}, &k } diff --git a/pkg/storage/testing_knobs.go b/pkg/storage/testing_knobs.go index aff808389828..8f2b349e14e0 100644 --- a/pkg/storage/testing_knobs.go +++ b/pkg/storage/testing_knobs.go @@ -202,6 +202,9 @@ type StoreTestingKnobs struct { // the func returns true. This happens before transitioning out of a joint // configuration. ReplicaAddStopAfterJointConfig func() bool + // ReplicationAlwaysUseJointConfig causes replica addition to always go + // through a joint configuration, even when this isn't necessary. + ReplicationAlwaysUseJointConfig func() bool // BeforeSnapshotSSTIngestion is run just before the SSTs are ingested when // applying a snapshot. BeforeSnapshotSSTIngestion func(IncomingSnapshot, SnapshotRequest_Type, []string) error From e604fb15b9f5355f866434f7358dcf46763d09b3 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:48:48 +0200 Subject: [PATCH 04/27] testcluster: add RemoveReplicasOrFatal Release note: None --- pkg/testutils/testcluster/testcluster.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index 23127ff92705..8f1bd531d7c0 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -506,6 +506,18 @@ func (tc *TestCluster) RemoveReplicas( return tc.changeReplicas(roachpb.REMOVE_REPLICA, keys.MustAddr(startKey), targets...) } +func (tc *TestCluster) RemoveReplicasOrFatal( + t testing.TB, startKey roachpb.Key, targets ...roachpb.ReplicationTarget, +) roachpb.RangeDescriptor { + t.Helper() + desc, err := tc.RemoveReplicas(startKey, targets...) + if err != nil { + t.Fatalf(`could not remove %v replicas from range containing %s: %+v`, + targets, startKey, err) + } + return desc +} + // TransferRangeLease is part of the TestServerInterface. func (tc *TestCluster) TransferRangeLease( rangeDesc roachpb.RangeDescriptor, dest roachpb.ReplicationTarget, From d3d8dd826084596e97ddc60722b57adaa2bc4b20 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:49:02 +0200 Subject: [PATCH 05/27] batcheval: explain ban on lease transfer to VOTER_OUTGOING Release note: None --- pkg/storage/batcheval/cmd_lease.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/storage/batcheval/cmd_lease.go b/pkg/storage/batcheval/cmd_lease.go index ba597922fda5..c01677e9e650 100644 --- a/pkg/storage/batcheval/cmd_lease.go +++ b/pkg/storage/batcheval/cmd_lease.go @@ -48,6 +48,13 @@ func checkCanReceiveLease(rec EvalContext) error { return errors.AssertionFailedf( `could not find replica for store %s in %s`, rec.StoreID(), rec.Desc()) } else if t := repDesc.GetType(); t != roachpb.VOTER_FULL { + // NB: there's no harm in transferring the lease to a VOTER_INCOMING, + // but we disallow it anyway. On the other hand, transferring to + // VOTER_OUTGOING would be a pretty bad idea since those voters are + // dropped when transitioning out of the joint config, which then + // amounts to removing the leaseholder without any safety precautions. + // This would either wedge the range or allow illegal reads to be + // served. return errors.Errorf(`cannot transfer lease to replica of type %s`, t) } return nil From 30df72309ba368c346f5a8c8fcce62c22f56a47b Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 09:50:11 +0200 Subject: [PATCH 06/27] storage: add TestJointConfigLease Release note: None --- pkg/storage/replica_learner_test.go | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index ba8baf235ccc..6ddf084ad42a 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -605,6 +605,37 @@ func TestLearnerNoAcceptLease(t *testing.T) { } } +// TestJointConfigLease verifies that incoming and outgoing voters can't have the +// lease transferred to them. +func TestJointConfigLease(t *testing.T) { + defer leaktest.AfterTest(t)() + ctx := context.Background() + knobs, ltk := makeReplicationTestKnobs() + tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{Knobs: knobs}, + ReplicationMode: base.ReplicationManual, + }) + defer tc.Stopper().Stop(ctx) + + k := tc.ScratchRange(t) + atomic.StoreInt64(<k.replicaAddStopAfterJointConfig, 1) + atomic.StoreInt64(<k.replicationAlwaysUseJointConfig, 1) + desc := tc.AddReplicasOrFatal(t, k, tc.Target(1)) + require.True(t, desc.Replicas().InAtomicReplicationChange(), desc) + + err := tc.TransferRangeLease(desc, tc.Target(1)) + exp := `cannot transfer lease to replica of type VOTER_INCOMING` + require.True(t, testutils.IsError(err, exp), err) + + // NB: we don't have to transition out of the joint config first because + // this is done automatically by ChangeReplicas before it does what it's + // asked to do. + desc = tc.RemoveReplicasOrFatal(t, k, tc.Target(1)) + err = tc.TransferRangeLease(desc, tc.Target(1)) + exp = `cannot transfer lease to replica of type VOTER_OUTGOING` + require.True(t, testutils.IsError(err, exp), err) +} + func TestLearnerFollowerRead(t *testing.T) { defer leaktest.AfterTest(t)() From 224441e1c6c486efe9824d84c9ed7527402d982d Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 11:00:13 +0200 Subject: [PATCH 07/27] storage: test that incoming/outcoming voters don't serve follower reads In adapting this test I also found a fat bug, which was that removing a learner while trying to make it a joint change would turn the learner into an outgoing voter. This is now fixed (and exercised by the test). Release note: None --- pkg/roachpb/metadata.go | 9 +-- pkg/roachpb/metadata_replicas.go | 63 +++++++++++--------- pkg/storage/replica_command.go | 25 ++++---- pkg/storage/replica_follower_read.go | 11 ++-- pkg/storage/replica_learner_test.go | 88 ++++++++++++++++++++-------- 5 files changed, 123 insertions(+), 73 deletions(-) diff --git a/pkg/roachpb/metadata.go b/pkg/roachpb/metadata.go index 63855311beb5..c9c63072bd6a 100644 --- a/pkg/roachpb/metadata.go +++ b/pkg/roachpb/metadata.go @@ -137,18 +137,19 @@ func (r *RangeDescriptor) SetReplicas(replicas ReplicaDescriptors) { // SetReplicaType changes the type of the replica with the given ID to the given // type. Returns zero values if the replica was not found and the updated -// descriptor and true otherwise. +// descriptor, the previous type, and true, otherwise. func (r *RangeDescriptor) SetReplicaType( nodeID NodeID, storeID StoreID, typ ReplicaType, -) (ReplicaDescriptor, bool) { +) (ReplicaDescriptor, ReplicaType, bool) { for i := range r.InternalReplicas { desc := &r.InternalReplicas[i] if desc.StoreID == storeID && desc.NodeID == nodeID { + prevTyp := desc.GetType() desc.Type = &typ - return *desc, true + return *desc, prevTyp, true } } - return ReplicaDescriptor{}, false + return ReplicaDescriptor{}, 0, false } // AddReplica adds a replica on the given node and store with the supplied type. diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index 7d64c9151251..c5dcd0b37608 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -88,6 +88,19 @@ func (d ReplicaDescriptors) All() []ReplicaDescriptor { return d.wrapped } +func predVoterFullOrIncoming(rDesc ReplicaDescriptor) bool { + switch rDesc.GetType() { + case VOTER_FULL, VOTER_INCOMING: + return true + default: + } + return false +} + +func predLearner(rDesc ReplicaDescriptor) bool { + return rDesc.GetType() == LEARNER +} + // Voters returns the current and future voter replicas in the set. This means // that during an atomic replication change, only the replicas that will be // voters once the change completes will be returned; "outgoing" voters will not @@ -102,26 +115,7 @@ func (d ReplicaDescriptors) All() []ReplicaDescriptor { // different subset of voters. Consider renaming this method so that it's // more descriptive. func (d ReplicaDescriptors) Voters() []ReplicaDescriptor { - // Fastpath, most of the time, everything is a voter, so special case that and - // save the alloc. - fastpath := true - for i := range d.wrapped { - if d.wrapped[i].GetType() != VOTER_FULL { - fastpath = false - break - } - } - if fastpath { - return d.wrapped - } - voters := make([]ReplicaDescriptor, 0, len(d.wrapped)) - for i := range d.wrapped { - switch d.wrapped[i].GetType() { - case VOTER_FULL, VOTER_INCOMING: - voters = append(voters, d.wrapped[i]) - } - } - return voters + return d.Filter(predVoterFullOrIncoming) } // Learners returns the learner replicas in the set. This may allocate, but it @@ -211,18 +205,31 @@ func (d ReplicaDescriptors) Voters() []ReplicaDescriptor { // // For some related mega-comments, see Replica.sendSnapshot. func (d ReplicaDescriptors) Learners() []ReplicaDescriptor { - // Fastpath, most of the time, everything is a voter, so special case that and - // save the alloc. - var learners []ReplicaDescriptor + return d.Filter(predLearner) +} + +// Filter returns only the replica descriptors for which the supplied method +// returns true. The memory returned may be shared with the receiver. +func (d ReplicaDescriptors) Filter( + pred func(descriptor ReplicaDescriptor) bool, +) []ReplicaDescriptor { + // Fast path when all or none match to avoid allocations. + fastpath := true + out := d.wrapped for i := range d.wrapped { - if d.wrapped[i].GetType() == LEARNER { - if learners == nil { - learners = make([]ReplicaDescriptor, 0, len(d.wrapped)-i) + if pred(d.wrapped[i]) { + if !fastpath { + out = append(out, d.wrapped[i]) + } + } else { + if fastpath { + out = nil + out = append(out, d.wrapped[:i]...) + fastpath = false } - learners = append(learners, d.wrapped[i]) } } - return learners + return out } // AsProto returns the protobuf representation of these replicas, suitable for diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index cc40313126ae..32f8e5291361 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1339,21 +1339,26 @@ func execChangeReplicasTxn( if useJoint { typ = roachpb.VOTER_INCOMING } - rDesc, ok := updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, typ) - if !ok { + rDesc, prevTyp, ok := updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, typ) + if !ok || prevTyp != roachpb.LEARNER { return nil, errors.Errorf("cannot promote target %v which is missing as Learner", chg.target) } added = append(added, rDesc) case internalChangeTypeRemove: - var rDesc roachpb.ReplicaDescriptor - var ok bool - if !useJoint { - rDesc, ok = updatedDesc.RemoveReplica(chg.target.NodeID, chg.target.StoreID) - } else { - rDesc, ok = updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, roachpb.VOTER_OUTGOING) - } + rDesc, ok := updatedDesc.GetReplicaDescriptor(chg.target.StoreID) if !ok { - return nil, errors.Errorf("cannot remove nonexistent target %v", chg.target) + return nil, errors.Errorf("target %s not found", chg.target) + } + if typ := rDesc.GetType(); !useJoint || typ != roachpb.VOTER_FULL { + // NB: typ != VOTER_FULL means it's a LEARNER since we verified above that we + // did not start in a joint config. + rDesc, _ = updatedDesc.RemoveReplica(chg.target.NodeID, chg.target.StoreID) + } else { + var prevTyp roachpb.ReplicaType + rDesc, prevTyp, _ = updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, roachpb.VOTER_OUTGOING) + if prevTyp != roachpb.VOTER_FULL { + return nil, errors.Errorf("cannot transition from %s to VOTER_OUTGOING", prevTyp) + } } removed = append(removed, rDesc) default: diff --git a/pkg/storage/replica_follower_read.go b/pkg/storage/replica_follower_read.go index 807c85a383b0..5344470562e4 100644 --- a/pkg/storage/replica_follower_read.go +++ b/pkg/storage/replica_follower_read.go @@ -37,16 +37,17 @@ var FollowerReadsEnabled = settings.RegisterBoolSetting( func (r *Replica) canServeFollowerRead( ctx context.Context, ba *roachpb.BatchRequest, pErr *roachpb.Error, ) *roachpb.Error { - // There's no known reason that a learner replica couldn't serve follower - // reads (or RangeFeed), but as of the time of writing, learners are expected + // There's no known reason that a non-VOTER_FULL replica couldn't serve follower + // reads (or RangeFeed), but as of the time of writing, these are expected // to be short-lived, so it's not worth working out the edge-cases. Revisit if - // we add long-lived learners. + // we add long-lived learners or feel that incoming/outgoing voters also need + // to be able to serve follower reads. repDesc, err := r.GetReplicaDescriptor() if err != nil { return roachpb.NewError(err) } - if repDesc.GetType() == roachpb.LEARNER { - log.Event(ctx, "learner replicas cannot serve follower reads") + if typ := repDesc.GetType(); typ != roachpb.VOTER_FULL { + log.Eventf(ctx, "%s replicas cannot serve follower reads", typ) return pErr } diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 6ddf084ad42a..77bfef25edb4 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -38,6 +38,13 @@ import ( "github.com/stretchr/testify/require" ) +func predIncoming(rDesc roachpb.ReplicaDescriptor) bool { + return rDesc.GetType() == roachpb.VOTER_INCOMING +} +func predOutgoing(rDesc roachpb.ReplicaDescriptor) bool { + return rDesc.GetType() == roachpb.VOTER_OUTGOING +} + type replicationTestKnobs struct { storeKnobs storage.StoreTestingKnobs replicaAddStopAfterLearnerAtomic int64 @@ -636,7 +643,7 @@ func TestJointConfigLease(t *testing.T) { require.True(t, testutils.IsError(err, exp), err) } -func TestLearnerFollowerRead(t *testing.T) { +func TestLearnerAndJointConfigFollowerRead(t *testing.T) { defer leaktest.AfterTest(t)() if util.RaceEnabled { @@ -663,32 +670,61 @@ func TestLearnerFollowerRead(t *testing.T) { scratchDesc := tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) - req := roachpb.BatchRequest{Header: roachpb.Header{ - RangeID: scratchDesc.RangeID, - Timestamp: tc.Server(0).Clock().Now(), - }} - req.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeader{ - Key: scratchDesc.StartKey.AsRawKey(), EndKey: scratchDesc.EndKey.AsRawKey(), - }}) + check := func() { + req := roachpb.BatchRequest{Header: roachpb.Header{ + RangeID: scratchDesc.RangeID, + Timestamp: tc.Server(0).Clock().Now(), + }} + req.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeader{ + Key: scratchDesc.StartKey.AsRawKey(), EndKey: scratchDesc.EndKey.AsRawKey(), + }}) - _, repl := getFirstStoreReplica(t, tc.Server(1), scratchStartKey) - testutils.SucceedsSoon(t, func() error { - // Trace the Send call so we can verify that it hit the exact `learner - // replicas cannot serve follower reads` branch that we're trying to test. - sendCtx, collect, cancel := tracing.ContextWithRecordingSpan(ctx, "manual read request") - defer cancel() - _, pErr := repl.Send(sendCtx, req) - err := pErr.GoError() - if !testutils.IsError(err, `not lease holder`) { - return errors.Errorf(`expected "not lease holder" error got: %+v`, err) - } - const msg = `learner replicas cannot serve follower reads` - formattedTrace := tracing.FormatRecordedSpans(collect()) - if !strings.Contains(formattedTrace, msg) { - return errors.Errorf("expected a trace with `%s` got:\n%s", msg, formattedTrace) - } - return nil - }) + _, repl := getFirstStoreReplica(t, tc.Server(1), scratchStartKey) + testutils.SucceedsSoon(t, func() error { + // Trace the Send call so we can verify that it hit the exact `learner + // replicas cannot serve follower reads` branch that we're trying to test. + sendCtx, collect, cancel := tracing.ContextWithRecordingSpan(ctx, "manual read request") + defer cancel() + _, pErr := repl.Send(sendCtx, req) + err := pErr.GoError() + if !testutils.IsError(err, `not lease holder`) { + return errors.Errorf(`expected "not lease holder" error got: %+v`, err) + } + const msg = `cannot serve follower reads` + formattedTrace := tracing.FormatRecordedSpans(collect()) + if !strings.Contains(formattedTrace, msg) { + return errors.Errorf("expected a trace with `%s` got:\n%s", msg, formattedTrace) + } + return nil + }) + } + + // Can't serve follower read from the LEARNER. + check() + + atomic.StoreInt64(<k.replicaAddStopAfterJointConfig, 1) + atomic.StoreInt64(<k.replicationAlwaysUseJointConfig, 1) + + scratchDesc = tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + // Removing a learner doesn't get you into a joint state (no voters changed). + require.False(t, scratchDesc.Replicas().InAtomicReplicationChange(), scratchDesc) + scratchDesc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + + // Re-adding the voter (and remaining in joint config) does. + require.True(t, scratchDesc.Replicas().InAtomicReplicationChange(), scratchDesc) + require.Len(t, scratchDesc.Replicas().Filter(predIncoming), 1) + + // Can't serve follower read from the VOTER_INCOMING. + check() + + // Removing the voter (and remaining in joint config) does. + + scratchDesc = tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + require.True(t, scratchDesc.Replicas().InAtomicReplicationChange(), scratchDesc) + require.Len(t, scratchDesc.Replicas().Filter(predOutgoing), 1) + + // Can't serve follower read from the VOTER_OUTGOING. + check() } func TestLearnerAdminRelocateRange(t *testing.T) { From c207c17cfe50dd65c48590215a44014d7ca60c62 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 11:31:10 +0200 Subject: [PATCH 08/27] storage: test AdminMerge in presence of joint config Release note: None --- pkg/storage/replica_command.go | 8 +++- pkg/storage/replica_learner_test.go | 71 ++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 32f8e5291361..e4dbf0e5fe17 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -585,10 +585,14 @@ func (r *Replica) AdminMerge( // This behavior can be changed later if the complexity becomes worth // it, but it's not right now. lReplicas, rReplicas := origLeftDesc.Replicas(), rightDesc.Replicas() - if len(lReplicas.Voters()) != len(lReplicas.All()) { + + predFullVoter := func(rDesc roachpb.ReplicaDescriptor) bool { + return rDesc.GetType() == roachpb.VOTER_FULL + } + if len(lReplicas.Filter(predFullVoter)) != len(lReplicas.All()) { return errors.Errorf("cannot merge range with non-voter replicas on lhs: %s", lReplicas) } - if len(rReplicas.Voters()) != len(rReplicas.All()) { + if len(rReplicas.Filter(predFullVoter)) != len(rReplicas.All()) { return errors.Errorf("cannot merge range with non-voter replicas on rhs: %s", rReplicas) } if !replicaSetsEqual(lReplicas.All(), rReplicas.All()) { diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 77bfef25edb4..c3d3203e4a4b 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -764,7 +764,7 @@ func TestLearnerAdminRelocateRange(t *testing.T) { require.Empty(t, desc.Replicas().Learners()) } -func TestLearnerAdminMerge(t *testing.T) { +func TestLearnerAndJointConfigAdminMerge(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() @@ -784,20 +784,67 @@ func TestLearnerAdminMerge(t *testing.T) { _, _ = tc.SplitRangeOrFatal(t, splitKey2) atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - _ = tc.AddReplicasOrFatal(t, splitKey2, tc.Target(1)) + // Three ranges (in that order): + // desc1: will have a learner (later joint voter) + // desc2 (unnamed): is always left vanilla + // desc3: like desc1 + // + // This allows testing merges that have a learner on the RHS (on desc2) and + // the LHS (on desc1). + desc1 := tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + desc3 := tc.AddReplicasOrFatal(t, splitKey2, tc.Target(1)) atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) - // Learner on the lhs should fail. - err := tc.Server(0).DB().AdminMerge(ctx, scratchStartKey) - if !testutils.IsError(err, `cannot merge range with non-voter replicas on lhs`) { - t.Fatalf(`expected "cannot merge range with non-voter replicas on lhs" error got: %+v`, err) - } - // Learner on the rhs should fail. - err = tc.Server(0).DB().AdminMerge(ctx, splitKey1) - if !testutils.IsError(err, `cannot merge range with non-voter replicas on rhs`) { - t.Fatalf(`expected "cannot merge range with non-voter replicas on rhs" error got: %+v`, err) + checkFails := func() { + err := tc.Server(0).DB().AdminMerge(ctx, scratchStartKey) + if exp := `cannot merge range with non-voter replicas on`; !testutils.IsError(err, exp) { + t.Fatalf(`expected "%s" error got: %+v`, exp, err) + } + err = tc.Server(0).DB().AdminMerge(ctx, splitKey1) + if exp := `cannot merge range with non-voter replicas on`; !testutils.IsError(err, exp) { + t.Fatalf(`expected "%s" error got: %+v`, exp, err) + } } + + // LEARNER on the lhs or rhs should fail. + checkFails() + + // Turn the learners on desc1 and desc3 into VOTER_INCOMINGs. + atomic.StoreInt64(<k.replicaAddStopAfterJointConfig, 1) + atomic.StoreInt64(<k.replicationAlwaysUseJointConfig, 1) + desc1 = tc.RemoveReplicasOrFatal(t, desc1.StartKey.AsRawKey(), tc.Target(1)) + desc1 = tc.AddReplicasOrFatal(t, desc1.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, desc1.Replicas().Filter(predIncoming), 1) + desc3 = tc.RemoveReplicasOrFatal(t, desc3.StartKey.AsRawKey(), tc.Target(1)) + desc3 = tc.AddReplicasOrFatal(t, desc3.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, desc1.Replicas().Filter(predIncoming), 1) + + // VOTER_INCOMING on the lhs or rhs should fail. + checkFails() + + // Turn the incoming voters on desc1 and desc3 into VOTER_OUTGOINGs. + desc1 = tc.RemoveReplicasOrFatal(t, desc1.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, desc1.Replicas().Filter(predOutgoing), 1) + desc3 = tc.RemoveReplicasOrFatal(t, desc3.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, desc3.Replicas().Filter(predOutgoing), 1) + + // VOTER_OUTGOING on the lhs or rhs should fail. + checkFails() + + // Add a VOTER_INCOMING to desc2 to make sure it actually exludes this type + // of replicas from merges (rather than really just checking whether the + // replica sets are equal). + + desc2 := tc.AddReplicasOrFatal(t, splitKey1, tc.Target(1)) + require.Len(t, desc2.Replicas().Filter(predIncoming), 1) + + checkFails() + + // Ditto VOTER_OUTGOING. + desc2 = tc.RemoveReplicasOrFatal(t, desc2.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, desc2.Replicas().Filter(predOutgoing), 1) + + checkFails() } func TestMergeQueueSeesLearner(t *testing.T) { From 1afd7b350242c071a7fa0b3cc9242437bcde737c Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 11:50:38 +0200 Subject: [PATCH 09/27] storage: test split when in joint configuration Release note: None --- pkg/storage/replica_learner_test.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index c3d3203e4a4b..8e4daf931330 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -293,7 +293,7 @@ func TestLearnerSnapshotFailsRollback(t *testing.T) { require.Empty(t, desc.Replicas().Learners()) } -func TestSplitWithLearner(t *testing.T) { +func TestSplitWithLearnerOrJointConfig(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() @@ -319,6 +319,20 @@ func TestSplitWithLearner(t *testing.T) { require.NoError(t, err) require.Len(t, left.Replicas().Learners(), 1) require.Len(t, right.Replicas().Learners(), 1) + + // Remove the learner on the RHS. + right = tc.RemoveReplicasOrFatal(t, right.StartKey.AsRawKey(), tc.Target(1)) + + // Put an incoming voter on the RHS and split again. This works because the + // split auto-transitions us out of the joint conf before doing work. + atomic.StoreInt64(<k.replicationAlwaysUseJointConfig, 1) + atomic.StoreInt64(<k.replicaAddStopAfterJointConfig, 1) + right = tc.AddReplicasOrFatal(t, right.StartKey.AsRawKey(), tc.Target(1)) + require.Len(t, right.Replicas().Filter(predIncoming), 1) + left, right, err = tc.SplitRange(right.StartKey.AsRawKey().Next()) + require.NoError(t, err) + require.False(t, left.Replicas().InAtomicReplicationChange(), left) + require.False(t, right.Replicas().InAtomicReplicationChange(), right) } func TestReplicateQueueSeesLearner(t *testing.T) { From 85eacfcda8f80601933003e173a200e7f6731248 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 12:37:09 +0200 Subject: [PATCH 10/27] storage: test merge queue seeing joint configs It simply transitions them out before proceeding. Release note: None --- pkg/storage/merge_queue.go | 34 ++++-- pkg/storage/replica_command.go | 25 ++-- pkg/storage/replica_learner_test.go | 177 ++++++++++++++++++++-------- pkg/storage/replicate_queue.go | 2 +- 4 files changed, 166 insertions(+), 72 deletions(-) diff --git a/pkg/storage/merge_queue.go b/pkg/storage/merge_queue.go index 08557107cf2b..51a93870b6fa 100644 --- a/pkg/storage/merge_queue.go +++ b/pkg/storage/merge_queue.go @@ -188,22 +188,22 @@ var _ purgatoryError = rangeMergePurgatoryError{} func (mq *mergeQueue) requestRangeStats( ctx context.Context, key roachpb.Key, -) (roachpb.RangeDescriptor, enginepb.MVCCStats, float64, error) { +) (*roachpb.RangeDescriptor, enginepb.MVCCStats, float64, error) { res, pErr := client.SendWrappedWith(ctx, mq.db.NonTransactionalSender(), roachpb.Header{ ReturnRangeInfo: true, }, &roachpb.RangeStatsRequest{ RequestHeader: roachpb.RequestHeader{Key: key}, }) if pErr != nil { - return roachpb.RangeDescriptor{}, enginepb.MVCCStats{}, 0, pErr.GoError() + return nil, enginepb.MVCCStats{}, 0, pErr.GoError() } rangeInfos := res.Header().RangeInfos if len(rangeInfos) != 1 { - return roachpb.RangeDescriptor{}, enginepb.MVCCStats{}, 0, fmt.Errorf( + return nil, enginepb.MVCCStats{}, 0, fmt.Errorf( "mergeQueue.requestRangeStats: response had %d range infos but exactly one was expected", len(rangeInfos)) } - return rangeInfos[0].Desc, res.(*roachpb.RangeStatsResponse).MVCCStats, + return &rangeInfos[0].Desc, res.(*roachpb.RangeStatsResponse).MVCCStats, res.(*roachpb.RangeStatsResponse).QueriesPerSecond, nil } @@ -275,21 +275,33 @@ func (mq *mergeQueue) process( } { - // AdminMerge errors if there are learners on either side and - // AdminRelocateRange removes any on the range it operates on. For the sake - // of obviousness, just remove them all upfront. - newLHSDesc, err := removeLearners(ctx, lhsRepl.store.DB(), lhsDesc) + // AdminMerge errors if there is a learner or joint config on either + // side and AdminRelocateRange removes any on the range it operates on. + // For the sake of obviousness, just fix this all upfront. + var err error + lhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, lhsRepl.store, lhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err } - lhsDesc = newLHSDesc - newRHSDesc, err := removeLearners(ctx, lhsRepl.store.DB(), &rhsDesc) + + lhsDesc, err = removeLearners(ctx, lhsRepl.store.DB(), lhsDesc) + if err != nil { + log.VEventf(ctx, 2, `%v`, err) + return err + } + + rhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, lhsRepl.store, rhsDesc) + if err != nil { + log.VEventf(ctx, 2, `%v`, err) + return err + } + + rhsDesc, err = removeLearners(ctx, lhsRepl.store.DB(), rhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err } - rhsDesc = *newRHSDesc } lhsReplicas, rhsReplicas := lhsDesc.Replicas().All(), rhsDesc.Replicas().All() diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index e4dbf0e5fe17..9516d1dfecef 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -147,7 +147,7 @@ func (r *Replica) adminSplitWithDescriptor( // The split queue doesn't care about the set of replicas, so if we somehow // are being handed one that's in a joint state, finalize that before // continuing. - desc, err = r.maybeLeaveAtomicChangeReplicas(ctx, desc) + desc, err = maybeLeaveAtomicChangeReplicas(ctx, r.store, desc) if err != nil { return roachpb.AdminSplitResponse{}, err } @@ -584,6 +584,12 @@ func (r *Replica) AdminMerge( // are no non-voter replicas, in case some third type is later added). // This behavior can be changed later if the complexity becomes worth // it, but it's not right now. + // + // NB: the merge queue transitions out of any joint states and removes + // any learners it sees. It's sort of silly that we don't do that here + // instead; effectively any caller of AdminMerge that is not the merge + // queue won't be able to recover from these cases (though the replicate + // queues should fix things up quickly). lReplicas, rReplicas := origLeftDesc.Replicas(), rightDesc.Replicas() predFullVoter := func(rDesc roachpb.ReplicaDescriptor) bool { @@ -927,7 +933,7 @@ func (r *Replica) changeReplicasImpl( // If in a joint config, clean up. The assumption here is that the caller // of ChangeReplicas didn't even realize that they were holding on to a // joint descriptor and would rather not have to deal with that fact. - desc, err = r.maybeLeaveAtomicChangeReplicas(ctx, desc) + desc, err = maybeLeaveAtomicChangeReplicas(ctx, r.store, desc) if err != nil { return nil, err } @@ -974,7 +980,7 @@ func (r *Replica) changeReplicasImpl( if err != nil { // If the error occurred while transitioning out of an atomic replication change, // try again here with a fresh descriptor; this is a noop otherwise. - if _, err := r.maybeLeaveAtomicChangeReplicas(ctx, r.Desc()); err != nil { + if _, err := maybeLeaveAtomicChangeReplicas(ctx, r.store, r.Desc()); err != nil { return nil, err } if fn := r.store.cfg.TestingKnobs.ReplicaAddSkipLearnerRollback; fn != nil && fn() { @@ -995,8 +1001,8 @@ func (r *Replica) changeReplicasImpl( // maybeLeaveAtomicChangeReplicas transitions out of the joint configuration if // the descriptor indicates one. This involves running a distributed transaction // updating said descriptor, the result of which will be returned. -func (r *Replica) maybeLeaveAtomicChangeReplicas( - ctx context.Context, desc *roachpb.RangeDescriptor, +func maybeLeaveAtomicChangeReplicas( + ctx context.Context, store *Store, desc *roachpb.RangeDescriptor, ) (*roachpb.RangeDescriptor, error) { // We want execChangeReplicasTxn to be able to make sure it's only tasked // with leaving a joint state when it's in one, so make sure we don't call @@ -1005,12 +1011,15 @@ func (r *Replica) maybeLeaveAtomicChangeReplicas( return desc, nil } + // NB: this is matched on in TestMergeQueueSeesLearner. + log.Eventf(ctx, "transitioning out of joint configuration %s", desc) + // NB: reason and detail won't be used because no range log event will be // emitted. // // TODO(tbg): reconsider this. return execChangeReplicasTxn( - ctx, r.store, desc, storagepb.ReasonUnknown /* unused */, "", nil, /* iChgs */ + ctx, store, desc, storagepb.ReasonUnknown /* unused */, "", nil, /* iChgs */ ) } @@ -1104,7 +1113,7 @@ func addLearnerReplicas( // two distributed transactions. On error, it is possible that the range is in // the intermediate ("joint") configuration in which a quorum of both the old // and new sets of voters is required. If a range is encountered in this state, -// r.maybeLeaveAtomicReplicationChange can fix this, but it is the caller's +// maybeLeaveAtomicReplicationChange can fix this, but it is the caller's // job to do this when necessary. func (r *Replica) atomicReplicationChange( ctx context.Context, @@ -1176,7 +1185,7 @@ func (r *Replica) atomicReplicationChange( } // Leave the joint config if we entered one. - return r.maybeLeaveAtomicChangeReplicas(ctx, desc) + return maybeLeaveAtomicChangeReplicas(ctx, r.store, desc) } // tryRollbackLearnerReplica attempts to remove a learner specified by the diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 8e4daf931330..6f876578318a 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -52,6 +52,23 @@ type replicationTestKnobs struct { replicationAlwaysUseJointConfig int64 } +func (rtl *replicationTestKnobs) withLearnerStop(f func()) { + prev := atomic.LoadInt64(&rtl.replicaAddStopAfterLearnerAtomic) + defer atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, prev) + atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, 1) + f() +} + +func (rtl *replicationTestKnobs) withJointConfigAndStop(f func()) { + au := atomic.LoadInt64(&rtl.replicationAlwaysUseJointConfig) + sa := atomic.LoadInt64(&rtl.replicaAddStopAfterJointConfig) + defer atomic.StoreInt64(&rtl.replicationAlwaysUseJointConfig, au) + defer atomic.StoreInt64(&rtl.replicaAddStopAfterJointConfig, sa) + atomic.StoreInt64(&rtl.replicationAlwaysUseJointConfig, 1) + atomic.StoreInt64(&rtl.replicaAddStopAfterJointConfig, 1) + f() +} + func makeReplicationTestKnobs() (base.TestingKnobs, *replicationTestKnobs) { var k replicationTestKnobs k.storeKnobs.ReplicaAddStopAfterLearnerSnapshot = func() bool { @@ -230,9 +247,10 @@ func TestLearnerRaftConfState(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - desc := tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + var desc roachpb.RangeDescriptor + ltk.withLearnerStop(func() { + desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) require.Len(t, desc.Replicas().Learners(), 1) learnerReplicaID := desc.Replicas().Learners()[0].ReplicaID @@ -309,9 +327,9 @@ func TestSplitWithLearnerOrJointConfig(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) // Splitting a learner is allowed. This orphans the two learners, but the // replication queue will eventually clean this up. @@ -352,9 +370,9 @@ func TestReplicateQueueSeesLearner(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) // Run the replicate queue. store, repl := getFirstStoreReplica(t, tc.Server(0), scratchStartKey) @@ -388,9 +406,9 @@ func TestReplicaGCQueueSeesLearner(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) // Run the replicaGC queue. store, repl := getFirstStoreReplica(t, tc.Server(1), scratchStartKey) @@ -615,9 +633,9 @@ func TestLearnerNoAcceptLease(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) desc := tc.LookupRangeOrFatal(t, scratchStartKey) err := tc.TransferRangeLease(desc, tc.Target(1)) @@ -680,9 +698,10 @@ func TestLearnerAndJointConfigFollowerRead(t *testing.T) { db.Exec(t, `SET CLUSTER SETTING kv.closed_timestamp.follower_reads_enabled = true`) scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - scratchDesc := tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + var scratchDesc roachpb.RangeDescriptor + ltk.withLearnerStop(func() { + scratchDesc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) check := func() { req := roachpb.BatchRequest{Header: roachpb.Header{ @@ -755,10 +774,10 @@ func TestLearnerAdminRelocateRange(t *testing.T) { db.Exec(t, `SET CLUSTER SETTING kv.learner_replicas.enabled = true`) scratchStartKey := tc.ScratchRange(t) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(2)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(2)) + }) // Test AdminRelocateRange's treatment of learners by having one that it has // to remove and one that should stay and become a voter. @@ -797,7 +816,6 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { _, _ = tc.SplitRangeOrFatal(t, splitKey1) _, _ = tc.SplitRangeOrFatal(t, splitKey2) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) // Three ranges (in that order): // desc1: will have a learner (later joint voter) // desc2 (unnamed): is always left vanilla @@ -805,9 +823,11 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { // // This allows testing merges that have a learner on the RHS (on desc2) and // the LHS (on desc1). - desc1 := tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - desc3 := tc.AddReplicasOrFatal(t, splitKey2, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + var desc1, desc3 roachpb.RangeDescriptor + ltk.withLearnerStop(func() { + desc1 = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + desc3 = tc.AddReplicasOrFatal(t, splitKey2, tc.Target(1)) + }) checkFails := func() { err := tc.Server(0).DB().AdminMerge(ctx, scratchStartKey) @@ -861,7 +881,7 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { checkFails() } -func TestMergeQueueSeesLearner(t *testing.T) { +func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() knobs, ltk := makeReplicationTestKnobs() @@ -879,34 +899,87 @@ func TestMergeQueueSeesLearner(t *testing.T) { origDesc := tc.LookupRangeOrFatal(t, scratchStartKey) splitKey := scratchStartKey.Next() - _, _ = tc.SplitRangeOrFatal(t, splitKey) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 1) - _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) - atomic.StoreInt64(<k.replicaAddStopAfterLearnerAtomic, 0) + splitAndUnsplit := func() roachpb.RangeDescriptor { + desc, _ := tc.SplitRangeOrFatal(t, splitKey) + // Unsplit the range to clear the sticky bit. + require.NoError(t, tc.Server(0).DB().AdminUnsplit(ctx, splitKey)) + return desc + } - // Unsplit the range to clear the sticky bit. - require.NoError(t, tc.Server(0).DB().AdminUnsplit(ctx, splitKey)) + // Run the merge queue while there's a learner on the LHS. + { + splitAndUnsplit() - // Run the merge queue. - store, repl := getFirstStoreReplica(t, tc.Server(0), scratchStartKey) - trace, errMsg, err := store.ManuallyEnqueue(ctx, "merge", repl, true /* skipShouldQueue */) - require.NoError(t, err) - require.Equal(t, ``, errMsg) - formattedTrace := tracing.FormatRecordedSpans(trace) - expectedMessages := []string{ - `removing learner replicas \[n2,s2\]`, - `merging to produce range: /Table/Max-/Max`, - } - if err := testutils.MatchInOrder(formattedTrace, expectedMessages...); err != nil { - t.Fatal(err) + ltk.withLearnerStop(func() { + _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) + + store, repl := getFirstStoreReplica(t, tc.Server(0), scratchStartKey) + trace, errMsg, err := store.ManuallyEnqueue(ctx, "merge", repl, true /* skipShouldQueue */) + require.NoError(t, err) + require.Equal(t, ``, errMsg) + formattedTrace := tracing.FormatRecordedSpans(trace) + expectedMessages := []string{ + `removing learner replicas \[n2,s2\]`, + `merging to produce range: /Table/Max-/Max`, + } + if err := testutils.MatchInOrder(formattedTrace, expectedMessages...); err != nil { + t.Fatal(err) + } + + // Sanity check that the desc has the same bounds it did originally. + desc := tc.LookupRangeOrFatal(t, scratchStartKey) + require.Equal(t, origDesc.StartKey, desc.StartKey) + require.Equal(t, origDesc.EndKey, desc.EndKey) + // The merge removed the learner. + require.Len(t, desc.Replicas().Voters(), 1) + require.Empty(t, desc.Replicas().Learners()) } - // Sanity check that the desc has the same bounds it did originally. - desc := tc.LookupRangeOrFatal(t, scratchStartKey) - require.Equal(t, origDesc.StartKey, desc.StartKey) - require.Equal(t, origDesc.EndKey, desc.EndKey) - // The merge removed the learner. - require.Len(t, desc.Replicas().Voters(), 1) - require.Empty(t, desc.Replicas().Learners()) + { + // Create the RHS again and repeat the same game, except this time the LHS + // gets a VOTER_INCOMING for s2. + desc := splitAndUnsplit() + + ltk.withJointConfigAndStop(func() { + desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + }) + require.Len(t, desc.Replicas().Filter(predIncoming), 1, desc) + + checkTransitioningOut := func() { + t.Helper() + store, repl := getFirstStoreReplica(t, tc.Server(0), scratchStartKey) + trace, errMsg, err := store.ManuallyEnqueue(ctx, "merge", repl, true /* skipShouldQueue */) + require.NoError(t, err) + require.Equal(t, ``, errMsg) + formattedTrace := tracing.FormatRecordedSpans(trace) + expectedMessages := []string{ + `transitioning out of joint configuration`, + `merging to produce range: /Table/Max-/Max`, + } + if err := testutils.MatchInOrder(formattedTrace, expectedMessages...); err != nil { + t.Fatal(err) + } + } + + checkTransitioningOut() + desc = tc.LookupRangeOrFatal(t, scratchStartKey) + require.Len(t, desc.Replicas().Voters(), 2) + + // Repeat the game, except now we start with two replicas and we're + // giving the RHS a VOTER_OUTGOING. + desc = splitAndUnsplit() + ltk.withJointConfigAndStop(func() { + descRight := tc.RemoveReplicasOrFatal(t, desc.EndKey.AsRawKey(), tc.Target(1)) + require.Len(t, descRight.Replicas().Filter(predOutgoing), 1, desc) + }) + + // This should transition out (i.e. remove the voter on s2 for the RHS) + // and then do its thing, which means in the end we have two voters again. + checkTransitioningOut() + desc = tc.LookupRangeOrFatal(t, scratchStartKey) + require.Len(t, desc.Replicas().Voters(), 2) + require.False(t, desc.Replicas().InAtomicReplicationChange(), desc) + } } diff --git a/pkg/storage/replicate_queue.go b/pkg/storage/replicate_queue.go index a1e2a40af580..75dc83eaf8b5 100644 --- a/pkg/storage/replicate_queue.go +++ b/pkg/storage/replicate_queue.go @@ -347,7 +347,7 @@ func (rq *replicateQueue) processOneChange( case AllocatorConsiderRebalance: return rq.considerRebalance(ctx, repl, voterReplicas, canTransferLease, dryRun) case AllocatorFinalizeAtomicReplicationChange: - _, err := repl.maybeLeaveAtomicChangeReplicas(ctx, repl.Desc()) + _, err := maybeLeaveAtomicChangeReplicas(ctx, repl.store, repl.Desc()) return false, err } return true, nil From 4b523c7f0efe492994d12f9d4e4499a5d30af90b Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 12:49:49 +0200 Subject: [PATCH 11/27] storage: test replicaGCQueue seeing joint config Release note: None --- pkg/storage/replica_learner_test.go | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 6f876578318a..785ec1fb9fe8 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -391,7 +391,7 @@ func TestReplicateQueueSeesLearner(t *testing.T) { require.Len(t, desc.Replicas().Voters(), 3) } -func TestReplicaGCQueueSeesLearner(t *testing.T) { +func TestReplicaGCQueueSeesLearnerOrJointConfig(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() knobs, ltk := makeReplicationTestKnobs() @@ -411,16 +411,29 @@ func TestReplicaGCQueueSeesLearner(t *testing.T) { }) // Run the replicaGC queue. - store, repl := getFirstStoreReplica(t, tc.Server(1), scratchStartKey) - trace, errMsg, err := store.ManuallyEnqueue(ctx, "replicaGC", repl, true /* skipShouldQueue */) - require.NoError(t, err) - require.Equal(t, ``, errMsg) - const msg = `not gc'able, replica is still in range descriptor: (n2,s2):2LEARNER` - require.Contains(t, tracing.FormatRecordedSpans(trace), msg) + checkNoGC := func() roachpb.RangeDescriptor { + store, repl := getFirstStoreReplica(t, tc.Server(1), scratchStartKey) + trace, errMsg, err := store.ManuallyEnqueue(ctx, "replicaGC", repl, true /* skipShouldQueue */) + require.NoError(t, err) + require.Equal(t, ``, errMsg) + const msg = `not gc'able, replica is still in range descriptor: (n2,s2):` + require.Contains(t, tracing.FormatRecordedSpans(trace), msg) + return tc.LookupRangeOrFatal(t, scratchStartKey) + } + desc := checkNoGC() // Make sure it didn't collect the learner. - desc := tc.LookupRangeOrFatal(t, scratchStartKey) require.NotEmpty(t, desc.Replicas().Learners()) + + // Now get the range into a joint config. + tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(1)) // remove learner + ltk.withJointConfigAndStop(func() { + desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) + require.Len(t, desc.Replicas().Filter(predIncoming), 1, desc) + }) + + postDesc := checkNoGC() + require.Equal(t, desc, postDesc) } func TestRaftSnapshotQueueSeesLearner(t *testing.T) { From b7a10e227b59865a4156b3048192b524dbb4496e Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 13:01:24 +0200 Subject: [PATCH 12/27] storage: test that replicate queue transitions out of joint configs Release note: None --- pkg/storage/replica_learner_test.go | 47 +++++++++++++++++++++-------- pkg/storage/replicate_queue.go | 2 +- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 785ec1fb9fe8..1098b59a205e 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -353,7 +353,7 @@ func TestSplitWithLearnerOrJointConfig(t *testing.T) { require.False(t, right.Replicas().InAtomicReplicationChange(), right) } -func TestReplicateQueueSeesLearner(t *testing.T) { +func TestReplicateQueueSeesLearnerOrJointConfig(t *testing.T) { defer leaktest.AfterTest(t)() // NB also see TestAllocatorRemoveLearner for a lower-level test. @@ -376,19 +376,42 @@ func TestReplicateQueueSeesLearner(t *testing.T) { // Run the replicate queue. store, repl := getFirstStoreReplica(t, tc.Server(0), scratchStartKey) - require.Equal(t, int64(0), getFirstStoreMetric(t, tc.Server(0), `queue.replicate.removelearnerreplica`)) - _, errMsg, err := store.ManuallyEnqueue(ctx, "replicate", repl, true /* skipShouldQueue */) - require.NoError(t, err) - require.Equal(t, ``, errMsg) - require.Equal(t, int64(1), getFirstStoreMetric(t, tc.Server(0), `queue.replicate.removelearnerreplica`)) + { + require.Equal(t, int64(0), getFirstStoreMetric(t, tc.Server(0), `queue.replicate.removelearnerreplica`)) + _, errMsg, err := store.ManuallyEnqueue(ctx, "replicate", repl, true /* skipShouldQueue */) + require.NoError(t, err) + require.Equal(t, ``, errMsg) + require.Equal(t, int64(1), getFirstStoreMetric(t, tc.Server(0), `queue.replicate.removelearnerreplica`)) - // Make sure it deleted the learner. - desc := tc.LookupRangeOrFatal(t, scratchStartKey) - require.Empty(t, desc.Replicas().Learners()) + // Make sure it deleted the learner. + desc := tc.LookupRangeOrFatal(t, scratchStartKey) + require.Empty(t, desc.Replicas().Learners()) + + // Bonus points: the replicate queue keeps processing until there is nothing + // to do, so it should have upreplicated the range to 3. + require.Len(t, desc.Replicas().Voters(), 3) + } - // Bonus points: the replicate queue keeps processing until there is nothing - // to do, so it should have upreplicated the range to 3. - require.Len(t, desc.Replicas().Voters(), 3) + // Create a VOTER_OUTGOING, i.e. a joint configuration. + ltk.withJointConfigAndStop(func() { + desc := tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(2)) + require.True(t, desc.Replicas().InAtomicReplicationChange(), desc) + trace, errMsg, err := store.ManuallyEnqueue(ctx, "replicate", repl, true /* skipShouldQueue */) + require.NoError(t, err) + require.Equal(t, ``, errMsg) + formattedTrace := tracing.FormatRecordedSpans(trace) + expectedMessages := []string{ + `transitioning out of joint configuration`, + } + if err := testutils.MatchInOrder(formattedTrace, expectedMessages...); err != nil { + t.Fatal(err) + } + + desc = tc.LookupRangeOrFatal(t, scratchStartKey) + require.False(t, desc.Replicas().InAtomicReplicationChange(), desc) + // Queue processed again, so we're back to three replicas. + require.Len(t, desc.Replicas().Voters(), 3) + }) } func TestReplicaGCQueueSeesLearnerOrJointConfig(t *testing.T) { diff --git a/pkg/storage/replicate_queue.go b/pkg/storage/replicate_queue.go index 75dc83eaf8b5..91f039901d62 100644 --- a/pkg/storage/replicate_queue.go +++ b/pkg/storage/replicate_queue.go @@ -348,7 +348,7 @@ func (rq *replicateQueue) processOneChange( return rq.considerRebalance(ctx, repl, voterReplicas, canTransferLease, dryRun) case AllocatorFinalizeAtomicReplicationChange: _, err := maybeLeaveAtomicChangeReplicas(ctx, repl.store, repl.Desc()) - return false, err + return true, err } return true, nil } From 6d983ec292bc213aa6c8fcfac486b698bf328e04 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 20:19:37 +0200 Subject: [PATCH 13/27] fixup! storage: test that incoming/outcoming voters don't serve follower reads --- pkg/roachpb/metadata_replicas.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index c5dcd0b37608..d13d8c4ef075 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -210,9 +210,7 @@ func (d ReplicaDescriptors) Learners() []ReplicaDescriptor { // Filter returns only the replica descriptors for which the supplied method // returns true. The memory returned may be shared with the receiver. -func (d ReplicaDescriptors) Filter( - pred func(descriptor ReplicaDescriptor) bool, -) []ReplicaDescriptor { +func (d ReplicaDescriptors) Filter(pred func(rDesc ReplicaDescriptor) bool) []ReplicaDescriptor { // Fast path when all or none match to avoid allocations. fastpath := true out := d.wrapped From 235df8229de073ddccc7cf34842c46bc4115b52b Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 20:21:12 +0200 Subject: [PATCH 14/27] fixup! storage: test merge queue seeing joint configs --- pkg/storage/merge_queue.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/storage/merge_queue.go b/pkg/storage/merge_queue.go index 51a93870b6fa..1d59b2300fa4 100644 --- a/pkg/storage/merge_queue.go +++ b/pkg/storage/merge_queue.go @@ -275,29 +275,30 @@ func (mq *mergeQueue) process( } { + store, db := lhsRepl.store, lhsRepl.store.DB() // AdminMerge errors if there is a learner or joint config on either // side and AdminRelocateRange removes any on the range it operates on. // For the sake of obviousness, just fix this all upfront. var err error - lhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, lhsRepl.store, lhsDesc) + lhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, store, lhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err } - lhsDesc, err = removeLearners(ctx, lhsRepl.store.DB(), lhsDesc) + lhsDesc, err = removeLearners(ctx, db, lhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err } - rhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, lhsRepl.store, rhsDesc) + rhsDesc, err = maybeLeaveAtomicChangeReplicas(ctx, store, rhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err } - rhsDesc, err = removeLearners(ctx, lhsRepl.store.DB(), rhsDesc) + rhsDesc, err = removeLearners(ctx, db, rhsDesc) if err != nil { log.VEventf(ctx, 2, `%v`, err) return err From 218476cb915c37ec1a5ca2e6c004c0f17cb22814 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:48:23 +0200 Subject: [PATCH 15/27] fixup! storage: test that replicate queue transitions out of joint configs --- pkg/storage/replicate_queue.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/storage/replicate_queue.go b/pkg/storage/replicate_queue.go index 91f039901d62..f2c1b41cca0f 100644 --- a/pkg/storage/replicate_queue.go +++ b/pkg/storage/replicate_queue.go @@ -348,6 +348,8 @@ func (rq *replicateQueue) processOneChange( return rq.considerRebalance(ctx, repl, voterReplicas, canTransferLease, dryRun) case AllocatorFinalizeAtomicReplicationChange: _, err := maybeLeaveAtomicChangeReplicas(ctx, repl.store, repl.Desc()) + // Requeue because either we failed to transition out of a joint state + // (bad) or we did and there might be more to do for that range. return true, err } return true, nil From 0698167cd00bf61c39be5c16cc84cd91b109a612 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:51:16 +0200 Subject: [PATCH 16/27] fixup! storage: add ReplicationAlwaysUseJointConfig testing knob --- pkg/storage/testing_knobs.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/storage/testing_knobs.go b/pkg/storage/testing_knobs.go index 8f2b349e14e0..16148b511816 100644 --- a/pkg/storage/testing_knobs.go +++ b/pkg/storage/testing_knobs.go @@ -203,7 +203,8 @@ type StoreTestingKnobs struct { // configuration. ReplicaAddStopAfterJointConfig func() bool // ReplicationAlwaysUseJointConfig causes replica addition to always go - // through a joint configuration, even when this isn't necessary. + // through a joint configuration, even when this isn't necessary (because + // the replication change affects only one replica). ReplicationAlwaysUseJointConfig func() bool // BeforeSnapshotSSTIngestion is run just before the SSTs are ingested when // applying a snapshot. From 837c16c0ab499844d213521a126facc418e57c3f Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:51:39 +0200 Subject: [PATCH 17/27] fixup! storage: add ReplicaAddStopAfterJointConfig store knob --- pkg/storage/testing_knobs.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/storage/testing_knobs.go b/pkg/storage/testing_knobs.go index 16148b511816..8a8548d5bac5 100644 --- a/pkg/storage/testing_knobs.go +++ b/pkg/storage/testing_knobs.go @@ -200,7 +200,10 @@ type StoreTestingKnobs struct { ReplicaAddStopAfterLearnerSnapshot func() bool // ReplicaAddStopAfterJointConfig causes replica addition to return early if // the func returns true. This happens before transitioning out of a joint - // configuration. + // configuration, after the joint configuration has been entered by means + // of a first ChangeReplicas transaction. If the replication change does + // not use joint consensus, this early return is identical to the regular + // return path. ReplicaAddStopAfterJointConfig func() bool // ReplicationAlwaysUseJointConfig causes replica addition to always go // through a joint configuration, even when this isn't necessary (because From 5e234b4255edefa969b96234a6513538a797960b Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:52:05 +0200 Subject: [PATCH 18/27] fixup! storage: add TestJointConfigLease --- pkg/storage/replica_learner_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 1098b59a205e..ec7fc7cd14fd 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -702,9 +702,9 @@ func TestJointConfigLease(t *testing.T) { exp := `cannot transfer lease to replica of type VOTER_INCOMING` require.True(t, testutils.IsError(err, exp), err) - // NB: we don't have to transition out of the joint config first because - // this is done automatically by ChangeReplicas before it does what it's - // asked to do. + // NB: we don't have to transition out of the previous joint config first + // because this is done automatically by ChangeReplicas before it does what + // it's asked to do. desc = tc.RemoveReplicasOrFatal(t, k, tc.Target(1)) err = tc.TransferRangeLease(desc, tc.Target(1)) exp = `cannot transfer lease to replica of type VOTER_OUTGOING` From cdd70ff2632344d92d50581af5996e42bfb4b1e7 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:52:29 +0200 Subject: [PATCH 19/27] fixup! storage: test merge queue seeing joint configs --- pkg/storage/replica_learner_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index ec7fc7cd14fd..79060f6458b0 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -973,9 +973,10 @@ func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { require.Empty(t, desc.Replicas().Learners()) } + // Create the RHS again and repeat the same game, except this time the LHS + // gets a VOTER_INCOMING for s2, and then the merge queue runs into it. It + // will transition the LHS out of the joint config and then do the merge. { - // Create the RHS again and repeat the same game, except this time the LHS - // gets a VOTER_INCOMING for s2. desc := splitAndUnsplit() ltk.withJointConfigAndStop(func() { @@ -1002,6 +1003,7 @@ func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { checkTransitioningOut() desc = tc.LookupRangeOrFatal(t, scratchStartKey) require.Len(t, desc.Replicas().Voters(), 2) + require.False(t, desc.Replicas().InAtomicReplicationChange(), desc) // Repeat the game, except now we start with two replicas and we're // giving the RHS a VOTER_OUTGOING. From 43b5d31da894382b17c9fb400d51d7351ee360b1 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:52:45 +0200 Subject: [PATCH 20/27] fixup! storage: test merge queue seeing joint configs --- pkg/storage/replica_learner_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 79060f6458b0..d4faf4d0350c 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -787,7 +787,6 @@ func TestLearnerAndJointConfigFollowerRead(t *testing.T) { check() // Removing the voter (and remaining in joint config) does. - scratchDesc = tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(1)) require.True(t, scratchDesc.Replicas().InAtomicReplicationChange(), scratchDesc) require.Len(t, scratchDesc.Replicas().Filter(predOutgoing), 1) From 732d3a72612607ae9e8997b4da963def277c8314 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:53:22 +0200 Subject: [PATCH 21/27] fixup! storage: test that incoming/outcoming voters don't serve follower reads --- pkg/storage/replica_command.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 9516d1dfecef..59f3ee603925 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -1362,13 +1362,13 @@ func execChangeReplicasTxn( if !ok { return nil, errors.Errorf("target %s not found", chg.target) } - if typ := rDesc.GetType(); !useJoint || typ != roachpb.VOTER_FULL { - // NB: typ != VOTER_FULL means it's a LEARNER since we verified above that we - // did not start in a joint config. + if typ := rDesc.GetType(); !useJoint || typ == roachpb.LEARNER { rDesc, _ = updatedDesc.RemoveReplica(chg.target.NodeID, chg.target.StoreID) } else { + // NB: typ is already known to be VOTER_FULL because of !InAtomicReplicationChange() above. + // We check it anyway. var prevTyp roachpb.ReplicaType - rDesc, prevTyp, _ = updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, roachpb.VOTER_OUTGOING) + rDesc, _, _ = updatedDesc.SetReplicaType(chg.target.NodeID, chg.target.StoreID, roachpb.VOTER_OUTGOING) if prevTyp != roachpb.VOTER_FULL { return nil, errors.Errorf("cannot transition from %s to VOTER_OUTGOING", prevTyp) } From 9b3247b15aebde08245863f52c7d16640d40d115 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:55:12 +0200 Subject: [PATCH 22/27] fixup! batcheval: explain ban on lease transfer to VOTER_OUTGOING --- pkg/storage/batcheval/cmd_lease.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/storage/batcheval/cmd_lease.go b/pkg/storage/batcheval/cmd_lease.go index c01677e9e650..87b12cb433aa 100644 --- a/pkg/storage/batcheval/cmd_lease.go +++ b/pkg/storage/batcheval/cmd_lease.go @@ -55,6 +55,9 @@ func checkCanReceiveLease(rec EvalContext) error { // amounts to removing the leaseholder without any safety precautions. // This would either wedge the range or allow illegal reads to be // served. + // + // Since the leaseholder can't remove itself and is a VOTER_FULL, we + // also know that in any configuration there's at least one VOTER_FULL. return errors.Errorf(`cannot transfer lease to replica of type %s`, t) } return nil From 11eb483aa8a91a0c2188c5cff94efc6034c59181 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:55:59 +0200 Subject: [PATCH 23/27] fixup! storage: test merge queue seeing joint configs --- pkg/storage/replica_learner_test.go | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index d4faf4d0350c..cbbe909d0ae9 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -52,14 +52,14 @@ type replicationTestKnobs struct { replicationAlwaysUseJointConfig int64 } -func (rtl *replicationTestKnobs) withLearnerStop(f func()) { +func (rtl *replicationTestKnobs) withStopAfterLearnerAtomic(f func()) { prev := atomic.LoadInt64(&rtl.replicaAddStopAfterLearnerAtomic) defer atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, prev) atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, 1) f() } -func (rtl *replicationTestKnobs) withJointConfigAndStop(f func()) { +func (rtl *replicationTestKnobs) withStopAfterJointConfig(f func()) { au := atomic.LoadInt64(&rtl.replicationAlwaysUseJointConfig) sa := atomic.LoadInt64(&rtl.replicaAddStopAfterJointConfig) defer atomic.StoreInt64(&rtl.replicationAlwaysUseJointConfig, au) @@ -248,7 +248,7 @@ func TestLearnerRaftConfState(t *testing.T) { // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) var desc roachpb.RangeDescriptor - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) require.Len(t, desc.Replicas().Learners(), 1) @@ -327,7 +327,7 @@ func TestSplitWithLearnerOrJointConfig(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -370,7 +370,7 @@ func TestReplicateQueueSeesLearnerOrJointConfig(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -393,7 +393,7 @@ func TestReplicateQueueSeesLearnerOrJointConfig(t *testing.T) { } // Create a VOTER_OUTGOING, i.e. a joint configuration. - ltk.withJointConfigAndStop(func() { + ltk.withStopAfterJointConfig(func() { desc := tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(2)) require.True(t, desc.Replicas().InAtomicReplicationChange(), desc) trace, errMsg, err := store.ManuallyEnqueue(ctx, "replicate", repl, true /* skipShouldQueue */) @@ -429,7 +429,7 @@ func TestReplicaGCQueueSeesLearnerOrJointConfig(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -450,7 +450,7 @@ func TestReplicaGCQueueSeesLearnerOrJointConfig(t *testing.T) { // Now get the range into a joint config. tc.RemoveReplicasOrFatal(t, scratchStartKey, tc.Target(1)) // remove learner - ltk.withJointConfigAndStop(func() { + ltk.withStopAfterJointConfig(func() { desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) require.Len(t, desc.Replicas().Filter(predIncoming), 1, desc) }) @@ -669,7 +669,7 @@ func TestLearnerNoAcceptLease(t *testing.T) { // Add a learner replica, send a snapshot so that it's materialized as a // Replica on the Store, but don't promote it to a voter. scratchStartKey := tc.ScratchRange(t) - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -735,7 +735,7 @@ func TestLearnerAndJointConfigFollowerRead(t *testing.T) { scratchStartKey := tc.ScratchRange(t) var scratchDesc roachpb.RangeDescriptor - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { scratchDesc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -809,7 +809,7 @@ func TestLearnerAdminRelocateRange(t *testing.T) { db.Exec(t, `SET CLUSTER SETTING kv.learner_replicas.enabled = true`) scratchStartKey := tc.ScratchRange(t) - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(2)) }) @@ -859,7 +859,7 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { // This allows testing merges that have a learner on the RHS (on desc2) and // the LHS (on desc1). var desc1, desc3 roachpb.RangeDescriptor - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { desc1 = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) desc3 = tc.AddReplicasOrFatal(t, splitKey2, tc.Target(1)) }) @@ -946,7 +946,7 @@ func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { { splitAndUnsplit() - ltk.withLearnerStop(func() { + ltk.withStopAfterLearnerAtomic(func() { _ = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) @@ -978,7 +978,7 @@ func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { { desc := splitAndUnsplit() - ltk.withJointConfigAndStop(func() { + ltk.withStopAfterJointConfig(func() { desc = tc.AddReplicasOrFatal(t, scratchStartKey, tc.Target(1)) }) require.Len(t, desc.Replicas().Filter(predIncoming), 1, desc) @@ -1007,7 +1007,7 @@ func TestMergeQueueSeesLearnerOrJointConfig(t *testing.T) { // Repeat the game, except now we start with two replicas and we're // giving the RHS a VOTER_OUTGOING. desc = splitAndUnsplit() - ltk.withJointConfigAndStop(func() { + ltk.withStopAfterJointConfig(func() { descRight := tc.RemoveReplicasOrFatal(t, desc.EndKey.AsRawKey(), tc.Target(1)) require.Len(t, descRight.Replicas().Filter(predOutgoing), 1, desc) }) From fb13fe78365de615f9b6ebad12ddc92abf6dd808 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:56:58 +0200 Subject: [PATCH 24/27] fixup! storage: test merge queue seeing joint configs --- pkg/storage/replica_learner_test.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index cbbe909d0ae9..6071e56a3c0a 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -53,19 +53,16 @@ type replicationTestKnobs struct { } func (rtl *replicationTestKnobs) withStopAfterLearnerAtomic(f func()) { - prev := atomic.LoadInt64(&rtl.replicaAddStopAfterLearnerAtomic) + prev := atomic.SwapInt64(&rtl.replicaAddStopAfterLearnerAtomic, 1) defer atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, prev) - atomic.StoreInt64(&rtl.replicaAddStopAfterLearnerAtomic, 1) f() } func (rtl *replicationTestKnobs) withStopAfterJointConfig(f func()) { - au := atomic.LoadInt64(&rtl.replicationAlwaysUseJointConfig) - sa := atomic.LoadInt64(&rtl.replicaAddStopAfterJointConfig) + au := atomic.SwapInt64(&rtl.replicationAlwaysUseJointConfig, 1) + sa := atomic.SwapInt64(&rtl.replicaAddStopAfterJointConfig, 1) defer atomic.StoreInt64(&rtl.replicationAlwaysUseJointConfig, au) defer atomic.StoreInt64(&rtl.replicaAddStopAfterJointConfig, sa) - atomic.StoreInt64(&rtl.replicationAlwaysUseJointConfig, 1) - atomic.StoreInt64(&rtl.replicaAddStopAfterJointConfig, 1) f() } From d69a54ebe8c945517f125486df075dce332017c8 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 21:59:36 +0200 Subject: [PATCH 25/27] fixup! storage: test AdminMerge in presence of joint config --- pkg/storage/replica_learner_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 6071e56a3c0a..5d4d3cadcda3 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -873,6 +873,7 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { } // LEARNER on the lhs or rhs should fail. + // desc{1,2,3} = (VOTER_FULL, LEARNER) (VOTER_FULL) (VOTER_FULL, LEARNER) checkFails() // Turn the learners on desc1 and desc3 into VOTER_INCOMINGs. @@ -886,9 +887,11 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { require.Len(t, desc1.Replicas().Filter(predIncoming), 1) // VOTER_INCOMING on the lhs or rhs should fail. + // desc{1,2,3} = (VOTER_FULL, VOTER_INCOMING) (VOTER_FULL) (VOTER_FULL, VOTER_INCOMING) checkFails() // Turn the incoming voters on desc1 and desc3 into VOTER_OUTGOINGs. + // desc{1,2,3} = (VOTER_FULL, VOTER_OUTGOING) (VOTER_FULL) (VOTER_FULL, VOTER_OUTGOING) desc1 = tc.RemoveReplicasOrFatal(t, desc1.StartKey.AsRawKey(), tc.Target(1)) require.Len(t, desc1.Replicas().Filter(predOutgoing), 1) desc3 = tc.RemoveReplicasOrFatal(t, desc3.StartKey.AsRawKey(), tc.Target(1)) @@ -900,13 +903,14 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { // Add a VOTER_INCOMING to desc2 to make sure it actually exludes this type // of replicas from merges (rather than really just checking whether the // replica sets are equal). - + // desc{1,2,3} = (VOTER_FULL, VOTER_OUTGOING) (VOTER_FULL, VOTER_INCOMING) (VOTER_FULL, VOTER_OUTGOING) desc2 := tc.AddReplicasOrFatal(t, splitKey1, tc.Target(1)) require.Len(t, desc2.Replicas().Filter(predIncoming), 1) checkFails() // Ditto VOTER_OUTGOING. + // desc{1,2,3} = (VOTER_FULL, VOTER_OUTGOING) (VOTER_FULL, VOTER_OUTGOING) (VOTER_FULL, VOTER_OUTGOING) desc2 = tc.RemoveReplicasOrFatal(t, desc2.StartKey.AsRawKey(), tc.Target(1)) require.Len(t, desc2.Replicas().Filter(predOutgoing), 1) From ae17a5ef93656c4f86eedcf946f9049d8c79e112 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 22:01:13 +0200 Subject: [PATCH 26/27] fixup! testcluster: add RemoveReplicasOrFatal --- pkg/testutils/serverutils/test_cluster_shim.go | 6 ++++++ pkg/testutils/testcluster/testcluster.go | 1 + 2 files changed, 7 insertions(+) diff --git a/pkg/testutils/serverutils/test_cluster_shim.go b/pkg/testutils/serverutils/test_cluster_shim.go index 37f9d925a2bf..efa54c9b2f10 100644 --- a/pkg/testutils/serverutils/test_cluster_shim.go +++ b/pkg/testutils/serverutils/test_cluster_shim.go @@ -68,6 +68,12 @@ type TestClusterInterface interface { startKey roachpb.Key, targets ...roachpb.ReplicationTarget, ) (roachpb.RangeDescriptor, error) + // RemoveReplicasOrFatal is the same as RemoveReplicas but will Fatal the test on + // error. + RemoveReplicasOrFatal( + t testing.TB, startKey roachpb.Key, targets ...roachpb.ReplicationTarget, + ) roachpb.RangeDescriptor + // FindRangeLeaseHolder returns the current lease holder for the given range. // In particular, it returns one particular node's (the hint, if specified) view // of the current lease. diff --git a/pkg/testutils/testcluster/testcluster.go b/pkg/testutils/testcluster/testcluster.go index 8f1bd531d7c0..666c7c679488 100644 --- a/pkg/testutils/testcluster/testcluster.go +++ b/pkg/testutils/testcluster/testcluster.go @@ -506,6 +506,7 @@ func (tc *TestCluster) RemoveReplicas( return tc.changeReplicas(roachpb.REMOVE_REPLICA, keys.MustAddr(startKey), targets...) } +// RemoveReplicasOrFatal is part of TestClusterInterface. func (tc *TestCluster) RemoveReplicasOrFatal( t testing.TB, startKey roachpb.Key, targets ...roachpb.ReplicationTarget, ) roachpb.RangeDescriptor { From 4c3d5a50aa256f045d69245de3bd97156f382964 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 27 Aug 2019 22:01:35 +0200 Subject: [PATCH 27/27] fixup! storage: test AdminMerge in presence of joint config --- pkg/storage/replica_learner_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/replica_learner_test.go b/pkg/storage/replica_learner_test.go index 5d4d3cadcda3..028481bb96ac 100644 --- a/pkg/storage/replica_learner_test.go +++ b/pkg/storage/replica_learner_test.go @@ -900,7 +900,7 @@ func TestLearnerAndJointConfigAdminMerge(t *testing.T) { // VOTER_OUTGOING on the lhs or rhs should fail. checkFails() - // Add a VOTER_INCOMING to desc2 to make sure it actually exludes this type + // Add a VOTER_INCOMING to desc2 to make sure it actually excludes this type // of replicas from merges (rather than really just checking whether the // replica sets are equal). // desc{1,2,3} = (VOTER_FULL, VOTER_OUTGOING) (VOTER_FULL, VOTER_INCOMING) (VOTER_FULL, VOTER_OUTGOING)