diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index bccedc802f32..f99a0ddf2596 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -211,6 +211,7 @@ ALL_TESTS = [ "//pkg/kv/kvserver/abortspan:abortspan_test", "//pkg/kv/kvserver/allocator/allocator2:allocator2_test", "//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test", + "//pkg/kv/kvserver/allocator/plan:plan_test", "//pkg/kv/kvserver/allocator/storepool:storepool_test", "//pkg/kv/kvserver/apply:apply_test", "//pkg/kv/kvserver/asim/gossip:gossip_test", @@ -1324,6 +1325,7 @@ GO_TARGETS = [ "//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test", "//pkg/kv/kvserver/allocator/load:load", "//pkg/kv/kvserver/allocator/plan:plan", + "//pkg/kv/kvserver/allocator/plan:plan_test", "//pkg/kv/kvserver/allocator/storepool:storepool", "//pkg/kv/kvserver/allocator/storepool:storepool_test", "//pkg/kv/kvserver/allocator:allocator", diff --git a/pkg/kv/kvserver/allocator/plan/BUILD.bazel b/pkg/kv/kvserver/allocator/plan/BUILD.bazel index 80cdaa0bddfa..75d563c59a7f 100644 --- a/pkg/kv/kvserver/allocator/plan/BUILD.bazel +++ b/pkg/kv/kvserver/allocator/plan/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "plan", @@ -26,3 +26,18 @@ go_library( "@io_etcd_go_raft_v3//:raft", ], ) + +go_test( + name = "plan_test", + srcs = ["util_test.go"], + args = ["-test.timeout=295s"], + embed = [":plan"], + deps = [ + "//pkg/kv/kvpb", + "//pkg/kv/kvserver/allocator/allocatorimpl", + "//pkg/roachpb", + "//pkg/util/leaktest", + "//pkg/util/log", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/kv/kvserver/allocator/plan/util.go b/pkg/kv/kvserver/allocator/plan/util.go index cbb8def379eb..0f5cf1cce2cc 100644 --- a/pkg/kv/kvserver/allocator/plan/util.go +++ b/pkg/kv/kvserver/allocator/plan/util.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" "go.etcd.io/raft/v3" ) @@ -37,51 +36,6 @@ func ReplicationChangesForRebalance( rebalanceTargetType allocatorimpl.TargetReplicaType, ) (chgs []kvpb.ReplicationChange, performingSwap bool, err error) { rdesc, found := desc.GetReplicaDescriptor(addTarget.StoreID) - if rebalanceTargetType == allocatorimpl.VoterTarget && numExistingVoters == 1 { - // If there's only one replica, the removal target is the - // leaseholder and this is unsupported and will fail. However, - // this is also the only way to rebalance in a single-replica - // range. If we try the atomic swap here, we'll fail doing - // nothing, and so we stay locked into the current distribution - // of replicas. (Note that maybeTransferLeaseAway above will not - // have found a target, and so will have returned (false, nil). - // - // Do the best thing we can, which is carry out the addition - // only, which should succeed, and the next time we touch this - // range, we will have one more replica and hopefully it will - // take the lease and remove the current leaseholder. - // - // It's possible that "rebalancing deadlock" can occur in other - // scenarios, it's really impossible to tell from the code given - // the constraints we support. However, the lease transfer often - // does not happen spuriously, and we can't enter dangerous - // configurations sporadically, so this code path is only hit - // when we know it's necessary, picking the smaller of two evils. - // - // See https://github.com/cockroachdb/cockroach/issues/40333. - log.KvDistribution.Infof(ctx, "can't swap replica due to lease; falling back to add") - - // Even when there is only 1 existing voter, there may be other replica - // types in the range. Check if the add target already has a replica, if so - // it must be a non-voter or the rebalance is invalid. - if found && rdesc.Type == roachpb.NON_VOTER { - // The receiving store already has a non-voting replica. Instead of just - // adding a voter to the receiving store, we *must* promote the non-voting - // replica to a voter. - chgs = kvpb.ReplicationChangesForPromotion(addTarget) - } else if !found { - chgs = []kvpb.ReplicationChange{ - {ChangeType: roachpb.ADD_VOTER, Target: addTarget}, - } - } else { - return nil, false, errors.AssertionFailedf( - "invalid rebalancing decision: trying to"+ - " move voter to a store that already has a replica %s for the range", rdesc, - ) - } - return chgs, false, err - } - switch rebalanceTargetType { case allocatorimpl.VoterTarget: // Check if the target being added already has a non-voting replica. diff --git a/pkg/kv/kvserver/allocator/plan/util_test.go b/pkg/kv/kvserver/allocator/plan/util_test.go new file mode 100644 index 000000000000..e4a7077c87e3 --- /dev/null +++ b/pkg/kv/kvserver/allocator/plan/util_test.go @@ -0,0 +1,188 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package plan + +import ( + "context" + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvpb" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/stretchr/testify/require" +) + +// TestReplicationChangesForRebalance asserts that the replication changes for +// rebalancing are correct, given a a range descriptor and rebalance target. +func TestReplicationChangesForRebalance(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + testCases := []struct { + name string + desc *roachpb.RangeDescriptor + addTarget, removeTarget roachpb.ReplicationTarget + rebalanceTargetType allocatorimpl.TargetReplicaType + expectedChanges []kvpb.ReplicationChange + expectedPerformingSwap bool + expectedErrStr string + }{ + { + name: "rf=1 rebalance voter 1->2", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance voter 1->4", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance voter 1->3 error: already has a voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "programming error: store being rebalanced to(3) already has a voting replica", + }, + { + name: "rf=3 rebalance non-voter: 1->4", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}}, + {ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance non-voter 1->3 error: already has a voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica", + }, + { + name: "rf=3 rebalance non-voter 1->3 error: already has a non-voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica", + }, + { + name: "rf=3 rebalance voter 1->3 swap", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}}, + {ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}}, + {ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: true, + expectedErrStr: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + chgs, performingSwap, err := ReplicationChangesForRebalance( + ctx, + tc.desc, + len(tc.desc.Replicas().VoterDescriptors()), + tc.addTarget, + tc.removeTarget, + tc.rebalanceTargetType, + ) + require.Equal(t, tc.expectedChanges, chgs) + require.Equal(t, tc.expectedPerformingSwap, performingSwap) + if tc.expectedErrStr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectedErrStr) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node b/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node index 97ee13f8f0a6..0abe0ecd8d21 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node @@ -40,9 +40,9 @@ plot stat=replicas sample=1 301 ┼──────────────────────────────────────╭──────────────────────────────────────── 281 ┤ ╭╭─╯ 261 ┤ ╭╭──╯ - 241 ┤ ╭╭─╯ - 221 ┤ ╭───╯ - 201 ┤ ╭╭─╯ + 241 ┤ ╭──╯ + 221 ┤ ╭──╯ + 201 ┤ ╭─╯╯ 181 ┤ ╭──╯ 161 ┤ ╭──╯ 140 ┤ ╭──╯╯ diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter index 8a1b9da7ce74..7f10bab1fe0b 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter @@ -14,17 +14,17 @@ plot stat=replicas ---- ---- - 100.00 ┼──────╭────────────╮ - 93.33 ┤ ╭╯ ╰╮ ╰────╮ - 86.67 ┤ │ ╰╮ ╭╰╮───────╮ - 80.00 ┤ │ ╰╮ ╭─╯ ╰╮ ╰─╭╮ - 73.33 ┤ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯╰─────────╮ - 66.67 ┤ │ │ ╭─╯ ╰─╮ │ ╰─╮ ╭──╰─╮╭╮──╭╮╮ - 60.00 ┤ ╭╯ ╰╮ │ │ ╭╯ ╰─╯ ╰╯╰──╯╰────────╮╭╮╭─╮╮ - 53.33 ┤ │ │ ╭─╯ ╰╮ ╭╯ ╰╯ ╰╯╰╯╰╯ ╰────────── - 46.67 ┤ ╭╯ ╰╮ │ ╰─╯ - 40.00 ┤ │ │╭╯ - 33.33 ┤ │ ╰╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ 26.67 ┤ ╭╯ 20.00 ┤ │ 13.33 ┤╭╯ @@ -38,22 +38,22 @@ plot stat=leases ---- ---- - 100.00 ┼──────╮ ╭───╮ - 93.33 ┤ ╰╮ ╭╯ ╰────╮ - 86.67 ┤ │ │ │ - 80.00 ┤ ╰╮ ╭╯ ╰╮ ╭─────╮ - 73.33 ┤ │ │ │ ╭╯ ╰─╮ ╭───╮ - 66.67 ┤ ╰╮╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰─╮ ╭──╮ - 60.00 ┤ ││ │ │ │ │ ╰╮╭╯ ╰──╮ ╭──╮ - 53.33 ┤ ╭╯ ╰╮╯ ╰╭╯ │╯ ╰╭╯ ╰──╮───────╭─────── - 46.67 ┤ │╮ ╭╰╮ ╭╯╮ ╰╮ ╭╯╮ ╭──╰───────╯─────── - 40.00 ┤ ││ │ │ │ │ ╭╯╰╮ ╭──╯ ╰──╯ - 33.33 ┤ ╭╯╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭─╯ ╰──╯ - 26.67 ┤ │ │ │ ╰╮ ╭─╯ ╰───╯ - 20.00 ┤ ╭╯ ╰╮ ╭╯ ╰─────╯ - 13.33 ┤ │ │ │ - 6.67 ┤ ╭╯ ╰╮ ╭────╯ - 0.00 ┼──────╯ ╰───╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ + 26.67 ┤ ╭╯ + 20.00 ┤ │ + 13.33 ┤╭╯ + 6.67 ┤│ + 0.00 ┼╯ leases ---- ---- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges index 783671ee4bf1..249f2396f55e 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges @@ -132,15 +132,7 @@ clear ---- # We expect ranges to be randomly allocated across stores with a replication -# factor of 1. Assertion failures on some samples are expected under this setup. -# When there is only one replica and the removal target in rebalancing is the -# leaseholder, stabilizing is hard. The system can't easily remove the replica, -# so it chose to fall back to adding a replica, hoping lease transfer and -# removal of original replica would happen next time this range is checked. In -# this set up, it is always possible to be over-replicated if rebalancing is -# occurring -- as we catch ranges in the middle of rebalancing. In addition, we -# expect all output details to be displayed upon test failure. Please see the -# comment in ReplicationChangesForRebalance for more details. +# factor of 1. rand_cluster cluster_gen_type=single_region ---- @@ -177,30 +169,7 @@ configurations generated using seed 2643318057788968173 randomized ranges with placement_type=random, ranges=944, key_space=150098, replication_factor=1, bytes=0 basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 number of mutation events=0, number of assertion events=0 -initial state at 2022-03-21 11:00:00: - stores(15)=[s1n1=(replicas(65)),s2n2=(replicas(64)),s3n3=(replicas(66)),s4n4=(replicas(63)),s5n5=(replicas(63)),s6n6=(replicas(63)),s7n7=(replicas(63)),s8n8=(replicas(62)),s9n9=(replicas(64)),s10n10=(replicas(64)),s11n11=(replicas(62)),s12n12=(replicas(63)),s13n13=(replicas(63)),s14n14=(replicas(64)),s15n15=(replicas(64))] -topology: -US - US_1 - └── [1 2 3 4 5] - US_2 - └── [6 7 8 9 10] - US_3 - └── [11 12 13 14 15] -no events were scheduled -sample2: failed assertion - conformance unavailable=0 under=0 over=0 violating=0 - actual unavailable=0 under=0, over=9 violating=0 -over replicated: - r120:000001{8921-9080} [(n8,s8):2, (n15,s15):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r133:000002{0988-1147} [(n3,s3):2, (n5,s5):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r138:0000021{783-942} [(n3,s3):2, (n12,s12):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r156:0000024{645-804} [(n3,s3):2, (n2,s2):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r243:0000038{478-637} [(n3,s3):2, (n12,s12):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r322:0000051{039-198} [(n1,s1):2, (n3,s3):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r567:00000{89994-90153} [(n3,s3):2, (n5,s5):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r605:0000096{036-195} [(n3,s3):3, (n8,s8):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r875:000013{8966-9125} [(n3,s3):3, (n2,s2):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 +sample2: pass ---------------------------------- sample3: start running configurations generated using seed 6972490225919430754 diff --git a/pkg/sql/scatter_test.go b/pkg/sql/scatter_test.go index adf0749b4eeb..dbe1b00827b4 100644 --- a/pkg/sql/scatter_test.go +++ b/pkg/sql/scatter_test.go @@ -17,7 +17,9 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" "github.com/cockroachdb/cockroach/pkg/sql/randgen" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -27,6 +29,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" ) func TestScatterRandomizeLeases(t *testing.T) { @@ -176,3 +180,97 @@ func TestScatterResponse(t *testing.T) { t.Fatalf("expected %d rows, but got %d", e, a) } } + +// TestScatterWithOneVoter tests that the scatter command works when the +// replication factor is set to 1. We expect that the total number of replicas +// remains unchanged and that the scattering store loses some replicas. Note we +// don't assert on the final distribution being even across all stores, scatter +// promises randomness, not necessarily uniformity. +func TestScatterWithOneVoter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + skip.UnderRace(t) // Too slow under stressrace. + skip.UnderDeadlock(t) + skip.UnderShort(t) + + zcfg := zonepb.DefaultZoneConfig() + zcfg.NumReplicas = proto.Int32(1) + + tc := serverutils.StartCluster(t, 3, base.TestClusterArgs{ + ReplicationMode: base.ReplicationManual, + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DefaultZoneConfigOverride: &zcfg, + }, + }, + }, + }) + defer tc.Stopper().Stop(context.Background()) + + sqlutils.CreateTable( + t, tc.ServerConn(0), "t", + "k INT PRIMARY KEY, v INT", + 500, /* numRows */ + sqlutils.ToRowFn(sqlutils.RowIdxFn, sqlutils.RowModuloFn(10)), + ) + + r := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + + // Create 49 splits, for 50 ranges in the test table. + r.Exec(t, "ALTER TABLE test.t SPLIT AT (SELECT i*10 FROM generate_series(1, 49) AS g(i))") + + getReplicaCounts := func() (map[int]int, int, error) { + rows := r.Query(t, ` + WITH ranges_info AS ( + SHOW RANGES FROM TABLE test.t + ) + SELECT + store_id, + count(*) AS replica_count + FROM + ( + SELECT + unnest(replicas) AS store_id + FROM + ranges_info + ) AS store_replicas + GROUP BY + store_id;`) + replicaCounts := make(map[int]int) + totalReplicas := 0 + for rows.Next() { + var storeID, replicaCount int + if err := rows.Scan(&storeID, &replicaCount); err != nil { + return nil, 0, err + } + replicaCounts[storeID] = replicaCount + totalReplicas += replicaCount + } + if err := rows.Err(); err != nil { + return nil, 0, err + } + return replicaCounts, totalReplicas, nil + } + + oldReplicaCounts, oldTotalReplicas, err := getReplicaCounts() + if err != nil { + t.Fatal(err) + } + + // Expect that the number of replicas on store 1 to have changed. We can't + // assert that the distribution will be even across all three stores, but s1 + // (the initial leaseholder and replica) should have a different number of + // replicas than before. Rebalancing is otherwise disabled in this test, so + // the only replica movements are from the scatter. + r.Exec(t, "ALTER TABLE test.t SCATTER") + newReplicaCounts, newTotalReplicas, err := getReplicaCounts() + require.NoError(t, err) + + require.Equal(t, oldTotalReplicas, newTotalReplicas, + "expected total replica count to remain the same post-scatter, "+ + "old replica counts(%d): %v, new replica counts(%d): %v", + oldTotalReplicas, oldReplicaCounts, newTotalReplicas, newReplicaCounts) + require.NotEqual(t, oldReplicaCounts[1], newReplicaCounts[1]) +}