From 8bc831d57df246ef2665ce0a11e59e98cb91208c Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Thu, 16 May 2024 15:28:55 +0000 Subject: [PATCH] kvserver: rebalance ranges with one voter using joint configurations The allocator would add a voter, instead of both adding and removing the existing voter when rebalancing ranges with one replica. Removing the leaseholder replica was not possible prior to #74077, so the addition only was necessary. This restriction is no longer necessary. Allow rebalancing a one voter range between stores using joint configurations, where the lease will be transferred to the incoming voter store, from the outgoing demoting voter. Scattering ranges with one voter will now leave the range with exactly one voter, where previously both the leaseholder voter evaluating the scatter, and the new voter would be left. Before this patch, scattering 1000 ranges with RF=1 on a 5 store cluster: ``` store_id | replica_count | replica_distribution | lease_count | lease_distribution -----------+---------------+----------------------+-------------+--------------------- 1 | 1001 | ########## | 500 | ########## 5 | 291 | ### | 147 | ### 4 | 275 | ### | 137 | ### 3 | 229 | ### | 118 | ### 2 | 206 | ### | 99 | ## ``` After: ``` store_id | replica_count | replica_distribution | lease_count | lease_distribution -----------+---------------+----------------------+-------------+--------------------- 2 | 242 | ########## | 241 | ########## 4 | 227 | ########## | 227 | ########## 5 | 217 | ######### | 216 | ######### 3 | 209 | ######### | 208 | ######### 1 | 106 | ##### | 109 | ##### ``` Fixes: #108420 Fixes: #124171 Release note (bug fix): Scattering a range with replication factor=1, no longer erroneously up-replicates the range to two replicas. Leases will also no longer thrash between nodes when perturbed with replication factor=1. --- pkg/BUILD.bazel | 2 + pkg/kv/kvserver/allocator/plan/BUILD.bazel | 17 +- pkg/kv/kvserver/allocator/plan/util.go | 46 ----- pkg/kv/kvserver/allocator/plan/util_test.go | 188 ++++++++++++++++++ .../tests/testdata/non_rand/example_add_node | 6 +- .../asim/tests/testdata/non_rand/one_voter | 54 ++--- .../asim/tests/testdata/rand/rand_ranges | 35 +--- pkg/sql/scatter_test.go | 98 +++++++++ 8 files changed, 336 insertions(+), 110 deletions(-) create mode 100644 pkg/kv/kvserver/allocator/plan/util_test.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index bccedc802f32..f99a0ddf2596 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -211,6 +211,7 @@ ALL_TESTS = [ "//pkg/kv/kvserver/abortspan:abortspan_test", "//pkg/kv/kvserver/allocator/allocator2:allocator2_test", "//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test", + "//pkg/kv/kvserver/allocator/plan:plan_test", "//pkg/kv/kvserver/allocator/storepool:storepool_test", "//pkg/kv/kvserver/apply:apply_test", "//pkg/kv/kvserver/asim/gossip:gossip_test", @@ -1324,6 +1325,7 @@ GO_TARGETS = [ "//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test", "//pkg/kv/kvserver/allocator/load:load", "//pkg/kv/kvserver/allocator/plan:plan", + "//pkg/kv/kvserver/allocator/plan:plan_test", "//pkg/kv/kvserver/allocator/storepool:storepool", "//pkg/kv/kvserver/allocator/storepool:storepool_test", "//pkg/kv/kvserver/allocator:allocator", diff --git a/pkg/kv/kvserver/allocator/plan/BUILD.bazel b/pkg/kv/kvserver/allocator/plan/BUILD.bazel index 80cdaa0bddfa..75d563c59a7f 100644 --- a/pkg/kv/kvserver/allocator/plan/BUILD.bazel +++ b/pkg/kv/kvserver/allocator/plan/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "plan", @@ -26,3 +26,18 @@ go_library( "@io_etcd_go_raft_v3//:raft", ], ) + +go_test( + name = "plan_test", + srcs = ["util_test.go"], + args = ["-test.timeout=295s"], + embed = [":plan"], + deps = [ + "//pkg/kv/kvpb", + "//pkg/kv/kvserver/allocator/allocatorimpl", + "//pkg/roachpb", + "//pkg/util/leaktest", + "//pkg/util/log", + "@com_github_stretchr_testify//require", + ], +) diff --git a/pkg/kv/kvserver/allocator/plan/util.go b/pkg/kv/kvserver/allocator/plan/util.go index cbb8def379eb..0f5cf1cce2cc 100644 --- a/pkg/kv/kvserver/allocator/plan/util.go +++ b/pkg/kv/kvserver/allocator/plan/util.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" "go.etcd.io/raft/v3" ) @@ -37,51 +36,6 @@ func ReplicationChangesForRebalance( rebalanceTargetType allocatorimpl.TargetReplicaType, ) (chgs []kvpb.ReplicationChange, performingSwap bool, err error) { rdesc, found := desc.GetReplicaDescriptor(addTarget.StoreID) - if rebalanceTargetType == allocatorimpl.VoterTarget && numExistingVoters == 1 { - // If there's only one replica, the removal target is the - // leaseholder and this is unsupported and will fail. However, - // this is also the only way to rebalance in a single-replica - // range. If we try the atomic swap here, we'll fail doing - // nothing, and so we stay locked into the current distribution - // of replicas. (Note that maybeTransferLeaseAway above will not - // have found a target, and so will have returned (false, nil). - // - // Do the best thing we can, which is carry out the addition - // only, which should succeed, and the next time we touch this - // range, we will have one more replica and hopefully it will - // take the lease and remove the current leaseholder. - // - // It's possible that "rebalancing deadlock" can occur in other - // scenarios, it's really impossible to tell from the code given - // the constraints we support. However, the lease transfer often - // does not happen spuriously, and we can't enter dangerous - // configurations sporadically, so this code path is only hit - // when we know it's necessary, picking the smaller of two evils. - // - // See https://github.com/cockroachdb/cockroach/issues/40333. - log.KvDistribution.Infof(ctx, "can't swap replica due to lease; falling back to add") - - // Even when there is only 1 existing voter, there may be other replica - // types in the range. Check if the add target already has a replica, if so - // it must be a non-voter or the rebalance is invalid. - if found && rdesc.Type == roachpb.NON_VOTER { - // The receiving store already has a non-voting replica. Instead of just - // adding a voter to the receiving store, we *must* promote the non-voting - // replica to a voter. - chgs = kvpb.ReplicationChangesForPromotion(addTarget) - } else if !found { - chgs = []kvpb.ReplicationChange{ - {ChangeType: roachpb.ADD_VOTER, Target: addTarget}, - } - } else { - return nil, false, errors.AssertionFailedf( - "invalid rebalancing decision: trying to"+ - " move voter to a store that already has a replica %s for the range", rdesc, - ) - } - return chgs, false, err - } - switch rebalanceTargetType { case allocatorimpl.VoterTarget: // Check if the target being added already has a non-voting replica. diff --git a/pkg/kv/kvserver/allocator/plan/util_test.go b/pkg/kv/kvserver/allocator/plan/util_test.go new file mode 100644 index 000000000000..e4a7077c87e3 --- /dev/null +++ b/pkg/kv/kvserver/allocator/plan/util_test.go @@ -0,0 +1,188 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package plan + +import ( + "context" + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvpb" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/stretchr/testify/require" +) + +// TestReplicationChangesForRebalance asserts that the replication changes for +// rebalancing are correct, given a a range descriptor and rebalance target. +func TestReplicationChangesForRebalance(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + testCases := []struct { + name string + desc *roachpb.RangeDescriptor + addTarget, removeTarget roachpb.ReplicationTarget + rebalanceTargetType allocatorimpl.TargetReplicaType + expectedChanges []kvpb.ReplicationChange + expectedPerformingSwap bool + expectedErrStr string + }{ + { + name: "rf=1 rebalance voter 1->2", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance voter 1->4", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance voter 1->3 error: already has a voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "programming error: store being rebalanced to(3) already has a voting replica", + }, + { + name: "rf=3 rebalance non-voter: 1->4", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}}, + {ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: false, + expectedErrStr: "", + }, + { + name: "rf=3 rebalance non-voter 1->3 error: already has a voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica", + }, + { + name: "rf=3 rebalance non-voter 1->3 error: already has a non-voter", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.NonVoterTarget, + expectedChanges: nil, + expectedPerformingSwap: false, + expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica", + }, + { + name: "rf=3 rebalance voter 1->3 swap", + desc: &roachpb.RangeDescriptor{ + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL}, + {NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL}, + {NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER}, + }, + }, + addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}, + removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}, + rebalanceTargetType: allocatorimpl.VoterTarget, + expectedChanges: []kvpb.ReplicationChange{ + {ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}}, + {ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}}, + {ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + {ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}}, + }, + expectedPerformingSwap: true, + expectedErrStr: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + chgs, performingSwap, err := ReplicationChangesForRebalance( + ctx, + tc.desc, + len(tc.desc.Replicas().VoterDescriptors()), + tc.addTarget, + tc.removeTarget, + tc.rebalanceTargetType, + ) + require.Equal(t, tc.expectedChanges, chgs) + require.Equal(t, tc.expectedPerformingSwap, performingSwap) + if tc.expectedErrStr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectedErrStr) + } else { + require.NoError(t, err) + } + }) + } +} diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node b/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node index 97ee13f8f0a6..0abe0ecd8d21 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node @@ -40,9 +40,9 @@ plot stat=replicas sample=1 301 ┼──────────────────────────────────────╭──────────────────────────────────────── 281 ┤ ╭╭─╯ 261 ┤ ╭╭──╯ - 241 ┤ ╭╭─╯ - 221 ┤ ╭───╯ - 201 ┤ ╭╭─╯ + 241 ┤ ╭──╯ + 221 ┤ ╭──╯ + 201 ┤ ╭─╯╯ 181 ┤ ╭──╯ 161 ┤ ╭──╯ 140 ┤ ╭──╯╯ diff --git a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter index 8a1b9da7ce74..7f10bab1fe0b 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter +++ b/pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter @@ -14,17 +14,17 @@ plot stat=replicas ---- ---- - 100.00 ┼──────╭────────────╮ - 93.33 ┤ ╭╯ ╰╮ ╰────╮ - 86.67 ┤ │ ╰╮ ╭╰╮───────╮ - 80.00 ┤ │ ╰╮ ╭─╯ ╰╮ ╰─╭╮ - 73.33 ┤ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯╰─────────╮ - 66.67 ┤ │ │ ╭─╯ ╰─╮ │ ╰─╮ ╭──╰─╮╭╮──╭╮╮ - 60.00 ┤ ╭╯ ╰╮ │ │ ╭╯ ╰─╯ ╰╯╰──╯╰────────╮╭╮╭─╮╮ - 53.33 ┤ │ │ ╭─╯ ╰╮ ╭╯ ╰╯ ╰╯╰╯╰╯ ╰────────── - 46.67 ┤ ╭╯ ╰╮ │ ╰─╯ - 40.00 ┤ │ │╭╯ - 33.33 ┤ │ ╰╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ 26.67 ┤ ╭╯ 20.00 ┤ │ 13.33 ┤╭╯ @@ -38,22 +38,22 @@ plot stat=leases ---- ---- - 100.00 ┼──────╮ ╭───╮ - 93.33 ┤ ╰╮ ╭╯ ╰────╮ - 86.67 ┤ │ │ │ - 80.00 ┤ ╰╮ ╭╯ ╰╮ ╭─────╮ - 73.33 ┤ │ │ │ ╭╯ ╰─╮ ╭───╮ - 66.67 ┤ ╰╮╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰─╮ ╭──╮ - 60.00 ┤ ││ │ │ │ │ ╰╮╭╯ ╰──╮ ╭──╮ - 53.33 ┤ ╭╯ ╰╮╯ ╰╭╯ │╯ ╰╭╯ ╰──╮───────╭─────── - 46.67 ┤ │╮ ╭╰╮ ╭╯╮ ╰╮ ╭╯╮ ╭──╰───────╯─────── - 40.00 ┤ ││ │ │ │ │ ╭╯╰╮ ╭──╯ ╰──╯ - 33.33 ┤ ╭╯╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭─╯ ╰──╯ - 26.67 ┤ │ │ │ ╰╮ ╭─╯ ╰───╯ - 20.00 ┤ ╭╯ ╰╮ ╭╯ ╰─────╯ - 13.33 ┤ │ │ │ - 6.67 ┤ ╭╯ ╰╮ ╭────╯ - 0.00 ┼──────╯ ╰───╯ + 100.00 ┼╮ + 93.33 ┤│ + 86.67 ┤╰╮ + 80.00 ┤ │ + 73.33 ┤ ╰╮ + 66.67 ┤ │ + 60.00 ┤ │ + 53.33 ┤ ╰──────────────────────────────────────────────────────────────────────────── + 46.67 ┤ ╭──────────────────────────────────────────────────────────────────────────── + 40.00 ┤ │ + 33.33 ┤ │ + 26.67 ┤ ╭╯ + 20.00 ┤ │ + 13.33 ┤╭╯ + 6.67 ┤│ + 0.00 ┼╯ leases ---- ---- diff --git a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges index 783671ee4bf1..249f2396f55e 100644 --- a/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges +++ b/pkg/kv/kvserver/asim/tests/testdata/rand/rand_ranges @@ -132,15 +132,7 @@ clear ---- # We expect ranges to be randomly allocated across stores with a replication -# factor of 1. Assertion failures on some samples are expected under this setup. -# When there is only one replica and the removal target in rebalancing is the -# leaseholder, stabilizing is hard. The system can't easily remove the replica, -# so it chose to fall back to adding a replica, hoping lease transfer and -# removal of original replica would happen next time this range is checked. In -# this set up, it is always possible to be over-replicated if rebalancing is -# occurring -- as we catch ranges in the middle of rebalancing. In addition, we -# expect all output details to be displayed upon test failure. Please see the -# comment in ReplicationChangesForRebalance for more details. +# factor of 1. rand_cluster cluster_gen_type=single_region ---- @@ -177,30 +169,7 @@ configurations generated using seed 2643318057788968173 randomized ranges with placement_type=random, ranges=944, key_space=150098, replication_factor=1, bytes=0 basic load with rw_ratio=0.00, rate=0.00, skewed_access=false, min_block_size=1, max_block_size=1, min_key=1, max_key=200000 number of mutation events=0, number of assertion events=0 -initial state at 2022-03-21 11:00:00: - stores(15)=[s1n1=(replicas(65)),s2n2=(replicas(64)),s3n3=(replicas(66)),s4n4=(replicas(63)),s5n5=(replicas(63)),s6n6=(replicas(63)),s7n7=(replicas(63)),s8n8=(replicas(62)),s9n9=(replicas(64)),s10n10=(replicas(64)),s11n11=(replicas(62)),s12n12=(replicas(63)),s13n13=(replicas(63)),s14n14=(replicas(64)),s15n15=(replicas(64))] -topology: -US - US_1 - └── [1 2 3 4 5] - US_2 - └── [6 7 8 9 10] - US_3 - └── [11 12 13 14 15] -no events were scheduled -sample2: failed assertion - conformance unavailable=0 under=0 over=0 violating=0 - actual unavailable=0 under=0, over=9 violating=0 -over replicated: - r120:000001{8921-9080} [(n8,s8):2, (n15,s15):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r133:000002{0988-1147} [(n3,s3):2, (n5,s5):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r138:0000021{783-942} [(n3,s3):2, (n12,s12):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r156:0000024{645-804} [(n3,s3):2, (n2,s2):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r243:0000038{478-637} [(n3,s3):2, (n12,s12):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r322:0000051{039-198} [(n1,s1):2, (n3,s3):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r567:00000{89994-90153} [(n3,s3):2, (n5,s5):3] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r605:0000096{036-195} [(n3,s3):3, (n8,s8):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 - r875:000013{8966-9125} [(n3,s3):3, (n2,s2):4] applying ttl_seconds=0 num_replicas=1 num_voters=1 +sample2: pass ---------------------------------- sample3: start running configurations generated using seed 6972490225919430754 diff --git a/pkg/sql/scatter_test.go b/pkg/sql/scatter_test.go index adf0749b4eeb..dbe1b00827b4 100644 --- a/pkg/sql/scatter_test.go +++ b/pkg/sql/scatter_test.go @@ -17,7 +17,9 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/sql/catalog/desctestutils" "github.com/cockroachdb/cockroach/pkg/sql/randgen" "github.com/cockroachdb/cockroach/pkg/testutils" @@ -27,6 +29,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" ) func TestScatterRandomizeLeases(t *testing.T) { @@ -176,3 +180,97 @@ func TestScatterResponse(t *testing.T) { t.Fatalf("expected %d rows, but got %d", e, a) } } + +// TestScatterWithOneVoter tests that the scatter command works when the +// replication factor is set to 1. We expect that the total number of replicas +// remains unchanged and that the scattering store loses some replicas. Note we +// don't assert on the final distribution being even across all stores, scatter +// promises randomness, not necessarily uniformity. +func TestScatterWithOneVoter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + skip.UnderRace(t) // Too slow under stressrace. + skip.UnderDeadlock(t) + skip.UnderShort(t) + + zcfg := zonepb.DefaultZoneConfig() + zcfg.NumReplicas = proto.Int32(1) + + tc := serverutils.StartCluster(t, 3, base.TestClusterArgs{ + ReplicationMode: base.ReplicationManual, + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DefaultZoneConfigOverride: &zcfg, + }, + }, + }, + }) + defer tc.Stopper().Stop(context.Background()) + + sqlutils.CreateTable( + t, tc.ServerConn(0), "t", + "k INT PRIMARY KEY, v INT", + 500, /* numRows */ + sqlutils.ToRowFn(sqlutils.RowIdxFn, sqlutils.RowModuloFn(10)), + ) + + r := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + + // Create 49 splits, for 50 ranges in the test table. + r.Exec(t, "ALTER TABLE test.t SPLIT AT (SELECT i*10 FROM generate_series(1, 49) AS g(i))") + + getReplicaCounts := func() (map[int]int, int, error) { + rows := r.Query(t, ` + WITH ranges_info AS ( + SHOW RANGES FROM TABLE test.t + ) + SELECT + store_id, + count(*) AS replica_count + FROM + ( + SELECT + unnest(replicas) AS store_id + FROM + ranges_info + ) AS store_replicas + GROUP BY + store_id;`) + replicaCounts := make(map[int]int) + totalReplicas := 0 + for rows.Next() { + var storeID, replicaCount int + if err := rows.Scan(&storeID, &replicaCount); err != nil { + return nil, 0, err + } + replicaCounts[storeID] = replicaCount + totalReplicas += replicaCount + } + if err := rows.Err(); err != nil { + return nil, 0, err + } + return replicaCounts, totalReplicas, nil + } + + oldReplicaCounts, oldTotalReplicas, err := getReplicaCounts() + if err != nil { + t.Fatal(err) + } + + // Expect that the number of replicas on store 1 to have changed. We can't + // assert that the distribution will be even across all three stores, but s1 + // (the initial leaseholder and replica) should have a different number of + // replicas than before. Rebalancing is otherwise disabled in this test, so + // the only replica movements are from the scatter. + r.Exec(t, "ALTER TABLE test.t SCATTER") + newReplicaCounts, newTotalReplicas, err := getReplicaCounts() + require.NoError(t, err) + + require.Equal(t, oldTotalReplicas, newTotalReplicas, + "expected total replica count to remain the same post-scatter, "+ + "old replica counts(%d): %v, new replica counts(%d): %v", + oldTotalReplicas, oldReplicaCounts, newTotalReplicas, newReplicaCounts) + require.NotEqual(t, oldReplicaCounts[1], newReplicaCounts[1]) +}