Skip to content

Commit

Permalink
kvserver: rebalance ranges with one voter using joint configurations
Browse files Browse the repository at this point in the history
The allocator would add a voter, instead of both adding and removing the
existing voter when rebalancing ranges with one replica. Removing the
leaseholder replica was not possible prior to cockroachdb#74077, so the addition
only was necessary.

This restriction is no longer necessary. Allow rebalancing a one voter
range between stores using joint configurations, where the lease will be
transferred to the incoming voter store, from the outgoing demoting
voter.

Scattering ranges with one voter will now leave the range with exactly
one voter, where previously both the leaseholder voter evaluating the
scatter, and the new voter would be left.

Before this patch, scattering 1000 ranges with RF=1 on a 5 store
cluster:

```
  store_id | replica_count | replica_distribution | lease_count | lease_distribution
-----------+---------------+----------------------+-------------+---------------------
         1 |          1001 | ##########           |         500 | ##########
         5 |           291 | ###                  |         147 | ###
         4 |           275 | ###                  |         137 | ###
         3 |           229 | ###                  |         118 | ###
         2 |           206 | ###                  |          99 | ##
```

After:

```
  store_id | replica_count | replica_distribution | lease_count | lease_distribution
-----------+---------------+----------------------+-------------+---------------------
         2 |           242 | ##########           |         241 | ##########
         4 |           227 | ##########           |         227 | ##########
         5 |           217 | #########            |         216 | #########
         3 |           209 | #########            |         208 | #########
         1 |           106 | #####                |         109 | #####
```

Fixes: cockroachdb#108420
Fixes: cockroachdb#124171

Release note (bug fix): Scattering a range with replication factor=1, no
longer erroneously up-replicates the range to two replicas. Leases will
also no longer thrash between nodes when perturbed with replication
factor=1.
  • Loading branch information
kvoli committed May 28, 2024
1 parent a51444d commit 8bc831d
Show file tree
Hide file tree
Showing 8 changed files with 336 additions and 110 deletions.
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ ALL_TESTS = [
"//pkg/kv/kvserver/abortspan:abortspan_test",
"//pkg/kv/kvserver/allocator/allocator2:allocator2_test",
"//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test",
"//pkg/kv/kvserver/allocator/plan:plan_test",
"//pkg/kv/kvserver/allocator/storepool:storepool_test",
"//pkg/kv/kvserver/apply:apply_test",
"//pkg/kv/kvserver/asim/gossip:gossip_test",
Expand Down Expand Up @@ -1324,6 +1325,7 @@ GO_TARGETS = [
"//pkg/kv/kvserver/allocator/allocatorimpl:allocatorimpl_test",
"//pkg/kv/kvserver/allocator/load:load",
"//pkg/kv/kvserver/allocator/plan:plan",
"//pkg/kv/kvserver/allocator/plan:plan_test",
"//pkg/kv/kvserver/allocator/storepool:storepool",
"//pkg/kv/kvserver/allocator/storepool:storepool_test",
"//pkg/kv/kvserver/allocator:allocator",
Expand Down
17 changes: 16 additions & 1 deletion pkg/kv/kvserver/allocator/plan/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "plan",
Expand Down Expand Up @@ -26,3 +26,18 @@ go_library(
"@io_etcd_go_raft_v3//:raft",
],
)

go_test(
name = "plan_test",
srcs = ["util_test.go"],
args = ["-test.timeout=295s"],
embed = [":plan"],
deps = [
"//pkg/kv/kvpb",
"//pkg/kv/kvserver/allocator/allocatorimpl",
"//pkg/roachpb",
"//pkg/util/leaktest",
"//pkg/util/log",
"@com_github_stretchr_testify//require",
],
)
46 changes: 0 additions & 46 deletions pkg/kv/kvserver/allocator/plan/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"go.etcd.io/raft/v3"
)
Expand All @@ -37,51 +36,6 @@ func ReplicationChangesForRebalance(
rebalanceTargetType allocatorimpl.TargetReplicaType,
) (chgs []kvpb.ReplicationChange, performingSwap bool, err error) {
rdesc, found := desc.GetReplicaDescriptor(addTarget.StoreID)
if rebalanceTargetType == allocatorimpl.VoterTarget && numExistingVoters == 1 {
// If there's only one replica, the removal target is the
// leaseholder and this is unsupported and will fail. However,
// this is also the only way to rebalance in a single-replica
// range. If we try the atomic swap here, we'll fail doing
// nothing, and so we stay locked into the current distribution
// of replicas. (Note that maybeTransferLeaseAway above will not
// have found a target, and so will have returned (false, nil).
//
// Do the best thing we can, which is carry out the addition
// only, which should succeed, and the next time we touch this
// range, we will have one more replica and hopefully it will
// take the lease and remove the current leaseholder.
//
// It's possible that "rebalancing deadlock" can occur in other
// scenarios, it's really impossible to tell from the code given
// the constraints we support. However, the lease transfer often
// does not happen spuriously, and we can't enter dangerous
// configurations sporadically, so this code path is only hit
// when we know it's necessary, picking the smaller of two evils.
//
// See https://github.com/cockroachdb/cockroach/issues/40333.
log.KvDistribution.Infof(ctx, "can't swap replica due to lease; falling back to add")

// Even when there is only 1 existing voter, there may be other replica
// types in the range. Check if the add target already has a replica, if so
// it must be a non-voter or the rebalance is invalid.
if found && rdesc.Type == roachpb.NON_VOTER {
// The receiving store already has a non-voting replica. Instead of just
// adding a voter to the receiving store, we *must* promote the non-voting
// replica to a voter.
chgs = kvpb.ReplicationChangesForPromotion(addTarget)
} else if !found {
chgs = []kvpb.ReplicationChange{
{ChangeType: roachpb.ADD_VOTER, Target: addTarget},
}
} else {
return nil, false, errors.AssertionFailedf(
"invalid rebalancing decision: trying to"+
" move voter to a store that already has a replica %s for the range", rdesc,
)
}
return chgs, false, err
}

switch rebalanceTargetType {
case allocatorimpl.VoterTarget:
// Check if the target being added already has a non-voting replica.
Expand Down
188 changes: 188 additions & 0 deletions pkg/kv/kvserver/allocator/plan/util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package plan

import (
"context"
"testing"

"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/stretchr/testify/require"
)

// TestReplicationChangesForRebalance asserts that the replication changes for
// rebalancing are correct, given a a range descriptor and rebalance target.
func TestReplicationChangesForRebalance(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()

testCases := []struct {
name string
desc *roachpb.RangeDescriptor
addTarget, removeTarget roachpb.ReplicationTarget
rebalanceTargetType allocatorimpl.TargetReplicaType
expectedChanges []kvpb.ReplicationChange
expectedPerformingSwap bool
expectedErrStr string
}{
{
name: "rf=1 rebalance voter 1->2",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.VoterTarget,
expectedChanges: []kvpb.ReplicationChange{
{ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 2, StoreID: 2}},
{ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}},
},
expectedPerformingSwap: false,
expectedErrStr: "",
},
{
name: "rf=3 rebalance voter 1->4",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.VoterTarget,
expectedChanges: []kvpb.ReplicationChange{
{ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}},
{ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}},
},
expectedPerformingSwap: false,
expectedErrStr: "",
},
{
name: "rf=3 rebalance voter 1->3 error: already has a voter",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.VoterTarget,
expectedChanges: nil,
expectedPerformingSwap: false,
expectedErrStr: "programming error: store being rebalanced to(3) already has a voting replica",
},
{
name: "rf=3 rebalance non-voter: 1->4",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.NonVoterTarget,
expectedChanges: []kvpb.ReplicationChange{
{ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 4, StoreID: 4}},
{ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}},
},
expectedPerformingSwap: false,
expectedErrStr: "",
},
{
name: "rf=3 rebalance non-voter 1->3 error: already has a voter",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.VOTER_FULL},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.NonVoterTarget,
expectedChanges: nil,
expectedPerformingSwap: false,
expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica",
},
{
name: "rf=3 rebalance non-voter 1->3 error: already has a non-voter",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.NON_VOTER},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.NonVoterTarget,
expectedChanges: nil,
expectedPerformingSwap: false,
expectedErrStr: "invalid rebalancing decision: trying to move non-voter to a store that already has a replica",
},
{
name: "rf=3 rebalance voter 1->3 swap",
desc: &roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, Type: roachpb.VOTER_FULL},
{NodeID: 2, StoreID: 2, Type: roachpb.VOTER_FULL},
{NodeID: 3, StoreID: 3, Type: roachpb.NON_VOTER},
},
},
addTarget: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3},
removeTarget: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1},
rebalanceTargetType: allocatorimpl.VoterTarget,
expectedChanges: []kvpb.ReplicationChange{
{ChangeType: roachpb.ADD_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}},
{ChangeType: roachpb.REMOVE_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 3, StoreID: 3}},
{ChangeType: roachpb.ADD_NON_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}},
{ChangeType: roachpb.REMOVE_VOTER, Target: roachpb.ReplicationTarget{NodeID: 1, StoreID: 1}},
},
expectedPerformingSwap: true,
expectedErrStr: "",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
chgs, performingSwap, err := ReplicationChangesForRebalance(
ctx,
tc.desc,
len(tc.desc.Replicas().VoterDescriptors()),
tc.addTarget,
tc.removeTarget,
tc.rebalanceTargetType,
)
require.Equal(t, tc.expectedChanges, chgs)
require.Equal(t, tc.expectedPerformingSwap, performingSwap)
if tc.expectedErrStr != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tc.expectedErrStr)
} else {
require.NoError(t, err)
}
})
}
}
6 changes: 3 additions & 3 deletions pkg/kv/kvserver/asim/tests/testdata/non_rand/example_add_node
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ plot stat=replicas sample=1
301 ┼──────────────────────────────────────╭────────────────────────────────────────
281 ┤ ╭╭─╯
261 ┤ ╭╭──╯
241 ┤ ╭─╯
221 ┤ ╭──
201 ┤ ╭╭─
241 ┤ ╭─╯
221 ┤ ╭──╯
201 ┤ ╭─╯
181 ┤ ╭──╯
161 ┤ ╭──╯
140 ┤ ╭──╯╯
Expand Down
54 changes: 27 additions & 27 deletions pkg/kv/kvserver/asim/tests/testdata/non_rand/one_voter
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ plot stat=replicas
----
----

100.00 ┼──────╭────────────
93.33 ┤ ╭╯ ╰╮ ╰────╮
86.67 ┤ │ ╰╮ ╭╰╮───────
80.00 ┤ │ ╰╮ ╭─╯ ╰╮ ╰─╭╮
73.33 ┤ ╭╯ ╰╮ ╭╯ ╰╮ ╭╯╰─────────
66.67 ┤ │ │ ╭─╯ ╰─╮ │ ╰─╮ ╭──╰─╮╭╮──╭╮╮
60.00 ┤ ╭╯ ╰╮ │ │ ╭╯ ╰─╯ ╰╯╰──╯╰────────╮╭╮╭─╮╮
53.33 ┤ │ │ ╭─╯ ╰╮ ╭╯ ╰╯ ╰╯╰╯╰╯ ╰──────────
46.67 ┤ ╭╯ ╰╮ │ ╰─╯
40.00 ┤ │ │╭╯
33.33 ┤ │ ╰╯
100.00 ┼╮
93.33 ┤
86.67 ┤
80.00 ┤
73.33 ┤
66.67 ┤
60.00 ┤
53.33 ┤ ╰────────────────────────────────────────────────────────────────────────────
46.67 ┤ ╭────────────────────────────────────────────────────────────────────────────
40.00 ┤ │
33.33 ┤ │
26.67 ┤ ╭╯
20.00 ┤ │
13.33 ┤╭╯
Expand All @@ -38,22 +38,22 @@ plot stat=leases
----
----

100.00 ┼──────╮ ╭───
93.33 ┤ ╰╮ ╭╯ ╰────╮
86.67 ┤ │ │ │
80.00 ┤ ╰╮ ╭╯ ╰╮ ╭─────╮
73.33 ┤ │ │ │ ╭╯ ╰─╮ ╭───
66.67 ┤ ╰╮╭╯ ╰╮ ╭╯ ╰╮ ╭╯ ╰─╮ ╭──╮
60.00 ┤ ││ │ │ │ │ ╰╮╭╯ ╰──╮ ╭──╮
53.33 ┤ ╭╯ ╰╮╯ ╰╭╯ │╯ ╰╭╯ ╰──╮──────────────
46.67 ┤ │╮ ╭╰╮ ╭╯╮ ╰╮ ╭╯╮ ╭──╰──────────────
40.00 ┤ ││ │ │ │ │ ╭╯╰╮ ╭──╯ ╰──╯
33.33 ┤ ╭╯╰╮ ╭╯ ╰╮ ╭╯ ╰╮ ╭─╯ ╰──╯
26.67 ┤ │ │ │ ╰╮ ╭─╯ ╰───
20.00 ┤ ╭╯ ╰╮ ╭╯ ╰─────╯
13.33 ┤ │ │ │
6.67 ┤ ╭╯ ╰╮ ╭────╯
0.00 ┼──────╯ ╰───
100.00 ┼╮
93.33 ┤
86.67 ┤╰╮
80.00 ┤
73.33 ┤
66.67 ┤
60.00 ┤
53.33 ┤ ╰────────────────────────────────────────────────────────────────────────────
46.67 ┤ ╭────────────────────────────────────────────────────────────────────────────
40.00 ┤
33.33 ┤
26.67 ┤
20.00 ┤
13.33 ┤╭╯
6.67 ┤
0.00 ┼╯
leases
----
----
Expand Down
Loading

0 comments on commit 8bc831d

Please sign in to comment.