Skip to content

Commit

Permalink
kvserver: modernize AdminRelocateRange
Browse files Browse the repository at this point in the history
Previously, `AdminRelocateRange`'s contract was to transfer the lease away to
the first voting replica in the input slice of targets. However, this was
mostly a result of the fact that leaseholder replicas weren't allowed to remove
themselves. Thus, the caller had to specifically tell `AdminRelocateRange`
where it wanted the lease to be, and the `AdminRelocateRange` logic would
ensure to transfer the lease away to this replica either before it removed the
leaseholder or after it was done performing all the replica relocations.

There are two internal callers of `AdminRelocateRange`, and this property of
the `AdminRelocateRange` API is unfortunate for both of them.

1. The merge queue takes extra effort to avoid a lease transfer when calling
into `AdminRelocateRange`.

2. The store rebalancer performs a lease transfer based on very naive logic
that is not optimal.

With #74077, leaseholder replicas will be able to remove themselves by
transferring the lease away to a suitable replica after they enter the joint
configuration of the replication change. This patch makes it such that, going
forward, `AdminRelocateRange` will only perform an explicit lease transfer if
the caller specifically asks for it. The patch also contains a migration path
for this functionality.

Once #74077 lands, a future patch will change the store rebalancer and the
merge queue behavior to leverage this new functionality.

Release note: None
  • Loading branch information
aayushshah15 committed Feb 1, 2022
1 parent 323988f commit 6525423
Show file tree
Hide file tree
Showing 11 changed files with 158 additions and 25 deletions.
10 changes: 7 additions & 3 deletions pkg/kv/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,9 @@ func (b *Batch) adminChangeReplicas(
// adminRelocateRange is only exported on DB. It is here for symmetry with the
// other operations.
func (b *Batch) adminRelocateRange(
key interface{}, voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
key interface{},
voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
transferLeaseToFirstVoter bool,
) {
k, err := marshalKey(key)
if err != nil {
Expand All @@ -753,8 +755,10 @@ func (b *Batch) adminRelocateRange(
RequestHeader: roachpb.RequestHeader{
Key: k,
},
VoterTargets: voterTargets,
NonVoterTargets: nonVoterTargets,
VoterTargets: voterTargets,
NonVoterTargets: nonVoterTargets,
TransferLeaseToFirstVoter: transferLeaseToFirstVoter,
TransferLeaseToFirstVoterAccurate: true,
}
b.appendReqs(req)
b.initResult(1, 0, notRaw, nil)
Expand Down
7 changes: 5 additions & 2 deletions pkg/kv/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -655,10 +655,13 @@ func (db *DB) AdminChangeReplicas(
// AdminRelocateRange relocates the replicas for a range onto the specified
// list of stores.
func (db *DB) AdminRelocateRange(
ctx context.Context, key interface{}, voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
ctx context.Context,
key interface{},
voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
transferLeaseToFirstVoter bool,
) error {
b := &Batch{}
b.adminRelocateRange(key, voterTargets, nonVoterTargets)
b.adminRelocateRange(key, voterTargets, nonVoterTargets, transferLeaseToFirstVoter)
return getOneErr(db.Run(ctx, b), b)
}

Expand Down
56 changes: 53 additions & 3 deletions pkg/kv/kvserver/client_relocate_range_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ func relocateAndCheck(
testutils.SucceedsSoon(t, func() error {
err := tc.Servers[0].DB().
AdminRelocateRange(
context.Background(), startKey.AsRawKey(), voterTargets, nonVoterTargets,
context.Background(),
startKey.AsRawKey(),
voterTargets,
nonVoterTargets,
true, /* transferLeaseToFirstVoter */
)
if err != nil {
if every.ShouldLog() {
Expand Down Expand Up @@ -77,7 +81,11 @@ func requireRelocationFailure(
) {
testutils.SucceedsSoon(t, func() error {
err := tc.Servers[0].DB().AdminRelocateRange(
ctx, startKey.AsRawKey(), voterTargets, nonVoterTargets,
ctx,
startKey.AsRawKey(),
voterTargets,
nonVoterTargets,
true, /* transferLeaseToFirstVoter */
)
if kv.IsExpectedRelocateError(err) {
return err
Expand Down Expand Up @@ -299,6 +307,44 @@ func TestAdminRelocateRange(t *testing.T) {
}
}

// TestAdminRelocateRangeWithoutLeaseTransfer tests that `AdminRelocateRange`
// only transfers the lease away to the first voting replica in the target slice
// if the callers asks it to.
func TestAdminRelocateRangeWithoutLeaseTransfer(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
args := base.TestClusterArgs{
ReplicationMode: base.ReplicationManual,
}

tc := testcluster.StartTestCluster(t, 5 /* numNodes */, args)
defer tc.Stopper().Stop(ctx)

k := keys.MustAddr(tc.ScratchRange(t))

// Add voters to the first three nodes.
relocateAndCheck(t, tc, k, tc.Targets(0, 1, 2), nil /* nonVoterTargets */)

// Move the last voter without asking for the lease to move.
err := tc.Servers[0].DB().AdminRelocateRange(
context.Background(),
k.AsRawKey(),
tc.Targets(3, 1, 0),
nil, /* nonVoterTargets */
false, /* transferLeaseToFirstVoter */
)
require.NoError(t, err)
leaseholder, err := tc.FindRangeLeaseHolder(tc.LookupRangeOrFatal(t, k.AsRawKey()), nil /* hint */)
require.NoError(t, err)
require.Equal(
t,
roachpb.ReplicationTarget{NodeID: leaseholder.NodeID, StoreID: leaseholder.StoreID},
tc.Target(0),
)
}

func TestAdminRelocateRangeFailsWithDuplicates(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down Expand Up @@ -339,7 +385,11 @@ func TestAdminRelocateRangeFailsWithDuplicates(t *testing.T) {
}
for _, subtest := range tests {
err := tc.Servers[0].DB().AdminRelocateRange(
context.Background(), k.AsRawKey(), tc.Targets(subtest.voterTargets...), tc.Targets(subtest.nonVoterTargets...),
context.Background(),
k.AsRawKey(),
tc.Targets(subtest.voterTargets...),
tc.Targets(subtest.nonVoterTargets...),
true, /* transferLeaseToFirstVoter */
)
require.Regexp(t, subtest.expectedErr, err)
}
Expand Down
26 changes: 20 additions & 6 deletions pkg/kv/kvserver/client_replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2461,9 +2461,14 @@ func TestRandomConcurrentAdminChangeReplicasRequests(t *testing.T) {
var wg sync.WaitGroup
key := roachpb.Key("a")
db := tc.Servers[0].DB()
require.Nil(t, db.AdminRelocateRange(
ctx, key, makeReplicationTargets(1, 2, 3), nil,
))
require.Nil(
t,
db.AdminRelocateRange(
ctx, key, makeReplicationTargets(1, 2, 3),
nil, /* nonVoterTargets */
true, /* transferLeaseToFirstVoter */
),
)
// Random targets consisting of a random number of nodes from the set of nodes
// in the cluster which currently do not have a replica.
pickTargets := func() []roachpb.ReplicationTarget {
Expand Down Expand Up @@ -3015,8 +3020,13 @@ func TestAdminRelocateRangeSafety(t *testing.T) {

key := roachpb.Key("a")
assert.Nil(t, db.AdminRelocateRange(
ctx, key, makeReplicationTargets(1, 2, 3), makeReplicationTargets(),
))
ctx,
key,
makeReplicationTargets(1, 2, 3),
makeReplicationTargets(),
true, /* transferLeaseToFirstVoter */
),
)
rangeInfo, err := getRangeInfo(ctx, db, key)
assert.Nil(t, err)
assert.Len(t, rangeInfo.Desc.InternalReplicas, 3)
Expand Down Expand Up @@ -3052,7 +3062,11 @@ func TestAdminRelocateRangeSafety(t *testing.T) {
}
relocate := func() {
relocateErr = db.AdminRelocateRange(
ctx, key, makeReplicationTargets(1, 2, 4), makeReplicationTargets(),
ctx,
key,
makeReplicationTargets(1, 2, 4),
makeReplicationTargets(),
true, /* transferLeaseToFirstVoter */
)
}
useSeenAdd.Store(true)
Expand Down
6 changes: 5 additions & 1 deletion pkg/kv/kvserver/merge_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,11 @@ func (mq *mergeQueue) process(
// The merge queue will only merge ranges that have the same zone config
// (see check inside mergeQueue.shouldQueue).
if err := mq.store.DB().AdminRelocateRange(
ctx, rhsDesc.StartKey, voterTargets, nonVoterTargets,
ctx,
rhsDesc.StartKey,
voterTargets,
nonVoterTargets,
true, /* transferLeaseToFirstVoter */
); err != nil {
return false, err
}
Expand Down
26 changes: 21 additions & 5 deletions pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2657,6 +2657,7 @@ func (s *Store) AdminRelocateRange(
ctx context.Context,
rangeDesc roachpb.RangeDescriptor,
voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
transferLeaseToFirstVoter bool,
) error {
if containsDuplicates(voterTargets) {
return errors.AssertionFailedf(
Expand Down Expand Up @@ -2686,7 +2687,9 @@ func (s *Store) AdminRelocateRange(
}
rangeDesc = *newDesc

rangeDesc, err = s.relocateReplicas(ctx, rangeDesc, voterTargets, nonVoterTargets)
rangeDesc, err = s.relocateReplicas(
ctx, rangeDesc, voterTargets, nonVoterTargets, transferLeaseToFirstVoter,
)
if err != nil {
return err
}
Expand Down Expand Up @@ -2718,6 +2721,7 @@ func (s *Store) relocateReplicas(
ctx context.Context,
rangeDesc roachpb.RangeDescriptor,
voterTargets, nonVoterTargets []roachpb.ReplicationTarget,
transferLeaseToFirstVoter bool,
) (roachpb.RangeDescriptor, error) {
startKey := rangeDesc.StartKey.AsRawKey()
transferLease := func(target roachpb.ReplicationTarget) error {
Expand Down Expand Up @@ -2748,16 +2752,28 @@ func (s *Store) relocateReplicas(
if err != nil {
return rangeDesc, err
}

if leaseTarget != nil {
// NB: we may need to transfer even if there are no ops, to make
// sure the attempt is made to make the first target the final
// leaseholder.
// If there are no more ops remaining and we have a non-nil lease
// target, it means that we are transferring the lease away to the first
// voting replica in the target slice.
if len(ops) == 0 {
if leaseTarget.StoreID != voterTargets[0].StoreID {
log.Fatalf(ctx, "programming error: spurious lease transfer attempt to a store that"+
" isn't the first voter in the target slice")
}
if !transferLeaseToFirstVoter {
// Done.
return rangeDesc, ctx.Err()
}
}
if err := transferLease(*leaseTarget); err != nil {
return rangeDesc, err
}
}

if len(ops) == 0 {
// Done.
// We're done.
return rangeDesc, ctx.Err()
}

Expand Down
6 changes: 5 additions & 1 deletion pkg/kv/kvserver/replica_learner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,11 @@ func TestLearnerOrJointConfigAdminRelocateRange(t *testing.T) {

check := func(voterTargets []roachpb.ReplicationTarget) {
require.NoError(t, tc.Server(0).DB().AdminRelocateRange(
ctx, scratchStartKey, voterTargets, []roachpb.ReplicationTarget{},
ctx,
scratchStartKey,
voterTargets,
[]roachpb.ReplicationTarget{},
true, /* transferLeaseToFirstVoter */
))
desc := tc.LookupRangeOrFatal(t, scratchStartKey)
voters := desc.Replicas().VoterDescriptors()
Expand Down
11 changes: 10 additions & 1 deletion pkg/kv/kvserver/replica_send.go
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,16 @@ func (r *Replica) executeAdminBatch(
}

case *roachpb.AdminRelocateRangeRequest:
err := r.store.AdminRelocateRange(ctx, *r.Desc(), tArgs.VoterTargets, tArgs.NonVoterTargets)
// Transferring the lease to the first voting replica in the target slice is
// pre-22.1 behavior.
// We revert to that behavior if the request is coming
// from a 21.2 node that doesn't yet know about this change in contract.
transferLeaseToFirstVoter := !tArgs.TransferLeaseToFirstVoterAccurate
// We also revert to that behavior if the caller specifically asked for it.
transferLeaseToFirstVoter = transferLeaseToFirstVoter || tArgs.TransferLeaseToFirstVoter
err := r.store.AdminRelocateRange(
ctx, *r.Desc(), tArgs.VoterTargets, tArgs.NonVoterTargets, transferLeaseToFirstVoter,
)
pErr = roachpb.NewError(err)
resp = &roachpb.AdminRelocateRangeResponse{}

Expand Down
8 changes: 7 additions & 1 deletion pkg/kv/kvserver/store_rebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,13 @@ func (sr *StoreRebalancer) rebalanceStore(

timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl)
if err := contextutil.RunWithTimeout(ctx, "relocate range", timeout, func(ctx context.Context) error {
return sr.rq.store.AdminRelocateRange(ctx, *descBeforeRebalance, voterTargets, nonVoterTargets)
return sr.rq.store.DB().AdminRelocateRange(
ctx,
descBeforeRebalance.StartKey.AsRawKey(),
voterTargets,
nonVoterTargets,
true, /* transferLeaseToFirstVoter */
)
}); err != nil {
log.Errorf(ctx, "unable to relocate range to %v: %+v", voterTargets, err)
continue
Expand Down
15 changes: 15 additions & 0 deletions pkg/roachpb/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,21 @@ message AdminRelocateRangeRequest {
repeated ReplicationTarget voter_targets = 2 [(gogoproto.nullable) = false];
repeated ReplicationTarget non_voter_targets = 3 [(gogoproto.nullable) = false];
// TODO(a-robinson): Add "reason"/"details" string fields?

// As of 22.1 (specifically #74077), leaseholder replicas can remove
// themselves from the range. This means that now, in a joint state, the
// leaseholder that is removing itself chooses the best target replica to
// transfer the lease to, all inside of AdminChangeReplicas.
//
// This means that the pre-22.1 contract of `AdminRelocateRange` to transfer
// the lease to the first voter replica isn't required anymore. Only callers
// that rely on this contract should set this attribute.
bool transfer_lease_to_first_voter = 4;
// TODO(aayush): Migration path:
// 22.1: Send and consult the attribute.
// 22.2: Send but don't consult the attribute.
// 23.1: Stop sending or consulting the attribute. Remove this field.
bool transfer_lease_to_first_voter_accurate = 5;
}

message AdminRelocateRangeResponse {
Expand Down
12 changes: 10 additions & 2 deletions pkg/sql/relocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,21 @@ func (n *relocateNode) Next(params runParams) (bool, error) {
}
case tree.RelocateNonVoters:
if err := params.p.ExecCfg().DB.AdminRelocateRange(
params.ctx, rowKey, existingVoters, relocationTargets,
params.ctx,
rowKey,
existingVoters,
relocationTargets,
true, /* transferLeaseToFirstVoter */
); err != nil {
return false, err
}
case tree.RelocateVoters:
if err := params.p.ExecCfg().DB.AdminRelocateRange(
params.ctx, rowKey, relocationTargets, existingNonVoters,
params.ctx,
rowKey,
relocationTargets,
existingNonVoters,
true, /* transferLeaseToFirstVoter */
); err != nil {
return false, err
}
Expand Down

0 comments on commit 6525423

Please sign in to comment.