Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.1: kvserver: don't allow VOTER_DEMOTING to acquire lease after transfer #89611

Merged
merged 1 commit into from
Oct 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pkg/kv/kvclient/kvcoord/replica_slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,14 @@ func NewReplicaSlice(
}
}
canReceiveLease := func(rDesc roachpb.ReplicaDescriptor) bool {
if err := roachpb.CheckCanReceiveLease(rDesc, desc.Replicas(), true /* leaseHolderRemovalAllowed */); err != nil {
// NOTE: This logic is client-side and it’s trying to determine the set of
// all replicas that could potentially be leaseholders. We pass
// wasLastLeaseholder = true because we don't know who the
// leaseholder is, so it's possible that a VOTER_DEMOTING still holds on to
// the lease.
if err := roachpb.CheckCanReceiveLease(
rDesc, desc.Replicas(), true /* lhRemovalAllowed */, true, /* wasLastLeaseholder */
); err != nil {
return false
}
return true
Expand Down
45 changes: 23 additions & 22 deletions pkg/kv/kvserver/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1391,11 +1391,11 @@ func (a Allocator) rebalanceTarget(
//
// The return values are, in order:
//
// 1. The target on which to add a new replica,
// 2. An existing replica to remove,
// 3. a JSON string for use in the range log, and
// 4. a boolean indicationg whether 1-3 were populated (i.e. whether a rebalance
// opportunity was found).
// 1. The target on which to add a new replica,
// 2. An existing replica to remove,
// 3. a JSON string for use in the range log, and
// 4. a boolean indicationg whether 1-3 were populated (i.e. whether a rebalance
// opportunity was found).
func (a Allocator) RebalanceVoter(
ctx context.Context,
conf roachpb.SpanConfig,
Expand Down Expand Up @@ -1501,7 +1501,8 @@ func (a *Allocator) ValidLeaseTargets(
replDescs := roachpb.MakeReplicaSet(existing)
lhRemovalAllowed := a.storePool.st.Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval)
for i := range existing {
if err := roachpb.CheckCanReceiveLease(existing[i], replDescs, lhRemovalAllowed); err != nil {
if err := roachpb.CheckCanReceiveLease(existing[i], replDescs, lhRemovalAllowed,
false /* wasLastLeaseholder */); err != nil {
continue
}
// If we're not allowed to include the current replica, remove it from
Expand Down Expand Up @@ -2055,22 +2056,22 @@ func (a Allocator) shouldTransferLeaseForAccessLocality(
// #13232 or the leaseholder_locality.md RFC for more details), but the general
// logic behind each part of the formula is as follows:
//
// * LeaseRebalancingAggressiveness: Allow the aggressiveness to be tuned via
// a cluster setting.
// * 0.1: Constant factor to reduce aggressiveness by default
// * math.Log10(remoteWeight/sourceWeight): Comparison of the remote replica's
// weight to the local replica's weight. Taking the log of the ratio instead
// of using the ratio directly makes things symmetric -- i.e. r1 comparing
// itself to r2 will come to the same conclusion as r2 comparing itself to r1.
// * math.Log1p(remoteLatencyMillis): This will be 0 if there's no latency,
// removing the weight/latency factor from consideration. Otherwise, it grows
// the aggressiveness for stores that are farther apart. Note that Log1p grows
// faster than Log10 as its argument gets larger, which is intentional to
// increase the importance of latency.
// * overfullScore and underfullScore: rebalanceThreshold helps us get an idea
// of the ideal number of leases on each store. We then calculate these to
// compare how close each node is to its ideal state and use the differences
// from the ideal state on each node to compute a final score.
// - LeaseRebalancingAggressiveness: Allow the aggressiveness to be tuned via
// a cluster setting.
// - 0.1: Constant factor to reduce aggressiveness by default
// - math.Log10(remoteWeight/sourceWeight): Comparison of the remote replica's
// weight to the local replica's weight. Taking the log of the ratio instead
// of using the ratio directly makes things symmetric -- i.e. r1 comparing
// itself to r2 will come to the same conclusion as r2 comparing itself to r1.
// - math.Log1p(remoteLatencyMillis): This will be 0 if there's no latency,
// removing the weight/latency factor from consideration. Otherwise, it grows
// the aggressiveness for stores that are farther apart. Note that Log1p grows
// faster than Log10 as its argument gets larger, which is intentional to
// increase the importance of latency.
// - overfullScore and underfullScore: rebalanceThreshold helps us get an idea
// of the ideal number of leases on each store. We then calculate these to
// compare how close each node is to its ideal state and use the differences
// from the ideal state on each node to compute a final score.
//
// Returns a total score for the replica that takes into account the number of
// leases already on each store. Also returns the raw "adjustment" value that's
Expand Down
18 changes: 10 additions & 8 deletions pkg/kv/kvserver/batcheval/cmd_lease_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,6 @@ func RequestLease(

lhRemovalAllowed :=
cArgs.EvalCtx.ClusterSettings().Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval)
// If this check is removed at some point, the filtering of learners on the
// sending side would have to be removed as well.
if err := roachpb.CheckCanReceiveLease(args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas(),
lhRemovalAllowed,
); err != nil {
rErr.Message = err.Error()
return newFailedLeaseTrigger(false /* isTransfer */), rErr
}

// MIGRATION(tschottdorf): needed to apply Raft commands which got proposed
// before the StartStasis field was introduced.
Expand All @@ -89,6 +81,16 @@ func RequestLease(
isExtension := prevLease.Replica.StoreID == newLease.Replica.StoreID
effectiveStart := newLease.Start

// If this check is removed at some point, the filtering of learners on the
// sending side would have to be removed as well.
wasLastLeaseholder := isExtension
if err := roachpb.CheckCanReceiveLease(
args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, wasLastLeaseholder,
); err != nil {
rErr.Message = err.Error()
return newFailedLeaseTrigger(false /* isTransfer */), rErr
}

// Wind the start timestamp back as far towards the previous lease as we
// can. That'll make sure that when multiple leases are requested out of
// order at the same replica (after all, they use the request timestamp,
Expand Down
64 changes: 38 additions & 26 deletions pkg/kv/kvserver/batcheval/cmd_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,35 +268,39 @@ func TestCheckCanReceiveLease(t *testing.T) {

const none = roachpb.ReplicaType(-1)

// CheckCanReceiveLease gets leaseHolderRemovalAllowed and wasLastLeaseholder
// booleans as parameters. Since they are used in a conjunction, this test
// distinguishes between two cases -- the expected result when both booleans
// are true, and the expected result when one of them is false.
for _, tc := range []struct {
leaseholderType roachpb.ReplicaType
anotherReplicaType roachpb.ReplicaType
eligibleLhRemovalEnabled bool
eligibleLhRemovalDisabled bool
leaseholderType roachpb.ReplicaType
anotherReplicaType roachpb.ReplicaType
expIfBothConditionsTrue bool
expIfOneConditionFalse bool
}{
{leaseholderType: roachpb.VOTER_FULL, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true},
{leaseholderType: roachpb.VOTER_INCOMING, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true},
{leaseholderType: roachpb.VOTER_FULL, anotherReplicaType: none, expIfBothConditionsTrue: true, expIfOneConditionFalse: true},
{leaseholderType: roachpb.VOTER_INCOMING, anotherReplicaType: none, expIfBothConditionsTrue: true, expIfOneConditionFalse: true},

// A VOTER_OUTGOING should only be able to get the lease if there's a VOTER_INCOMING.
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},

// A VOTER_DEMOTING_LEARNER should only be able to get the lease if there's a VOTER_INCOMING.
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},

// A VOTER_DEMOTING_NON_VOTER should only be able to get the lease if there's a VOTER_INCOMING.
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},

{leaseholderType: roachpb.LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false},
{leaseholderType: roachpb.LEARNER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
{leaseholderType: roachpb.NON_VOTER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false},
} {
t.Run(tc.leaseholderType.String(), func(t *testing.T) {
repDesc := roachpb.ReplicaDescriptor{
Expand All @@ -313,18 +317,26 @@ func TestCheckCanReceiveLease(t *testing.T) {
}
rngDesc.InternalReplicas = append(rngDesc.InternalReplicas, anotherDesc)
}
err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), true)
require.Equal(t, tc.eligibleLhRemovalEnabled, err == nil, "err: %v", err)

err = roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), false)
require.Equal(t, tc.eligibleLhRemovalDisabled, err == nil, "err: %v", err)
for _, leaseHolderRemovalAllowed := range []bool{false, true} {
for _, wasLastLeaseholder := range []bool{false, true} {
expectedResult := tc.expIfOneConditionFalse
if leaseHolderRemovalAllowed && wasLastLeaseholder {
expectedResult = tc.expIfBothConditionsTrue
}
err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(),
leaseHolderRemovalAllowed, wasLastLeaseholder)
require.Equal(t, expectedResult, err == nil, "err: %v", err)
}
}
})
}

t.Run("replica not in range desc", func(t *testing.T) {
repDesc := roachpb.ReplicaDescriptor{ReplicaID: 1}
rngDesc := roachpb.RangeDescriptor{}
require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc,
rngDesc.Replicas(), true))
rngDesc.Replicas(), true, true))
require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc,
rngDesc.Replicas(), true, false))
})
}
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/batcheval/cmd_lease_transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func TransferLease(
// If this check is removed at some point, the filtering of learners on the
// sending side would have to be removed as well.
if err := roachpb.CheckCanReceiveLease(
newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed,
newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, false, /* wasLastLeaseholder */
); err != nil {
return newFailedLeaseTrigger(true /* isTransfer */), err
}
Expand Down
Loading