diff --git a/pkg/kv/kvclient/kvcoord/replica_slice.go b/pkg/kv/kvclient/kvcoord/replica_slice.go index fc3e010ffdb1..1ddafa57040a 100644 --- a/pkg/kv/kvclient/kvcoord/replica_slice.go +++ b/pkg/kv/kvclient/kvcoord/replica_slice.go @@ -81,7 +81,14 @@ func NewReplicaSlice( } } canReceiveLease := func(rDesc roachpb.ReplicaDescriptor) bool { - if err := roachpb.CheckCanReceiveLease(rDesc, desc.Replicas(), true /* leaseHolderRemovalAllowed */); err != nil { + // NOTE: This logic is client-side and it’s trying to determine the set of + // all replicas that could potentially be leaseholders. We pass + // wasLastLeaseholder = true because we don't know who the + // leaseholder is, so it's possible that a VOTER_DEMOTING still holds on to + // the lease. + if err := roachpb.CheckCanReceiveLease( + rDesc, desc.Replicas(), true /* lhRemovalAllowed */, true, /* wasLastLeaseholder */ + ); err != nil { return false } return true diff --git a/pkg/kv/kvserver/allocator.go b/pkg/kv/kvserver/allocator.go index 780a4f3171b3..f92ef20a5fb7 100644 --- a/pkg/kv/kvserver/allocator.go +++ b/pkg/kv/kvserver/allocator.go @@ -1391,11 +1391,11 @@ func (a Allocator) rebalanceTarget( // // The return values are, in order: // -// 1. The target on which to add a new replica, -// 2. An existing replica to remove, -// 3. a JSON string for use in the range log, and -// 4. a boolean indicationg whether 1-3 were populated (i.e. whether a rebalance -// opportunity was found). +// 1. The target on which to add a new replica, +// 2. An existing replica to remove, +// 3. a JSON string for use in the range log, and +// 4. a boolean indicationg whether 1-3 were populated (i.e. whether a rebalance +// opportunity was found). func (a Allocator) RebalanceVoter( ctx context.Context, conf roachpb.SpanConfig, @@ -1501,7 +1501,8 @@ func (a *Allocator) ValidLeaseTargets( replDescs := roachpb.MakeReplicaSet(existing) lhRemovalAllowed := a.storePool.st.Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval) for i := range existing { - if err := roachpb.CheckCanReceiveLease(existing[i], replDescs, lhRemovalAllowed); err != nil { + if err := roachpb.CheckCanReceiveLease(existing[i], replDescs, lhRemovalAllowed, + false /* wasLastLeaseholder */); err != nil { continue } // If we're not allowed to include the current replica, remove it from @@ -2055,22 +2056,22 @@ func (a Allocator) shouldTransferLeaseForAccessLocality( // #13232 or the leaseholder_locality.md RFC for more details), but the general // logic behind each part of the formula is as follows: // -// * LeaseRebalancingAggressiveness: Allow the aggressiveness to be tuned via -// a cluster setting. -// * 0.1: Constant factor to reduce aggressiveness by default -// * math.Log10(remoteWeight/sourceWeight): Comparison of the remote replica's -// weight to the local replica's weight. Taking the log of the ratio instead -// of using the ratio directly makes things symmetric -- i.e. r1 comparing -// itself to r2 will come to the same conclusion as r2 comparing itself to r1. -// * math.Log1p(remoteLatencyMillis): This will be 0 if there's no latency, -// removing the weight/latency factor from consideration. Otherwise, it grows -// the aggressiveness for stores that are farther apart. Note that Log1p grows -// faster than Log10 as its argument gets larger, which is intentional to -// increase the importance of latency. -// * overfullScore and underfullScore: rebalanceThreshold helps us get an idea -// of the ideal number of leases on each store. We then calculate these to -// compare how close each node is to its ideal state and use the differences -// from the ideal state on each node to compute a final score. +// - LeaseRebalancingAggressiveness: Allow the aggressiveness to be tuned via +// a cluster setting. +// - 0.1: Constant factor to reduce aggressiveness by default +// - math.Log10(remoteWeight/sourceWeight): Comparison of the remote replica's +// weight to the local replica's weight. Taking the log of the ratio instead +// of using the ratio directly makes things symmetric -- i.e. r1 comparing +// itself to r2 will come to the same conclusion as r2 comparing itself to r1. +// - math.Log1p(remoteLatencyMillis): This will be 0 if there's no latency, +// removing the weight/latency factor from consideration. Otherwise, it grows +// the aggressiveness for stores that are farther apart. Note that Log1p grows +// faster than Log10 as its argument gets larger, which is intentional to +// increase the importance of latency. +// - overfullScore and underfullScore: rebalanceThreshold helps us get an idea +// of the ideal number of leases on each store. We then calculate these to +// compare how close each node is to its ideal state and use the differences +// from the ideal state on each node to compute a final score. // // Returns a total score for the replica that takes into account the number of // leases already on each store. Also returns the raw "adjustment" value that's diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_request.go b/pkg/kv/kvserver/batcheval/cmd_lease_request.go index 2758a3861f1c..80ae0e4d36bc 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_request.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_request.go @@ -70,14 +70,6 @@ func RequestLease( lhRemovalAllowed := cArgs.EvalCtx.ClusterSettings().Version.IsActive(ctx, clusterversion.EnableLeaseHolderRemoval) - // If this check is removed at some point, the filtering of learners on the - // sending side would have to be removed as well. - if err := roachpb.CheckCanReceiveLease(args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas(), - lhRemovalAllowed, - ); err != nil { - rErr.Message = err.Error() - return newFailedLeaseTrigger(false /* isTransfer */), rErr - } // MIGRATION(tschottdorf): needed to apply Raft commands which got proposed // before the StartStasis field was introduced. @@ -89,6 +81,16 @@ func RequestLease( isExtension := prevLease.Replica.StoreID == newLease.Replica.StoreID effectiveStart := newLease.Start + // If this check is removed at some point, the filtering of learners on the + // sending side would have to be removed as well. + wasLastLeaseholder := isExtension + if err := roachpb.CheckCanReceiveLease( + args.Lease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, wasLastLeaseholder, + ); err != nil { + rErr.Message = err.Error() + return newFailedLeaseTrigger(false /* isTransfer */), rErr + } + // Wind the start timestamp back as far towards the previous lease as we // can. That'll make sure that when multiple leases are requested out of // order at the same replica (after all, they use the request timestamp, diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_test.go b/pkg/kv/kvserver/batcheval/cmd_lease_test.go index 7510990de46f..161deb0feddc 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_test.go @@ -268,35 +268,39 @@ func TestCheckCanReceiveLease(t *testing.T) { const none = roachpb.ReplicaType(-1) + // CheckCanReceiveLease gets leaseHolderRemovalAllowed and wasLastLeaseholder + // booleans as parameters. Since they are used in a conjunction, this test + // distinguishes between two cases -- the expected result when both booleans + // are true, and the expected result when one of them is false. for _, tc := range []struct { - leaseholderType roachpb.ReplicaType - anotherReplicaType roachpb.ReplicaType - eligibleLhRemovalEnabled bool - eligibleLhRemovalDisabled bool + leaseholderType roachpb.ReplicaType + anotherReplicaType roachpb.ReplicaType + expIfBothConditionsTrue bool + expIfOneConditionFalse bool }{ - {leaseholderType: roachpb.VOTER_FULL, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true}, - {leaseholderType: roachpb.VOTER_INCOMING, anotherReplicaType: none, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: true}, + {leaseholderType: roachpb.VOTER_FULL, anotherReplicaType: none, expIfBothConditionsTrue: true, expIfOneConditionFalse: true}, + {leaseholderType: roachpb.VOTER_INCOMING, anotherReplicaType: none, expIfBothConditionsTrue: true, expIfOneConditionFalse: true}, // A VOTER_OUTGOING should only be able to get the lease if there's a VOTER_INCOMING. - {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_OUTGOING, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, // A VOTER_DEMOTING_LEARNER should only be able to get the lease if there's a VOTER_INCOMING. - {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_LEARNER, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, // A VOTER_DEMOTING_NON_VOTER should only be able to get the lease if there's a VOTER_INCOMING. - {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_INCOMING, eligibleLhRemovalEnabled: true, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_FULL, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_OUTGOING, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_INCOMING, expIfBothConditionsTrue: true, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_FULL, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.VOTER_DEMOTING_NON_VOTER, anotherReplicaType: roachpb.VOTER_OUTGOING, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, - {leaseholderType: roachpb.LEARNER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, - {leaseholderType: roachpb.NON_VOTER, anotherReplicaType: none, eligibleLhRemovalEnabled: false, eligibleLhRemovalDisabled: false}, + {leaseholderType: roachpb.LEARNER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, + {leaseholderType: roachpb.NON_VOTER, anotherReplicaType: none, expIfBothConditionsTrue: false, expIfOneConditionFalse: false}, } { t.Run(tc.leaseholderType.String(), func(t *testing.T) { repDesc := roachpb.ReplicaDescriptor{ @@ -313,11 +317,17 @@ func TestCheckCanReceiveLease(t *testing.T) { } rngDesc.InternalReplicas = append(rngDesc.InternalReplicas, anotherDesc) } - err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), true) - require.Equal(t, tc.eligibleLhRemovalEnabled, err == nil, "err: %v", err) - - err = roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), false) - require.Equal(t, tc.eligibleLhRemovalDisabled, err == nil, "err: %v", err) + for _, leaseHolderRemovalAllowed := range []bool{false, true} { + for _, wasLastLeaseholder := range []bool{false, true} { + expectedResult := tc.expIfOneConditionFalse + if leaseHolderRemovalAllowed && wasLastLeaseholder { + expectedResult = tc.expIfBothConditionsTrue + } + err := roachpb.CheckCanReceiveLease(rngDesc.InternalReplicas[0], rngDesc.Replicas(), + leaseHolderRemovalAllowed, wasLastLeaseholder) + require.Equal(t, expectedResult, err == nil, "err: %v", err) + } + } }) } @@ -325,6 +335,8 @@ func TestCheckCanReceiveLease(t *testing.T) { repDesc := roachpb.ReplicaDescriptor{ReplicaID: 1} rngDesc := roachpb.RangeDescriptor{} require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc, - rngDesc.Replicas(), true)) + rngDesc.Replicas(), true, true)) + require.Regexp(t, "replica.*not found", roachpb.CheckCanReceiveLease(repDesc, + rngDesc.Replicas(), true, false)) }) } diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go b/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go index 095b5fc65cc6..e960d570c38b 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_transfer.go @@ -82,7 +82,7 @@ func TransferLease( // If this check is removed at some point, the filtering of learners on the // sending side would have to be removed as well. if err := roachpb.CheckCanReceiveLease( - newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, + newLease.Replica, cArgs.EvalCtx.Desc().Replicas(), lhRemovalAllowed, false, /* wasLastLeaseholder */ ); err != nil { return newFailedLeaseTrigger(true /* isTransfer */), err } diff --git a/pkg/kv/kvserver/client_lease_test.go b/pkg/kv/kvserver/client_lease_test.go index 512d40a79de0..989f70de9a69 100644 --- a/pkg/kv/kvserver/client_lease_test.go +++ b/pkg/kv/kvserver/client_lease_test.go @@ -395,10 +395,10 @@ func TestCannotTransferLeaseToVoterDemoting(t *testing.T) { }) } -// TestTransferLeaseToVoterOutgoingWithIncoming ensures that the evaluation of lease -// requests for nodes which are already in the VOTER_DEMOTING_LEARNER state succeeds -// when there is a VOTER_INCOMING node. -func TestTransferLeaseToVoterDemotingWithIncoming(t *testing.T) { +// TestTransferLeaseToVoterDemotingFails ensures that the evaluation of lease +// requests for nodes which are already in the VOTER_DEMOTING_LEARNER state fails +// if they weren't previously holding the lease, even if there is a VOTER_INCOMING.. +func TestTransferLeaseToVoterDemotingFails(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) ctx := context.Background() @@ -448,8 +448,8 @@ func TestTransferLeaseToVoterDemotingWithIncoming(t *testing.T) { // - Send an AdminChangeReplicasRequest to remove n3 and add n4 // - Block the step that moves n3 to VOTER_DEMOTING_LEARNER on changeReplicasChan // - Send an AdminLeaseTransfer to make n3 the leaseholder - // - Try really hard to make sure that the lease transfer at least gets to - // latch acquisition before unblocking the ChangeReplicas. + // - Make sure that this request fails (since n3 is VOTER_DEMOTING_LEARNER and + // wasn't previously leaseholder) // - Unblock the ChangeReplicas. // - Make sure the lease transfer succeeds. @@ -476,15 +476,17 @@ func TestTransferLeaseToVoterDemotingWithIncoming(t *testing.T) { require.NoError(t, err) require.Equal(t, tc.Target(0), leaseHolder, errors.Errorf("Leaseholder supposed to be on n1.")) - // Move the lease to n3, the VOTER_DEMOTING_LEARNER. + // Try to move the lease to n3, the VOTER_DEMOTING_LEARNER. + // This should fail since the last leaseholder wasn't n3 + // (wasLastLeaseholder = false in CheckCanReceiveLease). err = tc.Server(0).DB().AdminTransferLease(context.Background(), scratchStartKey, tc.Target(2).StoreID) - require.NoError(t, err) - // Make sure the lease moved to n3. + require.EqualError(t, err, `replica cannot hold lease`) + // Make sure the lease is still on n1. leaseHolder, err = tc.FindRangeLeaseHolder(desc, nil) require.NoError(t, err) - require.Equal(t, tc.Target(2), leaseHolder, - errors.Errorf("Leaseholder supposed to be on n3.")) + require.Equal(t, tc.Target(0), leaseHolder, + errors.Errorf("Leaseholder supposed to be on n1.")) }() // Try really hard to make sure that our request makes it past the // sanity check error to the evaluation error. @@ -497,26 +499,25 @@ func TestTransferLeaseToVoterDemotingWithIncoming(t *testing.T) { }) } -// TestTransferLeaseFailureDuringJointConfig reproduces +// internalTransferLeaseFailureDuringJointConfig reproduces // https://github.com/cockroachdb/cockroach/issues/83687 // and makes sure that if lease transfer fails during a joint configuration // the previous leaseholder will successfully re-aquire the lease. // The test proceeds as follows: -// - Creates a range with 3 replicas n1, n2, n3, and makes sure the lease is on n1 -// - Makes sure lease transfers on this range fail from now on -// - Invokes AdminChangeReplicas to remove n1 and add n4 -// - This causes the range to go into a joint configuration. A lease transfer -// is attempted to move the lease from n1 to n4 before exiting the joint config, -// but that fails, causing us to remain in the joint configuration with the original -// leaseholder having revoked its lease, but everyone else thinking it's still -// the leaseholder. In this situation, only n1 can re-aquire the lease as long as it is live. -// - We re-enable lease transfers on this range. -// - n1 is able to re-aquire the lease, due to the fix in #83686 which enables a -// VOTER_DEMOTING_LEARNER (n1) replica to get the lease if there's also a VOTER_INCOMING -// which is the case here (n4). -// - n1 transfers the lease away and the range leaves the joint configuration. -func TestTransferLeaseFailureDuringJointConfig(t *testing.T) { - defer leaktest.AfterTest(t)() +// - Creates a range with 3 replicas n1, n2, n3, and makes sure the lease is on n1 +// - Makes sure lease transfers on this range fail from now on +// - Invokes AdminChangeReplicas to remove n1 and add n4 +// - This causes the range to go into a joint configuration. A lease transfer +// is attempted to move the lease from n1 to n4 before exiting the joint config, +// but that fails, causing us to remain in the joint configuration with the original +// leaseholder having revoked its lease, but everyone else thinking it's still +// the leaseholder. In this situation, only n1 can re-aquire the lease as long as it is live. +// - We re-enable lease transfers on this range. +// - n1 is able to re-aquire the lease, due to the fix in #83686 which enables a +// VOTER_DEMOTING_LEARNER (n1) replica to get the lease if there's also a VOTER_INCOMING +// which is the case here (n4) and since n1 was the last leaseholder. +// - n1 transfers the lease away and the range leaves the joint configuration. +func internalTransferLeaseFailureDuringJointConfig(t *testing.T, isManual bool) { defer log.Scope(t).Close(t) ctx := context.Background() @@ -567,7 +568,22 @@ func TestTransferLeaseFailureDuringJointConfig(t *testing.T) { require.NoError(t, err) require.True(t, desc.Replicas().InAtomicReplicationChange()) - // Further lease transfers should succeed, allowing the atomic replication change to complete. + // Allow further lease transfers to succeed. + atomic.StoreInt64(&scratchRangeID, 0) + + if isManual { + // Manually transfer the lease to n1 (VOTER_DEMOTING_LEARNER). + err = tc.Server(0).DB().AdminTransferLease(context.Background(), + scratchStartKey, tc.Target(0).StoreID) + require.NoError(t, err) + // Make sure n1 has the lease + leaseHolder, err := tc.FindRangeLeaseHolder(desc, nil) + require.NoError(t, err) + require.Equal(t, tc.Target(0), leaseHolder, + errors.Errorf("Leaseholder supposed to be on n1.")) + } + + // Complete the replication change. atomic.StoreInt64(&scratchRangeID, 0) store := tc.GetFirstStoreFromServer(t, 0) repl := store.LookupReplica(roachpb.RKey(scratchStartKey)) @@ -580,6 +596,24 @@ func TestTransferLeaseFailureDuringJointConfig(t *testing.T) { require.False(t, desc.Replicas().InAtomicReplicationChange()) } +// TestTransferLeaseFailureDuringJointConfig is using +// internalTransferLeaseFailureDuringJointConfig and +// completes the lease transfer to n1 “automatically” +// by relying on replicate queue. +func TestTransferLeaseFailureDuringJointConfigAuto(t *testing.T) { + defer leaktest.AfterTest(t)() + internalTransferLeaseFailureDuringJointConfig(t, false) +} + +// TestTransferLeaseFailureDuringJointConfigManual is using +// internalTransferLeaseFailureDuringJointConfig and +// completes the lease transfer to n1 “manually” using +// AdminTransferLease. +func TestTransferLeaseFailureDuringJointConfigManual(t *testing.T) { + defer leaktest.AfterTest(t)() + internalTransferLeaseFailureDuringJointConfig(t, true) +} + // TestStoreLeaseTransferTimestampCacheRead verifies that the timestamp cache on // the new leaseholder is properly updated after a lease transfer to prevent new // writes from invalidating previously served reads. diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index e8499e94e145..5e3bdc4bfbc2 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -909,13 +909,13 @@ func (b *propBuf) TrackEvaluatingRequest( // ensuring that no future writes ever write below it. // // Returns false in the following cases: -// 1) target is below the propBuf's closed timestamp. This ensures that the -// side-transport (the caller) is prevented from publishing closed timestamp -// regressions. In other words, for a given LAI, the side-transport only -// publishes closed timestamps higher than what Raft published. -// 2) There are requests evaluating at timestamps equal to or below target (as -// tracked by the evalTracker). We can't close timestamps at or above these -// requests' write timestamps. +// 1. target is below the propBuf's closed timestamp. This ensures that the +// side-transport (the caller) is prevented from publishing closed timestamp +// regressions. In other words, for a given LAI, the side-transport only +// publishes closed timestamps higher than what Raft published. +// 2. There are requests evaluating at timestamps equal to or below target (as +// tracked by the evalTracker). We can't close timestamps at or above these +// requests' write timestamps. func (b *propBuf) MaybeForwardClosedLocked(ctx context.Context, target hlc.Timestamp) bool { if lb := b.evalTracker.LowerBound(ctx); !lb.IsEmpty() && lb.LessEq(target) { return false @@ -1065,9 +1065,19 @@ func (rp *replicaProposer) leaderStatusRLocked( // lease again, and by then hopefully we will have caught up. leaderEligibleForLease = true } else { + lhRemovalAllowed := r.store.cfg.Settings.Version.IsActive( ctx, clusterversion.EnableLeaseHolderRemoval) - err := roachpb.CheckCanReceiveLease(leaderRep, rangeDesc.Replicas(), lhRemovalAllowed) + // If the current leader is a VOTER_DEMOTING and it was the last one to + // hold the lease (according to our possibly stale applied lease state), + // CheckCanReceiveLease considers it eligible to continue holding the + // lease, so we don't allow our proposal through. Otherwise, if it was not + // the last one to hold the lease, it will never be allowed to acquire it + // again, so we don't consider it eligible. + lastLease, _ := r.getLeaseRLocked() + wasLastLeaseholder := leaderRep.ReplicaID == lastLease.Replica.ReplicaID + err := roachpb.CheckCanReceiveLease( + leaderRep, rangeDesc.Replicas(), lhRemovalAllowed, wasLastLeaseholder) leaderEligibleForLease = err == nil } } diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go index e37dc43e15dc..815c919a1913 100644 --- a/pkg/kv/kvserver/replica_proposal_buf_test.go +++ b/pkg/kv/kvserver/replica_proposal_buf_test.go @@ -184,7 +184,7 @@ func (t *testProposer) leaderStatusRLocked( rngDesc := roachpb.RangeDescriptor{ InternalReplicas: []roachpb.ReplicaDescriptor{repDesc}, } - err := roachpb.CheckCanReceiveLease(repDesc, rngDesc.Replicas(), true) + err := roachpb.CheckCanReceiveLease(repDesc, rngDesc.Replicas(), true, true) leaderEligibleForLease = err == nil } else { // This matches replicaProposed.leaderStatusRLocked(). diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 55aa432d211d..c52e9fc80be9 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -92,14 +92,14 @@ func makeIDKey() kvserverbase.CmdIDKey { // would violate the locking order specified for Store.mu. // // Return values: -// - a channel which receives a response or error upon application -// - a closure used to attempt to abandon the command. When called, it unbinds -// the command's context from its Raft proposal. The client is then free to -// terminate execution, although it is given no guarantee that the proposal -// won't still go on to commit and apply at some later time. -// - the proposal's ID. -// - any error obtained during the creation or proposal of the command, in -// which case the other returned values are zero. +// - a channel which receives a response or error upon application +// - a closure used to attempt to abandon the command. When called, it unbinds +// the command's context from its Raft proposal. The client is then free to +// terminate execution, although it is given no guarantee that the proposal +// won't still go on to commit and apply at some later time. +// - the proposal's ID. +// - any error obtained during the creation or proposal of the command, in +// which case the other returned values are zero. func (r *Replica) evalAndPropose( ctx context.Context, ba *roachpb.BatchRequest, @@ -388,7 +388,7 @@ func (r *Replica) propose( // transferred away. The previous leaseholder is a LEARNER in the target config, // and therefore shouldn't continue holding the lease. if err := roachpb.CheckCanReceiveLease( - lhDesc, proposedDesc.Replicas(), lhRemovalAllowed, + lhDesc, proposedDesc.Replicas(), lhRemovalAllowed, true, /* wasLastLeaseholder */ ); err != nil { e := errors.Mark(errors.Wrapf(err, "%v received invalid ChangeReplicasTrigger %s to "+ "remove self (leaseholder); lhRemovalAllowed: %v; current desc: %v; proposed desc: %v", diff --git a/pkg/roachpb/metadata_replicas.go b/pkg/roachpb/metadata_replicas.go index ceba5948fa3d..bf8b0909daa1 100644 --- a/pkg/roachpb/metadata_replicas.go +++ b/pkg/roachpb/metadata_replicas.go @@ -186,16 +186,16 @@ func (d ReplicaSet) containsVoterIncoming() bool { // For simplicity, CockroachDB treats learner replicas the same as voter // replicas as much as possible, but there are a few exceptions: // -// - Learner replicas are not considered when calculating quorum size, and thus -// do not affect the computation of which ranges are under-replicated for -// upreplication/alerting/debug/etc purposes. Ditto for over-replicated. -// - Learner replicas cannot become raft leaders, so we also don't allow them to -// become leaseholders. As a result, DistSender and the various oracles don't -// try to send them traffic. -// - The raft snapshot queue tries to avoid sending snapshots to ephemeral -// learners (but not to non-voting replicas, which are also etcd learners) for -// reasons described below. -// - Merges won't run while a learner replica is present. +// - Learner replicas are not considered when calculating quorum size, and thus +// do not affect the computation of which ranges are under-replicated for +// upreplication/alerting/debug/etc purposes. Ditto for over-replicated. +// - Learner replicas cannot become raft leaders, so we also don't allow them to +// become leaseholders. As a result, DistSender and the various oracles don't +// try to send them traffic. +// - The raft snapshot queue tries to avoid sending snapshots to ephemeral +// learners (but not to non-voting replicas, which are also etcd learners) for +// reasons described below. +// - Merges won't run while a learner replica is present. // // Replicas are now added in two ConfChange transactions. The first creates the // learner and the second promotes it to a voter. If the node that is @@ -546,6 +546,17 @@ var errReplicaCannotHoldLease = errors.Errorf("replica cannot hold lease") // but not to exit it in this state, i.e., the leaseholder must be some // kind of voter in the next new config (potentially VOTER_DEMOTING). // +// It is possible (and sometimes needed) that while in the joint configuration, +// the replica being removed will receive lease. This is allowed only if +// a) there is a VOTER_INCOMING replica to which the lease will be trasferred +// when transitioning out of the joint config, and b) the replica being removed +// was the last leaseholder (as indictated by wasLastLeaseholder). The +// information we use for (b) is potentially stale, but if it incorrect +// the removed node either does not need to get the lease or will not be able +// to get it. In particular, when we think we are the last leaseholder but we +// aren't, the CAS call for extending the lease will fail (see +// wasLastLeaseholder := isExtension in cmd_lease_request.go). +// // An error is also returned is the replica is not part of `replDescs`. // leaseHolderRemovalAllowed is intended to check if the cluster version is // EnableLeaseHolderRemoval or higher. @@ -554,14 +565,17 @@ var errReplicaCannotHoldLease = errors.Errorf("replica cannot hold lease") // will check voter constraint violations. When changing this method, you need // to update replica filter in report to keep it correct. func CheckCanReceiveLease( - wouldbeLeaseholder ReplicaDescriptor, replDescs ReplicaSet, leaseHolderRemovalAllowed bool, + wouldbeLeaseholder ReplicaDescriptor, + replDescs ReplicaSet, + leaseHolderRemovalAllowed bool, + wasLastLeaseholder bool, ) error { repDesc, ok := replDescs.GetReplicaDescriptorByID(wouldbeLeaseholder.ReplicaID) if !ok { return errReplicaNotFound } if !(repDesc.IsVoterNewConfig() || - (repDesc.IsVoterOldConfig() && replDescs.containsVoterIncoming() && leaseHolderRemovalAllowed)) { + (repDesc.IsVoterOldConfig() && replDescs.containsVoterIncoming() && leaseHolderRemovalAllowed && wasLastLeaseholder)) { // We allow a demoting / incoming voter to receive the lease if there's an incoming voter. // In this case, when exiting the joint config, we will transfer the lease to the incoming // voter.