From fc78a8a81481725b8424f3539cd6e17e5bca41ae Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 5 Oct 2022 12:22:53 -0400 Subject: [PATCH 1/3] kv: improve logging around maybeTransferLeaseDuringLeaveJoint Also simplify a bit of logic. --- pkg/kv/kvserver/replica_command.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index d3536f9e803e..0d239b12ad48 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -1196,20 +1196,22 @@ func (r *Replica) maybeTransferLeaseDuringLeaveJoint( // the set of full or incoming voters that will remain after the joint configuration is // complete. If we don't find the current leaseholder there this means it's being removed, // and we're going to transfer the lease to another voter below, before exiting the JOINT config. - beingRemoved := true voterIncomingTarget := roachpb.ReplicaDescriptor{} for _, v := range voters { - if beingRemoved && v.ReplicaID == r.ReplicaID() { - beingRemoved = false + if v.ReplicaID == r.ReplicaID() { + // We are still a voter. + return nil } if voterIncomingTarget == (roachpb.ReplicaDescriptor{}) && v.Type == roachpb.VOTER_INCOMING { voterIncomingTarget = v } } - if !beingRemoved { - return nil - } + // We are being removed as a voter. + voterDemotingTarget, err := r.GetReplicaDescriptor() + if err != nil { + return err + } if voterIncomingTarget == (roachpb.ReplicaDescriptor{}) { // Couldn't find a VOTER_INCOMING target. When the leaseholder is being // removed, we only enter a JOINT config if there is a VOTER_INCOMING @@ -1221,18 +1223,18 @@ func (r *Replica) maybeTransferLeaseDuringLeaveJoint( // to continue trying to leave the JOINT config. If this is the case, // our replica will not be able to leave the JOINT config, but the new // leaseholder will be able to do so. - log.Infof(ctx, "no VOTER_INCOMING to transfer lease to. This replica probably lost the lease,"+ - " but still thinks its the leaseholder. In this case the new leaseholder is expected to "+ + log.Warningf(ctx, "no VOTER_INCOMING to transfer lease to. This replica probably lost the "+ + "lease, but still thinks its the leaseholder. In this case the new leaseholder is expected to "+ "complete LEAVE_JOINT. Range descriptor: %v", desc) return nil } - log.VEventf(ctx, 5, "current leaseholder %v is being removed through an"+ - " atomic replication change. Transferring lease to %v", r.String(), voterIncomingTarget) - err := r.store.DB().AdminTransferLease(ctx, r.startKey, voterIncomingTarget.StoreID) + log.VEventf(ctx, 2, "leaseholder %v is being removed through an atomic "+ + "replication change, transferring lease to %v", voterDemotingTarget, voterIncomingTarget) + err = r.store.DB().AdminTransferLease(ctx, r.startKey, voterIncomingTarget.StoreID) if err != nil { return err } - log.VEventf(ctx, 5, "leaseholder transfer to %v complete", voterIncomingTarget) + log.VEventf(ctx, 2, "lease transfer to %v complete", voterIncomingTarget) return nil } @@ -1251,7 +1253,7 @@ func (r *Replica) maybeLeaveAtomicChangeReplicas( return desc, nil } // NB: this is matched on in TestMergeQueueSeesLearner. - log.Eventf(ctx, "transitioning out of joint configuration %s", desc) + log.VEventf(ctx, 2, "transitioning out of joint configuration %s", desc) // If the leaseholder is being demoted, leaving the joint config is only // possible if we first transfer the lease. A range not being able to exit From f4990e9ae8a59b06396cbf6cb17f919334dc79df Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 4 Oct 2022 17:58:38 -0400 Subject: [PATCH 2/3] kv: bypass lease transfer safety checks during joint consensus Fixes #88667. This commit adds logic to bypass lease transfer safety checks (added in 034611b) when in a joint configuration and transferring the lease from a VOTER_DEMOTING to a VOTER_INCOMING. We do so because we could get stuck without a path to exit the joint configuration if we rejected this lease transfer while waiting to confirm that the target is up-to-date on its log. That confirmation may never arrive if the target is dead or partitioned away, and while we'd rather not transfer the lease to a dead node, at least we have a mechanism to recovery from that state. We also just sent the VOTER_INCOMING a snapshot (as a LEARNER, before promotion), so it is unlikely that the replica is actually dead or behind on its log. A better alternative here would be to introduce a mechanism to choose an alternate lease transfer target after some amount of time, if the lease transfer to the VOTER_INCOMING cannot be confirmed to be safe. We may do this in the future, but given the proximity to the release and given that this matches the behavior in v22.1, we choose this approach for now. Release note: None Release justification: Needed to resolve release blocker. --- pkg/kv/batch.go | 7 ++++-- pkg/kv/db.go | 13 +++++++++++- pkg/kv/kvserver/client_lease_test.go | 9 +++++--- pkg/kv/kvserver/client_replica_test.go | 23 ++++++++++---------- pkg/kv/kvserver/replica_command.go | 16 ++++++++++++-- pkg/kv/kvserver/replica_init.go | 1 - pkg/kv/kvserver/replica_proposal_buf.go | 6 +----- pkg/kv/kvserver/replica_range_lease.go | 26 +++++++++++++++++------ pkg/kv/kvserver/replica_rangefeed_test.go | 2 +- pkg/kv/kvserver/replica_send.go | 2 +- pkg/kv/kvserver/replica_test.go | 7 +++--- pkg/kv/kvserver/replicate_queue.go | 4 ++-- pkg/kv/kvserver/testing_knobs.go | 4 ---- pkg/roachpb/api.proto | 16 ++++++++++++++ 14 files changed, 92 insertions(+), 44 deletions(-) diff --git a/pkg/kv/batch.go b/pkg/kv/batch.go index 06c21ca2a1d5..449a4a342ce1 100644 --- a/pkg/kv/batch.go +++ b/pkg/kv/batch.go @@ -735,7 +735,9 @@ func (b *Batch) adminUnsplit(splitKeyIn interface{}) { // adminTransferLease is only exported on DB. It is here for symmetry with the // other operations. -func (b *Batch) adminTransferLease(key interface{}, target roachpb.StoreID) { +func (b *Batch) adminTransferLease( + key interface{}, target roachpb.StoreID, bypassSafetyChecks bool, +) { k, err := marshalKey(key) if err != nil { b.initResult(0, 0, notRaw, err) @@ -745,7 +747,8 @@ func (b *Batch) adminTransferLease(key interface{}, target roachpb.StoreID) { RequestHeader: roachpb.RequestHeader{ Key: k, }, - Target: target, + Target: target, + BypassSafetyChecks: bypassSafetyChecks, } b.appendReqs(req) b.initResult(1, 0, notRaw, nil) diff --git a/pkg/kv/db.go b/pkg/kv/db.go index c45d71fa99bd..a0bce7f5f1be 100644 --- a/pkg/kv/db.go +++ b/pkg/kv/db.go @@ -649,7 +649,18 @@ func (db *DB) AdminTransferLease( ctx context.Context, key interface{}, target roachpb.StoreID, ) error { b := &Batch{} - b.adminTransferLease(key, target) + b.adminTransferLease(key, target, false /* bypassSafetyChecks */) + return getOneErr(db.Run(ctx, b), b) +} + +// AdminTransferLeaseBypassingSafetyChecks is like AdminTransferLease, but +// configures the lease transfer to bypass safety checks. See the comment on +// AdminTransferLeaseRequest.BypassSafetyChecks for details. +func (db *DB) AdminTransferLeaseBypassingSafetyChecks( + ctx context.Context, key interface{}, target roachpb.StoreID, +) error { + b := &Batch{} + b.adminTransferLease(key, target, true /* bypassSafetyChecks */) return getOneErr(db.Run(ctx, b), b) } diff --git a/pkg/kv/kvserver/client_lease_test.go b/pkg/kv/kvserver/client_lease_test.go index 44f428a6030d..72f47ac77314 100644 --- a/pkg/kv/kvserver/client_lease_test.go +++ b/pkg/kv/kvserver/client_lease_test.go @@ -398,13 +398,16 @@ func internalTransferLeaseFailureDuringJointConfig(t *testing.T, isManual bool) // range when they are being proposed. var scratchRangeID int64 shouldFailProposal := func(args kvserverbase.ProposalFilterArgs) bool { - // Block if a ChangeReplicas command is removing a node from our range. return args.Req.RangeID == roachpb.RangeID(atomic.LoadInt64(&scratchRangeID)) && args.Req.IsSingleTransferLeaseRequest() } + const failureMsg = "injected lease transfer" knobs.Store.(*kvserver.StoreTestingKnobs).TestingProposalFilter = func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { if shouldFailProposal(args) { - return roachpb.NewErrorf("Injecting lease transfer failure") + // The lease transfer should be configured to bypass safety checks. + // See maybeTransferLeaseDuringLeaveJoint for an explanation. + require.True(t, args.Req.Requests[0].GetTransferLease().BypassSafetyChecks) + return roachpb.NewErrorf(failureMsg) } return nil } @@ -430,7 +433,7 @@ func internalTransferLeaseFailureDuringJointConfig(t *testing.T, isManual bool) {ChangeType: roachpb.ADD_VOTER, Target: tc.Target(3)}, }) require.Error(t, err) - require.Regexp(t, "Injecting lease transfer failure", err) + require.Regexp(t, failureMsg, err) // We're now in a joint configuration, n1 already revoked its lease but all // other replicas think n1 is the leaseholder. As long as n1 is alive, it is diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index 64e702d94149..4b50b3e397b7 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -1589,7 +1589,7 @@ func TestLeaseExpirationBasedRangeTransfer(t *testing.T) { origLease, _ := l.replica0.GetLease() { // Transferring the lease to ourself should be a no-op. - if err := l.replica0.AdminTransferLease(ctx, l.replica0Desc.StoreID); err != nil { + if err := l.replica0.AdminTransferLease(ctx, l.replica0Desc.StoreID, false /* bypassSafetyChecks */); err != nil { t.Fatal(err) } newLease, _ := l.replica0.GetLease() @@ -1601,12 +1601,12 @@ func TestLeaseExpirationBasedRangeTransfer(t *testing.T) { { // An invalid target should result in an error. const expected = "unable to find store .* in range" - if err := l.replica0.AdminTransferLease(ctx, 1000); !testutils.IsError(err, expected) { + if err := l.replica0.AdminTransferLease(ctx, 1000, false /* bypassSafetyChecks */); !testutils.IsError(err, expected) { t.Fatalf("expected %s, but found %v", expected, err) } } - if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID); err != nil { + if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID, false /* bypassSafetyChecks */); err != nil { t.Fatal(err) } @@ -1651,7 +1651,7 @@ func TestLeaseExpirationBasedRangeTransferWithExtension(t *testing.T) { l := setupLeaseTransferTest(t) defer l.tc.Stopper().Stop(ctx) // Ensure that replica1 has the lease. - if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID); err != nil { + if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID, false /* bypassSafetyChecks */); err != nil { t.Fatal(err) } l.checkHasLease(t, 1) @@ -1673,7 +1673,7 @@ func TestLeaseExpirationBasedRangeTransferWithExtension(t *testing.T) { transferErrCh := make(chan error) go func() { // Transfer back from replica1 to replica0. - err := l.replica1.AdminTransferLease(context.Background(), l.replica0Desc.StoreID) + err := l.replica1.AdminTransferLease(context.Background(), l.replica0Desc.StoreID, false /* bypassSafetyChecks */) // Ignore not leaseholder errors which can arise due to re-proposals. if errors.HasType(err, (*roachpb.NotLeaseHolderError)(nil)) { err = nil @@ -1738,7 +1738,7 @@ func TestLeaseExpirationBasedDrainTransferWithExtension(t *testing.T) { l := setupLeaseTransferTest(t) defer l.tc.Stopper().Stop(ctx) // Ensure that replica1 has the lease. - if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID); err != nil { + if err := l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID, false /* bypassSafetyChecks */); err != nil { t.Fatal(err) } l.checkHasLease(t, 1) @@ -1791,7 +1791,7 @@ func TestLeaseExpirationBelowFutureTimeRequest(t *testing.T) { // Ensure that replica1 has the lease, and that replica0 has also picked up // on the lease transfer. - require.NoError(t, l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID)) + require.NoError(t, l.replica0.AdminTransferLease(ctx, l.replica1Desc.StoreID, false /* bypassSafetyChecks */)) l.checkHasLease(t, 1) preLease, _ := l.replica1.GetLease() require.Eventually(t, func() bool { @@ -2780,9 +2780,6 @@ func TestLeaseTransferInSnapshotUpdatesTimestampCache(t *testing.T) { ReplicationMode: base.ReplicationManual, ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ - Store: &kvserver.StoreTestingKnobs{ - AllowLeaseTransfersWhenTargetMayNeedSnapshot: true, - }, Server: &server.TestingKnobs{ WallClock: manualClock, }, @@ -2872,7 +2869,11 @@ func TestLeaseTransferInSnapshotUpdatesTimestampCache(t *testing.T) { // Finally, transfer the lease to node 2 while it is still unavailable and // behind. We try to avoid this case when picking new leaseholders in practice, // but we're never 100% successful. - tc.TransferRangeLeaseOrFatal(t, *repl0.Desc(), tc.Target(2)) + // NOTE: we bypass safety checks because the target node is behind on its log, + // so the lease transfer would be rejected otherwise. + err = tc.Servers[0].DB().AdminTransferLeaseBypassingSafetyChecks(ctx, + repl0.Desc().StartKey.AsRawKey(), tc.Target(2).StoreID) + require.Nil(t, err) // Remove the partition. A snapshot to node 2 should follow. This snapshot // will inform node 2 that it is the new leaseholder for the range. Node 2 diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index 0d239b12ad48..7d4b57575afd 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -1230,7 +1230,19 @@ func (r *Replica) maybeTransferLeaseDuringLeaveJoint( } log.VEventf(ctx, 2, "leaseholder %v is being removed through an atomic "+ "replication change, transferring lease to %v", voterDemotingTarget, voterIncomingTarget) - err = r.store.DB().AdminTransferLease(ctx, r.startKey, voterIncomingTarget.StoreID) + // We bypass safety checks when transferring the lease to the VOTER_INCOMING. + // We do so because we could get stuck without a path to exit the joint + // configuration if we rejected this lease transfer while waiting to confirm + // that the target is up-to-date on its log. That confirmation may never + // arrive if the target is dead or partitioned away, and while we'd rather not + // transfer the lease to a dead node, at least we have a mechanism to recovery + // from that state. We also just sent the VOTER_INCOMING a snapshot (as a + // LEARNER, before promotion), so it is unlikely that the replica is actually + // dead or behind on its log. + // TODO(nvanbenschoten): this isn't great. Instead of bypassing safety checks, + // we should build a mechanism to choose an alternate lease transfer target + // after some amount of time. + err = r.store.DB().AdminTransferLeaseBypassingSafetyChecks(ctx, r.startKey, voterIncomingTarget.StoreID) if err != nil { return err } @@ -3637,7 +3649,7 @@ func (r *Replica) adminScatter( targetStoreID := potentialLeaseTargets[newLeaseholderIdx].StoreID if targetStoreID != r.store.StoreID() { log.VEventf(ctx, 2, "randomly transferring lease to s%d", targetStoreID) - if err := r.AdminTransferLease(ctx, targetStoreID); err != nil { + if err := r.AdminTransferLease(ctx, targetStoreID, false /* bypassSafetyChecks */); err != nil { log.Warningf(ctx, "failed to scatter lease to s%d: %+v", targetStoreID, err) } } diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index c25f4fde2817..d60bf9600cfb 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -103,7 +103,6 @@ func newUnloadedReplica( r.mu.checksums = map[uuid.UUID]*replicaChecksum{} r.mu.proposalBuf.Init((*replicaProposer)(r), tracker.NewLockfreeTracker(), r.Clock(), r.ClusterSettings()) r.mu.proposalBuf.testing.allowLeaseProposalWhenNotLeader = store.cfg.TestingKnobs.AllowLeaseRequestProposalsWhenNotLeader - r.mu.proposalBuf.testing.allowLeaseTransfersWhenTargetMayNeedSnapshot = store.cfg.TestingKnobs.AllowLeaseTransfersWhenTargetMayNeedSnapshot r.mu.proposalBuf.testing.dontCloseTimestamps = store.cfg.TestingKnobs.DontCloseTimestamps r.mu.proposalBuf.testing.submitProposalFilter = store.cfg.TestingKnobs.TestingProposalSubmitFilter diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index 5eb0e402e6f5..3f6d56d0e9f8 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -111,10 +111,6 @@ type propBuf struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). allowLeaseProposalWhenNotLeader bool - // allowLeaseTransfersWhenTargetMayNeedSnapshot, if set, makes the proposal - // buffer allow lease request proposals even when the proposer cannot prove - // that the lease transfer target does not need a Raft snapshot. - allowLeaseTransfersWhenTargetMayNeedSnapshot bool // dontCloseTimestamps inhibits the closing of timestamps. dontCloseTimestamps bool } @@ -699,7 +695,7 @@ func (b *propBuf) maybeRejectUnsafeProposalLocked( newLease := p.command.ReplicatedEvalResult.State.Lease newLeaseTarget := newLease.Replica.ReplicaID snapStatus := raftutil.ReplicaMayNeedSnapshot(&status, firstIndex, newLeaseTarget) - if snapStatus != raftutil.NoSnapshotNeeded && !b.testing.allowLeaseTransfersWhenTargetMayNeedSnapshot { + if snapStatus != raftutil.NoSnapshotNeeded && !p.Request.Requests[0].GetTransferLease().BypassSafetyChecks { b.p.rejectProposalWithLeaseTransferRejectedLocked(ctx, p, newLease, snapStatus) return true } diff --git a/pkg/kv/kvserver/replica_range_lease.go b/pkg/kv/kvserver/replica_range_lease.go index e24c8afb2822..d924631b8e8f 100644 --- a/pkg/kv/kvserver/replica_range_lease.go +++ b/pkg/kv/kvserver/replica_range_lease.go @@ -198,6 +198,7 @@ func (p *pendingLeaseRequest) InitOrJoinRequest( status kvserverpb.LeaseStatus, startKey roachpb.Key, transfer bool, + bypassSafetyChecks bool, ) *leaseRequestHandle { if nextLease, ok := p.RequestPending(); ok { if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID { @@ -292,11 +293,19 @@ func (p *pendingLeaseRequest) InitOrJoinRequest( var leaseReq roachpb.Request if transfer { leaseReq = &roachpb.TransferLeaseRequest{ - RequestHeader: reqHeader, - Lease: reqLease, - PrevLease: status.Lease, + RequestHeader: reqHeader, + Lease: reqLease, + PrevLease: status.Lease, + BypassSafetyChecks: bypassSafetyChecks, } } else { + if bypassSafetyChecks { + // TODO(nvanbenschoten): we could support a similar bypassSafetyChecks + // flag for RequestLeaseRequest, which would disable the protection in + // propBuf.maybeRejectUnsafeProposalLocked. For now, we use a testing + // knob. + log.Fatal(ctx, "bypassSafetyChecks not supported for RequestLeaseRequest") + } minProposedTS := p.repl.mu.minLeaseProposedTS leaseReq = &roachpb.RequestLeaseRequest{ RequestHeader: reqHeader, @@ -842,7 +851,8 @@ func (r *Replica) requestLeaseLocked( return r.mu.pendingLeaseRequest.newResolvedHandle(roachpb.NewError(err)) } return r.mu.pendingLeaseRequest.InitOrJoinRequest( - ctx, repDesc, status, r.mu.state.Desc.StartKey.AsRawKey(), false /* transfer */) + ctx, repDesc, status, r.mu.state.Desc.StartKey.AsRawKey(), + false /* transfer */, false /* bypassSafetyChecks */) } // AdminTransferLease transfers the LeaderLease to another replica. Only the @@ -867,7 +877,9 @@ func (r *Replica) requestLeaseLocked( // replica. Otherwise, a NotLeaseHolderError is returned. // // AdminTransferLease implements the ReplicaLeaseMover interface. -func (r *Replica) AdminTransferLease(ctx context.Context, target roachpb.StoreID) error { +func (r *Replica) AdminTransferLease( + ctx context.Context, target roachpb.StoreID, bypassSafetyChecks bool, +) error { // initTransferHelper inits a transfer if no extension is in progress. // It returns a channel for waiting for the result of a pending // extension (if any is in progress) and a channel for waiting for the @@ -921,7 +933,7 @@ func (r *Replica) AdminTransferLease(ctx context.Context, target roachpb.StoreID raftStatus := r.raftStatusRLocked() raftFirstIndex := r.raftFirstIndexRLocked() snapStatus := raftutil.ReplicaMayNeedSnapshot(raftStatus, raftFirstIndex, nextLeaseHolder.ReplicaID) - if snapStatus != raftutil.NoSnapshotNeeded && !r.store.TestingKnobs().AllowLeaseTransfersWhenTargetMayNeedSnapshot { + if snapStatus != raftutil.NoSnapshotNeeded && !bypassSafetyChecks { r.store.metrics.LeaseTransferErrorCount.Inc(1) log.VEventf(ctx, 2, "not initiating lease transfer because the target %s may "+ "need a snapshot: %s", nextLeaseHolder, snapStatus) @@ -930,7 +942,7 @@ func (r *Replica) AdminTransferLease(ctx context.Context, target roachpb.StoreID } transfer = r.mu.pendingLeaseRequest.InitOrJoinRequest( - ctx, nextLeaseHolder, status, desc.StartKey.AsRawKey(), true, /* transfer */ + ctx, nextLeaseHolder, status, desc.StartKey.AsRawKey(), true /* transfer */, bypassSafetyChecks, ) return nil, transfer, nil } diff --git a/pkg/kv/kvserver/replica_rangefeed_test.go b/pkg/kv/kvserver/replica_rangefeed_test.go index 680c4890012d..55f0a2e7a06d 100644 --- a/pkg/kv/kvserver/replica_rangefeed_test.go +++ b/pkg/kv/kvserver/replica_rangefeed_test.go @@ -830,7 +830,7 @@ func TestReplicaRangefeedRetryErrors(t *testing.T) { if raftStatus != nil && raftStatus.RaftState == raft.StateFollower { return nil } - err = repl.AdminTransferLease(ctx, roachpb.StoreID(1)) + err = repl.AdminTransferLease(ctx, roachpb.StoreID(1), false /* bypassSafetyChecks */) // NB: errors.Wrapf(nil, ...) returns nil. // nolint:errwrap return errors.Errorf("not raft follower: %+v, transferred lease: %v", raftStatus, err) diff --git a/pkg/kv/kvserver/replica_send.go b/pkg/kv/kvserver/replica_send.go index 395c2e8a1a94..89c05635011d 100644 --- a/pkg/kv/kvserver/replica_send.go +++ b/pkg/kv/kvserver/replica_send.go @@ -934,7 +934,7 @@ func (r *Replica) executeAdminBatch( resp = &reply case *roachpb.AdminTransferLeaseRequest: - pErr = roachpb.NewError(r.AdminTransferLease(ctx, tArgs.Target)) + pErr = roachpb.NewError(r.AdminTransferLease(ctx, tArgs.Target, tArgs.BypassSafetyChecks)) resp = &roachpb.AdminTransferLeaseResponse{} case *roachpb.AdminChangeReplicasRequest: diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index a434d55ea422..e250338698ff 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -616,9 +616,6 @@ func TestBehaviorDuringLeaseTransfer(t *testing.T) { tsc := TestStoreConfig(clock) var leaseAcquisitionTrap atomic.Value tsc.TestingKnobs.DisableAutomaticLeaseRenewal = true - // We're transferring the lease to a bogus replica, so disable protection - // which would otherwise notice this and reject the lease transfer. - tsc.TestingKnobs.AllowLeaseTransfersWhenTargetMayNeedSnapshot = true tsc.TestingKnobs.LeaseRequestEvent = func(ts hlc.Timestamp, _ roachpb.StoreID, _ roachpb.RangeID) *roachpb.Error { val := leaseAcquisitionTrap.Load() if val == nil { @@ -669,7 +666,9 @@ func TestBehaviorDuringLeaseTransfer(t *testing.T) { // Initiate a transfer (async) and wait for it to be blocked. transferResChan := make(chan error) go func() { - err := tc.repl.AdminTransferLease(ctx, secondReplica.StoreID) + // We're transferring the lease to a bogus replica, so disable protection + // which would otherwise notice this and reject the lease transfer. + err := tc.repl.AdminTransferLease(ctx, secondReplica.StoreID, true /* bypassSafetyChecks */) if !testutils.IsError(err, "injected") { transferResChan <- err } else { diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index de99b16a494b..201c6b59dda8 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -1827,7 +1827,7 @@ func (rq *replicateQueue) shedLease( // ReplicaLeaseMover handles lease transfers for a single range. type ReplicaLeaseMover interface { // AdminTransferLease moves the lease to the requested store. - AdminTransferLease(ctx context.Context, target roachpb.StoreID) error + AdminTransferLease(ctx context.Context, target roachpb.StoreID, bypassSafetyChecks bool) error // String returns info about the replica. String() string @@ -1860,7 +1860,7 @@ func (rq *replicateQueue) TransferLease( ) error { rq.metrics.TransferLeaseCount.Inc(1) log.KvDistribution.Infof(ctx, "transferring lease to s%d", target) - if err := rlm.AdminTransferLease(ctx, target); err != nil { + if err := rlm.AdminTransferLease(ctx, target, false /* bypassSafetyChecks */); err != nil { return errors.Wrapf(err, "%s: unable to transfer lease to s%d", rlm, target) } rq.lastLeaseTransfer.Store(timeutil.Now()) diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 3b38d0f655ef..ea7f2d28a644 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -340,10 +340,6 @@ type StoreTestingKnobs struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). AllowLeaseRequestProposalsWhenNotLeader bool - // AllowLeaseTransfersWhenTargetMayNeedSnapshot, if set, makes the Replica - // and proposal buffer allow lease request proposals even when the proposer - // cannot prove that the lease transfer target does not need a Raft snapshot. - AllowLeaseTransfersWhenTargetMayNeedSnapshot bool // LeaseTransferRejectedRetryLoopCount, if set, configures the maximum number // of retries for the retry loop used during lease transfers. This retry loop // retries after transfer attempts are rejected because the transfer is deemed diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index a0d6b29b9abc..7aa217014cc8 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -880,6 +880,14 @@ message AdminMergeResponse { message AdminTransferLeaseRequest { RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true]; int32 target = 2 [(gogoproto.casttype) = "StoreID"]; + // When set to true, bypass_safety_checks configures the lease transfer to + // skip safety checks that ensure that the transfer target is known to be + // (according to the outgoing leaseholder) alive and sufficiently caught up on + // its log. This option should be used sparingly — typically only by outgoing + // leaseholders who both have some other reason to believe that the target is + // alive and caught up on its log (e.g. they just sent it a snapshot) and also + // can't tolerate rejected lease transfers. + bool bypass_safety_checks = 3; } message AdminTransferLeaseResponse { @@ -1380,6 +1388,14 @@ message TransferLeaseRequest { // The previous lease is specified by the caller to verify // it has not changed when executing this command. Lease prev_lease = 3 [(gogoproto.nullable) = false]; + // When set to true, bypass_safety_checks configures the lease transfer to + // skip safety checks that ensure that the transfer target is known to be + // (according to the outgoing leaseholder) alive and sufficiently caught up on + // its log. This option should be used sparingly — typically only by outgoing + // leaseholders who both have some other reason to believe that the target is + // alive and caught up on its log (e.g. they just sent it a snapshot) and also + // can't tolerate rejected lease transfers. + bool bypass_safety_checks = 4; } // LeaseInfoRequest is the argument to the LeaseInfo() method, for getting From e6b99fdb600e9e1f26eeaa4443ae3f68f4d6e821 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 5 Oct 2022 15:07:52 -0400 Subject: [PATCH 3/3] kv: add TestTransferLeaseDuringJointConfigWithDeadIncomingVoter This commit adds a new test which ensures that the lease transfer performed during a joint config replication change that is replacing the existing leaseholder does not get stuck even if the existing leaseholder cannot prove that the incoming leaseholder is caught up on its log. It does so by killing the incoming leaseholder before it receives the lease and ensuring that the range is able to exit the joint configuration. Currently, the range exits by bypassing safety checks during the lease transfer, sending the lease to the dead incoming voter, letting the lease expire, acquiring the lease on one of the non-demoting voters, and exiting. The details here may change in the future, but the goal of this test will not. --- pkg/kv/kvserver/client_lease_test.go | 96 ++++++++++++++++++++++++ pkg/kv/kvserver/closed_timestamp_test.go | 4 + pkg/kv/kvserver/helpers_test.go | 7 ++ 3 files changed, 107 insertions(+) diff --git a/pkg/kv/kvserver/client_lease_test.go b/pkg/kv/kvserver/client_lease_test.go index 72f47ac77314..8283cd9de7d8 100644 --- a/pkg/kv/kvserver/client_lease_test.go +++ b/pkg/kv/kvserver/client_lease_test.go @@ -45,6 +45,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" + "go.etcd.io/etcd/raft/v3" + "go.etcd.io/etcd/raft/v3/tracker" ) // TestStoreRangeLease verifies that regular ranges (not some special ones at @@ -371,6 +373,100 @@ func TestTransferLeaseToVoterDemotingFails(t *testing.T) { }) } +// TestTransferLeaseDuringJointConfigWithDeadIncomingVoter ensures that the +// lease transfer performed during a joint config replication change that is +// replacing the existing leaseholder does not get stuck even if the existing +// leaseholder cannot prove that the incoming leaseholder is caught up on its +// log. It does so by killing the incoming leaseholder before it receives the +// lease and ensuring that the range is able to exit the joint configuration. +// +// Currently, the range exits by bypassing safety checks during the lease +// transfer, sending the lease to the dead incoming voter, letting the lease +// expire, acquiring the lease on one of the non-demoting voters, and exiting. +// The details here may change in the future, but the goal of this test will +// not. +func TestTransferLeaseDuringJointConfigWithDeadIncomingVoter(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + // The lease request timeout depends on the Raft election timeout, so we set + // it low to get faster lease expiration (800 ms) and speed up the test. + var raftCfg base.RaftConfig + raftCfg.SetDefaults() + raftCfg.RaftHeartbeatIntervalTicks = 1 + raftCfg.RaftElectionTimeoutTicks = 2 + + knobs, ltk := makeReplicationTestKnobs() + tc := testcluster.StartTestCluster(t, 4, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + RaftConfig: raftCfg, + Knobs: knobs, + }, + ReplicationMode: base.ReplicationManual, + }) + defer tc.Stopper().Stop(ctx) + + key := tc.ScratchRange(t) + desc := tc.AddVotersOrFatal(t, key, tc.Targets(1, 2)...) + // Make sure n1 has the lease to start with. + err := tc.Server(0).DB().AdminTransferLease(ctx, key, tc.Target(0).StoreID) + require.NoError(t, err) + store0, repl0 := getFirstStoreReplica(t, tc.Server(0), key) + + // The test proceeds as follows: + // + // - Send an AdminChangeReplicasRequest to remove n1 (leaseholder) and add n4 + // - Stop the replication change after entering the joint configuration + // - Kill n4 and wait until n1 notices + // - Complete the replication change + + // Enter joint config. + ltk.withStopAfterJointConfig(func() { + tc.RebalanceVoterOrFatal(ctx, t, key, tc.Target(0), tc.Target(3)) + }) + desc = tc.LookupRangeOrFatal(t, key) + require.Len(t, desc.Replicas().Descriptors(), 4) + require.True(t, desc.Replicas().InAtomicReplicationChange(), desc) + + // Kill n4. + tc.StopServer(3) + + // Wait for n1 to notice. + testutils.SucceedsSoon(t, func() error { + // Manually report n4 as unreachable to speed up the test. + require.NoError(t, repl0.RaftReportUnreachable(4)) + // Check the Raft progress. + s := repl0.RaftStatus() + require.Equal(t, raft.StateLeader, s.RaftState) + p := s.Progress + require.Len(t, p, 4) + require.Contains(t, p, uint64(4)) + if p[4].State != tracker.StateProbe { + return errors.Errorf("dead replica not state probe") + } + return nil + }) + + // Run the range through the replicate queue on n1. + trace, processErr, err := store0.Enqueue( + ctx, "replicate", repl0, true /* skipShouldQueue */, false /* async */) + require.NoError(t, err) + require.NoError(t, processErr) + formattedTrace := trace.String() + expectedMessages := []string{ + `transitioning out of joint configuration`, + `leaseholder .* is being removed through an atomic replication change, transferring lease to`, + `lease transfer to .* complete`, + } + require.NoError(t, testutils.MatchInOrder(formattedTrace, expectedMessages...)) + + // Verify that the joint configuration has completed. + desc = tc.LookupRangeOrFatal(t, key) + require.Len(t, desc.Replicas().VoterDescriptors(), 3) + require.False(t, desc.Replicas().InAtomicReplicationChange(), desc) +} + // internalTransferLeaseFailureDuringJointConfig reproduces // https://github.com/cockroachdb/cockroach/issues/83687 // and makes sure that if lease transfer fails during a joint configuration diff --git a/pkg/kv/kvserver/closed_timestamp_test.go b/pkg/kv/kvserver/closed_timestamp_test.go index 66859cfd9121..0d42c8e90c80 100644 --- a/pkg/kv/kvserver/closed_timestamp_test.go +++ b/pkg/kv/kvserver/closed_timestamp_test.go @@ -1102,6 +1102,10 @@ func countNotLeaseHolderErrors(ba roachpb.BatchRequest, repls []*kvserver.Replic const testingTargetDuration = 300 * time.Millisecond const testingSideTransportInterval = 100 * time.Millisecond + +// TODO(nvanbenschoten): this is a pretty bad variable name to leak into the +// global scope of the kvserver_test package. At least one test was using it +// unintentionally. Remove it. const numNodes = 3 func replsForRange( diff --git a/pkg/kv/kvserver/helpers_test.go b/pkg/kv/kvserver/helpers_test.go index 8eda8f267882..1d4032700165 100644 --- a/pkg/kv/kvserver/helpers_test.go +++ b/pkg/kv/kvserver/helpers_test.go @@ -248,6 +248,13 @@ func (r *Replica) RaftUnlock() { r.raftMu.Unlock() } +func (r *Replica) RaftReportUnreachable(id roachpb.ReplicaID) error { + return r.withRaftGroup(true, func(raftGroup *raft.RawNode) (bool, error) { + raftGroup.ReportUnreachable(uint64(id)) + return false /* unquiesceAndWakeLeader */, nil + }) +} + // LastAssignedLeaseIndexRLocked returns the last assigned lease index. func (r *Replica) LastAssignedLeaseIndex() uint64 { r.mu.RLock()