diff --git a/pkg/kv/kvclient/kvcoord/dist_sender.go b/pkg/kv/kvclient/kvcoord/dist_sender.go index fbd80fc812a8..1e1f220af59f 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender.go @@ -2188,7 +2188,7 @@ func (ds *DistSender) sendToReplicas( ds.metrics.NotLeaseHolderErrCount.Inc(1) // If we got some lease information, we use it. If not, we loop around // and try the next replica. - if tErr.Lease != nil || tErr.LeaseHolder != nil { + if tErr.Lease != nil || tErr.DeprecatedLeaseHolder != nil { // Update the leaseholder in the range cache. Naively this would also // happen when the next RPC comes back, but we don't want to wait out // the additional RPC latency. @@ -2196,8 +2196,10 @@ func (ds *DistSender) sendToReplicas( var updatedLeaseholder bool if tErr.Lease != nil { updatedLeaseholder = routing.SyncTokenAndMaybeUpdateCache(ctx, tErr.Lease, &tErr.RangeDesc) - } else if tErr.LeaseHolder != nil { - updatedLeaseholder = routing.SyncTokenAndMaybeUpdateCacheWithSpeculativeLease(ctx, *tErr.LeaseHolder, &tErr.RangeDesc) + } else if tErr.DeprecatedLeaseHolder != nil { + updatedLeaseholder = routing.SyncTokenAndMaybeUpdateCacheWithSpeculativeLease( + ctx, *tErr.DeprecatedLeaseHolder, &tErr.RangeDesc, + ) } // Move the new leaseholder to the head of the queue for the next // retry. Note that the leaseholder might not be the one indicated by diff --git a/pkg/kv/kvclient/kvcoord/dist_sender_test.go b/pkg/kv/kvclient/kvcoord/dist_sender_test.go index 089f9bc9111c..f8795ba63cce 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender_test.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender_test.go @@ -601,11 +601,13 @@ func TestRetryOnNotLeaseHolderError(t *testing.T) { expLease: true, }, { + // TODO(arul): This is only possible in 22.{1,2} mixed version clusters; + // remove once we get rid of the LeaseHolder field in 23.1. name: "leaseholder in desc, no lease", nlhe: roachpb.NotLeaseHolderError{ - RangeID: testUserRangeDescriptor3Replicas.RangeID, - LeaseHolder: &recognizedLeaseHolder, - RangeDesc: testUserRangeDescriptor3Replicas, + RangeID: testUserRangeDescriptor3Replicas.RangeID, + DeprecatedLeaseHolder: &recognizedLeaseHolder, + RangeDesc: testUserRangeDescriptor3Replicas, }, expLeaseholder: &recognizedLeaseHolder, expLease: false, @@ -757,9 +759,8 @@ func TestBackoffOnNotLeaseHolderErrorDuringTransfer(t *testing.T) { } reply.Error = roachpb.NewError( &roachpb.NotLeaseHolderError{ - Replica: repls[int(seq)%2], - LeaseHolder: &repls[(int(seq)+1)%2], - Lease: lease, + Replica: repls[int(seq)%2], + Lease: lease, }) return reply, nil } @@ -840,9 +841,8 @@ func TestNoBackoffOnNotLeaseHolderErrorFromFollowerRead(t *testing.T) { br := ba.CreateReply() if ba.Replica != lease.Replica { br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{ - Replica: ba.Replica, - LeaseHolder: &lease.Replica, - Lease: &lease, + Replica: ba.Replica, + Lease: &lease, }) } return br, nil @@ -1123,8 +1123,11 @@ func TestDistSenderIgnoresNLHEBasedOnOldRangeGeneration(t *testing.T) { Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4}, } } else { - // Speculative lease -- the NLHE only carries LeaseHolder information. - nlhe.LeaseHolder = &roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4} + // Speculative lease -- the NLHE only carries LeaseHolder information + // and the sequence number is unset. + nlhe.Lease = &roachpb.Lease{ + Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4}, + } } cachedLease := roachpb.Lease{ @@ -1703,7 +1706,10 @@ func TestEvictCacheOnUnknownLeaseHolder(t *testing.T) { var err error switch count { case 0, 1: - err = &roachpb.NotLeaseHolderError{LeaseHolder: &roachpb.ReplicaDescriptor{NodeID: 99, StoreID: 999}} + err = &roachpb.NotLeaseHolderError{ + Lease: &roachpb.Lease{ + Replica: roachpb.ReplicaDescriptor{NodeID: 99, StoreID: 999}}, + } case 2: err = roachpb.NewRangeNotFoundError(0, 0) default: @@ -5173,8 +5179,9 @@ func TestDistSenderNLHEFromUninitializedReplicaDoesNotCauseUnboundedBackoff(t *t // will include a speculative lease that points to a replica that isn't // part of the client's range descriptor. This is only possible in // versions <= 22.1 as NLHE errors from uninitialized replicas no longer - // return speculative leases. Effectively, this acts as a mixed - // (22.1, 22.2) version test. + // return speculative leases by populating the (Deprecated)LeaseHolder + // field. Effectively, this acts as a mixed (22.1, 22.2) version test. + // TODO(arul): remove the speculative lease version of this test in 23.1. clock := hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */) rpcContext := rpc.NewInsecureTestingContext(ctx, clock, stopper) @@ -5217,7 +5224,7 @@ func TestDistSenderNLHEFromUninitializedReplicaDoesNotCauseUnboundedBackoff(t *t RangeDesc: roachpb.RangeDescriptor{}, } if returnSpeculativeLease { - nlhe.LeaseHolder = &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5} + nlhe.DeprecatedLeaseHolder = &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5} } br.Error = roachpb.NewError(nlhe) case 1: diff --git a/pkg/kv/kvserver/client_raft_test.go b/pkg/kv/kvserver/client_raft_test.go index 431115c9a183..4a77ab618cdf 100644 --- a/pkg/kv/kvserver/client_raft_test.go +++ b/pkg/kv/kvserver/client_raft_test.go @@ -1282,8 +1282,9 @@ func TestRequestsOnLaggingReplica(t *testing.T) { require.NotNil(t, pErr, "unexpected success") nlhe := pErr.GetDetail().(*roachpb.NotLeaseHolderError) require.NotNil(t, nlhe, "expected NotLeaseholderError, got: %s", pErr) - require.NotNil(t, nlhe.LeaseHolder, "expected NotLeaseholderError with a known leaseholder, got: %s", pErr) - require.Equal(t, leaderReplicaID, nlhe.LeaseHolder.ReplicaID) + require.False(t, nlhe.Lease.Empty()) + require.NotNil(t, nlhe.Lease.Replica, "expected NotLeaseholderError with a known leaseholder, got: %s", pErr) + require.Equal(t, leaderReplicaID, nlhe.Lease.Replica.ReplicaID) } type fakeSnapshotStream struct { diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index e17bd407638f..49bb8d260482 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -1617,9 +1617,9 @@ func TestLeaseExpirationBasedRangeTransfer(t *testing.T) { if !ok { t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr) } - if !nlhe.LeaseHolder.Equal(&l.replica1Desc) { + if !nlhe.Lease.Replica.Equal(&l.replica1Desc) { t.Fatalf("expected lease holder %+v, got %+v", - l.replica1Desc, nlhe.LeaseHolder) + l.replica1Desc, nlhe.Lease.Replica) } // Check that replica1 now has the lease. @@ -1717,9 +1717,9 @@ func TestLeaseExpirationBasedDrainTransfer(t *testing.T) { if !ok { t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr) } - if nlhe.LeaseHolder == nil || !nlhe.LeaseHolder.Equal(&l.replica1Desc) { + if nlhe.Lease.Empty() || !nlhe.Lease.Replica.Equal(&l.replica1Desc) { t.Fatalf("expected lease holder %+v, got %+v", - l.replica1Desc, nlhe.LeaseHolder) + l.replica1Desc, nlhe.Lease.Replica) } // Check that replica1 now has the lease. diff --git a/pkg/kv/kvserver/replica_gossip.go b/pkg/kv/kvserver/replica_gossip.go index 96c40a425b30..b42af3cde957 100644 --- a/pkg/kv/kvserver/replica_gossip.go +++ b/pkg/kv/kvserver/replica_gossip.go @@ -144,8 +144,8 @@ func (r *Replica) getLeaseForGossip(ctx context.Context) (bool, *roachpb.Error) switch e := pErr.GetDetail().(type) { case *roachpb.NotLeaseHolderError: // NotLeaseHolderError means there is an active lease, but only if - // the lease holder is set; otherwise, it's likely a timeout. - if e.LeaseHolder != nil { + // the lease is non-empty; otherwise, it's likely a timeout. + if !e.Lease.Empty() { pErr = nil } default: diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index b68a30f2a287..fa42407e13e8 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -1235,12 +1235,14 @@ func (rp *replicaProposer) rejectProposalWithRedirectLocked( storeID := r.store.StoreID() r.store.metrics.LeaseRequestErrorCount.Inc(1) redirectRep, _ /* ok */ := rangeDesc.GetReplicaDescriptorByID(redirectTo) - speculativeLease := roachpb.Lease{ - Replica: redirectRep, - } log.VEventf(ctx, 2, "redirecting proposal to node %s; request: %s", redirectRep.NodeID, prop.Request) - rp.rejectProposalWithErrLocked(ctx, prop, roachpb.NewError(newNotLeaseHolderError( - speculativeLease, storeID, rangeDesc, "refusing to acquire lease on follower"))) + rp.rejectProposalWithErrLocked(ctx, prop, roachpb.NewError( + newNotLeaseHolderErrorWithSpeculativeLease( + redirectRep, + storeID, + rangeDesc, + "refusing to acquire lease on follower"), + )) } func (rp *replicaProposer) rejectProposalWithLeaseTransferRejectedLocked( diff --git a/pkg/kv/kvserver/replica_range_lease.go b/pkg/kv/kvserver/replica_range_lease.go index 6fef0e76b1b0..69cd6c256b94 100644 --- a/pkg/kv/kvserver/replica_range_lease.go +++ b/pkg/kv/kvserver/replica_range_lease.go @@ -1023,12 +1023,28 @@ func newNotLeaseHolderError( if stillMember { err.Lease = new(roachpb.Lease) *err.Lease = l - err.LeaseHolder = &err.Lease.Replica } } return err } +// newNotLeaseHolderErrorWithSpeculativeLease returns a NotLeaseHolderError +// initialized with a speculative lease pointing to the supplied replica. +// A NotLeaseHolderError may be constructed with a speculative lease if the +// current lease is not known, but the error is being created by guessing who +// the leaseholder may be. +func newNotLeaseHolderErrorWithSpeculativeLease( + leaseHolder roachpb.ReplicaDescriptor, + proposerStoreID roachpb.StoreID, + rangeDesc *roachpb.RangeDescriptor, + msg string, +) *roachpb.NotLeaseHolderError { + speculativeLease := roachpb.Lease{ + Replica: leaseHolder, + } + return newNotLeaseHolderError(speculativeLease, proposerStoreID, rangeDesc, msg) +} + // newLeaseTransferRejectedBecauseTargetMayNeedSnapshotError return an error // indicating that a lease transfer failed because the current leaseholder could // not prove that the lease transfer target did not need a Raft snapshot. diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 574db8d309fb..0b6cdfb47201 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -739,7 +739,7 @@ func TestBehaviorDuringLeaseTransfer(t *testing.T) { require.Error(t, err) var lErr *roachpb.NotLeaseHolderError require.True(t, errors.As(err, &lErr)) - require.Equal(t, secondReplica.StoreID, lErr.LeaseHolder.StoreID) + require.Equal(t, secondReplica.StoreID, lErr.Lease.Replica.StoreID) } else { // Check that the replica doesn't use its lease, even though there's // no longer a transfer in progress. This is because, even though diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 2ab292632729..ce473fc30341 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -491,14 +491,13 @@ func (p *PinnedLeasesKnob) rejectLeaseIfPinnedElsewhere(r *Replica) *roachpb.Err if err != nil { return roachpb.NewError(err) } - var pinned *roachpb.ReplicaDescriptor - if pinnedRep, ok := r.descRLocked().GetReplicaDescriptor(pinnedStore); ok { - pinned = &pinnedRep - } + pinned, _ := r.descRLocked().GetReplicaDescriptor(pinnedStore) return roachpb.NewError(&roachpb.NotLeaseHolderError{ - Replica: repDesc, - LeaseHolder: pinned, - RangeID: r.RangeID, - CustomMsg: "injected: lease pinned to another store", + Replica: repDesc, + Lease: &roachpb.Lease{ + Replica: pinned, + }, + RangeID: r.RangeID, + CustomMsg: "injected: lease pinned to another store", }) } diff --git a/pkg/roachpb/errors.go b/pkg/roachpb/errors.go index 640e91221fd4..544b3a0a1bb4 100644 --- a/pkg/roachpb/errors.go +++ b/pkg/roachpb/errors.go @@ -488,12 +488,12 @@ func (e *NotLeaseHolderError) message(_ *Error) string { } else { fmt.Fprint(&buf, "replica not lease holder; ") } - if e.LeaseHolder == nil { + if e.DeprecatedLeaseHolder == nil { fmt.Fprint(&buf, "lease holder unknown") } else if e.Lease != nil { fmt.Fprintf(&buf, "current lease is %s", e.Lease) } else { - fmt.Fprintf(&buf, "replica %s is", *e.LeaseHolder) + fmt.Fprintf(&buf, "replica %s is", *e.DeprecatedLeaseHolder) } return buf.String() } diff --git a/pkg/roachpb/errors.proto b/pkg/roachpb/errors.proto index 617c1b7ea4b2..55f9fee61eb2 100644 --- a/pkg/roachpb/errors.proto +++ b/pkg/roachpb/errors.proto @@ -45,10 +45,15 @@ message NotLeaseHolderError { // representation, if known. optional roachpb.ReplicaDescriptor replica = 1 [(gogoproto.nullable) = false]; // The lease holder, if known. - optional roachpb.ReplicaDescriptor lease_holder = 2; - // The current lease, if known. This might be nil even when lease_holder is - // set, as sometimes one can create this error without actually knowing the - // current lease, but having a guess about who the leader is. + // + // This field was only ever meaningful if the full lease was not known, but + // when constructing this error there was a guess about who the leaseholder + // may be. The same idea applied to speculative leases (which have unset + // sequence numbers). In a bid to unify these two cases, from v22.2, we stop + // making use of this field. + // TODO(arul): remove this field in 23.1. + optional roachpb.ReplicaDescriptor deprecated_lease_holder = 2; + // The current lease, if known. // // It's possible for leases returned here to represent speculative leases, not // actual committed leases. In this case, the lease will not have its Sequence @@ -196,7 +201,7 @@ enum TransactionAbortedReason { // TODO(andrei): We should be able to identify the range merge case by saving // a bit more info in the timestamp cache. ABORT_REASON_TIMESTAMP_CACHE_REJECTED = 7; - + reserved 2; } @@ -720,15 +725,15 @@ message InsufficientSpaceError { optional int64 store_id = 1 [(gogoproto.nullable) = false, (gogoproto.customname) = "StoreID", (gogoproto.casttype) = "StoreID"]; - // Op is the operaton that was unable to be performed. + // Op is the operaton that was unable to be performed. optional string op = 2 [(gogoproto.nullable) = false]; // Available is remaining capacity. optional int64 available = 3 [(gogoproto.nullable) = false]; - // Capacity is total capacity. + // Capacity is total capacity. optional int64 capacity = 4 [(gogoproto.nullable) = false]; // RequiredFraction is the required remaining capacity fraction. optional double required = 5 [(gogoproto.nullable) = false]; -} \ No newline at end of file +}