Skip to content

Commit

Permalink
kvclient: ignore stale lease information from lagging replicas
Browse files Browse the repository at this point in the history
This commit makes it such that the DistSender ignores the lease information
returned in `NotLeaseHolderError`s coming from replicas that have stale view of
the range descriptor (characterized by an older `DescriptorGeneration` on the
replica).

Not doing so before was hazardous because, if we received an NLHE that pointed
to a replica that did not belong in the cached descriptor, we'd trigger a cache
evicion. This assumed that the replica returning the error had a fresher view
of the range than what we had in the cache, which is not always true. This
meant that we'd keep doing range lookups and subsequent evictions until this
lagging replica caught up to the current state of the range.

Release note (bug fix): A bug that caused high SQL tail latencies during
background rebalancing in the cluster has been fixed.
  • Loading branch information
aayushshah15 committed Dec 12, 2021
1 parent 884088d commit d31a069
Show file tree
Hide file tree
Showing 5 changed files with 351 additions and 208 deletions.
13 changes: 12 additions & 1 deletion pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -2080,7 +2080,18 @@ func (ds *DistSender) sendToReplicas(
ds.metrics.NotLeaseHolderErrCount.Inc(1)
// If we got some lease information, we use it. If not, we loop around
// and try the next replica.
if tErr.Lease != nil || tErr.LeaseHolder != nil {

// We cannot trust the lease information in the error if it is coming
// from a replica that has a stale view of the range (compared to what
// we've got in the cache).
withStaleRangeDesc := tErr.DescriptorGeneration > 0 && tErr.DescriptorGeneration < routing.Desc().Generation
if withStaleRangeDesc {
log.VErrEventf(
ctx, 1, "NotLeaseHolderError originated from a replica with a "+
"stale range descriptor generation; ignoring",
)
}
if (tErr.Lease != nil || tErr.LeaseHolder != nil) && !withStaleRangeDesc {
// Update the leaseholder in the range cache. Naively this would also
// happen when the next RPC comes back, but we don't want to wait out
// the additional RPC latency.
Expand Down
96 changes: 96 additions & 0 deletions pkg/kv/kvclient/kvcoord/dist_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,102 @@ func TestDistSenderMovesOnFromReplicaWithStaleLease(t *testing.T) {
require.LessOrEqual(t, callsToNode2, 11)
}

// TestDistSenderIgnodesNLHEBasedOnOldRangeGeneration tests that a
// NotLeaseHolderError received from a replica that has a stale range descriptor
// version is ignored, and the next replica is attempted.
func TestDistSenderIgnoresNLHEBasedOnOldRangeGeneration(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
tracer := tracing.NewTracer()
ctx, finishAndGetRecording := tracing.ContextWithRecordingSpan(
context.Background(), tracer, "test",
)
stopper := stop.NewStopper()
defer stopper.Stop(ctx)

clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
g := makeGossip(t, stopper, rpcContext)
for _, n := range testUserRangeDescriptor3Replicas.Replicas().VoterDescriptors() {
require.NoError(t, g.AddInfoProto(
gossip.MakeNodeIDKey(n.NodeID),
newNodeDesc(n.NodeID),
gossip.NodeDescriptorTTL,
))
}

oldGeneration := roachpb.RangeGeneration(1)
newGeneration := roachpb.RangeGeneration(2)
desc := roachpb.RangeDescriptor{
RangeID: 1,
Generation: newGeneration,
StartKey: roachpb.RKeyMin,
EndKey: roachpb.RKeyMax,
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, ReplicaID: 1},
{NodeID: 2, StoreID: 2, ReplicaID: 2},
{NodeID: 3, StoreID: 3, ReplicaID: 3},
},
}
// Ambiguous lease refers to a replica that is incompatible with the cached
// range descriptor.
ambiguousLease := roachpb.Lease{
Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4},
}
cachedLease := roachpb.Lease{
Replica: desc.InternalReplicas[1],
}

// The cache starts with a lease on node 2, so the first request will be
// routed there. That replica will reply with an NLHE with an old descriptor
// generation value, which should make the DistSender try the next replica.
var callsToNode2 int
sendFn := func(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
if ba.Replica.NodeID == 2 {
callsToNode2++
reply := &roachpb.BatchResponse{}
err := &roachpb.NotLeaseHolderError{Lease: &ambiguousLease, DescriptorGeneration: oldGeneration}
reply.Error = roachpb.NewError(err)
return reply, nil
}
require.Equal(t, ba.Replica.NodeID, roachpb.NodeID(1))
return ba.CreateReply(), nil
}

cfg := DistSenderConfig{
AmbientCtx: log.AmbientContext{Tracer: tracer},
Clock: clock,
NodeDescs: g,
RPCContext: rpcContext,
TestingKnobs: ClientTestingKnobs{
TransportFactory: adaptSimpleTransport(sendFn),
},
RangeDescriptorDB: threeReplicaMockRangeDescriptorDB,
NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
Settings: cluster.MakeTestingClusterSettings(),
}
ds := NewDistSender(cfg)

ds.rangeCache.Insert(ctx, roachpb.RangeInfo{
Desc: desc,
Lease: cachedLease,
})

get := roachpb.NewGet(roachpb.Key("a"), false /* forUpdate */)
_, pErr := kv.SendWrapped(ctx, ds, get)
require.Nil(t, pErr)

require.Equal(t, ds.Metrics().RangeLookups.Count(), int64(0))
require.Equal(t, ds.Metrics().NextReplicaErrCount.Count(), int64(1))
require.Equal(t, ds.Metrics().NotLeaseHolderErrCount.Count(), int64(1))
require.Equal(t, callsToNode2, 1)
require.Regexp(
t,
"originated from a replica with a stale range descriptor generation",
finishAndGetRecording().String(),
)
}

func TestDistSenderRetryOnTransportErrors(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
5 changes: 3 additions & 2 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,8 +903,9 @@ func newNotLeaseHolderError(
l roachpb.Lease, proposerStoreID roachpb.StoreID, rangeDesc *roachpb.RangeDescriptor, msg string,
) *roachpb.NotLeaseHolderError {
err := &roachpb.NotLeaseHolderError{
RangeID: rangeDesc.RangeID,
CustomMsg: msg,
RangeID: rangeDesc.RangeID,
CustomMsg: msg,
DescriptorGeneration: rangeDesc.Generation,
}
if proposerStoreID != 0 {
err.Replica, _ = rangeDesc.GetReplicaDescriptor(proposerStoreID)
Expand Down
Loading

0 comments on commit d31a069

Please sign in to comment.