From e1c24edea342a83dbd250732e8d1de770cc47ae8 Mon Sep 17 00:00:00 2001 From: Arul Ajmani Date: Fri, 12 Aug 2022 10:51:29 -0400 Subject: [PATCH] kvserver: stop returning speculative leases from uninitialized replicas Previously, if a request was routed to an uninitialized replica, the returned NLHE would contain a speculative lease pointing to the creator of the uninitialized replica. The prior commit makes it such that lease/leaseholder information in NLHEs is only considered if accompanied by a non-stale range descriptor. As uninitialized replicas always return empty range descriptors, which are considered stale, the speculative lease they returned would never be acted upon. Given this, we omit speculative leases in NLHEs returned by uninitialized replicas. The idea being the client will simply move on to the next replica in its transport which will very likely have more useful leaseholder information for the client. Release note: None Release justification: Low risk, high benefit change --- pkg/kv/kvclient/kvcoord/dist_sender_test.go | 207 +++++++++++--------- pkg/kv/kvserver/replica.go | 4 - pkg/kv/kvserver/store_create_replica.go | 1 - pkg/kv/kvserver/store_send.go | 13 +- 4 files changed, 116 insertions(+), 109 deletions(-) diff --git a/pkg/kv/kvclient/kvcoord/dist_sender_test.go b/pkg/kv/kvclient/kvcoord/dist_sender_test.go index 876efd74f21c..089f9bc9111c 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender_test.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender_test.go @@ -5161,108 +5161,121 @@ func TestDistSenderNLHEFromUninitializedReplicaDoesNotCauseUnboundedBackoff(t *t stopper := stop.NewStopper() defer stopper.Stop(ctx) - // We'll set things up such that the first replica on the range descriptor of - // the client has an uninitialized replica. We'll mimic this by returning an - // empty range descriptor as part of the NotLeaseHolderError it returns. - // Moreover, the error will point to a replica that isn't part of the client's - // range descriptor. We expect the client to disregard this NLHE and simply - // reroute to the next replica. + testutils.RunTrueAndFalse(t, "uninitialized-replica-returns-speculative-lease", + func(t *testing.T, returnSpeculativeLease bool) { - clock := hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */) - rpcContext := rpc.NewInsecureTestingContext(ctx, clock, stopper) - ns := &mockNodeStore{nodes: []roachpb.NodeDescriptor{ - {NodeID: 1, Address: util.UnresolvedAddr{}}, - {NodeID: 2, Address: util.UnresolvedAddr{}}, - {NodeID: 3, Address: util.UnresolvedAddr{}}, - {NodeID: 4, Address: util.UnresolvedAddr{}}, - }} + // We'll set things up such that the first replica on the range descriptor of + // the client has an uninitialized replica. We'll mimic this by returning an + // empty range descriptor as part of the NotLeaseHolderError it returns. + // We expect the client to simply reroute to the next replica. + // + // For the returnsSpeculativeLease=true version of the test the NLHE error + // will include a speculative lease that points to a replica that isn't + // part of the client's range descriptor. This is only possible in + // versions <= 22.1 as NLHE errors from uninitialized replicas no longer + // return speculative leases. Effectively, this acts as a mixed + // (22.1, 22.2) version test. - // Actual view of the range (descriptor + lease). The client doesn't have - // any knowledge about the lease, so it routes its request to the first - // replica on the range descriptor. - var desc = roachpb.RangeDescriptor{ - RangeID: roachpb.RangeID(1), - Generation: 1, - StartKey: roachpb.RKeyMin, - EndKey: roachpb.RKeyMax, - InternalReplicas: []roachpb.ReplicaDescriptor{ - {NodeID: 1, StoreID: 1, ReplicaID: 1}, - {NodeID: 2, StoreID: 2, ReplicaID: 2}, - {NodeID: 3, StoreID: 3, ReplicaID: 3}, - {NodeID: 4, StoreID: 4, ReplicaID: 4}, - }, - } - leaseResp := roachpb.Lease{ - Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4}, - } + clock := hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */) + rpcContext := rpc.NewInsecureTestingContext(ctx, clock, stopper) + ns := &mockNodeStore{nodes: []roachpb.NodeDescriptor{ + {NodeID: 1, Address: util.UnresolvedAddr{}}, + {NodeID: 2, Address: util.UnresolvedAddr{}}, + {NodeID: 3, Address: util.UnresolvedAddr{}}, + {NodeID: 4, Address: util.UnresolvedAddr{}}, + }} - call := 0 - var transportFn = func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) { - br := &roachpb.BatchResponse{} - switch call { - // First we return the replica on store 3 as the leaseholder. This ensures - // the range cache is seeded with some lease. - case 0: - expRepl := desc.Replicas().Descriptors()[0] - require.Equal(t, expRepl, ba.Replica) - br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{ - RangeDesc: roachpb.RangeDescriptor{}, - LeaseHolder: &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5}, - }) - case 1: - // We expect the client to discard information from the NLHE above and - // instead just try the next replica. - expRepl := desc.Replicas().Descriptors()[1] - require.Equal(t, expRepl, ba.Replica) - br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{ - RangeDesc: desc, - Lease: &leaseResp, - }) - case 2: - // We expect the client to route to the leaseholder given it's known now. - expRepl := desc.Replicas().Descriptors()[3] - require.Equal(t, expRepl, ba.Replica) - br = ba.CreateReply() - default: - t.Fatal("unexpected") - } - call++ - return br, nil - } + // Actual view of the range (descriptor + lease). The client doesn't have + // any knowledge about the lease, so it routes its request to the first + // replica on the range descriptor. + var desc = roachpb.RangeDescriptor{ + RangeID: roachpb.RangeID(1), + Generation: 1, + StartKey: roachpb.RKeyMin, + EndKey: roachpb.RKeyMax, + InternalReplicas: []roachpb.ReplicaDescriptor{ + {NodeID: 1, StoreID: 1, ReplicaID: 1}, + {NodeID: 2, StoreID: 2, ReplicaID: 2}, + {NodeID: 3, StoreID: 3, ReplicaID: 3}, + {NodeID: 4, StoreID: 4, ReplicaID: 4}, + }, + } + leaseResp := roachpb.Lease{ + Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4}, + } - rangeLookups := 0 - cfg := DistSenderConfig{ - AmbientCtx: log.MakeTestingAmbientCtxWithNewTracer(), - Clock: clock, - NodeDescs: ns, - RPCContext: rpcContext, - RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) ( - []roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error, - ) { - switch rangeLookups { - case 0: - rangeLookups++ - return []roachpb.RangeDescriptor{desc}, nil, nil - default: - // This doesn't run on the test's goroutine. - panic("unexpected") + call := 0 + var transportFn = func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) { + br := &roachpb.BatchResponse{} + switch call { + case 0: + // We return an empty range descriptor in the NLHE like an + // uninitialized replica would. + expRepl := desc.Replicas().Descriptors()[0] + require.Equal(t, expRepl, ba.Replica) + nlhe := &roachpb.NotLeaseHolderError{ + RangeDesc: roachpb.RangeDescriptor{}, + } + if returnSpeculativeLease { + nlhe.LeaseHolder = &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5} + } + br.Error = roachpb.NewError(nlhe) + case 1: + // We expect the client to discard information from the NLHE above and + // instead just try the next replica. + expRepl := desc.Replicas().Descriptors()[1] + require.Equal(t, expRepl, ba.Replica) + br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{ + RangeDesc: desc, + Lease: &leaseResp, + }) + case 2: + // We expect the client to route to the leaseholder given it's now + // known. + expRepl := desc.Replicas().Descriptors()[3] + require.Equal(t, expRepl, ba.Replica) + br = ba.CreateReply() + default: + t.Fatal("unexpected") + } + call++ + return br, nil } - }), - TestingKnobs: ClientTestingKnobs{ - TransportFactory: adaptSimpleTransport(transportFn), - DontReorderReplicas: true, - }, - Settings: cluster.MakeTestingClusterSettings(), - } - ds := NewDistSender(cfg) - var ba roachpb.BatchRequest - get := &roachpb.GetRequest{} - get.Key = roachpb.Key("a") - ba.Add(get) + rangeLookups := 0 + cfg := DistSenderConfig{ + AmbientCtx: log.MakeTestingAmbientCtxWithNewTracer(), + Clock: clock, + NodeDescs: ns, + RPCContext: rpcContext, + RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) ( + []roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error, + ) { + switch rangeLookups { + case 0: + rangeLookups++ + return []roachpb.RangeDescriptor{desc}, nil, nil + default: + // This doesn't run on the test's goroutine. + panic("unexpected") + } + }), + TestingKnobs: ClientTestingKnobs{ + TransportFactory: adaptSimpleTransport(transportFn), + DontReorderReplicas: true, + }, + Settings: cluster.MakeTestingClusterSettings(), + } - _, err := ds.Send(ctx, ba) - require.NoError(t, err.GoError()) - require.Equal(t, 3, call) - require.Equal(t, 1, rangeLookups) + ds := NewDistSender(cfg) + var ba roachpb.BatchRequest + get := &roachpb.GetRequest{} + get.Key = roachpb.Key("a") + ba.Add(get) + + _, err := ds.Send(ctx, ba) + require.NoError(t, err.GoError()) + require.Equal(t, 3, call) + require.Equal(t, 1, rangeLookups) + }) } diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 1ce438c56de5..c510504fcc27 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -233,10 +233,6 @@ type Replica struct { // inform load based lease and replica rebalancing decisions. loadStats *ReplicaLoad - // creatingReplica is set when a replica is created as uninitialized - // via a raft message. - creatingReplica *roachpb.ReplicaDescriptor - // Held in read mode during read-only commands. Held in exclusive mode to // prevent read-only commands from executing. Acquired before the embedded // RWMutex. diff --git a/pkg/kv/kvserver/store_create_replica.go b/pkg/kv/kvserver/store_create_replica.go index 60a46f361c08..4a2f59d93820 100644 --- a/pkg/kv/kvserver/store_create_replica.go +++ b/pkg/kv/kvserver/store_create_replica.go @@ -154,7 +154,6 @@ func (s *Store) tryGetOrCreateReplica( // snapshot. } repl := newUnloadedReplica(ctx, uninitializedDesc, s, replicaID) - repl.creatingReplica = creatingReplica repl.raftMu.Lock() // not unlocked // Take out read-only lock. Not strictly necessary here, but follows the diff --git a/pkg/kv/kvserver/store_send.go b/pkg/kv/kvserver/store_send.go index 21618f188f70..c823e23b55b9 100644 --- a/pkg/kv/kvserver/store_send.go +++ b/pkg/kv/kvserver/store_send.go @@ -184,14 +184,13 @@ func (s *Store) SendWithWriteBytes( if !repl.IsInitialized() { // If we have an uninitialized copy of the range, then we are // probably a valid member of the range, we're just in the - // process of getting our snapshot. If we returned - // RangeNotFoundError, the client would invalidate its cache, - // but we can be smarter: the replica that caused our - // uninitialized replica to be created is most likely the - // leader. + // RangeNotFoundError, the client would invalidate its cache. Instead, + // we return a NLHE error to indicate to the client that it should move + // on and try the next replica. Very likely, the next replica the client + // tries will be initialized and will have useful leaseholder information + // for the client. return nil, nil, roachpb.NewError(&roachpb.NotLeaseHolderError{ - RangeID: ba.RangeID, - LeaseHolder: repl.creatingReplica, + RangeID: ba.RangeID, // The replica doesn't have a range descriptor yet, so we have to build // a ReplicaDescriptor manually. Replica: roachpb.ReplicaDescriptor{