Skip to content

Commit

Permalink
kvserver: stop returning speculative leases from uninitialized replicas
Browse files Browse the repository at this point in the history
Previously, if a request was routed to an uninitialized replica, the
returned NLHE would contain a speculative lease  pointing to the creator
of the uninitialized replica.

The prior commit makes it such that lease/leaseholder information in
NLHEs is only considered if accompanied by a non-stale range descriptor.
As uninitialized replicas always return empty range descriptors, which
are considered stale, the speculative lease they returned would never
be acted upon. Given this, we omit speculative leases in NLHEs returned
by uninitialized replicas. The idea being the client will simply move
on to the next replica in its transport which will very likely have
more useful leaseholder information for the client.

Release note: None
Release justification: Low risk, high benefit change
  • Loading branch information
arulajmani committed Aug 15, 2022
1 parent 5fb48d6 commit e1c24ed
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 109 deletions.
207 changes: 110 additions & 97 deletions pkg/kv/kvclient/kvcoord/dist_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5161,108 +5161,121 @@ func TestDistSenderNLHEFromUninitializedReplicaDoesNotCauseUnboundedBackoff(t *t
stopper := stop.NewStopper()
defer stopper.Stop(ctx)

// We'll set things up such that the first replica on the range descriptor of
// the client has an uninitialized replica. We'll mimic this by returning an
// empty range descriptor as part of the NotLeaseHolderError it returns.
// Moreover, the error will point to a replica that isn't part of the client's
// range descriptor. We expect the client to disregard this NLHE and simply
// reroute to the next replica.
testutils.RunTrueAndFalse(t, "uninitialized-replica-returns-speculative-lease",
func(t *testing.T, returnSpeculativeLease bool) {

clock := hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */)
rpcContext := rpc.NewInsecureTestingContext(ctx, clock, stopper)
ns := &mockNodeStore{nodes: []roachpb.NodeDescriptor{
{NodeID: 1, Address: util.UnresolvedAddr{}},
{NodeID: 2, Address: util.UnresolvedAddr{}},
{NodeID: 3, Address: util.UnresolvedAddr{}},
{NodeID: 4, Address: util.UnresolvedAddr{}},
}}
// We'll set things up such that the first replica on the range descriptor of
// the client has an uninitialized replica. We'll mimic this by returning an
// empty range descriptor as part of the NotLeaseHolderError it returns.
// We expect the client to simply reroute to the next replica.
//
// For the returnsSpeculativeLease=true version of the test the NLHE error
// will include a speculative lease that points to a replica that isn't
// part of the client's range descriptor. This is only possible in
// versions <= 22.1 as NLHE errors from uninitialized replicas no longer
// return speculative leases. Effectively, this acts as a mixed
// (22.1, 22.2) version test.

// Actual view of the range (descriptor + lease). The client doesn't have
// any knowledge about the lease, so it routes its request to the first
// replica on the range descriptor.
var desc = roachpb.RangeDescriptor{
RangeID: roachpb.RangeID(1),
Generation: 1,
StartKey: roachpb.RKeyMin,
EndKey: roachpb.RKeyMax,
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, ReplicaID: 1},
{NodeID: 2, StoreID: 2, ReplicaID: 2},
{NodeID: 3, StoreID: 3, ReplicaID: 3},
{NodeID: 4, StoreID: 4, ReplicaID: 4},
},
}
leaseResp := roachpb.Lease{
Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4},
}
clock := hlc.NewClockWithSystemTimeSource(time.Nanosecond /* maxOffset */)
rpcContext := rpc.NewInsecureTestingContext(ctx, clock, stopper)
ns := &mockNodeStore{nodes: []roachpb.NodeDescriptor{
{NodeID: 1, Address: util.UnresolvedAddr{}},
{NodeID: 2, Address: util.UnresolvedAddr{}},
{NodeID: 3, Address: util.UnresolvedAddr{}},
{NodeID: 4, Address: util.UnresolvedAddr{}},
}}

call := 0
var transportFn = func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
br := &roachpb.BatchResponse{}
switch call {
// First we return the replica on store 3 as the leaseholder. This ensures
// the range cache is seeded with some lease.
case 0:
expRepl := desc.Replicas().Descriptors()[0]
require.Equal(t, expRepl, ba.Replica)
br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{
RangeDesc: roachpb.RangeDescriptor{},
LeaseHolder: &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5},
})
case 1:
// We expect the client to discard information from the NLHE above and
// instead just try the next replica.
expRepl := desc.Replicas().Descriptors()[1]
require.Equal(t, expRepl, ba.Replica)
br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{
RangeDesc: desc,
Lease: &leaseResp,
})
case 2:
// We expect the client to route to the leaseholder given it's known now.
expRepl := desc.Replicas().Descriptors()[3]
require.Equal(t, expRepl, ba.Replica)
br = ba.CreateReply()
default:
t.Fatal("unexpected")
}
call++
return br, nil
}
// Actual view of the range (descriptor + lease). The client doesn't have
// any knowledge about the lease, so it routes its request to the first
// replica on the range descriptor.
var desc = roachpb.RangeDescriptor{
RangeID: roachpb.RangeID(1),
Generation: 1,
StartKey: roachpb.RKeyMin,
EndKey: roachpb.RKeyMax,
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1, ReplicaID: 1},
{NodeID: 2, StoreID: 2, ReplicaID: 2},
{NodeID: 3, StoreID: 3, ReplicaID: 3},
{NodeID: 4, StoreID: 4, ReplicaID: 4},
},
}
leaseResp := roachpb.Lease{
Replica: roachpb.ReplicaDescriptor{NodeID: 4, StoreID: 4, ReplicaID: 4},
}

rangeLookups := 0
cfg := DistSenderConfig{
AmbientCtx: log.MakeTestingAmbientCtxWithNewTracer(),
Clock: clock,
NodeDescs: ns,
RPCContext: rpcContext,
RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) (
[]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error,
) {
switch rangeLookups {
case 0:
rangeLookups++
return []roachpb.RangeDescriptor{desc}, nil, nil
default:
// This doesn't run on the test's goroutine.
panic("unexpected")
call := 0
var transportFn = func(_ context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
br := &roachpb.BatchResponse{}
switch call {
case 0:
// We return an empty range descriptor in the NLHE like an
// uninitialized replica would.
expRepl := desc.Replicas().Descriptors()[0]
require.Equal(t, expRepl, ba.Replica)
nlhe := &roachpb.NotLeaseHolderError{
RangeDesc: roachpb.RangeDescriptor{},
}
if returnSpeculativeLease {
nlhe.LeaseHolder = &roachpb.ReplicaDescriptor{NodeID: 5, StoreID: 5, ReplicaID: 5}
}
br.Error = roachpb.NewError(nlhe)
case 1:
// We expect the client to discard information from the NLHE above and
// instead just try the next replica.
expRepl := desc.Replicas().Descriptors()[1]
require.Equal(t, expRepl, ba.Replica)
br.Error = roachpb.NewError(&roachpb.NotLeaseHolderError{
RangeDesc: desc,
Lease: &leaseResp,
})
case 2:
// We expect the client to route to the leaseholder given it's now
// known.
expRepl := desc.Replicas().Descriptors()[3]
require.Equal(t, expRepl, ba.Replica)
br = ba.CreateReply()
default:
t.Fatal("unexpected")
}
call++
return br, nil
}
}),
TestingKnobs: ClientTestingKnobs{
TransportFactory: adaptSimpleTransport(transportFn),
DontReorderReplicas: true,
},
Settings: cluster.MakeTestingClusterSettings(),
}

ds := NewDistSender(cfg)
var ba roachpb.BatchRequest
get := &roachpb.GetRequest{}
get.Key = roachpb.Key("a")
ba.Add(get)
rangeLookups := 0
cfg := DistSenderConfig{
AmbientCtx: log.MakeTestingAmbientCtxWithNewTracer(),
Clock: clock,
NodeDescs: ns,
RPCContext: rpcContext,
RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) (
[]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error,
) {
switch rangeLookups {
case 0:
rangeLookups++
return []roachpb.RangeDescriptor{desc}, nil, nil
default:
// This doesn't run on the test's goroutine.
panic("unexpected")
}
}),
TestingKnobs: ClientTestingKnobs{
TransportFactory: adaptSimpleTransport(transportFn),
DontReorderReplicas: true,
},
Settings: cluster.MakeTestingClusterSettings(),
}

_, err := ds.Send(ctx, ba)
require.NoError(t, err.GoError())
require.Equal(t, 3, call)
require.Equal(t, 1, rangeLookups)
ds := NewDistSender(cfg)
var ba roachpb.BatchRequest
get := &roachpb.GetRequest{}
get.Key = roachpb.Key("a")
ba.Add(get)

_, err := ds.Send(ctx, ba)
require.NoError(t, err.GoError())
require.Equal(t, 3, call)
require.Equal(t, 1, rangeLookups)
})
}
4 changes: 0 additions & 4 deletions pkg/kv/kvserver/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,6 @@ type Replica struct {
// inform load based lease and replica rebalancing decisions.
loadStats *ReplicaLoad

// creatingReplica is set when a replica is created as uninitialized
// via a raft message.
creatingReplica *roachpb.ReplicaDescriptor

// Held in read mode during read-only commands. Held in exclusive mode to
// prevent read-only commands from executing. Acquired before the embedded
// RWMutex.
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/kvserver/store_create_replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ func (s *Store) tryGetOrCreateReplica(
// snapshot.
}
repl := newUnloadedReplica(ctx, uninitializedDesc, s, replicaID)
repl.creatingReplica = creatingReplica
repl.raftMu.Lock() // not unlocked

// Take out read-only lock. Not strictly necessary here, but follows the
Expand Down
13 changes: 6 additions & 7 deletions pkg/kv/kvserver/store_send.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,13 @@ func (s *Store) SendWithWriteBytes(
if !repl.IsInitialized() {
// If we have an uninitialized copy of the range, then we are
// probably a valid member of the range, we're just in the
// process of getting our snapshot. If we returned
// RangeNotFoundError, the client would invalidate its cache,
// but we can be smarter: the replica that caused our
// uninitialized replica to be created is most likely the
// leader.
// RangeNotFoundError, the client would invalidate its cache. Instead,
// we return a NLHE error to indicate to the client that it should move
// on and try the next replica. Very likely, the next replica the client
// tries will be initialized and will have useful leaseholder information
// for the client.
return nil, nil, roachpb.NewError(&roachpb.NotLeaseHolderError{
RangeID: ba.RangeID,
LeaseHolder: repl.creatingReplica,
RangeID: ba.RangeID,
// The replica doesn't have a range descriptor yet, so we have to build
// a ReplicaDescriptor manually.
Replica: roachpb.ReplicaDescriptor{
Expand Down

0 comments on commit e1c24ed

Please sign in to comment.