Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kv: Don't evict from leaseholder cache on context cancellations #30163

Merged
merged 2 commits into from
Sep 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 26 additions & 22 deletions pkg/kv/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,23 +433,26 @@ func (ds *DistSender) sendSingleRange(
// Try to send the call.
replicas := NewReplicaSlice(ds.gossip, desc)

// Rearrange the replicas so that they're ordered in expectation of
// request latency.
var latencyFn LatencyFunc
if ds.rpcContext != nil {
latencyFn = ds.rpcContext.RemoteClocks.Latency
}
replicas.OptimizeReplicaOrder(ds.getNodeDescriptor(), latencyFn)

// If this request needs to go to a lease holder and we know who that is, move
// it to the front.
var knowLeaseholder bool
if !ba.IsReadOnly() || ba.ReadConsistency.RequiresReadLease() {
if storeID, ok := ds.leaseHolderCache.Lookup(ctx, desc.RangeID); ok {
if i := replicas.FindReplica(storeID); i >= 0 {
replicas.MoveToFront(i)
knowLeaseholder = true
}
}
}
if !knowLeaseholder {
// Rearrange the replicas so that they're ordered in expectation of
// request latency.
var latencyFn LatencyFunc
if ds.rpcContext != nil {
latencyFn = ds.rpcContext.RemoteClocks.Latency
}
replicas.OptimizeReplicaOrder(ds.getNodeDescriptor(), latencyFn)
}

br, err := ds.sendRPC(ctx, desc.RangeID, replicas, ba)
if err != nil {
Expand Down Expand Up @@ -1321,20 +1324,21 @@ func (ds *DistSender) sendToReplicas(
ambiguousError = err
}
log.VErrEventf(ctx, 2, "RPC error: %s", err)
if storeID, ok := ds.leaseHolderCache.Lookup(ctx, rangeID); ok && curReplica.StoreID == storeID {
// If the down replica is cached as the lease holder, evict
// it. The only other eviction happens below on
// NotLeaseHolderError, but if the next replica is the
// actual lease holder, we're never going to receive one of
// those and will thus pay the price of trying the down node
// first forever.
//
// NB: we could consider instead adding a successful reply
// from the next replica into the cache, but without a
// leaseholder (and taking into account that the local
// node can't be down) it won't take long until we talk
// to a replica that tells us who the leaseholder is.
ds.leaseHolderCache.Update(ctx, rangeID, 0 /* evict */)

// If the error wasn't just a context cancellation and the down replica
// is cached as the lease holder, evict it. The only other eviction
// happens below on NotLeaseHolderError, but if the next replica is the
// actual lease holder, we're never going to receive one of those and
// will thus pay the price of trying the down node first forever.
//
// NB: we should consider instead adding a successful reply from the next
// replica into the cache, but without a leaseholder (and taking into
// account that the local node can't be down) it won't take long until we
// talk to a replica that tells us who the leaseholder is.
if ctx.Err() == nil {
if storeID, ok := ds.leaseHolderCache.Lookup(ctx, rangeID); ok && curReplica.StoreID == storeID {
ds.leaseHolderCache.Update(ctx, rangeID, 0 /* evict */)
}
}
} else {
propagateError := false
Expand Down
5 changes: 3 additions & 2 deletions pkg/kv/dist_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,9 +269,9 @@ func TestSendRPCOrder(t *testing.T) {
{
args: &roachpb.PutRequest{},
tiers: append(nodeTiers[5], roachpb.Tier{Key: "irrelevant", Value: ""}),
// Compare only the first resulting addresses as we have a lease holder
// Compare only the first resulting address as we have a lease holder
// and that means we're only trying to send there.
expReplica: []roachpb.NodeID{2, 5, 4, 0, 0},
expReplica: []roachpb.NodeID{2, 0, 0, 0, 0},
leaseHolder: 2,
},
// Inconsistent Get without matching attributes but lease holder (node 3). Should just
Expand Down Expand Up @@ -335,6 +335,7 @@ func TestSendRPCOrder(t *testing.T) {
ds := NewDistSender(cfg, g)

for n, tc := range testCases {
log.Infof(context.TODO(), "testcase %d", n)
verifyCall = makeVerifier(tc.expReplica)

{
Expand Down