From 2ee8a77d0f1a6aa875f54dd9d17c21f451727f52 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Mon, 13 Sep 2021 17:16:32 -0400 Subject: [PATCH] kvclient: switch error in ctx cancel edge case This patch changes the error returned by the DistSender on a cancelled ctx. Depending on the exactly who detects the ctx as cancelled, there are a number of different possibilities - too many to enumerate here. Generally, the client is supposed to get a context.Canceled error, wrapped in different layers. The code path that this patch changes is about the cancellation being detected by sendPartialBatch() without it having been previously detected by sendToReplicas(). This path is unusual (sendToReplicas() generally detects the ctx cancelled). It's also hard to test. Depending on the exact cancellation timing, it's possible though for sendPartialBatch() to detect it instead. In this case, this patch makes it so that, if sendToReplicas() returned a sendError (indicating that a suitable replica could not be reached and that the higher layer is expected to continue trying other replicas), the error returned to the client is a cancellation error instead of the sendError. Touches #69419 Release note: None --- pkg/kv/kvclient/kvcoord/dist_sender.go | 8 ++++---- pkg/kv/kvserver/client_merge_test.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/kv/kvclient/kvcoord/dist_sender.go b/pkg/kv/kvclient/kvcoord/dist_sender.go index fcda80c5bd34..5643043868fe 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender.go @@ -1590,9 +1590,6 @@ func (ds *DistSender) sendPartialBatch( } if err != nil { - // Set pErr so that, if we don't perform any more retries, the - // deduceRetryEarlyExitError() call below the loop is inhibited. - pErr = roachpb.NewError(err) switch { case errors.HasType(err, sendError{}): // We've tried all the replicas without success. Either they're all @@ -1614,6 +1611,9 @@ func (ds *DistSender) sendPartialBatch( routingTok.Evict(ctx) continue } + // Set pErr so that the deduceRetryEarlyExitError() call below the loop is + // inhibited. + pErr = roachpb.NewError(err) break } @@ -1677,7 +1677,7 @@ func (ds *DistSender) sendPartialBatch( // channels were closed. if pErr == nil { if err := ds.deduceRetryEarlyExitError(ctx); err == nil { - log.Fatal(ctx, "exited retry loop without an error") + pErr = roachpb.NewError(errors.AssertionFailedf("exited retry loop without an error")) } else { pErr = roachpb.NewError(err) } diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index fa49a05475cb..16103aeed38e 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -4127,7 +4127,7 @@ func TestStoreRangeMergeDuringShutdown(t *testing.T) { // Send a dummy get request on the RHS to force a lease acquisition. We expect // this to fail, as quiescing stores cannot acquire leases. _, err = store.DB().Get(ctx, key.Next()) - if exp := "not lease holder"; !testutils.IsError(err, exp) { + if exp := "node unavailable"; !testutils.IsError(err, exp) { t.Fatalf("expected %q error, but got %v", exp, err) } }