From 8ad62e6d2e72004f4f09b8839e222318ccc33fbd Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Mon, 13 Sep 2021 17:16:32 -0400 Subject: [PATCH] kvclient: switch error in ctx cancel edge case This patch changes the error returned by the DistSender on a cancelled ctx. Depending on the exactly who detects the ctx as cancelled, there are a number of different possibilities - too many to enumerate here. Generally, the client is supposed to get a context.Canceled error, wrapped in different layers. The code path that this patch changes is about the cancellation being detected by sendPartialBatch() without it having been previously detected by sendToReplicas(). This path is unusual (sendToReplicas() generally detects the ctx cancelled). It's also hard to test. Depending on the exact cancellation timing, it's possible though for sendPartialBatch() to detect it instead. In this case, this patch makes it so that, if sendToReplicas() returned a sendError (indicating that a suitable replica could not be reached and that the higher layer is expected to continue trying other replicas), the error returned to the client is a cancellation error instead of the sendError. Touches #69419 Release note: None --- pkg/kv/kvclient/kvcoord/dist_sender.go | 8 ++++---- pkg/kv/kvserver/client_merge_test.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/kv/kvclient/kvcoord/dist_sender.go b/pkg/kv/kvclient/kvcoord/dist_sender.go index c51e77fbe2c0..b5d15c649f5b 100644 --- a/pkg/kv/kvclient/kvcoord/dist_sender.go +++ b/pkg/kv/kvclient/kvcoord/dist_sender.go @@ -1590,9 +1590,6 @@ func (ds *DistSender) sendPartialBatch( } if err != nil { - // Set pErr so that, if we don't perform any more retries, the - // deduceRetryEarlyExitError() call below the loop is inhibited. - pErr = roachpb.NewError(err) switch { case errors.HasType(err, sendError{}): // We've tried all the replicas without success. Either they're all @@ -1614,6 +1611,9 @@ func (ds *DistSender) sendPartialBatch( routingTok.Evict(ctx) continue } + // Set pErr so that the deduceRetryEarlyExitError() call below the loop is + // inhibited. + pErr = roachpb.NewError(err) break } @@ -1677,7 +1677,7 @@ func (ds *DistSender) sendPartialBatch( // channels were closed. if pErr == nil { if err := ds.deduceRetryEarlyExitError(ctx); err == nil { - log.Fatal(ctx, "exited retry loop without an error") + pErr = roachpb.NewError(errors.AssertionFailedf("exited retry loop without an error")) } else { pErr = roachpb.NewError(err) } diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index fa49a05475cb..16103aeed38e 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -4127,7 +4127,7 @@ func TestStoreRangeMergeDuringShutdown(t *testing.T) { // Send a dummy get request on the RHS to force a lease acquisition. We expect // this to fail, as quiescing stores cannot acquire leases. _, err = store.DB().Get(ctx, key.Next()) - if exp := "not lease holder"; !testutils.IsError(err, exp) { + if exp := "node unavailable"; !testutils.IsError(err, exp) { t.Fatalf("expected %q error, but got %v", exp, err) } }