From 5515e201943ba0e65eadf9df85c2c8f34cfc1ca0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Apr 2024 13:47:00 -0700 Subject: [PATCH] testing(bigquery): improve retry testing (#10006) This PR augments retry testing to exercise the two main predicates: * job insertion RPCs (jobs.insert, jobs.query) * everything else In practice we have a wider predicate for job insertion, as the current API style makes it difficult to disambiguate RPC failure from the job execution errors, but we still want to be more resilient to transient failures when enqueuing jobs/queries. Fixes: https://togithub.com/googleapis/google-cloud-go/issues/9751 --- bigquery/bigquery.go | 8 ++- bigquery/bigquery_test.go | 147 ++++++++++++++++++++++++++------------ 2 files changed, 108 insertions(+), 47 deletions(-) diff --git a/bigquery/bigquery.go b/bigquery/bigquery.go index 0916ec17a4e3..15088624f3c3 100644 --- a/bigquery/bigquery.go +++ b/bigquery/bigquery.go @@ -249,8 +249,12 @@ func runWithRetryExplicit(ctx context.Context, call func() error, allowedReasons var ( defaultRetryReasons = []string{"backendError", "rateLimitExceeded"} - jobRetryReasons = []string{"backendError", "rateLimitExceeded", "jobRateLimitExceeded", "internalError"} - retry5xxCodes = []int{ + + // These reasons are used exclusively for enqueuing jobs (jobs.insert and jobs.query). + // Using them for polling may cause unwanted retries until context deadline/cancellation/etc. + jobRetryReasons = []string{"backendError", "rateLimitExceeded", "jobRateLimitExceeded", "internalError"} + + retry5xxCodes = []int{ http.StatusInternalServerError, http.StatusBadGateway, http.StatusServiceUnavailable, diff --git a/bigquery/bigquery_test.go b/bigquery/bigquery_test.go index f1fd24745ac3..6d88b4fdb978 100644 --- a/bigquery/bigquery_test.go +++ b/bigquery/bigquery_test.go @@ -26,102 +26,159 @@ import ( ) func TestRetryableErrors(t *testing.T) { - for _, tc := range []struct { - description string - in error - want bool + testCases := []struct { + description string + in error + useJobRetryReasons bool + wantRetry bool }{ { - "nil error", - nil, - false, + description: "nil error", + in: nil, + wantRetry: false, }, { - "http stream closed", - errors.New("http2: stream closed"), - true, + description: "http stream closed", + in: errors.New("http2: stream closed"), + wantRetry: true, }, { - "io ErrUnexpectedEOF", - io.ErrUnexpectedEOF, - true, + description: "io ErrUnexpectedEOF", + in: io.ErrUnexpectedEOF, + wantRetry: true, }, { - "unavailable", - &googleapi.Error{ + description: "unavailable", + in: &googleapi.Error{ Code: http.StatusServiceUnavailable, Message: "foo", }, - true, + wantRetry: true, }, { - "url connection error", - &url.Error{Op: "blah", URL: "blah", Err: errors.New("connection refused")}, - true, + description: "url connection error", + in: &url.Error{Op: "blah", URL: "blah", Err: errors.New("connection refused")}, + wantRetry: true, }, { - "url other error", - &url.Error{Op: "blah", URL: "blah", Err: errors.New("blah")}, - false, + description: "url other error", + in: &url.Error{Op: "blah", URL: "blah", Err: errors.New("blah")}, + wantRetry: false, }, { - "wrapped retryable", - xerrors.Errorf("test of wrapped retryable: %w", &googleapi.Error{ + description: "wrapped retryable", + in: xerrors.Errorf("test of wrapped retryable: %w", &googleapi.Error{ Code: http.StatusServiceUnavailable, Message: "foo", Errors: []googleapi.ErrorItem{ {Reason: "backendError", Message: "foo"}, }, }), - true, + wantRetry: true, }, { - "wrapped non-retryable", - xerrors.Errorf("test of wrapped retryable: %w", errors.New("blah")), - false, + description: "wrapped non-retryable", + in: xerrors.Errorf("test of wrapped retryable: %w", errors.New("blah")), + wantRetry: false, }, { // not retried per https://google.aip.dev/194 - "internal error", - &googleapi.Error{ + description: "internal error", + in: &googleapi.Error{ Code: http.StatusInternalServerError, }, - true, + wantRetry: true, }, { - "internal w/backend reason", - &googleapi.Error{ + description: "internal w/backend reason", + in: &googleapi.Error{ Code: http.StatusServiceUnavailable, Message: "foo", Errors: []googleapi.ErrorItem{ {Reason: "backendError", Message: "foo"}, }, }, - true, + wantRetry: true, }, { - "internal w/rateLimitExceeded reason", - &googleapi.Error{ + description: "internal w/rateLimitExceeded reason", + in: &googleapi.Error{ Code: http.StatusServiceUnavailable, Message: "foo", Errors: []googleapi.ErrorItem{ {Reason: "rateLimitExceeded", Message: "foo"}, }, }, - true, + wantRetry: true, }, { - "bad gateway error", - &googleapi.Error{ + description: "bad gateway error", + in: &googleapi.Error{ Code: http.StatusBadGateway, Message: "foo", }, - true, + wantRetry: true, }, - } { - got := retryableError(tc.in, defaultRetryReasons) - if got != tc.want { - t.Errorf("case (%s) mismatch: got %t want %t", tc.description, got, tc.want) - } + { + description: "jobRateLimitExceeded default", + in: &googleapi.Error{ + Code: http.StatusOK, // ensure we're testing the reason + Message: "foo", + Errors: []googleapi.ErrorItem{ + {Reason: "jobRateLimitExceeded", Message: "foo"}, + }, + }, + wantRetry: false, + }, + { + description: "jobRateLimitExceeded job", + in: &googleapi.Error{ + Code: http.StatusOK, // ensure we're testing the reason + Message: "foo", + Errors: []googleapi.ErrorItem{ + {Reason: "jobRateLimitExceeded", Message: "foo"}, + }, + }, + useJobRetryReasons: true, + wantRetry: true, + }, + { + description: "structured internal error default", + in: &googleapi.Error{ + Code: http.StatusOK, // ensure we're testing the reason + Message: "foo", + Errors: []googleapi.ErrorItem{ + {Reason: "internalError", Message: "foo"}, + }, + }, + wantRetry: false, + }, + { + description: "structured internal error default", + in: &googleapi.Error{ + Code: http.StatusOK, // ensure we're testing the reason + Message: "foo", + Errors: []googleapi.ErrorItem{ + {Reason: "internalError", Message: "foo"}, + }, + }, + useJobRetryReasons: true, + wantRetry: true, + }, + } + + for _, testcase := range testCases { + tc := testcase + t.Run(tc.description, func(t *testing.T) { + t.Parallel() + reasons := defaultRetryReasons + if tc.useJobRetryReasons { + reasons = jobRetryReasons + } + got := retryableError(tc.in, reasons) + if got != tc.wantRetry { + t.Errorf("case (%s) mismatch: got %t wantRetry %t", tc.description, got, tc.wantRetry) + } + }) } }