From 72919930d6551358f23506b54bb2552be0c5bb08 Mon Sep 17 00:00:00 2001 From: Modular Magician Date: Thu, 13 Feb 2020 20:29:23 +0000 Subject: [PATCH] Add more generic error predicates (#3116) * add connection reset by peer predicate * lint * separate error preds, fix temp pred * Fix error retries, comments * remove removed func Signed-off-by: Modular Magician --- .changelog/3116.txt | 3 ++ google/error_retry_predicates.go | 62 ++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 6 deletions(-) create mode 100644 .changelog/3116.txt diff --git a/.changelog/3116.txt b/.changelog/3116.txt new file mode 100644 index 00000000000..a29a7243a33 --- /dev/null +++ b/.changelog/3116.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +Added retries for common network errors we've encountered. +``` diff --git a/google/error_retry_predicates.go b/google/error_retry_predicates.go index 67ddf32dc7b..ba20ec23dfe 100644 --- a/google/error_retry_predicates.go +++ b/google/error_retry_predicates.go @@ -2,7 +2,9 @@ package google import ( "fmt" + "io" "log" + "net" "net/url" "strings" @@ -14,21 +16,69 @@ type RetryErrorPredicateFunc func(error) (bool, string) /** ADD GLOBAL ERROR RETRY PREDICATES HERE **/ // Retry predicates that shoud apply to all requests should be added here. var defaultErrorRetryPredicates = []RetryErrorPredicateFunc{ - isUrlTimeoutError, + // Common network errors (usually wrapped by URL error) + isNetworkTemporaryError, + isNetworkTimeoutError, + isIoEOFError, + isConnectionResetNetworkError, + + // Common GCP error codes isCommonRetryableErrorCode, //While this might apply only to Cloud SQL, historically, - // we had this in our global default error retries, so it is a default - // for now. + // we had this in our global default error retries. + // Keeping it as a default for now. is409OperationInProgressError, } /** END GLOBAL ERROR RETRY PREDICATES HERE **/ -func isUrlTimeoutError(err error) (bool, string) { +func isNetworkTemporaryError(err error) (bool, string) { + if netErr, ok := err.(*net.OpError); ok && netErr.Temporary() { + return true, "marked as timeout" + } + if urlerr, ok := err.(*url.Error); ok && urlerr.Temporary() { + return true, "marked as timeout" + } + return false, "" +} + +func isNetworkTimeoutError(err error) (bool, string) { + if netErr, ok := err.(*net.OpError); ok && netErr.Timeout() { + return true, "marked as timeout" + } if urlerr, ok := err.(*url.Error); ok && urlerr.Timeout() { - log.Printf("[DEBUG] Dismissed an error as retryable based on googleapis.com target: %s", err) - return true, "Got URL timeout error" + return true, "marked as timeout" + } + return false, "" +} + +func isIoEOFError(err error) (bool, string) { + if err == io.ErrUnexpectedEOF { + return true, "Got unexpected EOF" + } + + if urlerr, urlok := err.(*url.Error); urlok { + wrappedErr := urlerr.Unwrap() + if wrappedErr == io.ErrUnexpectedEOF { + return true, "Got unexpected EOF" + } + } + return false, "" +} + +const connectionResetByPeerErr = "connection reset by peer" + +func isConnectionResetNetworkError(err error) (bool, string) { + neterr, ok := err.(*net.OpError) + if !ok { + if urlerr, urlok := err.(*url.Error); urlok { + wrappedErr := urlerr.Unwrap() + neterr, ok = wrappedErr.(*net.OpError) + } + } + if ok && neterr.Err.Error() == connectionResetByPeerErr { + return true, fmt.Sprintf("Connection reset by peer") } return false, "" }