diff --git a/pkg/jobs/joberror/BUILD.bazel b/pkg/jobs/joberror/BUILD.bazel index 4a0cc63b885b..eb754a23ed39 100644 --- a/pkg/jobs/joberror/BUILD.bazel +++ b/pkg/jobs/joberror/BUILD.bazel @@ -8,6 +8,7 @@ go_library( deps = [ "//pkg/kv/kvclient/kvcoord", "//pkg/sql/flowinfra", + "//pkg/sql/sqlerrors", "//pkg/util/circuit", "//pkg/util/grpcutil", "//pkg/util/sysutil", diff --git a/pkg/jobs/joberror/errors.go b/pkg/jobs/joberror/errors.go index 96fb3858a3de..73fe2e5daacc 100644 --- a/pkg/jobs/joberror/errors.go +++ b/pkg/jobs/joberror/errors.go @@ -11,36 +11,16 @@ package joberror import ( - "strings" - circuitbreaker "github.com/cockroachdb/circuitbreaker" "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" + "github.com/cockroachdb/cockroach/pkg/sql/sqlerrors" "github.com/cockroachdb/cockroach/pkg/util/circuit" "github.com/cockroachdb/cockroach/pkg/util/grpcutil" "github.com/cockroachdb/cockroach/pkg/util/sysutil" "github.com/cockroachdb/errors" ) -// IsDistSQLRetryableError returns true if the supplied error, or any of its parent -// causes is an rpc error. -// This is an unfortunate implementation that should be looking for a more -// specific error. -func IsDistSQLRetryableError(err error) bool { - if err == nil { - return false - } - - // TODO(knz): this is a bad implementation. Make it go away - // by avoiding string comparisons. - - errStr := err.Error() - // When a crdb node dies, any DistSQL flows with processors scheduled on - // it get an error with "rpc error" in the message from the call to - // `(*DistSQLPlanner).Run`. - return strings.Contains(errStr, `rpc error`) -} - // isBreakerOpenError returns true if err is a circuit.ErrBreakerOpen. // // NB: Two packages have ErrBreakerOpen error types. The cicruitbreaker package @@ -57,7 +37,7 @@ func IsPermanentBulkJobError(err error) bool { if err == nil { return false } - return !IsDistSQLRetryableError(err) && + return !sqlerrors.IsDistSQLRetryableError(err) && !grpcutil.IsClosedConnection(err) && !flowinfra.IsFlowRetryableError(err) && !flowinfra.IsNoInboundStreamConnectionError(err) && diff --git a/pkg/sql/distsql_running.go b/pkg/sql/distsql_running.go index ddefb975fadf..50cecc9f3426 100644 --- a/pkg/sql/distsql_running.go +++ b/pkg/sql/distsql_running.go @@ -44,6 +44,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/rowexec" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" + "github.com/cockroachdb/cockroach/pkg/sql/sqlerrors" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/buildutil" @@ -1998,7 +1999,8 @@ func (dsp *DistSQLPlanner) PlanAndRun( // cancellation has already occurred. return } - if !pgerror.IsSQLRetryableError(distributedErr) && + if !sqlerrors.IsDistSQLRetryableError(distributedErr) && + !pgerror.IsSQLRetryableError(distributedErr) && !flowinfra.IsFlowRetryableError(distributedErr) && !isDialErr(distributedErr) { // Only re-run the query if we think there is a high chance of a diff --git a/pkg/sql/sqlerrors/errors.go b/pkg/sql/sqlerrors/errors.go index 21b98bdb1b17..f9ad4ab17c37 100644 --- a/pkg/sql/sqlerrors/errors.go +++ b/pkg/sql/sqlerrors/errors.go @@ -12,6 +12,8 @@ package sqlerrors import ( + "strings" + "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" @@ -411,3 +413,22 @@ func errHasCode(err error, code ...pgcode.Code) bool { } return false } + +// IsDistSQLRetryableError returns true if the supplied error, or any of its parent +// causes is an rpc error. +// This is an unfortunate implementation that should be looking for a more +// specific error. +func IsDistSQLRetryableError(err error) bool { + if err == nil { + return false + } + + // TODO(knz): this is a bad implementation. Make it go away + // by avoiding string comparisons. + + errStr := err.Error() + // When a crdb node dies, any DistSQL flows with processors scheduled on + // it get an error with "rpc error" in the message from the call to + // `(*DistSQLPlanner).Run`. + return strings.Contains(errStr, `rpc error`) +}