Skip to content

Commit

Permalink
Fix not retryable error codes from: Unavailable -> FailedPrecondition
Browse files Browse the repository at this point in the history
 - ErrGRPCNotCapable("etcdserver: not capable") -> codes.FailedPrecondition  (it will not autofix, it requires new version of server)
 - ErrGPRCNotSupportedForLearner("etcdserver: rpc not supported for learner") -> codes.FailedPrecondition (as long as its learner, the call will not work)
 - ErrGRPCClusterVersionUnavailable("etcdserver: cluster version not found during downgrade") -> codes.FailedPrecondition (backend does not contain the version (old etcd?) so retry will not help)

https://github.com/etcd-io/etcd/runs/2599598633?check_suite_focus=true

```
{"level":"warn","ts":"2021-05-17T09:55:30.246Z","logger":"etcd-client","caller":"v3/retry_interceptor.go:62","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc000539880/#initially=[unix://localhost:m30]","attempt":0,"error":"rpc error: code = Unavailable desc = etcdserver: rpc not supported for learner"}
{"level":"warn","ts":"2021-05-17T09:55:30.270Z","logger":"etcd-client","caller":"v3/retry_interceptor.go:62","msg":"retrying
of unary invoker
failed","target":"etcd-endpoints://0xc000539880/#initially=[unix://localhost:m30]","attempt":1,"error":"rpc
error: code = Unavailable desc = etcdserver: rpc not supported for
learner"}`
```
  • Loading branch information
ptabor authored and sheyt0 committed Mar 25, 2024
1 parent 38f3eb3 commit 1c4e63c
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
6 changes: 3 additions & 3 deletions api/v3rpc/rpctypes/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,20 +71,20 @@ var (
ErrGRPCNoLeader = status.New(codes.Unavailable, "etcdserver: no leader").Err()
ErrGRPCNotLeader = status.New(codes.FailedPrecondition, "etcdserver: not leader").Err()
ErrGRPCLeaderChanged = status.New(codes.Unavailable, "etcdserver: leader changed").Err()
ErrGRPCNotCapable = status.New(codes.Unavailable, "etcdserver: not capable").Err()
ErrGRPCNotCapable = status.New(codes.FailedPrecondition, "etcdserver: not capable").Err()
ErrGRPCStopped = status.New(codes.Unavailable, "etcdserver: server stopped").Err()
ErrGRPCTimeout = status.New(codes.Unavailable, "etcdserver: request timed out").Err()
ErrGRPCTimeoutDueToLeaderFail = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure").Err()
ErrGRPCTimeoutDueToConnectionLost = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost").Err()
ErrGRPCTimeoutWaitAppliedIndex = status.New(codes.Unavailable, "etcdserver: request timed out, waiting for the applied index took too long").Err()
ErrGRPCUnhealthy = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
ErrGRPCCorrupt = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
ErrGPRCNotSupportedForLearner = status.New(codes.Unavailable, "etcdserver: rpc not supported for learner").Err()
ErrGPRCNotSupportedForLearner = status.New(codes.FailedPrecondition, "etcdserver: rpc not supported for learner").Err()
ErrGRPCBadLeaderTransferee = status.New(codes.FailedPrecondition, "etcdserver: bad leader transferee").Err()

ErrGRPCClusterVersionUnavailable = status.New(codes.Unavailable, "etcdserver: cluster version not found during downgrade").Err()
ErrGRPCWrongDowngradeVersionFormat = status.New(codes.InvalidArgument, "etcdserver: wrong downgrade target version format").Err()
ErrGRPCInvalidDowngradeTargetVersion = status.New(codes.InvalidArgument, "etcdserver: invalid downgrade target version").Err()
ErrGRPCClusterVersionUnavailable = status.New(codes.FailedPrecondition, "etcdserver: cluster version not found during downgrade").Err()
ErrGRPCDowngradeInProcess = status.New(codes.FailedPrecondition, "etcdserver: cluster has a downgrade job in progress").Err()
ErrGRPCNoInflightDowngrade = status.New(codes.FailedPrecondition, "etcdserver: no inflight downgrade job").Err()

Expand Down
20 changes: 16 additions & 4 deletions client/v3/retry_interceptor.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package clientv3

import (
"context"
"errors"
"io"
"sync"
"time"
Expand Down Expand Up @@ -85,7 +86,7 @@ func (c *Client) unaryClientInterceptor(optFuncs ...retryOption) grpc.UnaryClien
}
continue
}
if !isSafeRetry(c.lg, lastErr, callOpts) {
if !isSafeRetry(c, lastErr, callOpts) {
return lastErr
}
}
Expand Down Expand Up @@ -279,7 +280,7 @@ func (s *serverStreamingRetryingStream) receiveMsgAndIndicateRetry(m interface{}
return true, err

}
return isSafeRetry(s.client.lg, err, s.callOpts), err
return isSafeRetry(s.client, err, s.callOpts), err
}

func (s *serverStreamingRetryingStream) reestablishStreamAndResendBuffer(callCtx context.Context) (grpc.ClientStream, error) {
Expand Down Expand Up @@ -319,17 +320,28 @@ func waitRetryBackoff(ctx context.Context, attempt uint, callOpts *options) erro
}

// isSafeRetry returns "true", if request is safe for retry with the given error.
func isSafeRetry(lg *zap.Logger, err error, callOpts *options) bool {
func isSafeRetry(c *Client, err error, callOpts *options) bool {
if isContextError(err) {
return false
}

// Situation when learner refuses RPC it is supposed to not serve is from the server
// perspective not retryable.
// But for backward-compatibility reasons we need to support situation that
// customer provides mix of learners (not yet voters) and voters with an
// expectation to pick voter in the next attempt.
// TODO: Ideally client should be 'aware' which endpoint represents: leader/voter/learner with high probability.
if errors.Is(err, rpctypes.ErrGPRCNotSupportedForLearner) && len(c.Endpoints()) > 1 {
return true
}

switch callOpts.retryPolicy {
case repeatable:
return isSafeRetryImmutableRPC(err)
case nonRepeatable:
return isSafeRetryMutableRPC(err)
default:
lg.Warn("unrecognized retry policy", zap.String("retryPolicy", callOpts.retryPolicy.String()))
c.lg.Warn("unrecognized retry policy", zap.String("retryPolicy", callOpts.retryPolicy.String()))
return false
}
}
Expand Down

0 comments on commit 1c4e63c

Please sign in to comment.