Skip to content

Commit

Permalink
rpc: improve detection of onlyOnceDialer redials
Browse files Browse the repository at this point in the history
While looking into #96543, I wasn't 100% sure we weren't accidentally
redialing a connection internally. This improved logging and the test
makes it more obvious that things are working as intended.

Touches #96543.

Epic: none
Release note: None
  • Loading branch information
tbg committed Feb 8, 2023
1 parent cc99062 commit 3285d25
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 7 deletions.
15 changes: 8 additions & 7 deletions pkg/rpc/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -1798,7 +1798,8 @@ func init() {
type onlyOnceDialer struct {
mu struct {
syncutil.Mutex
err error
err error
redialed bool
}
}

Expand All @@ -1815,13 +1816,13 @@ func (ood *onlyOnceDialer) dial(ctx context.Context, addr string) (net.Conn, err
defer ood.mu.Unlock()

if err := ood.mu.err; err != nil {
// Not first dial.
if !errors.Is(err, grpcutil.ErrConnectionInterrupted) {
// Hitting this path would indicate that gRPC retried even though it
// received an error not marked as retriable. At the time of writing, at
// least in simple experiments, as expected we don't see this error.
err = errors.Wrap(err, "previous dial failed")
if ood.mu.redialed {
// We set up onlyOnceDialer to avoid returning any errors that could look
// temporary to gRPC, and so we don't expect it to re-dial a connection
// twice (the first re-dial is supposed to surface the permanent error).
return nil, errors.NewAssertionErrorWithWrappedErrf(err, "gRPC connection unexpectedly re-dialed")
}
ood.mu.redialed = true
return nil, err
}

Expand Down
24 changes: 24 additions & 0 deletions pkg/rpc/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2533,3 +2533,27 @@ func TestRejectDialOnQuiesce(t *testing.T) {
require.Error(t, err)
require.Equal(t, codes.Canceled, status.Code(err))
}

// TestOnlyOnceDialer verifies that onlyOnceDialer prevents gRPC from re-dialing
// on an existing connection. (Instead, gRPC will have to make a new dialer).
func TestOnlyOnceDialer(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()
ln, err := net.Listen(util.TestAddr.Network(), util.TestAddr.String())
require.NoError(t, err)
defer func() { _ = ln.Close() }()

ood := &onlyOnceDialer{}
conn, err := ood.dial(ctx, ln.Addr().String())
require.NoError(t, err)
_ = conn.Close()

for i := 0; i < 5; i++ {
_, err := ood.dial(ctx, ln.Addr().String())
if i == 0 {
require.True(t, errors.Is(err, grpcutil.ErrConnectionInterrupted), "i=%d: %+v", i, err)
} else {
require.ErrorContains(t, err, `gRPC connection unexpectedly re-dialed`)
}
}
}

0 comments on commit 3285d25

Please sign in to comment.