Skip to content

Commit

Permalink
client: Clean up RPC client on failed connect
Browse files Browse the repository at this point in the history
This issue was caught debugging reconnect failures.
We attempt to reconnect as soon as the server disconnects, which allows
us to open a socket, but there is no reply (listdbs fails) at this
point, when we retry again, the rpcClient is not nil, so we assume we're
already connected and don't return an error.

This commit ensures that the rpcClient is set to nil if we fail to
fully connect and adjusts our tests to perform a container restart
instead of simply disconnecting the client to test the reconnect
behaviour.

Signed-off-by: Dave Tucker <[email protected]>
  • Loading branch information
dave-tucker committed Aug 20, 2021
1 parent fe88f92 commit 2d41bfc
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
7 changes: 7 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,15 @@ func (o *ovsdbClient) connect(ctx context.Context, reconnect bool) error {
// FIXME: This only emits the error from the last attempted connection
return fmt.Errorf("failed to connect to endpoints %q: %v", o.options.endpoints, err)
}

if err := o.createRPC2Client(c); err != nil {
return err
}

dbs, err := o.listDbs(ctx)
if err != nil {
o.rpcClient.Close()
o.rpcClient = nil
return err
}

Expand All @@ -164,6 +166,7 @@ func (o *ovsdbClient) connect(ctx context.Context, reconnect bool) error {
}
if !found {
o.rpcClient.Close()
o.rpcClient = nil
return fmt.Errorf("target database not found")
}

Expand All @@ -180,8 +183,10 @@ func (o *ovsdbClient) connect(ctx context.Context, reconnect bool) error {

if err != nil {
o.rpcClient.Close()
o.rpcClient = nil
return err
}

o.schemaMutex.Lock()
o.schema = schema
o.schemaMutex.Unlock()
Expand All @@ -192,6 +197,7 @@ func (o *ovsdbClient) connect(ctx context.Context, reconnect bool) error {
o.api = newAPI(o.cache)
} else {
o.rpcClient.Close()
o.rpcClient = nil
return err
}
o.cacheMutex.Unlock()
Expand All @@ -208,6 +214,7 @@ func (o *ovsdbClient) connect(ctx context.Context, reconnect bool) error {
err = o.monitor(ctx, id, reconnect, request...)
if err != nil {
o.rpcClient.Close()
o.rpcClient = nil
return err
}
}
Expand Down
19 changes: 11 additions & 8 deletions test/ovs/ovs_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package ovs

import (
"context"
"fmt"
"os"
"reflect"
"strings"
Expand Down Expand Up @@ -43,7 +42,10 @@ func (suite *OVSIntegrationSuite) SetupSuite() {
Repository: "libovsdb/ovs",
Tag: tag,
ExposedPorts: []string{"6640/tcp"},
Tty: true,
PortBindings: map[docker.Port][]docker.PortBinding{
"6640/tcp": {{HostPort: "56640"}},
},
Tty: true,
}
hostConfig := func(config *docker.HostConfig) {
// set AutoRemove to true so that stopped container goes away by itself
Expand All @@ -56,8 +58,8 @@ func (suite *OVSIntegrationSuite) SetupSuite() {
suite.resource, err = suite.pool.RunWithOptions(options, hostConfig)
require.NoError(suite.T(), err)

// set expiry to 60 seconds so containers are cleaned up on test panic
err = suite.resource.Expire(60)
// set expiry to 90 seconds so containers are cleaned up on test panic
err = suite.resource.Expire(90)
require.NoError(suite.T(), err)

// let the container start before we attempt connection
Expand All @@ -66,7 +68,7 @@ func (suite *OVSIntegrationSuite) SetupSuite() {
err = suite.pool.Retry(func() error {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
endpoint := fmt.Sprintf("tcp::%s", suite.resource.GetPort("6640/tcp"))
endpoint := "tcp::56640"
ovs, err := client.NewOVSDBClient(
defDB,
client.WithEndpoint(endpoint),
Expand Down Expand Up @@ -230,7 +232,7 @@ func (suite *OVSIntegrationSuite) TestWithReconnect() {

// Reconfigure
err = suite.client.SetOption(
client.WithReconnect(2*time.Second, backoff.NewExponentialBackOff()),
client.WithReconnect(500*time.Millisecond, &backoff.ZeroBackOff{}),
)
require.NoError(suite.T(), err)

Expand Down Expand Up @@ -278,12 +280,13 @@ func (suite *OVSIntegrationSuite) TestWithReconnect() {
require.Equal(suite.T(), bridgeName, br.Name)

// trigger reconnect
suite.client.Disconnect()
err = suite.pool.Client.RestartContainer(suite.resource.Container.ID, 0)
require.NoError(suite.T(), err)

// check that we are automatically reconnected
require.Eventually(suite.T(), func() bool {
return suite.client.Connected()
}, 2*time.Second, 500*time.Millisecond)
}, 20*time.Second, 1*time.Second)

err = suite.client.Echo(context.TODO())
require.NoError(suite.T(), err)
Expand Down

0 comments on commit 2d41bfc

Please sign in to comment.