Skip to content

Commit

Permalink
Merge #98851
Browse files Browse the repository at this point in the history
98851: roachtest: added connection timeout to loq recovery ops r=tbg a=aliher1911

When working with cluster that failed recovery, db connections could get stuck indefinitely preventing test completion. This is caused by driver ignoring context cancellation when attempting to establish connection.
This commit adds a connect timeout option to the driver to prevent this behaviour.

Epic: none

Release note: None

Fixes #98639

Co-authored-by: Oleg Afanasyev <[email protected]>
  • Loading branch information
craig[bot] and aliher1911 committed Mar 23, 2023
2 parents 4913247 + bba0750 commit bbc5ec1
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ ALL_TESTS = [
"//pkg/cmd/release:release_test",
"//pkg/cmd/roachprod-microbench:roachprod-microbench_test",
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/option:option_test",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:mixedversion_test",
"//pkg/cmd/roachtest/roachtestutil:roachtestutil_test",
"//pkg/cmd/roachtest/tests:tests_test",
Expand Down Expand Up @@ -1041,6 +1042,7 @@ GO_TARGETS = [
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/grafana:grafana",
"//pkg/cmd/roachtest/option:option",
"//pkg/cmd/roachtest/option:option_test",
"//pkg/cmd/roachtest/registry:registry",
"//pkg/cmd/roachtest/roachtestutil/clusterupgrade:clusterupgrade",
"//pkg/cmd/roachtest/roachtestutil/mixedversion:mixedversion",
Expand Down
7 changes: 7 additions & 0 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2365,6 +2365,13 @@ func (c *clusterImpl) ConnE(
u.User = url.User(connOptions.User)
dataSourceName = u.String()
}
if len(connOptions.Options) > 0 {
vals := make(url.Values)
for k, v := range connOptions.Options {
vals.Add(k, v)
}
dataSourceName = dataSourceName + "&" + vals.Encode()
}
db, err := gosql.Open("postgres", dataSourceName)
if err != nil {
return nil, err
Expand Down
10 changes: 9 additions & 1 deletion pkg/cmd/roachtest/option/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "option",
Expand All @@ -18,4 +18,12 @@ go_library(
],
)

go_test(
name = "option_test",
srcs = ["connection_options_test.go"],
args = ["-test.timeout=295s"],
embed = [":option"],
deps = ["@com_github_stretchr_testify//require"],
)

get_x_data(name = "get_x_data")
23 changes: 23 additions & 0 deletions pkg/cmd/roachtest/option/connection_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@

package option

import (
"fmt"
"time"
)

type ConnOption struct {
User string
TenantName string
Options map[string]string
}

func User(user string) func(*ConnOption) {
Expand All @@ -26,3 +32,20 @@ func TenantName(tenantName string) func(*ConnOption) {
option.TenantName = tenantName
}
}

func ConnectionOption(key, value string) func(*ConnOption) {
return func(option *ConnOption) {
if len(option.Options) == 0 {
option.Options = make(map[string]string)
}
option.Options[key] = value
}
}

func ConnectTimeout(t time.Duration) func(*ConnOption) {
sec := int64(t.Seconds())
if sec < 1 {
sec = 1
}
return ConnectionOption("connect_timeout", fmt.Sprintf("%d", sec))
}
52 changes: 52 additions & 0 deletions pkg/cmd/roachtest/option/connection_options_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package option

import (
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestFirstOptionCreatesMap(t *testing.T) {
var opts ConnOption
o := ConnectionOption("a", "b")
o(&opts)
require.NotNil(t, opts.Options)
}

func TestTimeoutCalculation(t *testing.T) {
var opts ConnOption
for _, d := range []struct {
t time.Duration
o string
}{
{
t: time.Second,
o: "1",
},
{
t: time.Millisecond,
o: "1",
},
{
t: time.Minute,
o: "60",
},
} {
t.Run(d.t.String(), func(t *testing.T) {
o := ConnectTimeout(d.t)
o(&opts)
require.Equal(t, d.o, opts.Options["connect_timeout"])
})
}
}
7 changes: 5 additions & 2 deletions pkg/cmd/roachtest/tests/loss_of_quorum_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,10 @@ func runRecoverLossOfQuorum(ctx context.Context, t test.Test, c cluster.Cluster,
if ctx.Err() != nil {
return &recoveryImpossibleError{testOutcome: restartFailed}
}
db, err = c.ConnE(ctx, t.L(), 1)
// Note that conn doesn't actually connect, it just creates driver
// and prepares URL. Actual connection is done when statement is
// being executed.
db, err = c.ConnE(ctx, t.L(), 1, option.ConnectTimeout(15*time.Second))
if err == nil {
break
}
Expand Down Expand Up @@ -485,7 +488,7 @@ func runHalfOnlineRecoverLossOfQuorum(
if ctx.Err() != nil {
return &recoveryImpossibleError{testOutcome: restartFailed}
}
db, err = c.ConnE(ctx, t.L(), remaining[len(remaining)-1])
db, err = c.ConnE(ctx, t.L(), remaining[len(remaining)-1], option.ConnectTimeout(15*time.Second))
if err == nil {
break
}
Expand Down

0 comments on commit bbc5ec1

Please sign in to comment.