Skip to content

Commit

Permalink
Merge #79960
Browse files Browse the repository at this point in the history
79960: roachtest: add minimal secure support to multitenant r=cucaroach a=cucaroach

The existing multitenant roachtests operated in insecure mode which
means to the kvserver they looked like any other CRDB node, the tenant
id is transfered in the certificate so secure mode must be used to
really test multitenant support.

A more ambitious task will be to take this further and move all the
multitenant stuff into roachprod.

Fixes: #79844

Release note: None

Co-authored-by: Tommy Reilly <[email protected]>
  • Loading branch information
craig[bot] and cucaroach committed May 2, 2022
2 parents 9c77040 + ed4c6b4 commit 3672d03
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 141 deletions.
15 changes: 14 additions & 1 deletion pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1193,8 +1193,16 @@ func (c *clusterImpl) FetchTimeseriesData(ctx context.Context, t test.Test) erro
if node > c.spec.NodeCount {
return errors.New("no node responds to SQL, cannot fetch tsdata")
}
sec := "--insecure"
if c.IsSecure() {
certs := "certs"
if c.IsLocal() {
certs = c.localCertsDir
}
sec = fmt.Sprintf("--certs-dir=%s", certs)
}
if err := c.RunE(
ctx, c.Node(node), "./cockroach debug tsdump --insecure --format=raw > tsdump.gob",
ctx, c.Node(node), fmt.Sprintf("./cockroach debug tsdump %s --format=raw > tsdump.gob", sec),
); err != nil {
return err
}
Expand Down Expand Up @@ -2352,9 +2360,14 @@ func (c *clusterImpl) MakeNodes(opts ...option.Option) string {
}

func (c *clusterImpl) IsLocal() bool {
// FIXME: I think radu made local more flexible and local is a prefix?
return c.name == "local"
}

func (c *clusterImpl) IsSecure() bool {
return c.localCertsDir != ""
}

// Extend extends the cluster's expiration by d.
func (c *clusterImpl) Extend(ctx context.Context, d time.Duration, l *logger.Logger) error {
if ctx.Err() != nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/cluster/cluster_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type Cluster interface {
Spec() spec.ClusterSpec
Name() string
IsLocal() bool
IsSecure() bool

// Deleting CockroachDB data and logs on nodes.

Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ go_library(
"mixed_version_schemachange.go",
"multitenant.go",
"multitenant_upgrade.go",
"multitenant_utils.go",
"network.go",
"nodejs_postgres.go",
"orm_helpers.go",
Expand Down Expand Up @@ -165,6 +166,7 @@ go_library(
"//pkg/jobs/jobspb",
"//pkg/kv",
"//pkg/roachpb",
"//pkg/roachprod",
"//pkg/roachprod/install",
"//pkg/roachprod/logger",
"//pkg/server",
Expand Down
7 changes: 2 additions & 5 deletions pkg/cmd/roachtest/tests/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,20 @@ import (
func runAcceptanceMultitenant(ctx context.Context, t test.Test, c cluster.Cluster) {
c.Put(ctx, t.Cockroach(), "./cockroach")

c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(install.SecureOption(true)), c.All())

const tenantID = 123
{
_, err := c.Conn(ctx, t.L(), 1).Exec(`SELECT crdb_internal.create_tenant($1)`, tenantID)
require.NoError(t, err)
}

kvAddrs, err := c.ExternalAddr(ctx, t.L(), c.All())
require.NoError(t, err)

const (
tenantHTTPPort = 8081
tenantSQLPort = 30258
)
const tenantNode = 1
tenant := createTenantNode(kvAddrs, tenantID, tenantNode, tenantHTTPPort, tenantSQLPort)
tenant := createTenantNode(ctx, t, c, c.All(), tenantID, tenantNode, tenantHTTPPort, tenantSQLPort)
tenant.start(ctx, t, c, "./cockroach")

t.Status("checking that a client can connect to the tenant server")
Expand Down
140 changes: 5 additions & 135 deletions pkg/cmd/roachtest/tests/multitenant_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ package tests
import (
"context"
gosql "database/sql"
"fmt"
"net/url"
"strconv"
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
Expand All @@ -25,7 +21,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/cockroach/pkg/util/version"
"github.com/stretchr/testify/require"
)
Expand All @@ -42,95 +37,6 @@ func registerMultiTenantUpgrade(r registry.Registry) {
})
}

// tenantNode corresponds to a running tenant.
type tenantNode struct {
tenantID int
httpPort, sqlPort int
kvAddrs []string
pgURL string

binary string // the binary last passed to start()
errCh chan error
node int
}

func createTenantNode(kvAddrs []string, tenantID, node, httpPort, sqlPort int) *tenantNode {
tn := &tenantNode{
tenantID: tenantID,
httpPort: httpPort,
kvAddrs: kvAddrs,
node: node,
sqlPort: sqlPort,
}
return tn
}

func (tn *tenantNode) stop(ctx context.Context, t test.Test, c cluster.Cluster) {
if tn.errCh == nil {
return
}
// Must use pkill because the context cancellation doesn't wait for the
// process to exit.
c.Run(ctx, c.Node(tn.node),
fmt.Sprintf("pkill -o -f '^%s mt start.*tenant-id=%d'", tn.binary, tn.tenantID))
t.L().Printf("mt cluster exited: %v", <-tn.errCh)
tn.errCh = nil
}

func (tn *tenantNode) logDir() string {
return fmt.Sprintf("logs/mt-%d", tn.tenantID)
}

func (tn *tenantNode) storeDir() string {
return fmt.Sprintf("cockroach-data-mt-%d", tn.tenantID)
}

func (tn *tenantNode) start(ctx context.Context, t test.Test, c cluster.Cluster, binary string) {
tn.binary = binary
extraArgs := []string{"--log-dir=" + tn.logDir(), "--store=" + tn.storeDir()}
tn.errCh = startTenantServer(
ctx, c, c.Node(tn.node), binary, tn.kvAddrs, tn.tenantID,
tn.httpPort, tn.sqlPort,
extraArgs...,
)
externalUrls, err := c.ExternalPGUrl(ctx, t.L(), c.Node(tn.node))
require.NoError(t, err)
u, err := url.Parse(externalUrls[0])
require.NoError(t, err)
internalUrls, err := c.ExternalIP(ctx, t.L(), c.Node(tn.node))
require.NoError(t, err)
u.Host = internalUrls[0] + ":" + strconv.Itoa(tn.sqlPort)
tn.pgURL = u.String()

// The tenant is usually responsive ~right away, but it
// has on occasions taken more than 3s for it to connect
// to the KV layer, and it won't open the SQL port until
// it has.
if err := retry.ForDuration(45*time.Second, func() error {
select {
case <-ctx.Done():
t.Fatal(ctx.Err())
case err := <-tn.errCh:
t.Fatal(err)
default:
}

db, err := gosql.Open("postgres", tn.pgURL)
if err != nil {
return err
}
defer db.Close()
ctx, cancel := context.WithTimeout(context.Background(), 45*time.Second)
defer cancel()
_, err = db.ExecContext(ctx, `SELECT 1`)
return err
}); err != nil {
t.Fatal(err)
}

t.L().Printf("sql server for tenant %d running at %s", tn.tenantID, tn.pgURL)
}

// runMultiTenantUpgrade exercises upgrading tenants and their host cluster.
//
// Sketch of the test:
Expand Down Expand Up @@ -163,12 +69,9 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,

kvNodes := c.Node(1)

settings := install.MakeClusterSettings(install.BinaryOption(predecessorBinary))
settings := install.MakeClusterSettings(install.BinaryOption(predecessorBinary), install.SecureOption(true))
c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, kvNodes)

kvAddrs, err := c.ExternalAddr(ctx, t.L(), kvNodes)
require.NoError(t, err)

const tenant11HTTPPort, tenant11SQLPort = 8011, 20011
const tenant11ID = 11
runner := sqlutils.MakeSQLRunner(c.Conn(ctx, t.L(), 1))
Expand All @@ -182,7 +85,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,
runner.QueryRow(t, "SHOW CLUSTER SETTING version").Scan(&initialVersion)

const tenantNode = 2
tenant11 := createTenantNode(kvAddrs, tenant11ID, tenantNode, tenant11HTTPPort, tenant11SQLPort)
tenant11 := createTenantNode(ctx, t, c, kvNodes, tenant11ID, tenantNode, tenant11HTTPPort, tenant11SQLPort)
tenant11.start(ctx, t, c, predecessorBinary)
defer tenant11.stop(ctx, t, c)

Expand Down Expand Up @@ -226,7 +129,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,
runner.Exec(t, `SELECT crdb_internal.create_tenant($1)`, tenant12ID)

t.Status("starting tenant 12 server with older binary")
tenant12 := createTenantNode(kvAddrs, tenant12ID, tenantNode, tenant12HTTPPort, tenant12SQLPort)
tenant12 := createTenantNode(ctx, t, c, kvNodes, tenant12ID, tenantNode, tenant12HTTPPort, tenant12SQLPort)
tenant12.start(ctx, t, c, predecessorBinary)
defer tenant12.stop(ctx, t, c)

Expand All @@ -248,7 +151,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,
runner.Exec(t, `SELECT crdb_internal.create_tenant($1)`, tenant13ID)

t.Status("starting tenant 13 server with new binary")
tenant13 := createTenantNode(kvAddrs, tenant13ID, tenantNode, tenant13HTTPPort, tenant13SQLPort)
tenant13 := createTenantNode(ctx, t, c, kvNodes, tenant13ID, tenantNode, tenant13HTTPPort, tenant13SQLPort)
tenant13.start(ctx, t, c, currentBinary)
defer tenant13.stop(ctx, t, c)

Expand Down Expand Up @@ -358,7 +261,7 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,
runner.Exec(t, `SELECT crdb_internal.create_tenant($1)`, tenant14ID)

t.Status("verifying the tenant 14 works and has the proper version")
tenant14 := createTenantNode(kvAddrs, tenant14ID, tenantNode, tenant14HTTPPort, tenant14SQLPort)
tenant14 := createTenantNode(ctx, t, c, kvNodes, tenant14ID, tenantNode, tenant14HTTPPort, tenant14SQLPort)
tenant14.start(ctx, t, c, currentBinary)
defer tenant14.stop(ctx, t, c)
verifySQL(t, tenant14.pgURL,
Expand All @@ -381,39 +284,6 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster,
withResults([][]string{{"true"}}))
}

func startTenantServer(
tenantCtx context.Context,
c cluster.Cluster,
node option.NodeListOption,
binary string,
kvAddrs []string,
tenantID int,
httpPort int,
sqlPort int,
extraFlags ...string,
) chan error {

args := []string{
// TODO(tbg): make this test secure.
// "--certs-dir", "certs",
"--insecure",
"--tenant-id=" + strconv.Itoa(tenantID),
"--http-addr", ifLocal(c, "127.0.0.1", "0.0.0.0") + ":" + strconv.Itoa(httpPort),
"--kv-addrs", strings.Join(kvAddrs, ","),
// Don't bind to external interfaces when running locally.
"--sql-addr", ifLocal(c, "127.0.0.1", "0.0.0.0") + ":" + strconv.Itoa(sqlPort),
}
args = append(args, extraFlags...)
errCh := make(chan error, 1)
go func() {
errCh <- c.RunE(tenantCtx, node,
append([]string{binary, "mt", "start-sql"}, args...)...,
)
close(errCh)
}()
return errCh
}

type sqlVerificationStmt struct {
stmt string
args []interface{}
Expand Down
Loading

0 comments on commit 3672d03

Please sign in to comment.