From f11f2315a08b745c1132edd93f8421eadae552d6 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 30 Dec 2022 18:35:32 +0100 Subject: [PATCH 1/3] cli/demo: enable reuse of web sessions across restarts Prior to this change, every time the `cockroach demo` command would be stopped, all the web sessions already open would be lost. As a result, any web browser currently open on the `demo` cluster would switch over to the login page again. This was inconvenient because often folk use `cockroach demo` to iterate on new feature development. They want the ability to keep the web browser open (and preserve whatever they were looking at) across restarts of the `cockroach demo` command. This UX inconvenience was excessively pushing users to use the `--insecure` flag, which we want to discourage. So this patch resolves the inconvenience by preserving open web sessions across restarts of the `demo` session. Release note (cli change): `cockroach demo` is now able to preserve open web sessions across restarts of the `cockroach demo` command. The sessions are saved in the directory `~/.cockroach-demo` alongside the TLS certificates and keys. --- pkg/cli/demo.go | 3 + pkg/cli/democluster/BUILD.bazel | 2 + pkg/cli/democluster/demo_cluster.go | 15 ++ pkg/cli/democluster/session_persistence.go | 220 ++++++++++++++++++ .../test_demo_cli_integration.tcl | 50 ++++ 5 files changed, 290 insertions(+) create mode 100644 pkg/cli/democluster/session_persistence.go diff --git a/pkg/cli/demo.go b/pkg/cli/demo.go index d8ee072ce7d7..54b471ddc29c 100644 --- a/pkg/cli/demo.go +++ b/pkg/cli/demo.go @@ -375,5 +375,8 @@ func runDemoInternal( defer c.SetSimulatedLatency(false /* on */) } + // Ensure the last few entries in the log files are flushed at the end. + defer log.Flush() + return sqlCtx.Run(ctx, conn) } diff --git a/pkg/cli/democluster/BUILD.bazel b/pkg/cli/democluster/BUILD.bazel index bac0bfc7a1ab..dddaff9f9b3e 100644 --- a/pkg/cli/democluster/BUILD.bazel +++ b/pkg/cli/democluster/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "demo_cluster.go", "demo_locality_list.go", "doc.go", + "session_persistence.go", "socket_unix.go", "socket_windows.go", ], @@ -53,6 +54,7 @@ go_library( "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_errors//oserror", "@com_github_cockroachdb_logtags//:logtags", + "@com_github_cockroachdb_redact//:redact", "@com_github_nightlyone_lockfile//:lockfile", "@org_golang_x_time//rate", ], diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index a30345fe0e13..125215dd1103 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -525,6 +525,18 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { }(phaseCtx); err != nil { return err } + + // Step 10: restore web sessions. + phaseCtx = logtags.AddTag(ctx, "phase", 10) + if err := func(ctx context.Context) error { + if err := c.restoreWebSessions(ctx); err != nil { + c.warnLog(ctx, "unable to restore web sessions: %v", err) + } + return nil + }(phaseCtx); err != nil { + return err + } + return nil } @@ -852,6 +864,9 @@ func TestingForceRandomizeDemoPorts() func() { } func (c *transientCluster) Close(ctx context.Context) { + if err := c.saveWebSessions(ctx); err != nil { + c.warnLog(ctx, "unable to save web sessions: %v", err) + } if c.stopper != nil { if r := recover(); r != nil { // A panic here means some of the async tasks may still be diff --git a/pkg/cli/democluster/session_persistence.go b/pkg/cli/democluster/session_persistence.go new file mode 100644 index 000000000000..ff259242a301 --- /dev/null +++ b/pkg/cli/democluster/session_persistence.go @@ -0,0 +1,220 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package democluster + +import ( + "bufio" + "context" + gosql "database/sql" + "encoding/json" + "io" + "os" + "path/filepath" + + "github.com/cockroachdb/cockroach/pkg/security/certnames" + "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/server/pgurl" + "github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants" + "github.com/cockroachdb/cockroach/pkg/util/netutil/addr" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/redact" +) + +// saveWebSessions persists any currently active web session to disk, +// so they can be restored when the demo shell starts again. +func (c *transientCluster) saveWebSessions(ctx context.Context) error { + return c.doPersistence(ctx, "saving", c.saveWebSessionsInternal) +} + +// restoreWebSessions restores any currently active web session from disk. +func (c *transientCluster) restoreWebSessions(ctx context.Context) error { + return c.doPersistence(ctx, "restoring", c.restoreWebSessionsInternal) +} + +func (c *transientCluster) doPersistence( + ctx context.Context, + word redact.SafeString, + fn func(ctx context.Context, filename string, db *gosql.DB) error, +) error { + if c.demoDir == "" { + // No directory to save to. Bail. + return nil + } + + // Lock the output dir to prevent concurrent demo sessions + // from stamping each other over. + cleanup, err := c.lockDir(ctx, c.demoDir, "sessions") + if err != nil { + return err + } + defer cleanup() + + // We compose the connection URL anew because + // getNetworkURLForServer() uses the 'demo' account and we want a + // root connection, using client certs. + // + // This will bypass any blockage caused by a mistaken HBA + // configuration by the user. + caCert := filepath.Join(c.demoDir, certnames.CACertFilename()) + rootCert := filepath.Join(c.demoDir, certnames.ClientCertFilename(username.RootUserName())) + rootKey := filepath.Join(c.demoDir, certnames.ClientKeyFilename(username.RootUserName())) + u := pgurl.New(). + WithDatabase(catconstants.SystemDatabaseName). + WithUsername(username.RootUser). + WithAuthn(pgurl.AuthnClientCert(rootCert, rootKey)). + WithTransport(pgurl.TransportTLS(pgurl.TLSRequire, caCert)) + + apply := func(filename string, u *pgurl.URL) error { + db, err := gosql.Open("postgres", u.ToPQ().String()) + if err != nil { + return err + } + defer db.Close() + return fn(ctx, filename, db) + } + + if c.demoCtx.Multitenant && len(c.tenantServers) > 0 && c.tenantServers[0] != nil { + sqlAddr := c.tenantServers[0].SQLAddr() + host, port, _ := addr.SplitHostPort(sqlAddr, "") + u.WithNet(pgurl.NetTCP(host, port)) + if err := apply("sessions.app.txt", u); err != nil { + return errors.Wrapf(err, "%s for application tenant", word) + } + } + + if c.servers[0].TestServer != nil { + sqlAddr := c.servers[0].ServingSQLAddr() + host, port, _ := addr.SplitHostPort(sqlAddr, "") + u.WithNet(pgurl.NetTCP(host, port)) + return errors.Wrapf( + apply("sessions.system.txt", u), + "%s for for system tenant", word) + } + return nil +} + +type webSessionRow struct { + ID int64 + HashedSecret []byte + Username string + ExpiresAt string +} + +// saveWebSessionsInternal saves the sessions for just one tenant to +// the provided filename. +func (c *transientCluster) saveWebSessionsInternal( + ctx context.Context, filename string, db *gosql.DB, +) error { + c.infoLog(ctx, "saving sessions") + rows, err := db.QueryContext(ctx, ` +SELECT id, "hashedSecret", username, "expiresAt" +FROM system.web_sessions +WHERE "expiresAt" > now() +AND "revokedAt" IS NULL`) + if err != nil { + return err + } + defer rows.Close() + + outputFile, err := os.Create(filepath.Join(c.demoDir, filename)) + if err != nil { + return err + } + defer func() { + if err := outputFile.Close(); err != nil { + c.warnLog(ctx, "%v", err) + } + }() + buf := bufio.NewWriter(outputFile) + defer func() { + if err := buf.Flush(); err != nil { + c.warnLog(ctx, "%v", err) + } + }() + + numSessions := 0 + for rows.Next() { + var row webSessionRow + if err := rows.Scan(&row.ID, &row.HashedSecret, &row.Username, &row.ExpiresAt); err != nil { + return err + } + j, err := json.Marshal(row) + if err != nil { + return err + } + j = append(j, '\n') + if _, err := buf.Write(j); err != nil { + return err + } + numSessions++ + } + + c.infoLog(ctx, "saved %d sessions to %q", numSessions, filename) + + return nil +} + +// restoreWebSessionsInternal restores the sessions for just one +// tenant from the provided filename. +func (c *transientCluster) restoreWebSessionsInternal( + ctx context.Context, filename string, db *gosql.DB, +) error { + c.infoLog(ctx, "restoring sessions") + + inputFile, err := os.Open(filepath.Join(c.demoDir, filename)) + if err != nil { + if oserror.IsNotExist(err) { + // No file yet. That's OK. + return nil + } + return err + } + defer func() { + if err := inputFile.Close(); err != nil { + c.warnLog(ctx, "%v", err) + } + }() + + buf := bufio.NewReader(inputFile) + numSessions := 0 + for { + j, err := buf.ReadBytes('\n') + if err != nil { + if errors.Is(err, io.EOF) { + // Done reading. Nothing more to do. + break + } + return err + } + + var row webSessionRow + if err := json.Unmarshal(j, &row); err != nil { + return err + } + + if _, err := db.ExecContext(ctx, ` +INSERT INTO system.web_sessions(id, "hashedSecret", username, "expiresAt") +VALUES ($1, $2, $3, $4)`, + row.ID, + row.HashedSecret, + row.Username, + row.ExpiresAt, + ); err != nil { + return err + } + numSessions++ + } + + c.infoLog(ctx, "restored %d sessions from %q", numSessions, filename) + + return nil +} diff --git a/pkg/cli/interactive_tests/test_demo_cli_integration.tcl b/pkg/cli/interactive_tests/test_demo_cli_integration.tcl index 35856aa68416..d4021f9bdfd1 100644 --- a/pkg/cli/interactive_tests/test_demo_cli_integration.tcl +++ b/pkg/cli/interactive_tests/test_demo_cli_integration.tcl @@ -3,6 +3,7 @@ source [file join [file dirname $argv0] common.tcl] set ::env(COCKROACH_INSECURE) "false" +set python "python2.7" spawn $argv demo --empty --no-line-editor --multitenant=true eexpect "Welcome" @@ -72,6 +73,55 @@ send "\\q\r" eexpect eof end_test +spawn /bin/bash +set shell_spawn_id $spawn_id +send "PS1=':''/# '\r" +eexpect ":/# " + +start_test "Check that an auth cookie can be extracted for a demo session" +# From the system tenant. +send "$argv auth-session login root --certs-dir=\$HOME/.cockroach-demo -p 26258 --only-cookie >cookie_system.txt\r" +eexpect ":/# " +# From the app tenant. +send "$argv auth-session login root --certs-dir=\$HOME/.cockroach-demo --only-cookie >cookie_app.txt\r" +eexpect ":/# " + +# Check that the cookies work. +set pyfile [file join [file dirname $argv0] test_auth_cookie.py] + +send "$python $pyfile cookie_system.txt 'http://localhost:8081/_admin/v1/users'\r" +eexpect "username" +eexpect "demo" +send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users'\r" +eexpect "username" +eexpect "demo" +end_test + + +start_test "Check that login sessions are preserved across demo restarts." + +set spawn_id $demo_spawn_id +send "\\q\r" +eexpect eof + +spawn $argv demo --empty --no-line-editor --multitenant=true +set demo_spawn_id $spawn_id +eexpect "Welcome" +eexpect "defaultdb>" + +set spawn_id $shell_spawn_id + +send "$python $pyfile cookie_system.txt 'http://localhost:8081/_admin/v1/users'\r" +eexpect "username" +eexpect "demo" +send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users'\r" +eexpect "username" +eexpect "demo" +end_test + +send "exit\r" +eexpect eof + set spawn_id $demo_spawn_id send "\\q\r" eexpect eof From fa71be312132c0b337cff3ef9e0f21f3fc58e13c Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 6 Jan 2023 14:01:37 +0100 Subject: [PATCH 2/3] server,demo: sanitize tenant server cfg for demo Prior to this patch, the port numbering for the servers started by the server controller inside `cockroach demo` was difficult to grok. This patch fixes it, and restores the "default" port numbers for the application tenant. It also fixes the display of the UI connection strings. For example: ``` ... - Connection parameters: (webui) http://127.0.0.1:8080/demologin?password=demo60684&tenant_name=demo-tenant&username=demo ... ``` Release note: None --- pkg/base/test_server_args.go | 9 ++ pkg/cli/democluster/demo_cluster.go | 89 ++++++++++++------ pkg/cli/democluster/demo_cluster_test.go | 47 +++++----- pkg/cli/interactive_tests/test_demo.tcl | 10 +- .../test_demo_cli_integration.tcl | 8 +- pkg/server/config.go | 4 + pkg/server/server_controller.go | 41 +++++---- pkg/server/testserver.go | 91 ++++++++++--------- pkg/server/testserver_http.go | 12 ++- 9 files changed, 184 insertions(+), 127 deletions(-) diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index 0d3ff49e6252..2f528b278d66 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -59,6 +59,15 @@ type TestServerArgs struct { // DisableTLSForHTTP if set, disables TLS for the HTTP interface. DisableTLSForHTTP bool + // SecondaryTenantPortOffset if non-zero forces the network addresses + // generated for servers started by the serverController to be offset + // from the base addressed by the specified amount. + SecondaryTenantPortOffset int + + // SecondaryTenantKnobs contains the testing knobs to use + // for tenant servers started by the serverController. + SecondaryTenantKnobs TestingKnobs + // JoinAddr is the address of a node we are joining. // // If left empty and the TestServer is being added to a nonempty cluster, this diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index 125215dd1103..35c3c1ae1307 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -218,7 +218,9 @@ func NewDemoCluster( // tenant. // Note: this logic can be removed once we use a single // listener for HTTP and SQL. - c.httpFirstPort += c.demoCtx.NumNodes + if !c.demoCtx.InProcessTenant { + c.httpFirstPort += c.demoCtx.NumNodes + } c.sqlFirstPort += c.demoCtx.NumNodes c.rpcFirstPort += c.demoCtx.NumNodes } @@ -419,23 +421,16 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { latencyMap := c.servers[i].Cfg.TestingKnobs.Server.(*server.TestingKnobs). ContextTestingKnobs.InjectedLatencyOracle c.infoLog(ctx, "starting tenant node %d", i) - tenantStopper := stop.NewStopper() - ts, err := c.servers[i].StartTenant(ctx, base.TestTenantArgs{ + + args := base.TestTenantArgs{ // We set the tenant ID to i+2, since tenant 0 is not a tenant, and // tenant 1 is the system tenant. We also subtract 2 for the "starting" // SQL/HTTP ports so the first tenant ends up with the desired default // ports. - DisableCreateTenant: !createTenant, - TenantName: roachpb.TenantName(fmt.Sprintf("demo-tenant-%d", secondaryTenantID)), - TenantID: roachpb.MustMakeTenantID(secondaryTenantID), - Stopper: tenantStopper, - ForceInsecure: c.demoCtx.Insecure, - SSLCertsDir: c.demoDir, - DisableTLSForHTTP: true, - EnableDemoLoginEndpoint: true, - StartingRPCAndSQLPort: c.demoCtx.SQLPort - secondaryTenantID + i, - StartingHTTPPort: c.demoCtx.HTTPPort - secondaryTenantID + i, - Locality: c.demoCtx.Localities[i], + DisableCreateTenant: !createTenant, + TenantName: roachpb.TenantName("demo-tenant"), + TenantID: roachpb.MustMakeTenantID(secondaryTenantID), + InProcessTenant: c.demoCtx.InProcessTenant, TestingKnobs: base.TestingKnobs{ Server: &server.TestingKnobs{ ContextTestingKnobs: rpc.ContextTestingKnobs{ @@ -444,15 +439,33 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { }, }, }, - InProcessTenant: c.demoCtx.InProcessTenant, - }) - c.stopper.AddCloser(stop.CloserFn(func() { - stopCtx := context.Background() - if ts != nil { - stopCtx = ts.AnnotateCtx(stopCtx) - } - tenantStopper.Stop(stopCtx) - })) + } + + var tenantStopper *stop.Stopper + if !c.demoCtx.InProcessTenant { + tenantStopper = stop.NewStopper() + args.Stopper = tenantStopper + args.ForceInsecure = c.demoCtx.Insecure + args.SSLCertsDir = c.demoDir + args.DisableTLSForHTTP = true + args.EnableDemoLoginEndpoint = true + args.StartingRPCAndSQLPort = c.demoCtx.SQLPort - secondaryTenantID + i + args.StartingHTTPPort = c.demoCtx.HTTPPort - secondaryTenantID + i + args.Locality = c.demoCtx.Localities[i] + } + + ts, err := c.servers[i].StartTenant(ctx, args) + if !c.demoCtx.InProcessTenant { + // InProcessTenant means that the server controller is + // already taking care of shutdown. + c.stopper.AddCloser(stop.CloserFn(func() { + stopCtx := context.Background() + if ts != nil { + stopCtx = ts.AnnotateCtx(stopCtx) + } + tenantStopper.Stop(stopCtx) + })) + } if err != nil { return err } @@ -601,10 +614,14 @@ func (c *transientCluster) createAndAddNode( // The latency map will be populated after all servers have // started listening on RPC, and before they proceed with their // startup routine. - serverKnobs.ContextTestingKnobs = rpc.ContextTestingKnobs{ + rpcKnobs := rpc.ContextTestingKnobs{ InjectedLatencyOracle: regionlatency.MakeAddrMap(), InjectedLatencyEnabled: c.latencyEnabled.Get, } + serverKnobs.ContextTestingKnobs = rpcKnobs + args.SecondaryTenantKnobs.Server = &server.TestingKnobs{ + ContextTestingKnobs: rpcKnobs, + } } // Create the server instance. This also registers the in-memory store @@ -806,6 +823,7 @@ func (demoCtx *Context) testServerArgsForTransientCluster( // Demo clusters by default will create their own tenants, so we // don't need to create them here. DisableDefaultTestTenant: true, + Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ StickyEngineRegistry: stickyEngineRegistry, @@ -839,6 +857,15 @@ func (demoCtx *Context) testServerArgsForTransientCluster( args.Addr = fmt.Sprintf("127.0.0.1:%d", rpcPort) args.SQLAddr = fmt.Sprintf("127.0.0.1:%d", sqlPort) args.HTTPAddr = fmt.Sprintf("127.0.0.1:%d", httpPort) + + if demoCtx.InProcessTenant { + // The code in NewDemoCluster put the KV ports higher + // so we need to subtract the number of nodes to get + // back to the "good" ports. + // We reduce NumNodes by 1 because the server controller + // uses 1-based indexing for servers. + args.SecondaryTenantPortOffset = -(demoCtx.NumNodes + 1) + } } if demoCtx.Localities != nil { @@ -1793,6 +1820,11 @@ func (c *transientCluster) ListDemoNodes(w, ew io.Writer, justOne, verbose bool) // Connection parameters for the system tenant follow. uiURL := s.Cfg.AdminURL() + if q := uiURL.Query(); c.demoCtx.Multitenant && c.demoCtx.InProcessTenant && !q.Has(server.TenantNameParamInQueryURL) { + q.Add(server.TenantNameParamInQueryURL, catconstants.SystemTenantName) + uiURL.RawQuery = q.Encode() + } + sqlURL, err := c.getNetworkURLForServer(context.Background(), i, false /* includeAppName */, false /* forSecondaryTenant */) if err != nil { @@ -1824,12 +1856,11 @@ func (c *transientCluster) printURLs( // Print node ID and web UI URL. Embed the autologin feature inside the URL. // We avoid printing those when insecure, as the autologin path is not available // in that case. - pwauth := url.Values{ - "username": []string{c.adminUser.Normalized()}, - "password": []string{c.adminPassword}, - } + q := uiURL.Query() + q.Add("username", c.adminUser.Normalized()) + q.Add("password", c.adminPassword) uiURL.Path = server.DemoLoginPath - uiURL.RawQuery = pwauth.Encode() + uiURL.RawQuery = q.Encode() } fmt.Fprintln(w, " (webui) ", uiURL) diff --git a/pkg/cli/democluster/demo_cluster_test.go b/pkg/cli/democluster/demo_cluster_test.go index cb60a048de7d..91070e9057d3 100644 --- a/pkg/cli/democluster/demo_cluster_test.go +++ b/pkg/cli/democluster/demo_cluster_test.go @@ -67,17 +67,18 @@ func TestTestServerArgsForTransientCluster(t *testing.T) { sqlPoolMemorySize: 2 << 10, cacheSize: 1 << 10, expected: base.TestServerArgs{ - DisableDefaultTestTenant: true, - PartOfCluster: true, - JoinAddr: "127.0.0.1", - DisableTLSForHTTP: true, - Addr: "127.0.0.1:7890", - SQLAddr: "127.0.0.1:1234", - HTTPAddr: "127.0.0.1:4567", - SQLMemoryPoolSize: 2 << 10, - CacheSize: 1 << 10, - NoAutoInitializeCluster: true, - EnableDemoLoginEndpoint: true, + DisableDefaultTestTenant: true, + PartOfCluster: true, + JoinAddr: "127.0.0.1", + DisableTLSForHTTP: true, + Addr: "127.0.0.1:7890", + SQLAddr: "127.0.0.1:1234", + HTTPAddr: "127.0.0.1:4567", + SecondaryTenantPortOffset: -2, + SQLMemoryPoolSize: 2 << 10, + CacheSize: 1 << 10, + NoAutoInitializeCluster: true, + EnableDemoLoginEndpoint: true, Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ StickyEngineRegistry: stickyEnginesRegistry, @@ -91,17 +92,18 @@ func TestTestServerArgsForTransientCluster(t *testing.T) { sqlPoolMemorySize: 4 << 10, cacheSize: 4 << 10, expected: base.TestServerArgs{ - DisableDefaultTestTenant: true, - PartOfCluster: true, - JoinAddr: "127.0.0.1", - Addr: "127.0.0.1:7892", - SQLAddr: "127.0.0.1:1236", - HTTPAddr: "127.0.0.1:4569", - DisableTLSForHTTP: true, - SQLMemoryPoolSize: 4 << 10, - CacheSize: 4 << 10, - NoAutoInitializeCluster: true, - EnableDemoLoginEndpoint: true, + DisableDefaultTestTenant: true, + PartOfCluster: true, + JoinAddr: "127.0.0.1", + Addr: "127.0.0.1:7892", + SQLAddr: "127.0.0.1:1236", + HTTPAddr: "127.0.0.1:4569", + SecondaryTenantPortOffset: -2, + DisableTLSForHTTP: true, + SQLMemoryPoolSize: 4 << 10, + CacheSize: 4 << 10, + NoAutoInitializeCluster: true, + EnableDemoLoginEndpoint: true, Knobs: base.TestingKnobs{ Server: &server.TestingKnobs{ StickyEngineRegistry: stickyEnginesRegistry, @@ -257,6 +259,7 @@ func TestTransientClusterMultitenant(t *testing.T) { // This test is too slow to complete under the race detector, sometimes. skip.UnderRace(t) + skip.UnderStress(t) demoCtx := newDemoCtx() // Set up an empty 3-node cluster with tenants on each node. diff --git a/pkg/cli/interactive_tests/test_demo.tcl b/pkg/cli/interactive_tests/test_demo.tcl index e257be258e79..60c4ca2a4aa4 100644 --- a/pkg/cli/interactive_tests/test_demo.tcl +++ b/pkg/cli/interactive_tests/test_demo.tcl @@ -65,7 +65,7 @@ eexpect "sslmode=disable" eexpect "System tenant" eexpect "(webui)" eexpect "http://" -eexpect ":8081" +eexpect ":8080" eexpect "(sql)" eexpect "root@" eexpect ":26258" @@ -133,7 +133,7 @@ eexpect "sslrootcert=" eexpect "System tenant" eexpect "(webui)" -eexpect "http://127.0.0.1:8081/demologin" +eexpect "http://127.0.0.1:8080/demologin" eexpect "(sql)" eexpect "postgresql://demo:" eexpect ":26258" @@ -261,11 +261,11 @@ eexpect "defaultdb>" # Show the URLs. send "\\demo ls\r" eexpect "http://" -eexpect ":8003" +eexpect ":8000" eexpect "http://" -eexpect ":8004" +eexpect ":8001" eexpect "http://" -eexpect ":8005" +eexpect ":8002" eexpect "defaultdb>" send_eof diff --git a/pkg/cli/interactive_tests/test_demo_cli_integration.tcl b/pkg/cli/interactive_tests/test_demo_cli_integration.tcl index d4021f9bdfd1..7e12d35a979e 100644 --- a/pkg/cli/interactive_tests/test_demo_cli_integration.tcl +++ b/pkg/cli/interactive_tests/test_demo_cli_integration.tcl @@ -89,10 +89,10 @@ eexpect ":/# " # Check that the cookies work. set pyfile [file join [file dirname $argv0] test_auth_cookie.py] -send "$python $pyfile cookie_system.txt 'http://localhost:8081/_admin/v1/users'\r" +send "$python $pyfile cookie_system.txt 'http://localhost:8080/_admin/v1/users?tenant_name=system'\r" eexpect "username" eexpect "demo" -send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users'\r" +send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users?tenant_name=demo-tenant'\r" eexpect "username" eexpect "demo" end_test @@ -111,10 +111,10 @@ eexpect "defaultdb>" set spawn_id $shell_spawn_id -send "$python $pyfile cookie_system.txt 'http://localhost:8081/_admin/v1/users'\r" +send "$python $pyfile cookie_system.txt 'http://localhost:8080/_admin/v1/users?tenant_name=system'\r" eexpect "username" eexpect "demo" -send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users'\r" +send "$python $pyfile cookie_app.txt 'http://localhost:8080/_admin/v1/users?tenant_name=demo-tenant'\r" eexpect "username" eexpect "demo" end_test diff --git a/pkg/server/config.go b/pkg/server/config.go index 3bad5ce56970..c0e9d485c9fc 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -183,6 +183,10 @@ type BaseConfig struct { // TestingKnobs is used for internal test controls only. TestingKnobs base.TestingKnobs + // SecondaryTenantKnobs contains the testing knobs to use + // for tenant servers started by the serverController. + SecondaryTenantKnobs base.TestingKnobs + // TestingInsecureWebAccess enables uses of the HTTP and UI // endpoints without a valid authentication token. This should be // used only in tests what want a secure cluster with RPC diff --git a/pkg/server/server_controller.go b/pkg/server/server_controller.go index 1c745a602792..715a552fc9fb 100644 --- a/pkg/server/server_controller.go +++ b/pkg/server/server_controller.go @@ -93,7 +93,6 @@ type newServerFn func( tenantName roachpb.TenantName, index int, deregister func(), - opts *BaseConfig, ) (onDemandServer, error) // serverController manages a fleet of multiple servers side-by-side. @@ -107,10 +106,6 @@ type serverController struct { // stopper is the parent stopper. stopper *stop.Stopper - // tenantBaseCfg allows overriding of the baseCfg for all new tenants. - // Used for testing. - tenantBaseCfg *BaseConfig - mu struct { syncutil.Mutex @@ -169,7 +164,7 @@ func (c *serverController) getOrCreateServer( // Server does not exist yet: instantiate and start it. c.mu.nextServerIdx++ idx := c.mu.nextServerIdx - s, err := c.newServerFn(ctx, tenantName, idx, deregisterFn, c.tenantBaseCfg) + s, err := c.newServerFn(ctx, tenantName, idx, deregisterFn) if err != nil { return nil, err } @@ -207,6 +202,9 @@ func (c *serverController) getServers() (res []onDemandServer) { // TenantSelectHeader is the HTTP header used to select a particular tenant. const TenantSelectHeader = `X-Cockroach-Tenant` +// TenantNameParamInQueryURL is the HTTP query URL parameter used to select a particular tenant. +const TenantNameParamInQueryURL = "tenant_name" + // TenantSelectCookieName is the name of the HTTP cookie used to select a particular tenant, // if the custom header is not specified. const TenantSelectCookieName = `tenant` @@ -239,8 +237,7 @@ func (c *serverController) httpMux(w http.ResponseWriter, r *http.Request) { func getTenantNameFromHTTPRequest(r *http.Request) (roachpb.TenantName, bool) { // Highest priority is manual override on the URL query parameters. - const tenantNameParamInQueryURL = "tenant_name" - if tenantName := r.URL.Query().Get(tenantNameParamInQueryURL); tenantName != "" { + if tenantName := r.URL.Query().Get(TenantNameParamInQueryURL); tenantName != "" { return roachpb.TenantName(tenantName), true } @@ -470,11 +467,7 @@ var ErrInvalidTenant error = errInvalidTenantMarker{} // is not active), the returned error will contain the // ErrInvalidTenant mark, which can be checked with errors.Is. func (s *Server) newServerForTenant( - ctx context.Context, - tenantName roachpb.TenantName, - index int, - deregister func(), - baseCfg *BaseConfig, + ctx context.Context, tenantName roachpb.TenantName, index int, deregister func(), ) (onDemandServer, error) { // Look up the ID of the requested tenant. // @@ -502,7 +495,7 @@ func (s *Server) newServerForTenant( } // Start the tenant server. - tenantStopper, tenantServer, err := s.startInMemoryTenantServerInternal(ctx, tenantID, index, baseCfg) + tenantStopper, tenantServer, err := s.startInMemoryTenantServerInternal(ctx, tenantID, index) if err != nil { // Abandon any work done so far. tenantStopper.Stop(ctx) @@ -570,7 +563,7 @@ func (t *systemServerWrapper) testingGetSQLAddr() string { // simultaneously running server. This can be used to allocate // distinct but predictable network listeners. func (s *Server) startInMemoryTenantServerInternal( - ctx context.Context, tenantID roachpb.TenantID, index int, baseCfgOverride *BaseConfig, + ctx context.Context, tenantID roachpb.TenantID, index int, ) (stopper *stop.Stopper, tenantServer *SQLServerWrapper, err error) { stopper = stop.NewStopper() @@ -582,9 +575,6 @@ func (s *Server) startInMemoryTenantServerInternal( if err != nil { return stopper, nil, err } - if baseCfgOverride != nil { - baseCfg = *baseCfgOverride - } // Create a child stopper for this tenant's server. ambientCtx := baseCfg.AmbientCtx @@ -694,6 +684,7 @@ func makeInMemoryTenantServerConfig( baseCfg.Locality = kvServerCfg.BaseConfig.Locality baseCfg.SpanConfigsDisabled = kvServerCfg.BaseConfig.SpanConfigsDisabled baseCfg.EnableDemoLoginEndpoint = kvServerCfg.BaseConfig.EnableDemoLoginEndpoint + baseCfg.TestingKnobs = kvServerCfg.BaseConfig.SecondaryTenantKnobs // TODO(knz): use a single network interface for all tenant servers. // See: https://github.com/cockroachdb/cockroach/issues/84585 @@ -709,6 +700,18 @@ func makeInMemoryTenantServerConfig( return baseCfg, sqlCfg, err } + // This will change when we can use a single SQL listener. + const splitSQL = false + if splitSQL { + baseCfg.SplitListenSQL = true + } else { + baseCfg.SplitListenSQL = false + baseCfg.Addr, baseCfg.SQLAddr = baseCfg.SQLAddr, baseCfg.Addr + baseCfg.AdvertiseAddr, baseCfg.SQLAdvertiseAddr = baseCfg.SQLAdvertiseAddr, baseCfg.AdvertiseAddr + baseCfg.SQLAddr = "" + baseCfg.SQLAdvertiseAddr = "" + } + // The parent server will route HTTP requests to us. baseCfg.DisableHTTPListener = true // Nevertheless, we like to know our own HTTP address. @@ -719,8 +722,6 @@ func makeInMemoryTenantServerConfig( // See: https://github.com/cockroachdb/cockroach/issues/84585 baseCfg.SocketFile = "" - baseCfg.SplitListenSQL = false - // TODO(knz): Make the TLS config separate per tenant. // See https://cockroachlabs.atlassian.net/browse/CRDB-14539. baseCfg.SSLCertsDir = kvServerCfg.BaseConfig.SSLCertsDir diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 023bd512669f..d98db7c66cab 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -215,6 +215,9 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { cfg.SQLAdvertiseAddr = util.IsolatedTestAddr.String() cfg.HTTPAddr = util.IsolatedTestAddr.String() } + if params.SecondaryTenantPortOffset != 0 { + cfg.SecondaryTenantPortOffset = params.SecondaryTenantPortOffset + } if params.Addr != "" { cfg.Addr = params.Addr cfg.AdvertiseAddr = params.Addr @@ -828,6 +831,26 @@ func (ts *TestServer) StartTenant( } } + if params.InProcessTenant { + onDemandServer, err := ts.serverController.getOrCreateServer(ctx, params.TenantName) + if err != nil { + return nil, err + } + sw := onDemandServer.(*tenantServerWrapper) + + hts := &httpTestServer{} + hts.t.authentication = sw.server.authentication + hts.t.sqlServer = sw.server.sqlServer + hts.t.tenantName = params.TenantName + + return &TestTenant{ + SQLServer: sw.server.sqlServer, + Cfg: sw.server.sqlServer.cfg, + httpTestServer: hts, + drain: sw.server.drainServer, + }, err + } + st := params.Settings if st == nil { st = cluster.MakeTestingClusterSettings() @@ -928,61 +951,39 @@ func (ts *TestServer) StartTenant( baseCfg.HTTPAdvertiseAddr = newAddr } - if !params.InProcessTenant { - sw, err := NewTenantServer( - ctx, - stopper, - baseCfg, - sqlCfg, - ) - if err != nil { - return nil, err - } - go func() { - // If the server requests a shutdown, do that simply by stopping the - // tenant's stopper. - select { - case <-sw.ShutdownRequested(): - stopper.Stop(sw.AnnotateCtx(context.Background())) - case <-stopper.ShouldQuiesce(): - } - }() - - if err := sw.Start(ctx); err != nil { - return nil, err - } - - hts := &httpTestServer{} - hts.t.authentication = sw.authentication - hts.t.sqlServer = sw.sqlServer - - return &TestTenant{ - SQLServer: sw.sqlServer, - Cfg: &baseCfg, - httpTestServer: hts, - drain: sw.drainServer, - }, err + sw, err := NewTenantServer( + ctx, + stopper, + baseCfg, + sqlCfg, + ) + if err != nil { + return nil, err } + go func() { + // If the server requests a shutdown, do that simply by stopping the + // tenant's stopper. + select { + case <-sw.ShutdownRequested(): + stopper.Stop(sw.AnnotateCtx(context.Background())) + case <-stopper.ShouldQuiesce(): + } + }() - ts.serverController.tenantBaseCfg = &baseCfg - onDemandServer, err := ts.serverController.getOrCreateServer(ctx, params.TenantName) - if err != nil { + if err := sw.Start(ctx); err != nil { return nil, err } - sw := onDemandServer.(*tenantServerWrapper) hts := &httpTestServer{} - hts.t.authentication = sw.server.authentication - hts.t.sqlServer = sw.server.sqlServer - hts.t.tenantName = params.TenantName + hts.t.authentication = sw.authentication + hts.t.sqlServer = sw.sqlServer return &TestTenant{ - SQLServer: sw.server.sqlServer, - Cfg: sw.server.sqlServer.cfg, + SQLServer: sw.sqlServer, + Cfg: &baseCfg, httpTestServer: hts, - drain: sw.server.drainServer, + drain: sw.drainServer, }, err - } // ExpectedInitialRangeCount returns the expected number of ranges that should diff --git a/pkg/server/testserver_http.go b/pkg/server/testserver_http.go index 6edd7ebeeebe..273a723d34ec 100644 --- a/pkg/server/testserver_http.go +++ b/pkg/server/testserver_http.go @@ -54,7 +54,9 @@ type tenantHeaderDecorator struct { } func (t tenantHeaderDecorator) RoundTrip(req *http.Request) (*http.Response, error) { - req.Header.Add(TenantSelectHeader, string(t.tenantName)) + if t.tenantName != "" { + req.Header.Add(TenantSelectHeader, string(t.tenantName)) + } return t.RoundTripper.RoundTrip(req) } @@ -62,7 +64,13 @@ var _ http.RoundTripper = &tenantHeaderDecorator{} // AdminURL implements TestServerInterface. func (ts *httpTestServer) AdminURL() string { - return ts.t.sqlServer.execCfg.RPCContext.Config.AdminURL().String() + u := ts.t.sqlServer.execCfg.RPCContext.Config.AdminURL() + if ts.t.tenantName != "" { + q := u.Query() + q.Add(TenantNameParamInQueryURL, string(ts.t.tenantName)) + u.RawQuery = q.Encode() + } + return u.String() } // GetUnauthenticatedHTTPClient implements TestServerInterface. From 3c2bff17c507635af83cb83ee9a988b2772b7a11 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Fri, 6 Jan 2023 14:21:20 +0100 Subject: [PATCH 3/3] cli,server: rename "InProcessTenant" to "UseServerController" In `cockroach demo` and unit tests, all tenant servers regardless of origin are "in process". The flag was really pointing to the server controller, not the process-ness. So this patch renames it accordingly. Consequently, the corresponding `cockroach demo` flag becomes `--disable-server-controller`. Release note: None --- pkg/base/test_server_args.go | 6 +++--- pkg/cli/cliflags/flags.go | 12 +++++------- pkg/cli/context.go | 2 +- pkg/cli/democluster/context.go | 5 +++-- pkg/cli/democluster/demo_cluster.go | 16 ++++++++-------- pkg/cli/flags.go | 4 ++-- pkg/server/testserver.go | 2 +- 7 files changed, 23 insertions(+), 24 deletions(-) diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index 2f528b278d66..10c86dfd1d0a 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -340,7 +340,7 @@ type TestTenantArgs struct { // Cockroach Labs. Should remain disabled during unit testing. StartDiagnosticsReporting bool - // InProcessTenant is used to initialize tenants through the - // kv server's `serverController` in the same process. - InProcessTenant bool + // UseServerController tells testserver.StartTenant() to use + // its serverController to start the secondary tenant. + UseServerController bool } diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index 8cf2c75bc75d..a84bac95810f 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -1343,16 +1343,14 @@ More information about the geo-partitioned replicas topology can be found at: Description: ` If set, cockroach demo will start separate in-memory KV and SQL servers in multi-tenancy mode. The SQL shell will be connected to the first tenant, and can be switched between tenants -and the system tenant using the \connect command. By default the tenant will be initialized -in-process. Use --simulate-separate-tenant-process to emulate a separate tenant process. -`, +and the system tenant using the \connect command.`, } - DemoInProcessTenant = FlagInfo{ - Name: "in-process-tenant", + DemoDisableServerController = FlagInfo{ + Name: "disable-server-controller", Description: ` -If set, and --multitenant is set to true, the tenant process will be started -in-process.`, +If set, the server controller will not be used to start secondary +tenant servers.`, } DemoNoLicense = FlagInfo{ diff --git a/pkg/cli/context.go b/pkg/cli/context.go index d2747f30d2ef..b97de80a087d 100644 --- a/pkg/cli/context.go +++ b/pkg/cli/context.go @@ -618,7 +618,7 @@ func setDemoContextDefaults() { demoCtx.HTTPPort, _ = strconv.Atoi(base.DefaultHTTPPort) demoCtx.WorkloadMaxQPS = 25 demoCtx.Multitenant = true - demoCtx.InProcessTenant = true + demoCtx.DisableServerController = false demoCtx.DefaultEnableRangefeeds = true demoCtx.pidFile = "" diff --git a/pkg/cli/democluster/context.go b/pkg/cli/democluster/context.go index 1371edc728a1..9fe8a420709f 100644 --- a/pkg/cli/democluster/context.go +++ b/pkg/cli/democluster/context.go @@ -107,8 +107,9 @@ type Context struct { // out enabled. DefaultEnableRangefeeds bool - // InProcessTenant is true if we want to emulate an in-process tenant. - InProcessTenant bool + // DisableServerController is true if we want to avoid the server + // controller to instantiate tenant secondary servers. + DisableServerController bool } // IsInteractive returns true if the demo cluster configuration diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index 35c3c1ae1307..72cda2058562 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -218,7 +218,7 @@ func NewDemoCluster( // tenant. // Note: this logic can be removed once we use a single // listener for HTTP and SQL. - if !c.demoCtx.InProcessTenant { + if c.demoCtx.DisableServerController { c.httpFirstPort += c.demoCtx.NumNodes } c.sqlFirstPort += c.demoCtx.NumNodes @@ -430,7 +430,7 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { DisableCreateTenant: !createTenant, TenantName: roachpb.TenantName("demo-tenant"), TenantID: roachpb.MustMakeTenantID(secondaryTenantID), - InProcessTenant: c.demoCtx.InProcessTenant, + UseServerController: !c.demoCtx.DisableServerController, TestingKnobs: base.TestingKnobs{ Server: &server.TestingKnobs{ ContextTestingKnobs: rpc.ContextTestingKnobs{ @@ -442,7 +442,7 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { } var tenantStopper *stop.Stopper - if !c.demoCtx.InProcessTenant { + if c.demoCtx.DisableServerController { tenantStopper = stop.NewStopper() args.Stopper = tenantStopper args.ForceInsecure = c.demoCtx.Insecure @@ -455,9 +455,9 @@ func (c *transientCluster) Start(ctx context.Context) (err error) { } ts, err := c.servers[i].StartTenant(ctx, args) - if !c.demoCtx.InProcessTenant { - // InProcessTenant means that the server controller is - // already taking care of shutdown. + if c.demoCtx.DisableServerController { + // If we use the server controller, it is already taking + // care of shutdown. c.stopper.AddCloser(stop.CloserFn(func() { stopCtx := context.Background() if ts != nil { @@ -858,7 +858,7 @@ func (demoCtx *Context) testServerArgsForTransientCluster( args.SQLAddr = fmt.Sprintf("127.0.0.1:%d", sqlPort) args.HTTPAddr = fmt.Sprintf("127.0.0.1:%d", httpPort) - if demoCtx.InProcessTenant { + if !demoCtx.DisableServerController { // The code in NewDemoCluster put the KV ports higher // so we need to subtract the number of nodes to get // back to the "good" ports. @@ -1820,7 +1820,7 @@ func (c *transientCluster) ListDemoNodes(w, ew io.Writer, justOne, verbose bool) // Connection parameters for the system tenant follow. uiURL := s.Cfg.AdminURL() - if q := uiURL.Query(); c.demoCtx.Multitenant && c.demoCtx.InProcessTenant && !q.Has(server.TenantNameParamInQueryURL) { + if q := uiURL.Query(); c.demoCtx.Multitenant && !c.demoCtx.DisableServerController && !q.Has(server.TenantNameParamInQueryURL) { q.Add(server.TenantNameParamInQueryURL, catconstants.SystemTenantName) uiURL.RawQuery = q.Encode() } diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 3b6db50d6572..88c38f34fa59 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -795,11 +795,11 @@ func init() { cliflagcfg.BoolFlag(f, &demoCtx.DefaultEnableRangefeeds, cliflags.DemoEnableRangefeeds) cliflagcfg.BoolFlag(f, &demoCtx.Multitenant, cliflags.DemoMultitenant) - cliflagcfg.BoolFlag(f, &demoCtx.InProcessTenant, cliflags.DemoInProcessTenant) + cliflagcfg.BoolFlag(f, &demoCtx.DisableServerController, cliflags.DemoDisableServerController) // TODO(knz): Currently the multitenant UX for 'demo' is not // satisfying for end-users. Let's not advertise it too much. _ = f.MarkHidden(cliflags.DemoMultitenant.Name) - _ = f.MarkHidden(cliflags.DemoInProcessTenant.Name) + _ = f.MarkHidden(cliflags.DemoDisableServerController.Name) cliflagcfg.BoolFlag(f, &demoCtx.SimulateLatency, cliflags.Global) // We also support overriding the GEOS library path for 'demo'. diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index d98db7c66cab..52c525ffb1c8 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -831,7 +831,7 @@ func (ts *TestServer) StartTenant( } } - if params.InProcessTenant { + if params.UseServerController { onDemandServer, err := ts.serverController.getOrCreateServer(ctx, params.TenantName) if err != nil { return nil, err