diff --git a/pkg/acceptance/decommission_test.go b/pkg/acceptance/decommission_test.go deleted file mode 100644 index 4605f3ec09dc..000000000000 --- a/pkg/acceptance/decommission_test.go +++ /dev/null @@ -1,528 +0,0 @@ -// Copyright 2015 The Cockroach Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -// implied. See the License for the specific language governing -// permissions and limitations under the License. - -package acceptance - -import ( - "context" - "reflect" - "regexp" - "strconv" - "strings" - "testing" - "time" - - gosql "database/sql" - - "github.com/cockroachdb/cockroach/pkg/acceptance/cluster" - "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/server/serverpb" - "github.com/cockroachdb/cockroach/pkg/sql" - "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" - "github.com/cockroachdb/cockroach/pkg/util/encoding/csv" - "github.com/cockroachdb/cockroach/pkg/util/httputil" - "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/retry" - "github.com/kr/pretty" - "github.com/pkg/errors" -) - -// TestDecommission starts up an >3 node cluster and decomissions and -// recommissions nodes in various ways. -func TestDecommission(t *testing.T) { - RunLocal(t, func(t *testing.T) { - s := log.Scope(t) - defer s.Close(t) - - runTestWithCluster(t, testDecommissionInner) - }) -} - -func decommission( - ctx context.Context, - c cluster.Cluster, - runNode int, - targetNodes []roachpb.NodeID, - verbs ...string, -) (string, string, error) { - args := append([]string{"node"}, verbs...) - for _, target := range targetNodes { - args = append(args, strconv.Itoa(int(target))) - } - o, e, err := c.ExecCLI(ctx, runNode, args) - return o, e, err -} - -func matchCSV(csvStr string, matchColRow [][]string) (err error) { - defer func() { - if err != nil { - err = errors.Errorf("csv input:\n%v\nexpected:\n%s\nerrors:%s", csvStr, pretty.Sprint(matchColRow), err) - } - }() - - reader := csv.NewReader(strings.NewReader(csvStr)) - reader.FieldsPerRecord = -1 - records, err := reader.ReadAll() - if err != nil { - return err - } - - lr, lm := len(records), len(matchColRow) - if lr < lm { - return errors.Errorf("csv has %d rows, but expected at least %d", lr, lm) - } - - // Compare only the last len(matchColRow) records. That is, if we want to - // match 4 rows and we have 100 records, we only really compare - // records[96:], that is, the last four rows. - records = records[lr-lm:] - - for i := range records { - if lr, lm := len(records[i]), len(matchColRow[i]); lr != lm { - return errors.Errorf("row #%d: csv has %d columns, but expected %d", i+1, lr, lm) - } - for j := range records[i] { - pat, str := matchColRow[i][j], records[i][j] - re := regexp.MustCompile(pat) - if !re.MatchString(str) { - err = errors.Errorf("%v\nrow #%d, col #%d: found %q which does not match %q", err, i+1, j+1, str, pat) - } - } - } - return err -} - -func testDecommissionInner( - ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig, -) { - if c.NumNodes() < 4 { - // TODO(tschottdorf): or we invent a way to change the ZoneConfig in - // this test and test less ambitiously (or split up the tests). - t.Skip("need at least four nodes") - } - - withDB := func(n int, stmt string) { - db, err := gosql.Open("postgres", c.PGUrl(ctx, n)) - if err != nil { - t.Fatal(err) - } - defer func() { - if err := db.Close(); err != nil { - t.Error(err) - } - }() - if _, err := db.ExecContext(ctx, stmt); err != nil { - t.Fatal(err) - } - } - - withDB(1, "SET CLUSTER SETTING server.remote_debugging.mode = 'any';") - - // Get the ids for each node. - idMap := make(map[int]roachpb.NodeID) - for i := 0; i < c.NumNodes(); i++ { - var details serverpb.DetailsResponse - if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, i)+"/_status/details/local", &details); err != nil { - t.Fatal(err) - } - idMap[i] = details.NodeID - } - - decommissionHeader := []string{"id", "is_live", "replicas", "is_decommissioning", "is_draining"} - decommissionFooter := []string{"No more data reported on target nodes. Please verify cluster health before removing the nodes."} - waitLiveDeprecated := "--wait=live is deprecated and is treated as --wait=all" - - statusHeader := []string{"id", "address", "build", "started_at", "updated_at", "is_live"} - - log.Info(ctx, "decommissioning first node from the second, polling the status manually") - retryOpts := retry.Options{ - InitialBackoff: time.Second, - MaxBackoff: 5 * time.Second, - Multiplier: 1, - MaxRetries: 20, - } - for r := retry.Start(retryOpts); r.Next(); { - o, _, err := decommission(ctx, c, 1, []roachpb.NodeID{idMap[0]}, "decommission", "--wait", "none", "--format", "csv") - if err != nil { - t.Fatal(err) - } - - exp := [][]string{ - decommissionHeader, - {strconv.Itoa(int(idMap[0])), "true", "0", "true", "false"}, - decommissionFooter, - } - log.Infof(ctx, o) - - if err := matchCSV(o, exp); err != nil { - continue - } - break - } - - // Check that even though the node is decommissioned, we still see it (since - // it remains live) in `node ls`. - { - o, _, err := c.ExecCLI(ctx, 2, []string{"node", "ls", "--format", "csv"}) - if err != nil { - t.Fatal(err) - } - exp := [][]string{ - {"id"}, - {"1"}, - {"2"}, - {"3"}, - {"4"}, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - } - // Ditto `node status`. - { - o, _, err := c.ExecCLI(ctx, 2, []string{"node", "status", "--format", "csv"}) - if err != nil { - t.Fatal(err) - } - exp := [][]string{ - statusHeader, - {`1`, `.*`, `.*`, `.*`, `.*`, `.*`}, - {`2`, `.*`, `.*`, `.*`, `.*`, `.*`}, - {`3`, `.*`, `.*`, `.*`, `.*`, `.*`}, - {`4`, `.*`, `.*`, `.*`, `.*`, `.*`}, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - } - - log.Info(ctx, "recommissioning first node (from third node)") - { - o, _, err := decommission(ctx, c, 2, []roachpb.NodeID{idMap[0]}, "recommission") - if err != nil { - t.Fatal(err) - } - log.Infof(ctx, o) - } - - log.Info(ctx, "decommissioning second node from third, using --wait=all") - { - target := idMap[1] - o, _, err := decommission(ctx, c, 2, []roachpb.NodeID{target}, "decommission", "--wait", "all", "--format", "csv") - if err != nil { - t.Fatal(err) - } - log.Infof(ctx, o) - - exp := [][]string{ - decommissionHeader, - {strconv.Itoa(int(target)), "true", "0", "true", "false"}, - decommissionFooter, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - } - - log.Info(ctx, "recommissioning second node from itself") - { - o, _, err := decommission(ctx, c, 1, []roachpb.NodeID{idMap[1]}, "recommission") - if err != nil { - t.Fatalf("could no recommission: %s\n%s", err, o) - } - log.Infof(ctx, o) - } - - log.Info(ctx, "decommissioning third node via `quit --decommission`") - { - // This should not take longer than five minutes, and if it does, it's - // likely stuck forever and we want to see the output. - timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) - defer cancel() - o, e, err := c.ExecCLI(timeoutCtx, 2, []string{"quit", "--decommission"}) - if err != nil { - if timeoutCtx.Err() != nil { - t.Fatalf("quit --decommission failed: %s\nstdout:\n%s\nstderr:\n%s", err, o, e) - } - // TODO(tschottdorf): grep the process output for the string announcing success? - log.Warningf(ctx, "ignoring error on quit --decommission: %s", err) - } else { - log.Infof(ctx, o, e) - } - - // Kill the node to generate the expected event (Kill is idempotent, so this works). - if err := c.Kill(ctx, 2); err != nil { - log.Warning(ctx, err) - } - } - - // Now that the third node is down and decommissioned, decommissioning it - // again should be a no-op. We do it from node one but as always it doesn't - // matter. - log.Info(ctx, "checking that other nodes see node three as successfully decommissioned") - { - target := idMap[2] - o, _, err := decommission(ctx, c, 1, []roachpb.NodeID{target}, "decommission", "--format", "csv") // wait=all is implied - if err != nil { - t.Fatal(err) - } - log.Infof(ctx, o) - - exp := [][]string{ - decommissionHeader, - // Expect the same as usual, except this time the node should be draining - // because it shut down cleanly (thanks to `quit --decommission`). - {strconv.Itoa(int(target)), "true", "0", "true", "true"}, - decommissionFooter, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - - // Bring the node back up. It's still decommissioned, so it won't be of much use. - if err := c.Restart(ctx, 2); err != nil { - t.Fatal(err) - } - - // Recommission. Welcome back! - o, _, err = decommission(ctx, c, 1, []roachpb.NodeID{target}, "recommission") - if err != nil { - t.Fatal(err) - } - log.Infof(ctx, o) - } - - // Kill the first node and verify that we can decommission it while it's down, - // bringing it back up to verify that its replicas still get removed. - log.Info(ctx, "intentionally killing first node") - if err := c.Kill(ctx, 0); err != nil { - t.Fatal(err) - } - log.Info(ctx, "decommission first node, starting with it down but restarting it for verification") - { - target := idMap[0] - o, e, err := decommission(ctx, c, 2, []roachpb.NodeID{target}, "decommission", "--wait", "live") - if err != nil { - t.Fatal(err) - } - log.Infof(ctx, o) - if strings.Split(e, "\n")[1] != waitLiveDeprecated { - t.Fatal("missing deprecate message for --wait=live") - } - if err := c.Restart(ctx, 0); err != nil { - t.Fatal(err) - } - // Run a second time to wait until the replicas have all been GC'ed. - // Note that we specify "all" because even though the first node is - // now running, it may not be live by the time the command runs. - o, _, err = decommission(ctx, c, 2, []roachpb.NodeID{target}, "decommission", "--wait", "all", "--format", "csv") - if err != nil { - t.Fatal(err) - } - - log.Info(ctx, o) - - exp := [][]string{ - decommissionHeader, - {strconv.Itoa(int(target)), "true", "0", "true", "false"}, - decommissionFooter, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - } - - // Now we want to test decommissioning a truly dead node. Make sure we don't - // waste too much time waiting for the node to be recognized as dead. Note that - // we don't want to set this number too low or everything will seem dead to the - // allocator at all times, so nothing will ever happen. - withDB(1, "SET CLUSTER SETTING server.time_until_store_dead = '1m15s'") - - log.Info(ctx, "intentionally killing first node") - if err := c.Kill(ctx, 0); err != nil { - t.Fatal(err) - } - // It is being decommissioned in absentia, meaning that its replicas are - // being removed due to deadness. We can't see that reflected in the output - // since the current mechanism gets its replica counts from what the node - // reports about itself, so our assertion here is somewhat weak. - log.Info(ctx, "decommission first node in absentia using --wait=live") - { - target := idMap[0] - o, e, err := decommission(ctx, c, 2, []roachpb.NodeID{target}, "decommission", "--wait", "live", "--format", "csv") - if err != nil { - t.Fatal(err) - } - - log.Infof(ctx, o) - - // Note we don't check precisely zero replicas (which the node would write - // itself, but it's dead). We do check that the node isn't live, though, which - // is essentially what `--wait=live` waits for. - // Note that the target node may still be "live" when it's marked as - // decommissioned, as its replica count may drop to zero faster than - // liveness times out. - exp := [][]string{ - decommissionHeader, - {strconv.Itoa(int(target)), `true|false`, "0", `true`, `false`}, - decommissionFooter, - } - if err := matchCSV(o, exp); err != nil { - t.Fatal(err) - } - if strings.Split(e, "\n")[1] != waitLiveDeprecated { - t.Fatal("missing deprecate message for --wait=live") - } - } - - // Check that (at least after a bit) the node disappears from `node ls` - // because it is decommissioned and not live. - for { - o, _, err := c.ExecCLI(ctx, 2, []string{"node", "ls", "--format", "csv"}) - if err != nil { - t.Fatal(err) - } - - log.Info(ctx, o) - - exp := [][]string{ - {"id"}, - {"2"}, - {"3"}, - {"4"}, - } - - if err := matchCSV(o, exp); err != nil { - time.Sleep(time.Second) - continue - } - break - } - for { - o, _, err := c.ExecCLI(ctx, 2, []string{"node", "status", "--format", "csv"}) - if err != nil { - t.Fatal(err) - } - - log.Info(ctx, o) - - exp := [][]string{ - statusHeader, - {`2`, `.*`, `.*`, `.*`, `.*`, `.*`}, - {`3`, `.*`, `.*`, `.*`, `.*`, `.*`}, - {`4`, `.*`, `.*`, `.*`, `.*`, `.*`}, - } - if err := matchCSV(o, exp); err != nil { - time.Sleep(time.Second) - continue - } - break - } - - var rows *gosql.Rows - if err := retry.ForDuration(time.Minute, func() error { - // Verify the event log has recorded exactly one decommissioned or - // recommissioned event for each commissioning operation. - // - // Spurious errors appear to be possible since we might be trying to - // send RPCs to the (relatively recently) down node: - // - // pq: rpc error: code = Unavailable desc = grpc: the connection is - // unavailable - // - // Seen in https://teamcity.cockroachdb.com/viewLog.html?buildId=344802. - db, err := gosql.Open("postgres", c.PGUrl(ctx, 1)) - if err != nil { - t.Fatal(err) - } - defer func() { - if err := db.Close(); err != nil { - t.Error(err) - } - }() - - rows, err = db.Query(` - SELECT "eventType", "targetID" FROM system.eventlog - WHERE "eventType" IN ($1, $2) ORDER BY timestamp`, - sql.EventLogNodeDecommissioned, sql.EventLogNodeRecommissioned, - ) - if err != nil { - log.Warning(ctx, errors.Wrap(err, "retrying after")) - return err - } - return nil - }); err != nil { - t.Fatal(err) - } - - matrix, err := sqlutils.RowsToStrMatrix(rows) - if err != nil { - t.Fatal(err) - } - expMatrix := [][]string{ - {string(sql.EventLogNodeDecommissioned), idMap[0].String()}, - {string(sql.EventLogNodeRecommissioned), idMap[0].String()}, - {string(sql.EventLogNodeDecommissioned), idMap[1].String()}, - {string(sql.EventLogNodeRecommissioned), idMap[1].String()}, - {string(sql.EventLogNodeDecommissioned), idMap[2].String()}, - {string(sql.EventLogNodeRecommissioned), idMap[2].String()}, - {string(sql.EventLogNodeDecommissioned), idMap[0].String()}, - } - - if !reflect.DeepEqual(matrix, expMatrix) { - t.Fatalf("unexpected diff(matrix, expMatrix):\n%s", pretty.Diff(matrix, expMatrix)) - } - - // Last, verify that the operator can't shoot themselves in the foot by - // accidentally decommissioning all nodes. - var allNodeIDs []roachpb.NodeID - for _, nodeID := range idMap { - allNodeIDs = append(allNodeIDs, nodeID) - } - - // Specify wait=none because the command would block forever (the replicas have - // nowhere to go). - if _, _, err := decommission( - ctx, c, 1, allNodeIDs, "decommission", "--wait", "none", - ); err != nil { - t.Fatal(err) - } - - // Check that we can still do stuff. Creating a database should be good enough. - db, err := gosql.Open("postgres", c.PGUrl(ctx, 1)) - if err != nil { - t.Fatal(err) - } - defer func() { _ = db.Close() }() - - if _, err := db.Exec(`CREATE DATABASE still_working;`); err != nil { - t.Fatal(err) - } - - // Recommission all nodes. - if _, _, err := decommission( - ctx, c, 1, allNodeIDs, "recommission", - ); err != nil { - t.Fatal(err) - } - - // To verify that all nodes are actually accepting replicas again, decommission - // the first nodes (blocking until it's done). This proves that the other nodes - // absorb the first one's replicas. - if _, _, err := decommission( - ctx, c, 1, []roachpb.NodeID{idMap[0]}, "decommission", - ); err != nil { - t.Fatal(err) - } -} diff --git a/pkg/acceptance/init_test.go b/pkg/acceptance/init_test.go index 9896354a1541..c5f9bc3999cf 100644 --- a/pkg/acceptance/init_test.go +++ b/pkg/acceptance/init_test.go @@ -39,8 +39,7 @@ func TestInitModeBootstrapNodeZero(t *testing.T) { s := log.Scope(t) defer s.Close(t) - // TODO(tschottdorf): give LocalCluster support for the init modes and we should be able - // to switch this to RunLocal. Ditto below. + // TODO(peter): Move this test to a roachtest. RunDocker(t, func(t *testing.T) { runTestWithCluster(t, testInitModeInner, useInitMode(cluster.INIT_BOOTSTRAP_NODE_ZERO)) }) @@ -50,7 +49,7 @@ func TestInitModeCommand(t *testing.T) { s := log.Scope(t) defer s.Close(t) - // TODO(tschottdorf): see above. + // TODO(peter): Move this test to a roachtest. RunDocker(t, func(t *testing.T) { runTestWithCluster(t, testInitModeInner, useInitMode(cluster.INIT_COMMAND)) }) @@ -75,7 +74,7 @@ func TestInitModeNone(t *testing.T) { s := log.Scope(t) defer s.Close(t) - // TODO(tschottdorf): see above. + // TODO(peter): Move this test to a roachtest. RunDocker(t, func(t *testing.T) { runTestWithCluster(t, testInitModeNoneInner, useInitMode(cluster.INIT_NONE)) }) diff --git a/pkg/acceptance/util_cluster.go b/pkg/acceptance/util_cluster.go index 49b2f2d8a1d1..62898bfd88b8 100644 --- a/pkg/acceptance/util_cluster.go +++ b/pkg/acceptance/util_cluster.go @@ -16,8 +16,6 @@ package acceptance import ( "context" - "io/ioutil" - "os" "path/filepath" "regexp" "strings" @@ -25,23 +23,15 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/acceptance/cluster" - "github.com/cockroachdb/cockroach/pkg/acceptance/localcluster" "github.com/cockroachdb/cockroach/pkg/testutils" - "github.com/cockroachdb/cockroach/pkg/util/binfetcher" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/pkg/errors" ) const ( - localTest = "runMode=local" dockerTest = "runMode=docker" ) -// RunLocal runs the given acceptance test using a bare cluster. -func RunLocal(t *testing.T, testee func(t *testing.T)) { - t.Run(localTest, testee) -} - // RunDocker runs the given acceptance test using a Docker cluster. func RunDocker(t *testing.T, testee func(t *testing.T)) { t.Run(dockerTest, testee) @@ -87,15 +77,13 @@ func StartCluster(ctx context.Context, t *testing.T, cfg cluster.TestConfig) (c parts := strings.Split(t.Name(), "/") if len(parts) < 2 { - t.Fatal("must invoke RunLocal or RunDocker") + t.Fatal("must invoke RunDocker") } var runMode string for _, part := range parts[1:] { part = reStripTestEnumeration.ReplaceAllLiteralString(part, "") switch part { - case localTest: - fallthrough case dockerTest: if runMode != "" { t.Fatalf("test has more than one run mode: %s and %s", runMode, part) @@ -105,44 +93,6 @@ func StartCluster(ctx context.Context, t *testing.T, cfg cluster.TestConfig) (c } switch runMode { - case localTest: - pwd, err := os.Getwd() - if err != nil { - t.Fatal(err) - } - dataDir, err := ioutil.TempDir(pwd, ".localcluster") - if err != nil { - t.Fatal(err) - } - - logDir := *flagLogDir - if logDir != "" { - logDir = filepath.Join(logDir, filepath.Clean(t.Name())) - } - - perNodeCfg := localcluster.MakePerNodeFixedPortsCfg(len(cfg.Nodes)) - for i := 0; i < len(cfg.Nodes); i++ { - // TODO(tschottdorf): handle Nodes[i].Stores properly. - if cfg.Nodes[i].Version != "" { - nCfg := perNodeCfg[i] - nCfg.Binary = GetBinary(ctx, t, cfg.Nodes[i].Version) - perNodeCfg[i] = nCfg - } - } - clusterCfg := localcluster.ClusterConfig{ - Ephemeral: true, - DataDir: dataDir, - LogDir: logDir, - NumNodes: len(cfg.Nodes), - PerNodeCfg: perNodeCfg, - NoWait: cfg.NoWait, - } - - l := localcluster.New(clusterCfg) - - l.Start(ctx) - c = &localcluster.LocalCluster{Cluster: l} - case dockerTest: logDir := *flagLogDir if logDir != "" { @@ -153,7 +103,7 @@ func StartCluster(ctx context.Context, t *testing.T, cfg cluster.TestConfig) (c c = l default: - t.Fatalf("unable to run in mode %q, use either RunLocal or RunDocker", runMode) + t.Fatalf("unable to run in mode %q, use RunDocker", runMode) } // Don't wait for replication unless requested (usually it is). @@ -229,17 +179,3 @@ func StartCluster(ctx context.Context, t *testing.T, cfg cluster.TestConfig) (c completed = true return c } - -// GetBinary retrieves a binary for the specified version and returns it. -func GetBinary(ctx context.Context, t *testing.T, version string) string { - t.Helper() - bin, err := binfetcher.Download(ctx, binfetcher.Options{ - Binary: "cockroach", - Dir: ".localcluster_cache", - Version: version, - }) - if err != nil { - t.Fatalf("unable to set up binary for v%s: %s", version, err) - } - return bin -} diff --git a/pkg/cmd/roachtest/acceptance.go b/pkg/cmd/roachtest/acceptance.go index f53188fb0c52..bd7966ab9045 100644 --- a/pkg/cmd/roachtest/acceptance.go +++ b/pkg/cmd/roachtest/acceptance.go @@ -38,6 +38,7 @@ func registerAcceptance(r *registry) { {"bank/node-restart", runBankNodeRestart}, {"build-info", runBuildInfo}, {"cli/node-status", runCLINodeStatus}, + {"decommission", runDecommissionAcceptance}, {"event-log", runEventLog}, {"gossip/peerings", runGossipPeerings}, {"gossip/restart", runGossipRestart}, diff --git a/pkg/cmd/roachtest/decommission.go b/pkg/cmd/roachtest/decommission.go index 1c2484ca4090..f626c451226f 100644 --- a/pkg/cmd/roachtest/decommission.go +++ b/pkg/cmd/roachtest/decommission.go @@ -16,13 +16,25 @@ package main import ( + "bytes" "context" + "encoding/csv" "fmt" + "io" + "os/exec" + "reflect" + "regexp" "strconv" + "strings" "time" + "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + + "github.com/kr/pretty" _ "github.com/lib/pq" + "github.com/pkg/errors" "golang.org/x/sync/errgroup" ) @@ -120,7 +132,7 @@ func runDecommission(t *test, c *cluster, nodes int, duration time.Duration) { if err != nil { t.Fatal(err) } - c.l.Printf(fmt.Sprintf("run: %s\n", stmt)) + c.l.Printf("run: %s\n", stmt) } var m *errgroup.Group // see comment in version.go @@ -229,9 +241,434 @@ func registerDecommission(r *registry) { Run: func(ctx context.Context, t *test, c *cluster) { if local { duration = 3 * time.Minute - fmt.Printf("running with duration=%s in local mode\n", duration) + c.l.Printf("running with duration=%s in local mode\n", duration) } runDecommission(t, c, numNodes, duration) }, }) } + +func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { + args := startArgs("--sequential", "--env=COCKROACH_SCAN_MAX_IDLE_TIME=5ms") + c.Put(ctx, cockroach, "./cockroach") + c.Start(ctx, args) + + execCLI := func( + ctx context.Context, + runNode int, + extraArgs ...string, + ) (string, error) { + args := []string{"ssh", c.makeNodes(c.Node(runNode)), "--", cockroach} + args = append(args, extraArgs...) + args = append(args, "--insecure") + args = append(args, fmt.Sprintf("--port={pgport:%d}", runNode)) + c.l.Printf("> roachprod %s\n", strings.Join(args, " ")) + var buf bytes.Buffer + cmd := exec.CommandContext(ctx, roachprod, args...) + cmd.Stdout = io.MultiWriter(c.l.stdout, &buf) + cmd.Stderr = cmd.Stdout + err := cmd.Run() + return buf.String(), err + } + + decommission := func( + ctx context.Context, + runNode int, + targetNodes nodeListOption, + verbs ...string, + ) (string, error) { + args := []string{"node"} + args = append(args, verbs...) + for _, target := range targetNodes { + args = append(args, strconv.Itoa(target)) + } + return execCLI(ctx, runNode, args...) + } + + matchCSV := func(csvStr string, matchColRow [][]string) (err error) { + defer func() { + if err != nil { + err = errors.Errorf("csv input:\n%v\nexpected:\n%s\nerrors:%s", + csvStr, pretty.Sprint(matchColRow), err) + } + }() + + reader := csv.NewReader(strings.NewReader(csvStr)) + reader.FieldsPerRecord = -1 + records, err := reader.ReadAll() + if err != nil { + return err + } + + lr, lm := len(records), len(matchColRow) + if lr < lm { + return errors.Errorf("csv has %d rows, but expected at least %d", lr, lm) + } + + // Compare only the last len(matchColRow) records. That is, if we want to + // match 4 rows and we have 100 records, we only really compare + // records[96:], that is, the last four rows. + records = records[lr-lm:] + + for i := range records { + if lr, lm := len(records[i]), len(matchColRow[i]); lr != lm { + return errors.Errorf("row #%d: csv has %d columns, but expected %d", i+1, lr, lm) + } + for j := range records[i] { + pat, str := matchColRow[i][j], records[i][j] + re := regexp.MustCompile(pat) + if !re.MatchString(str) { + err = errors.Errorf("%v\nrow #%d, col #%d: found %q which does not match %q", + err, i+1, j+1, str, pat) + } + } + } + return err + } + + decommissionHeader := []string{ + "id", "is_live", "replicas", "is_decommissioning", "is_draining", + } + decommissionFooter := []string{ + "No more data reported on target nodes. " + + "Please verify cluster health before removing the nodes.", + } + statusHeader := []string{ + "id", "address", "build", "started_at", "updated_at", "is_live", + } + waitLiveDeprecated := "--wait=live is deprecated and is treated as --wait=all" + + c.l.Printf("decommissioning first node from the second, polling the status manually\n") + retryOpts := retry.Options{ + InitialBackoff: time.Second, + MaxBackoff: 5 * time.Second, + Multiplier: 1, + MaxRetries: 20, + } + for r := retry.Start(retryOpts); r.Next(); { + o, err := decommission(ctx, 2, c.Node(1), + "decommission", "--wait", "none", "--format", "csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + exp := [][]string{ + decommissionHeader, + {"1", "true", "0", "true", "false"}, + decommissionFooter, + } + + if err := matchCSV(o, exp); err != nil { + continue + } + break + } + + // Check that even though the node is decommissioned, we still see it (since + // it remains live) in `node ls`. + { + o, err := execCLI(ctx, 2, "node", "ls", "--format", "csv") + if err != nil { + t.Fatalf("node-ls failed: %v", err) + } + exp := [][]string{ + {"id"}, + {"1"}, + {"2"}, + {"3"}, + {"4"}, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + } + // Ditto `node status`. + { + o, err := execCLI(ctx, 2, "node", "status", "--format", "csv") + if err != nil { + t.Fatalf("node-status failed: %v", err) + } + exp := [][]string{ + statusHeader, + {`1`, `.*`, `.*`, `.*`, `.*`, `.*`}, + {`2`, `.*`, `.*`, `.*`, `.*`, `.*`}, + {`3`, `.*`, `.*`, `.*`, `.*`, `.*`}, + {`4`, `.*`, `.*`, `.*`, `.*`, `.*`}, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + } + + c.l.Printf("recommissioning first node (from third node)\n") + if _, err := decommission(ctx, 3, c.Node(1), "recommission"); err != nil { + t.Fatalf("recommission failed: %v", err) + } + + c.l.Printf("decommissioning second node from third, using --wait=all\n") + { + o, err := decommission(ctx, 3, c.Node(2), + "decommission", "--wait", "all", "--format", "csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + exp := [][]string{ + decommissionHeader, + {"2", "true", "0", "true", "false"}, + decommissionFooter, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + } + + c.l.Printf("recommissioning second node from itself\n") + if _, err := decommission(ctx, 2, c.Node(2), "recommission"); err != nil { + t.Fatalf("recommission failed: %v", err) + } + + c.l.Printf("decommissioning third node via `quit --decommission`\n") + func() { + // This should not take longer than five minutes, and if it does, it's + // likely stuck forever and we want to see the output. + timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + if _, err := execCLI(timeoutCtx, 3, "quit", "--decommission"); err != nil { + if timeoutCtx.Err() != nil { + t.Fatalf("quit --decommission failed: %s", err) + } + // TODO(tschottdorf): grep the process output for the string announcing success? + c.l.Errorf("WARNING: ignoring error on quit --decommission: %s\n", err) + } + }() + + // Now that the third node is down and decommissioned, decommissioning it + // again should be a no-op. We do it from node one but as always it doesn't + // matter. + c.l.Printf("checking that other nodes see node three as successfully decommissioned\n") + { + o, err := decommission(ctx, 2, c.Node(3), + "decommission", "--format", "csv") // wait=all is implied + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + exp := [][]string{ + decommissionHeader, + // Expect the same as usual, except this time the node should be draining + // because it shut down cleanly (thanks to `quit --decommission`). + {"3", "true", "0", "true", "true"}, + decommissionFooter, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + + // Bring the node back up. It's still decommissioned, so it won't be of much use. + c.Stop(ctx, c.Node(3)) + c.Start(ctx, c.Node(3), args) + + // Recommission. Welcome back! + if _, err = decommission(ctx, 2, c.Node(3), "recommission"); err != nil { + t.Fatalf("recommission failed: %v", err) + } + } + + // Kill the first node and verify that we can decommission it while it's down, + // bringing it back up to verify that its replicas still get removed. + c.l.Printf("intentionally killing first node\n") + c.Stop(ctx, c.Node(1)) + c.l.Printf("decommission first node, starting with it down but restarting it for verification\n") + { + o, err := decommission(ctx, 2, c.Node(1), + "decommission", "--wait", "live") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + if strings.Split(o, "\n")[1] != waitLiveDeprecated { + t.Fatal("missing deprecate message for --wait=live") + } + c.Start(ctx, c.Node(1), args) + // Run a second time to wait until the replicas have all been GC'ed. + // Note that we specify "all" because even though the first node is + // now running, it may not be live by the time the command runs. + o, err = decommission(ctx, 2, c.Node(1), + "decommission", "--wait", "all", "--format", "csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + exp := [][]string{ + decommissionHeader, + {"1", "true|false", "0", "true", "false"}, + decommissionFooter, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + } + + // Now we want to test decommissioning a truly dead node. Make sure we don't + // waste too much time waiting for the node to be recognized as dead. Note that + // we don't want to set this number too low or everything will seem dead to the + // allocator at all times, so nothing will ever happen. + func() { + db := c.Conn(ctx, 2) + defer db.Close() + const stmt = "SET CLUSTER SETTING server.time_until_store_dead = '1m15s'" + if _, err := db.ExecContext(ctx, stmt); err != nil { + t.Fatal(err) + } + }() + + c.l.Printf("intentionally killing first node\n") + c.Stop(ctx, c.Node(1)) + // It is being decommissioned in absentia, meaning that its replicas are + // being removed due to deadness. We can't see that reflected in the output + // since the current mechanism gets its replica counts from what the node + // reports about itself, so our assertion here is somewhat weak. + c.l.Printf("decommission first node in absentia using --wait=live\n") + { + o, err := decommission(ctx, 3, c.Node(1), + "decommission", "--wait", "live", "--format", "csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + // Note we don't check precisely zero replicas (which the node would write + // itself, but it's dead). We do check that the node isn't live, though, which + // is essentially what `--wait=live` waits for. + // Note that the target node may still be "live" when it's marked as + // decommissioned, as its replica count may drop to zero faster than + // liveness times out. + exp := [][]string{ + decommissionHeader, + {"1", `true|false`, "0", `true`, `false`}, + decommissionFooter, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) + } + if strings.Split(o, "\n")[1] != waitLiveDeprecated { + t.Fatal("missing deprecate message for --wait=live") + } + } + + // Check that (at least after a bit) the node disappears from `node ls` + // because it is decommissioned and not live. + for { + o, err := execCLI(ctx, 2, "node", "ls", "--format", "csv") + if err != nil { + t.Fatalf("node-ls failed: %v", err) + } + + exp := [][]string{ + {"id"}, + {"2"}, + {"3"}, + {"4"}, + } + + if err := matchCSV(o, exp); err != nil { + time.Sleep(time.Second) + continue + } + break + } + for { + o, err := execCLI(ctx, 2, "node", "status", "--format", "csv") + if err != nil { + t.Fatalf("node-status failed: %v", err) + } + + exp := [][]string{ + statusHeader, + {`2`, `.*`, `.*`, `.*`, `.*`, `.*`}, + {`3`, `.*`, `.*`, `.*`, `.*`, `.*`}, + {`4`, `.*`, `.*`, `.*`, `.*`, `.*`}, + } + if err := matchCSV(o, exp); err != nil { + time.Sleep(time.Second) + continue + } + break + } + + if err := retry.ForDuration(time.Minute, func() error { + // Verify the event log has recorded exactly one decommissioned or + // recommissioned event for each commissioning operation. + // + // Spurious errors appear to be possible since we might be trying to + // send RPCs to the (relatively recently) down node: + // + // pq: rpc error: code = Unavailable desc = grpc: the connection is + // unavailable + // + // Seen in https://teamcity.cockroachdb.com/viewLog.html?buildId=344802. + db := c.Conn(ctx, 2) + defer db.Close() + + rows, err := db.Query(` +SELECT "eventType", "targetID" FROM system.eventlog +WHERE "eventType" IN ($1, $2) ORDER BY timestamp`, + "node_decommissioned", "node_recommissioned", + ) + if err != nil { + c.l.Printf("retrying: %v\n", err) + return err + } + defer rows.Close() + + matrix, err := sqlutils.RowsToStrMatrix(rows) + if err != nil { + return err + } + + expMatrix := [][]string{ + {"node_decommissioned", "1"}, + {"node_recommissioned", "1"}, + {"node_decommissioned", "2"}, + {"node_recommissioned", "2"}, + {"node_decommissioned", "3"}, + {"node_recommissioned", "3"}, + {"node_decommissioned", "1"}, + } + + if !reflect.DeepEqual(matrix, expMatrix) { + t.Fatalf("unexpected diff(matrix, expMatrix):\n%s", pretty.Diff(matrix, expMatrix)) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Last, verify that the operator can't shoot themselves in the foot by + // accidentally decommissioning all nodes. + // + // Specify wait=none because the command would block forever (the replicas have + // nowhere to go). + if _, err := decommission(ctx, 2, c.All(), "decommission", "--wait", "none"); err != nil { + t.Fatalf("decommission failed: %v", err) + } + + // Check that we can still do stuff. Creating a database should be good enough. + db := c.Conn(ctx, 2) + defer db.Close() + + if _, err := db.Exec(`CREATE DATABASE still_working;`); err != nil { + t.Fatal(err) + } + + // Recommission all nodes. + if _, err := decommission(ctx, 2, c.All(), "recommission"); err != nil { + t.Fatalf("recommission failed: %v", err) + } + + // To verify that all nodes are actually accepting replicas again, decommission + // the first nodes (blocking until it's done). This proves that the other nodes + // absorb the first one's replicas. + if _, err := decommission(ctx, 2, c.Node(1), "decommission"); err != nil { + t.Fatalf("decommission failed: %v", err) + } +}