From a51410a670dd0a768b249b0919d204c9c7f4efc2 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Fri, 27 Oct 2023 10:10:22 +0000 Subject: [PATCH] demo: wait for stopper to stop on shutdown Previously, attempting to quickly shutdown and restart a node using the demo `\demo shutdown` and `\demo restart` command would result in an error: ERROR: internal server error: failed to create engines: resource temporarily unavailable This was revealed in the multi-tenant tests but is not multi-tenant specific and happens in non-mt demo clusters as well. The `\demo shutdown` command sends a Drain request with `Shutdown: true`. However, as part of the shutdown, the gRPC server is shut down, which will return an error to the client (which is ignored) before the entire server is shutdown. There is a TODO in the code from 2019 asking why we don't shut down gRPC later in the process. This is a demo cluster specific fix in which we wait on the stopper client side since we happen to have a reference to it. This is intended for backport so the disabled test can be re-enabled on the release branches. It's likely that we can do a larger fix in which we leave the drain server running for as long as possible. But, the blast radius of such a change would not be suitable for backport given the low-impact of the problem. Fixes: #110748 Release note: None --- pkg/acceptance/generated_cli_test.go | 7 +++++++ pkg/cli/democluster/demo_cluster.go | 7 +++++++ ..._multitenant.tcl.disabled => test_demo_multitenant.tcl} | 4 ---- 3 files changed, 14 insertions(+), 4 deletions(-) rename pkg/cli/interactive_tests/{test_demo_multitenant.tcl.disabled => test_demo_multitenant.tcl} (91%) diff --git a/pkg/acceptance/generated_cli_test.go b/pkg/acceptance/generated_cli_test.go index 0386fe57eaad..12ac1f8adb6a 100644 --- a/pkg/acceptance/generated_cli_test.go +++ b/pkg/acceptance/generated_cli_test.go @@ -137,6 +137,13 @@ func TestDockerCLI_test_demo_memory_warning(t *testing.T) { runTestDockerCLI(t, "test_demo_memory_warning", "../cli/interactive_tests/test_demo_memory_warning.tcl") } +func TestDockerCLI_test_demo_multitenant(t *testing.T) { + s := log.Scope(t) + defer s.Close(t) + + runTestDockerCLI(t, "test_demo_multitenant", "../cli/interactive_tests/test_demo_multitenant.tcl") +} + func TestDockerCLI_test_demo_networking(t *testing.T) { s := log.Scope(t) defer s.Close(t) diff --git a/pkg/cli/democluster/demo_cluster.go b/pkg/cli/democluster/demo_cluster.go index 380277d389ad..1dcab49d63b4 100644 --- a/pkg/cli/democluster/demo_cluster.go +++ b/pkg/cli/democluster/demo_cluster.go @@ -1060,6 +1060,13 @@ func (c *transientCluster) DrainAndShutdown(ctx context.Context, nodeID int32) e if err := c.drainAndShutdown(ctx, c.servers[serverIdx].adminClient); err != nil { return err } + + select { + case <-c.servers[serverIdx].Stopper().IsStopped(): + case <-time.After(10 * time.Second): + return errors.Errorf("server stopper not stopped after 10 seconds") + } + c.servers[serverIdx].TestServerInterface = nil c.servers[serverIdx].adminClient = nil if c.demoCtx.Multitenant { diff --git a/pkg/cli/interactive_tests/test_demo_multitenant.tcl.disabled b/pkg/cli/interactive_tests/test_demo_multitenant.tcl similarity index 91% rename from pkg/cli/interactive_tests/test_demo_multitenant.tcl.disabled rename to pkg/cli/interactive_tests/test_demo_multitenant.tcl index 3d277787d79f..fa5b5dc015c8 100644 --- a/pkg/cli/interactive_tests/test_demo_multitenant.tcl.disabled +++ b/pkg/cli/interactive_tests/test_demo_multitenant.tcl @@ -1,9 +1,5 @@ #! /usr/bin/env expect -f -# This test is skipped -- its filename lets it hide from the selector in -# TestDockerCLI. Unskip it by renaming after fixing -# https://github.com/cockroachdb/cockroach/issues/110748. - source [file join [file dirname $argv0] common.tcl] spawn $argv demo --no-line-editor --empty --nodes 3 --multitenant --log-dir=logs