From 01ca48ce4977bafc819c9aa7ed7de2e16e220cda Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 22 May 2024 17:40:49 +0000 Subject: [PATCH] sqlccl: de-flake TestActiveCancelSession This commit fixes a possible race in `TestActiveCancelSession`. In particular, that test uses two connections in the following manner: - conn1 runs `pg_sleep` query that is intended to run forever - conn2 executes `CANCEL SESSION` for conn1. Since `pg_sleep` query is executed in a separate goroutine, previously it was possible for CANCEL SESSION statement to be issued before either the separate goroutine was spun up or it began executing the `pg_sleep` query, and in both cases this would result in `connection reset by peer` error which is not what the test expects. This race is now fixed by blocking conn2 until it sees the `pg_sleep` query in the SHOW CLUSTER QUERIES output. Release note: None --- pkg/ccl/testccl/sqlccl/run_control_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pkg/ccl/testccl/sqlccl/run_control_test.go b/pkg/ccl/testccl/sqlccl/run_control_test.go index 700a1707bea1..522c58f9fec3 100644 --- a/pkg/ccl/testccl/sqlccl/run_control_test.go +++ b/pkg/ccl/testccl/sqlccl/run_control_test.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/sql/sqltestutils" + "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -205,6 +206,27 @@ func testCancelSession(t *testing.T, hasActiveSession bool) { _, err = conn1.ExecContext(ctx, "SELECT pg_sleep(1000000)") errChan <- err }() + // Block until the query goroutine was spun up and began + // executing the query - this is needed to avoid a race between + // canceling the session before vs after 'pg_sleep' query begins + // (the former would result in an unexpected error message). + testutils.SucceedsSoon(t, func() error { + row := conn2.QueryRowContext(ctx, ` +SELECT count(*) FROM [SHOW CLUSTER QUERIES] WHERE query LIKE '%pg_sleep%' + AND query NOT LIKE '%SHOW CLUSTER QUERIES%' +;`) + var count int + if err = row.Scan(&count); err != nil { + t.Fatal(err) + } + if count == 1 { + return nil + } + if count > 1 { + t.Fatalf("unexpectedly found %d pg_sleep queries", count) + } + return errors.New("pg_sleep query hasn't started yet") + }) } // Cancel the session on node 1.