diff --git a/pkg/ccl/testccl/sqlccl/BUILD.bazel b/pkg/ccl/testccl/sqlccl/BUILD.bazel index 661f35623c73..f28c0aed8962 100644 --- a/pkg/ccl/testccl/sqlccl/BUILD.bazel +++ b/pkg/ccl/testccl/sqlccl/BUILD.bazel @@ -4,6 +4,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_test") go_test( name = "sqlccl_test", srcs = [ + "explain_test.go", "gc_job_test.go", "main_test.go", "run_control_test.go", @@ -42,6 +43,7 @@ go_test( "//pkg/sql/catalog/desctestutils", "//pkg/sql/gcjob", "//pkg/sql/isql", + "//pkg/sql/lexbase", "//pkg/sql/sessiondatapb", "//pkg/sql/sqlliveness/slinstance", "//pkg/sql/sqltestutils", diff --git a/pkg/ccl/testccl/sqlccl/explain_test.go b/pkg/ccl/testccl/sqlccl/explain_test.go new file mode 100644 index 000000000000..da3081cd0605 --- /dev/null +++ b/pkg/ccl/testccl/sqlccl/explain_test.go @@ -0,0 +1,125 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +package sqlccl + +import ( + "context" + gosql "database/sql" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" + "github.com/cockroachdb/cockroach/pkg/sql/lexbase" + "github.com/cockroachdb/cockroach/pkg/sql/tests" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/randutil" + "github.com/cockroachdb/errors" +) + +// TestExplainRedactDDL tests that variants of EXPLAIN (REDACT) do not leak +// PII. This is very similar to sql.TestExplainRedact but includes CREATE TABLE +// and ALTER TABLE statements, which could include partitioning (hence this is +// in CCL). +func TestExplainRedactDDL(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + const numStatements = 10 + + ctx := context.Background() + rng, seed := randutil.NewTestRand() + t.Log("seed:", seed) + + params, _ := tests.CreateTestServerParams() + s, sqlDB, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + defer sqlDB.Close() + + query := func(sql string) (*gosql.Rows, error) { + return sqlDB.QueryContext(ctx, sql) + } + + // To check for PII leaks, we inject a single unlikely string into some of the + // query constants produced by SQLSmith, and then search the redacted EXPLAIN + // output for this string. + pii := "pachycephalosaurus" + containsPII := func(sql, output string) error { + lowerOutput := strings.ToLower(output) + if strings.Contains(lowerOutput, pii) { + return errors.Newf( + "output contained PII (%q):\n%s\noutput:\n%s\n", pii, sql, output, + ) + } + return nil + } + + // Perform a few random initial CREATE TABLEs. + setup := sqlsmith.RandTablesPrefixStringConsts(rng, pii) + setup = append(setup, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled = off;") + setup = append(setup, "SET statement_timeout = '5s';") + for _, stmt := range setup { + t.Log(stmt) + if _, err := sqlDB.ExecContext(ctx, stmt); err != nil { + // Ignore errors. + t.Log("-- ignoring error:", err) + continue + } + } + + // Check EXPLAIN (OPT, CATALOG, REDACT) for each table. + rows, err := query("SELECT table_name FROM [SHOW TABLES]") + if err != nil { + t.Fatal(err) + } + var tables []string + for rows.Next() { + var table string + if err = rows.Scan(&table); err != nil { + t.Fatal(err) + } + tables = append(tables, table) + } + for _, table := range tables { + explain := "EXPLAIN (OPT, CATALOG, REDACT) SELECT * FROM " + lexbase.EscapeSQLIdent(table) + t.Log(explain) + rows, err = query(explain) + if err != nil { + // This explain should always succeed. + t.Fatal(err) + } + var output strings.Builder + for rows.Next() { + var out string + if err = rows.Scan(&out); err != nil { + t.Fatal(err) + } + output.WriteString(out) + output.WriteRune('\n') + } + if err = containsPII(explain, output.String()); err != nil { + t.Error(err) + continue + } + + } + + // Set up smither to generate random DDL and DML statements. + smith, err := sqlsmith.NewSmither(sqlDB, rng, + sqlsmith.PrefixStringConsts(pii), + sqlsmith.EnableAlters(), + ) + if err != nil { + t.Fatal(err) + } + defer smith.Close() + + tests.GenerateAndCheckRedactedExplainsForPII(t, smith, numStatements, query, containsPII) +} diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index 2ad5ce869ac6..a4174d9abd1d 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -868,8 +868,6 @@ go_test( "@in_gopkg_yaml_v2//:yaml_v2", "@org_golang_google_protobuf//proto", "@org_golang_x_sync//errgroup", - "@org_golang_x_text//cases", - "@org_golang_x_text//language", ], ) diff --git a/pkg/sql/explain_test.go b/pkg/sql/explain_test.go index 92b85edcd848..59d23f908597 100644 --- a/pkg/sql/explain_test.go +++ b/pkg/sql/explain_test.go @@ -13,6 +13,7 @@ package sql_test import ( "bytes" "context" + gosql "database/sql" "fmt" "regexp" "strconv" @@ -22,7 +23,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" - "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/tests" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" @@ -34,8 +34,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" - "golang.org/x/text/cases" - "golang.org/x/text/language" ) func TestStatementReuses(t *testing.T) { @@ -481,6 +479,8 @@ func TestExplainRedact(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + const numStatements = 10 + ctx := context.Background() rng, seed := randutil.NewTestRand() t.Log("seed:", seed) @@ -490,18 +490,25 @@ func TestExplainRedact(t *testing.T) { defer s.Stopper().Stop(ctx) defer sqlDB.Close() + query := func(sql string) (*gosql.Rows, error) { + return sqlDB.QueryContext(ctx, sql) + } + // To check for PII leaks, we inject a single unlikely string into some of the - // query constants produced by SQLSmith, and then search the EXPLAIN output - // for this string. + // query constants produced by SQLSmith, and then search the redacted EXPLAIN + // output for this string. pii := "pterodactyl" - containsPII := func(explain, contents string) error { - lowerContents := strings.ToLower(contents) - if strings.Contains(lowerContents, pii) { - return errors.Newf("EXPLAIN output contained PII (%q):\n%s\noutput:\n%s\n", pii, explain, contents) + containsPII := func(explain, output string) error { + lowerOutput := strings.ToLower(output) + if strings.Contains(lowerOutput, pii) { + return errors.Newf( + "EXPLAIN output contained PII (%q):\n%s\noutput:\n%s\n", pii, explain, output, + ) } return nil } + // Set up smither to generate random DML statements. setup := sqlsmith.Setups["seed"](rng) setup = append(setup, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled = off;") setup = append(setup, "ANALYZE seed;") @@ -511,99 +518,13 @@ func TestExplainRedact(t *testing.T) { db.ExecMultiple(t, setup...) smith, err := sqlsmith.NewSmither(sqlDB, rng, - sqlsmith.EnableAlters(), sqlsmith.PrefixStringConsts(pii), + sqlsmith.DisableDDLs(), ) if err != nil { t.Fatal(err) } defer smith.Close() - // Generate a few random statements. - var statements [5]string - for i := range statements { - statements[i] = smith.Generate() - } - - // Gather EXPLAIN variants to test. - commands := []string{"EXPLAIN", "EXPLAIN ANALYZE"} - - modes := []string{"NO MODE"} - for modeStr, mode := range tree.ExplainModes() { - switch mode { - case tree.ExplainDebug: - // EXPLAIN ANALYZE (DEBUG, REDACT) is checked by TestExplainAnalyzeDebug/redact. - continue - } - modes = append(modes, modeStr) - } - - flags := []string{"NO FLAG"} - for flagStr, flag := range tree.ExplainFlags() { - switch flag { - case tree.ExplainFlagRedact: - // We add REDACT to each EXPLAIN below. - continue - } - flags = append(flags, flagStr) - } - - testName := func(s string) string { - return strings.ReplaceAll(cases.Title(language.English).String(s), " ", "") - } - - // Execute each EXPLAIN variant on each random statement, and look for PII. - for _, cmd := range commands { - t.Run(testName(cmd), func(t *testing.T) { - for _, mode := range modes { - t.Run(testName(mode), func(t *testing.T) { - if mode == "NO MODE" { - mode = "" - } else { - mode += ", " - } - for _, flag := range flags { - t.Run(testName(flag), func(t *testing.T) { - if flag == "NO FLAG" { - flag = "" - } else { - flag += ", " - } - for _, stmt := range statements { - explain := cmd + " (" + mode + flag + "REDACT) " + stmt - rows, err := sqlDB.QueryContext(ctx, explain) - if err != nil { - // There are many legitimate errors that could be returned - // that don't indicate a PII leak or a test failure. For - // example, EXPLAIN (OPT, JSON) is always a syntax error, or - // EXPLAIN ANALYZE of a random query might timeout. To avoid - // these false positives, we only fail on internal errors. - msg := err.Error() - if strings.Contains(msg, "internal error") { - t.Error(err) - } - continue - } - var output strings.Builder - for rows.Next() { - var out string - if err := rows.Scan(&out); err != nil { - t.Fatal(err) - } - output.WriteString(out) - output.WriteRune('\n') - } - if err := containsPII(explain, output.String()); err != nil { - t.Error(err) - continue - } - // TODO(michae2): When they are supported, also check HTML returned by - // EXPLAIN (DISTSQL, REDACT) and EXPLAIN (OPT, ENV, REDACT). - } - }) - } - }) - } - }) - } + tests.GenerateAndCheckRedactedExplainsForPII(t, smith, numStatements, query, containsPII) } diff --git a/pkg/sql/tests/BUILD.bazel b/pkg/sql/tests/BUILD.bazel index 5d7542da402c..7e486c1d29b7 100644 --- a/pkg/sql/tests/BUILD.bazel +++ b/pkg/sql/tests/BUILD.bazel @@ -6,21 +6,26 @@ go_library( srcs = [ "command_filters.go", "data.go", + "explain_test_util.go", "server_params.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/sql/tests", visibility = ["//visibility:public"], deps = [ "//pkg/base", + "//pkg/internal/sqlsmith", "//pkg/kv", "//pkg/kv/kvpb", "//pkg/kv/kvserver", "//pkg/kv/kvserver/kvserverbase", "//pkg/roachpb", + "//pkg/sql/sem/tree", "//pkg/sql/sqlstats", "//pkg/testutils/storageutils", "//pkg/util/syncutil", "@com_github_cockroachdb_errors//:errors", + "@org_golang_x_text//cases", + "@org_golang_x_text//language", ], ) diff --git a/pkg/sql/tests/explain_test_util.go b/pkg/sql/tests/explain_test_util.go new file mode 100644 index 000000000000..2f2ced41f7f2 --- /dev/null +++ b/pkg/sql/tests/explain_test_util.go @@ -0,0 +1,123 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tests + +import ( + gosql "database/sql" + "strings" + "testing" + + "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "golang.org/x/text/cases" + "golang.org/x/text/language" +) + +// GenerateAndCheckRedactedExplainsForPII generates num random statements and +// checks that the output of all variants of EXPLAIN (REDACT) on each random +// statement does not contain injected PII. +func GenerateAndCheckRedactedExplainsForPII( + t *testing.T, + smith *sqlsmith.Smither, + num int, + query func(sql string) (*gosql.Rows, error), + containsPII func(explain, output string) error, +) { + // Generate a few random statements. + statements := make([]string, num) + t.Log("generated statements:") + for i := range statements { + statements[i] = smith.Generate() + t.Log(statements[i]) + } + + // Gather EXPLAIN variants to test. + commands := []string{"EXPLAIN", "EXPLAIN ANALYZE"} + + modes := []string{"NO MODE"} + for modeStr, mode := range tree.ExplainModes() { + switch mode { + case tree.ExplainDebug: + // EXPLAIN ANALYZE (DEBUG, REDACT) is checked by TestExplainAnalyzeDebug/redact. + continue + } + modes = append(modes, modeStr) + } + + flags := []string{"NO FLAG"} + for flagStr, flag := range tree.ExplainFlags() { + switch flag { + case tree.ExplainFlagRedact: + // We add REDACT to each EXPLAIN below. + continue + } + flags = append(flags, flagStr) + } + + testName := func(s string) string { + return strings.ReplaceAll(cases.Title(language.English).String(s), " ", "") + } + + // Execute each EXPLAIN variant on each random statement, and look for PII. + for _, cmd := range commands { + t.Run(testName(cmd), func(t *testing.T) { + for _, mode := range modes { + t.Run(testName(mode), func(t *testing.T) { + if mode == "NO MODE" { + mode = "" + } else { + mode += ", " + } + for _, flag := range flags { + t.Run(testName(flag), func(t *testing.T) { + if flag == "NO FLAG" { + flag = "" + } else { + flag += ", " + } + for _, stmt := range statements { + explain := cmd + " (" + mode + flag + "REDACT) " + stmt + rows, err := query(explain) + if err != nil { + // There are many legitimate errors that could be returned + // that don't indicate a PII leak or a test failure. For + // example, EXPLAIN (OPT, JSON) is always a syntax error, or + // EXPLAIN ANALYZE of a random query might timeout. To avoid + // these false positives, we only fail on internal errors. + msg := err.Error() + if strings.Contains(msg, "internal error") { + t.Error(err) + } + continue + } + var output strings.Builder + for rows.Next() { + var out string + if err := rows.Scan(&out); err != nil { + t.Fatal(err) + } + output.WriteString(out) + output.WriteRune('\n') + } + if err := containsPII(explain, output.String()); err != nil { + t.Error(err) + continue + } + // TODO(michae2): When they are supported, also check HTML returned by + // EXPLAIN (DISTSQL, REDACT) and EXPLAIN (OPT, ENV, REDACT). + } + }) + } + }) + } + }) + } +}