diff --git a/pkg/cli/clisqlcfg/context.go b/pkg/cli/clisqlcfg/context.go index 5c23ca9df834..72a3a05dcfcc 100644 --- a/pkg/cli/clisqlcfg/context.go +++ b/pkg/cli/clisqlcfg/context.go @@ -200,6 +200,9 @@ func (c *Context) Run(conn clisqlclient.Conn) error { if err := c.maybeSetReadOnly(conn); err != nil { return err } + if err := c.maybeSetTroubleshootingMode(conn); err != nil { + return err + } if c.ConnCtx.DebugMode { fmt.Fprintln(c.CmdOut, @@ -240,3 +243,13 @@ func (c *Context) maybeSetReadOnly(conn clisqlclient.Conn) error { return conn.Exec(context.Background(), "SET default_transaction_read_only = TRUE") } + +func (c *Context) maybeSetTroubleshootingMode(conn clisqlclient.Conn) error { + if !c.ConnCtx.DebugMode { + return nil + } + // If we are in debug mode, enable "troubleshooting mode". + return conn.Exec( + context.Background(), + "SET troubleshooting_mode = on") +} diff --git a/pkg/cli/interactive_tests/test_client_side_checking.tcl b/pkg/cli/interactive_tests/test_client_side_checking.tcl index 3f6556de09a4..72d635597c53 100644 --- a/pkg/cli/interactive_tests/test_client_side_checking.tcl +++ b/pkg/cli/interactive_tests/test_client_side_checking.tcl @@ -87,6 +87,11 @@ eexpect "\"\".crdb_internal.node_build_info" eexpect "you might want to set the current database" eexpect "to the empty string" +# Check that troubleshooting mode is enabled in debug mode. +eexpect "root@" +send "show troubleshooting_mode;\r" +eexpect "on" + eexpect "root@" send "\\set display_format csv\r\\set\r" eexpect "check_syntax,false" diff --git a/pkg/sql/exec_log.go b/pkg/sql/exec_log.go index 0fd49a8b3794..ed865cac06f9 100644 --- a/pkg/sql/exec_log.go +++ b/pkg/sql/exec_log.go @@ -375,7 +375,7 @@ func (p *planner) maybeLogStatementInternal( p.logEventsOnlyExternally(ctx, eventLogEntry{event: &eventpb.AdminQuery{CommonSQLExecDetails: execDetails}}) } - if telemetryLoggingEnabled { + if telemetryLoggingEnabled && !p.SessionData().TroubleshootingMode { // We only log to the telemetry channel if enough time has elapsed from // the last event emission. requiredTimeElapsed := 1.0 / float64(maxEventFrequency) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 172c28f86035..179cf6475be8 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3228,6 +3228,10 @@ func (m *sessionDataMutator) SetShowPrimaryKeyConstraintOnNotVisibleColumns(val m.data.ShowPrimaryKeyConstraintOnNotVisibleColumns = val } +func (m *sessionDataMutator) SetTroubleshootingModeEnabled(val bool) { + m.data.TroubleshootingMode = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 8ba9cf4051c8..bbaf2cd5ad70 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4808,6 +4808,7 @@ transaction_rows_read_log 0 transaction_rows_written_err 0 transaction_rows_written_log 0 transaction_status NoTxn +troubleshooting_mode off xmloption content # information_schema can be used with the anonymous database. diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index fb2922c621fc..3e39a9387258 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -4231,6 +4231,7 @@ transaction_rows_read_log 0 NULL transaction_rows_written_err 0 NULL NULL NULL string transaction_rows_written_log 0 NULL NULL NULL string transaction_status NoTxn NULL NULL NULL string +troubleshooting_mode off NULL NULL NULL string use_declarative_schema_changer on NULL NULL NULL string vectorize on NULL NULL NULL string xmloption content NULL NULL NULL string @@ -4353,6 +4354,7 @@ transaction_rows_read_log 0 NULL transaction_rows_written_err 0 NULL user NULL 0 0 transaction_rows_written_log 0 NULL user NULL 0 0 transaction_status NoTxn NULL user NULL NoTxn NoTxn +troubleshooting_mode off NULL user NULL off off use_declarative_schema_changer on NULL user NULL on on vectorize on NULL user NULL on on xmloption content NULL user NULL content content @@ -4472,6 +4474,7 @@ transaction_rows_read_log NULL NULL NULL transaction_rows_written_err NULL NULL NULL NULL NULL transaction_rows_written_log NULL NULL NULL NULL NULL transaction_status NULL NULL NULL NULL NULL +troubleshooting_mode NULL NULL NULL NULL NULL use_declarative_schema_changer NULL NULL NULL NULL NULL vectorize NULL NULL NULL NULL NULL xmloption NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index fed6ed690080..b2d18a18a48f 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -134,6 +134,7 @@ transaction_rows_read_log 0 transaction_rows_written_err 0 transaction_rows_written_log 0 transaction_status NoTxn +troubleshooting_mode off use_declarative_schema_changer on vectorize on xmloption content diff --git a/pkg/sql/sessiondatapb/session_data.proto b/pkg/sql/sessiondatapb/session_data.proto index 8e4529c034ea..3a73ce57a89b 100644 --- a/pkg/sql/sessiondatapb/session_data.proto +++ b/pkg/sql/sessiondatapb/session_data.proto @@ -89,6 +89,11 @@ message SessionData { // increase the speed of lookup joins when each input row might get multiple // looked up rows at the cost of increased memory usage. bool parallelize_multi_key_lookup_joins_enabled = 19; + + // Troubleshooting mode determines whether we refuse to do additional work with + // the query (i.e. collect & emit telemetry data). Troubleshooting mode is + // disabled by default. + bool troubleshooting_mode = 21; } // DataConversionConfig contains the parameters that influence the output diff --git a/pkg/sql/telemetry_logging_test.go b/pkg/sql/telemetry_logging_test.go index 349c953330d5..810752c4c10e 100644 --- a/pkg/sql/telemetry_logging_test.go +++ b/pkg/sql/telemetry_logging_test.go @@ -284,3 +284,112 @@ func TestTelemetryLogging(t *testing.T) { } } } + +func TestNoTelemetryLogOnTroubleshootMode(t *testing.T) { + defer leaktest.AfterTest(t)() + sc := log.ScopeWithoutShowLogs(t) + defer sc.Close(t) + + cleanup := installTelemetryLogFileSink(sc, t) + defer cleanup() + + st := stubTime{} + + s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{ + Knobs: base.TestingKnobs{ + TelemetryLoggingKnobs: &TelemetryLoggingTestingKnobs{ + getTimeNow: st.TimeNow, + }, + }, + }) + db := sqlutils.MakeSQLRunner(sqlDB) + defer s.Stopper().Stop(context.Background()) + + db.Exec(t, `SET CLUSTER SETTING sql.telemetry.query_sampling.enabled = true;`) + db.Exec(t, "CREATE TABLE t();") + + stubMaxEventFrequency := int64(1) + telemetryMaxEventFrequency.Override(context.Background(), &s.ClusterSettings().SV, stubMaxEventFrequency) + + /* + Testing Cases: + - run query when troubleshoot mode is enabled + - ensure no log appears + - run another query when troubleshoot mode is disabled + - ensure log appears + */ + testData := []struct { + name string + query string + expectedLogStatement string + enableTroubleshootingMode bool + expectedNumLogs int + }{ + { + "select-troubleshooting-enabled", + "SELECT * FROM t LIMIT 1;", + `SELECT * FROM \"\".\"\".t LIMIT ‹1›`, + true, + 0, + }, + { + "select-troubleshooting-disabled", + "SELECT * FROM t LIMIT 2;", + `SELECT * FROM \"\".\"\".t LIMIT ‹2›`, + false, + 1, + }, + } + + for idx, tc := range testData { + // Set the time for when we issue a query to enable/disable + // troubleshooting mode. + setTroubleshootModeTime := timeutil.FromUnixMicros(int64(idx * 1e6)) + st.setTime(setTroubleshootModeTime) + if tc.enableTroubleshootingMode { + db.Exec(t, `SET troubleshooting_mode = true;`) + } else { + db.Exec(t, `SET troubleshooting_mode = false;`) + } + // Advance time 1 second from previous query. Ensure enough time has passed + // from when we set troubleshooting mode for this query to be sampled. + setQueryTime := timeutil.FromUnixMicros(int64((idx + 1) * 1e6)) + st.setTime(setQueryTime) + db.Exec(t, tc.query) + } + + log.Flush() + + entries, err := log.FetchEntriesFromFiles( + 0, + math.MaxInt64, + 10000, + regexp.MustCompile(`"EventType":"sampled_query"`), + log.WithMarkedSensitiveData, + ) + + if err != nil { + t.Fatal(err) + } + + if len(entries) == 0 { + t.Fatal(errors.Newf("no entries found")) + } + + for _, tc := range testData { + numLogsFound := 0 + for i := len(entries) - 1; i >= 0; i-- { + e := entries[i] + if strings.Contains(e.Message, tc.expectedLogStatement) { + if tc.enableTroubleshootingMode { + t.Errorf("%s: unexpected log entry when troubleshooting mode enabled:\n%s", tc.name, entries[0].Message) + } else { + numLogsFound++ + } + } + } + if numLogsFound != tc.expectedNumLogs { + t.Errorf("%s: expected %d log entries, found %d", tc.name, tc.expectedNumLogs, numLogsFound) + } + } +} diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 99df1f8b2b59..d6cbf9025c72 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -1230,6 +1230,23 @@ var varGen = map[string]sessionVar{ // See https://www.postgresql.org/docs/10/static/runtime-config-preset.html#GUC-SERVER-VERSION-NUM `server_version_num`: makeReadOnlyVar(PgServerVersionNum), + // CockroachDB extension. + `troubleshooting_mode`: { + GetStringVal: makePostgresBoolGetStringValFn(`troubleshooting_mode`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("troubleshooting_mode", s) + if err != nil { + return err + } + m.SetTroubleshootingModeEnabled(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().TroubleshootingMode), nil + }, + GlobalDefault: globalFalse, + }, + // This is read-only in Postgres also. // See https://www.postgresql.org/docs/14/sql-show.html and // https://www.postgresql.org/docs/14/locale.html