Skip to content

Commit

Permalink
sql: add troubleshooting mode session variable
Browse files Browse the repository at this point in the history
Resolves: cockroachdb#84429

This change introduces a `troubleshooting_mode_enabled` session
variable. When enabled, this session variable is intended to be used as
a way to avoid performing additional work on queries, particularly when
the cluster is experiencing issues/unavailability/failure. By default,
this session variable is disabled.  Currently, this session variable is
only used to avoid collecting/emitting telemetry data.

Release note (sql change): Introduce new `troubleshooting_mode_enabled`
session variable, to avoid doing additional work on queries when
possible (i.e. collection telemetry data). By default, this session
variable is disabled.
  • Loading branch information
Thomas Hardy committed Jul 18, 2022
1 parent 6ea03b9 commit eab31a2
Show file tree
Hide file tree
Showing 9 changed files with 153 additions and 1 deletion.
12 changes: 12 additions & 0 deletions pkg/cli/clisqlshell/sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -2104,6 +2104,18 @@ func (c *cliState) configurePreShellDefaults(
}
}

if c.sqlConnCtx.DebugMode {
// If we are in debug mode, enable "troubleshooting mode".
c.exitErr = c.conn.Exec(
context.Background(),
"SET troubleshooting_mode = on")
if c.exitErr != nil {
if c.exitErr != nil {
return cleanupFn, c.exitErr
}
}
}

// If any --set flags were set through the command line,
// synthetize '-e set=xxx' statements for them at the beginning.
c.iCtx.quitAfterExecStmts = len(c.sqlCtx.ExecStmts) > 0
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/exec_log.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (p *planner) maybeLogStatementInternal(
p.logEventsOnlyExternally(ctx, eventLogEntry{event: &eventpb.AdminQuery{CommonSQLExecDetails: execDetails}})
}

if telemetryLoggingEnabled {
if telemetryLoggingEnabled && !p.SessionData().TroubleshootingMode {
// We only log to the telemetry channel if enough time has elapsed from
// the last event emission.
requiredTimeElapsed := 1.0 / float64(maxEventFrequency)
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3251,6 +3251,10 @@ func (m *sessionDataMutator) SetUnconstrainedNonCoveringIndexScanEnabled(val boo
m.data.UnconstrainedNonCoveringIndexScanEnabled = val
}

func (m *sessionDataMutator) SetTroubleshootingModeEnabled(val bool) {
m.data.TroubleshootingMode = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4718,6 +4718,7 @@ transaction_rows_read_log 0
transaction_rows_written_err 0
transaction_rows_written_log 0
transaction_status NoTxn
troubleshooting_mode off
unconstrained_non_covering_index_scan_enabled off
xmloption content

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -4243,6 +4243,7 @@ transaction_rows_read_log 0 NULL
transaction_rows_written_err 0 NULL NULL NULL string
transaction_rows_written_log 0 NULL NULL NULL string
transaction_status NoTxn NULL NULL NULL string
troubleshooting_mode off NULL NULL NULL åstring
unconstrained_non_covering_index_scan_enabled off NULL NULL NULL string
use_declarative_schema_changer on NULL NULL NULL string
vectorize on NULL NULL NULL string
Expand Down Expand Up @@ -4367,6 +4368,7 @@ transaction_rows_read_log 0 NULL
transaction_rows_written_err 0 NULL user NULL 0 0
transaction_rows_written_log 0 NULL user NULL 0 0
transaction_status NoTxn NULL user NULL NoTxn NoTxn
troubleshooting_mode off NULL user NULL off off
unconstrained_non_covering_index_scan_enabled off NULL user NULL off off
use_declarative_schema_changer on NULL user NULL on on
vectorize on NULL user NULL on on
Expand Down Expand Up @@ -4489,6 +4491,7 @@ transaction_rows_read_log NULL NULL NULL
transaction_rows_written_err NULL NULL NULL NULL NULL
transaction_rows_written_log NULL NULL NULL NULL NULL
transaction_status NULL NULL NULL NULL NULL
troubleshooting_mode NULL NULL NULL NULL NULL
unconstrained_non_covering_index_scan_enabled NULL NULL NULL NULL NULL
use_declarative_schema_changer NULL NULL NULL NULL NULL
vectorize NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ transaction_rows_read_log 0
transaction_rows_written_err 0
transaction_rows_written_log 0
transaction_status NoTxn
troubleshooting_mode off
unconstrained_non_covering_index_scan_enabled off
use_declarative_schema_changer on
vectorize on
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sessiondatapb/session_data.proto
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ message SessionData {
// TrigramSimilarityThreshold configures the value that's used to compare
// trigram similarities to in order to evaluate the string % string overload.
double trigram_similarity_threshold = 20;

// Troubleshooting mode determines whether we refuse to do additional work with
// the query (i.e. collect & emit telemetry data). Troubleshooting mode is
// disabled by default.
bool troubleshooting_mode = 21;
}

// DataConversionConfig contains the parameters that influence the output
Expand Down
109 changes: 109 additions & 0 deletions pkg/sql/telemetry_logging_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,112 @@ func TestTelemetryLogging(t *testing.T) {
}
}
}

func TestNoTelemetryLogOnTroubleshootMode(t *testing.T) {
defer leaktest.AfterTest(t)()
sc := log.ScopeWithoutShowLogs(t)
defer sc.Close(t)

cleanup := installTelemetryLogFileSink(sc, t)
defer cleanup()

st := stubTime{}

s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{
Knobs: base.TestingKnobs{
TelemetryLoggingKnobs: &TelemetryLoggingTestingKnobs{
getTimeNow: st.TimeNow,
},
},
})
db := sqlutils.MakeSQLRunner(sqlDB)
defer s.Stopper().Stop(context.Background())

db.Exec(t, `SET CLUSTER SETTING sql.telemetry.query_sampling.enabled = true;`)
db.Exec(t, "CREATE TABLE t();")

stubMaxEventFrequency := int64(1)
telemetryMaxEventFrequency.Override(context.Background(), &s.ClusterSettings().SV, stubMaxEventFrequency)

/*
Testing Cases:
- run query when troubleshoot mode is enabled
- ensure no log appears
- run another query when troubleshoot mode is disabled
- ensure log appears
*/
testData := []struct {
name string
query string
expectedLogStatement string
enableTroubleshootingMode bool
expectedNumLogs int
}{
{
"select-troubleshooting-enabled",
"SELECT * FROM t LIMIT 1;",
`SELECT * FROM \"\".\"\".t LIMIT ‹1›`,
true,
0,
},
{
"select-troubleshooting-disabled",
"SELECT * FROM t LIMIT 2;",
`SELECT * FROM \"\".\"\".t LIMIT ‹2›`,
false,
1,
},
}

for idx, tc := range testData {
// Set the time for when we issue a query to enable/disable
// troubleshooting mode.
setTroubleshootModeTime := timeutil.FromUnixMicros(int64(idx * 1e6))
st.setTime(setTroubleshootModeTime)
if tc.enableTroubleshootingMode {
db.Exec(t, `SET troubleshooting_mode = true;`)
} else {
db.Exec(t, `SET troubleshooting_mode = false;`)
}
// Advance time 1 second from previous query. Ensure enough time has passed
// from when we set troubleshooting mode for this query to be sampled.
setQueryTime := timeutil.FromUnixMicros(int64((idx + 1) * 1e6))
st.setTime(setQueryTime)
db.Exec(t, tc.query)
}

log.Flush()

entries, err := log.FetchEntriesFromFiles(
0,
math.MaxInt64,
10000,
regexp.MustCompile(`"EventType":"sampled_query"`),
log.WithMarkedSensitiveData,
)

if err != nil {
t.Fatal(err)
}

if len(entries) == 0 {
t.Fatal(errors.Newf("no entries found"))
}

for _, tc := range testData {
numLogsFound := 0
for i := len(entries) - 1; i >= 0; i-- {
e := entries[i]
if strings.Contains(e.Message, tc.expectedLogStatement) {
if tc.enableTroubleshootingMode {
t.Errorf("%s: unexpected log entry when troubleshooting mode enabled:\n%s", tc.name, entries[0].Message)
} else {
numLogsFound++
}
}
}
if numLogsFound != tc.expectedNumLogs {
t.Errorf("%s: expected %d log entries, found %d", tc.name, tc.expectedNumLogs, numLogsFound)
}
}
}
17 changes: 17 additions & 0 deletions pkg/sql/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,23 @@ var varGen = map[string]sessionVar{
},
},

// CockroachDB extension.
`troubleshooting_mode`: {
GetStringVal: makePostgresBoolGetStringValFn(`troubleshooting_mode`),
Set: func(_ context.Context, m sessionDataMutator, s string) error {
b, err := paramparse.ParseBoolVar("troubleshooting_mode", s)
if err != nil {
return err
}
m.SetTroubleshootingModeEnabled(b)
return nil
},
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
return formatBoolAsPostgresSetting(evalCtx.SessionData().TroubleshootingMode), nil
},
GlobalDefault: globalFalse,
},

// This is read-only in Postgres also.
// See https://www.postgresql.org/docs/14/sql-show.html and
// https://www.postgresql.org/docs/14/locale.html
Expand Down

0 comments on commit eab31a2

Please sign in to comment.