Skip to content

Commit

Permalink
sql: keep track of statement error code and error message in
Browse files Browse the repository at this point in the history
crdb_internal.execution_insights

Part of: cockroachdb#87785, cockroachdb#94381.

Previously, the insights subsystem did not keep track of error code and
error messages for failed executions and only contained a
`FailedExecution` value for the `problem` field. This commit adds
the `last_error_code` and `last_error_msg` columns to the
`crdb_internal.cluster_execution_insights` virtual table. The next PR
will focus on displaying these values in the UI.

Release note (sql change): Added `last_error_code` and `last_error_msg`
columns to the `crdb_internal.cluster_execution_insights` virtual table
which contain the error code and error message for a failed statement
execution, respectively.
  • Loading branch information
gtr committed Jan 31, 2023
1 parent 91bdcdd commit ca94fcb
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 11 deletions.
10 changes: 10 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,16 @@ SELECT * FROM crdb_internal.node_inflight_trace_spans WHERE span_id < 0
----
trace_id parent_span_id span_id goroutine_id finished start_time duration operation

query TTTBTTTTTIITITTTTTTTTTTTTTTT colnames
SELECT * FROM crdb_internal.cluster_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention contention_events index_recommendations implicit_txn error_code error_msg

query TTTBTTTTTIITITTTTTTTTTTTTTTT colnames
SELECT * FROM crdb_internal.node_execution_insights WHERE query = ''
----
session_id txn_id txn_fingerprint_id stmt_id stmt_fingerprint_id problem causes query status start_time end_time full_scan user_name app_name database_name plan_gist rows_read rows_written priority retries last_retry_reason exec_node_ids contention contention_events index_recommendations implicit_txn error_code error_msg

query TTTBTTTTTIITITTTTT colnames
SELECT * FROM crdb_internal.cluster_txn_execution_insights WHERE query = ''
----
Expand Down
6 changes: 5 additions & 1 deletion pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -7119,7 +7119,9 @@ CREATE TABLE crdb_internal.%s (
contention INTERVAL,
contention_events JSONB,
index_recommendations STRING[] NOT NULL,
implicit_txn BOOL NOT NULL
implicit_txn BOOL NOT NULL,
error_code STRING,
error_msg STRING
)`

var crdbInternalClusterExecutionInsightsTable = virtualSchemaTable{
Expand Down Expand Up @@ -7249,6 +7251,8 @@ func populateStmtInsights(
contentionEvents,
indexRecommendations,
tree.MakeDBool(tree.DBool(insight.Transaction.ImplicitTxn)),
tree.NewDString(s.ErrorCode),
tree.NewDString(s.ErrorMsg),
))
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pkg/sql/sqlstats/insights/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,7 @@ func (d *latencyThresholdDetector) enabled() bool {
func (d *latencyThresholdDetector) isSlow(s *Statement) bool {
return d.enabled() && s.LatencyInSeconds >= LatencyThreshold.Get(&d.st.SV).Seconds()
}

func isFailed(s *Statement) bool {
return s.Status == Statement_Failed
}
2 changes: 2 additions & 0 deletions pkg/sql/sqlstats/insights/insights.proto
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ message Statement {
repeated cockroach.roachpb.ContentionEvent contention_events = 20 [(gogoproto.nullable) = false];
Problem problem = 21;
repeated Cause causes = 22;
string error_code = 23;
string error_msg = 24;
}


Expand Down
15 changes: 7 additions & 8 deletions pkg/sql/sqlstats/insights/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,11 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
delete(r.statements, sessionID)
defer statements.release()

var slowStatements intsets.Fast
// Mark statements which are detected as slow or have a failed status.
var slowOrFailedStatements intsets.Fast
for i, s := range *statements {
if r.detector.isSlow(s) {
slowStatements.Add(i)
if r.detector.isSlow(s) || isFailed(s) {
slowOrFailedStatements.Add(i)
}
}

Expand All @@ -112,8 +113,8 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
highContention = transaction.Contention.Seconds() >= LatencyThreshold.Get(&r.causes.st.SV).Seconds()
}

if slowStatements.Empty() && !highContention {
// We only record an insight if we have slow statements or high txn contention.
if slowOrFailedStatements.Empty() && !highContention {
// We only record an insight if we have slow or failed statements or high txn contention.
return
}

Expand All @@ -127,14 +128,12 @@ func (r *lockingRegistry) ObserveTransaction(sessionID clusterunique.ID, transac
}

for i, s := range *statements {
if slowStatements.Contains(i) {
if slowOrFailedStatements.Contains(i) {
switch s.Status {
case Statement_Completed:
s.Problem = Problem_SlowExecution
s.Causes = r.causes.examine(s.Causes, s)
case Statement_Failed:
// Note that we'll be building better failure support for 23.1.
// For now, we only mark failed statements that were also slow.
s.Problem = Problem_FailedExecution
}

Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sqlstats/ssmemstorage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ go_library(
"//pkg/settings",
"//pkg/settings/cluster",
"//pkg/sql/execstats",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/sqlstats",
"//pkg/sql/sqlstats/insights",
"//pkg/util",
Expand Down
13 changes: 13 additions & 0 deletions pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/execstats"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sqlstats"
"github.com/cockroachdb/cockroach/pkg/sql/sqlstats/insights"
"github.com/cockroachdb/cockroach/pkg/util"
Expand Down Expand Up @@ -185,6 +186,16 @@ func (s *Container) RecordStatement(
contentionEvents = value.ExecStats.ContentionEvents
}

var errorCode string
if value.StatementError != nil {
errorCode = pgerror.GetPGCode(value.StatementError).String()
}

var errorMsg string
if value.StatementError != nil {
errorMsg = value.StatementError.Error()
}

s.insights.ObserveStatement(value.SessionID, &insights.Statement{
ID: value.StatementID,
FingerprintID: stmtFingerprintID,
Expand All @@ -204,6 +215,8 @@ func (s *Container) RecordStatement(
ContentionEvents: contentionEvents,
IndexRecommendations: value.IndexRecommendations,
Database: value.Database,
ErrorCode: errorCode,
ErrorMsg: errorMsg,
})

return stats.ID, nil
Expand Down

0 comments on commit ca94fcb

Please sign in to comment.