diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 8ff3d806a88d..2a3f00122a3d 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -3138,6 +3138,9 @@ func TestStatusAPIContentionEvents(t *testing.T) { server1Conn := sqlutils.MakeSQLRunner(testCluster.ServerConn(0)) server2Conn := sqlutils.MakeSQLRunner(testCluster.ServerConn(1)) + contentionCountBefore := testCluster.Server(1).SQLServer().(*sql.Server). + Metrics.EngineMetrics.SQLContendedTxns.Count() + sqlutils.CreateTable( t, testCluster.ServerConn(0), @@ -3214,6 +3217,13 @@ SET TRACING=off; (statistics -> 'execution_statistics' -> 'contentionTime' ->> 'mean')::FLOAT > 0 AND app_name = 'contentionTest' `, [][]string{{"1"}}) + + contentionCountNow := testCluster.Server(1).SQLServer().(*sql.Server). + Metrics.EngineMetrics.SQLContendedTxns.Count() + + require.Greaterf(t, contentionCountNow, contentionCountBefore, + "expected txn contention count to be more than %d, but it is %d", + contentionCountBefore, contentionCountNow) } func TestStatusCancelSessionGatewayMetadataPropagation(t *testing.T) { diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index ec35a584cd3b..a1ecc187b315 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -429,6 +429,7 @@ func makeMetrics(internal bool) Metrics { 6*metricsSampleInterval), SQLTxnsOpen: metric.NewGauge(getMetricMeta(MetaSQLTxnsOpen, internal)), SQLActiveStatements: metric.NewGauge(getMetricMeta(MetaSQLActiveQueries, internal)), + SQLContendedTxns: metric.NewCounter(getMetricMeta(MetaSQLTxnContended, internal)), TxnAbortCount: metric.NewCounter(getMetricMeta(MetaTxnAbort, internal)), FailureCount: metric.NewCounter(getMetricMeta(MetaFailure, internal)), diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index 8796b00c19f3..448d23eac92d 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -2216,6 +2216,10 @@ func (ex *connExecutor) recordTransactionFinish( } ex.metrics.EngineMetrics.SQLTxnLatency.RecordValue(txnTime.Nanoseconds()) + if contentionDuration := ex.extraTxnState.accumulatedStats.ContentionTime.Nanoseconds(); contentionDuration > 0 { + ex.metrics.EngineMetrics.SQLContendedTxns.Inc(1) + } + ex.txnIDCacheWriter.Record(contentionpb.ResolvedTxnID{ TxnID: ev.txnID, TxnFingerprintID: transactionFingerprintID, diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index e961d5e98bc0..c0167eb823f4 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -850,6 +850,12 @@ var ( Measurement: "SQL Statements", Unit: metric.Unit_COUNT, } + MetaSQLTxnContended = metric.Metadata{ + Name: "sql.txn.contended.count", + Help: "Number of SQL transactions experienced contention", + Measurement: "Contention", + Unit: metric.Unit_COUNT, + } MetaSelectStarted = metric.Metadata{ Name: "sql.select.started.count", Help: "Number of SQL SELECT statements started", diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go index 26733b37a79b..56aa93a0f153 100644 --- a/pkg/sql/executor_statement_metrics.go +++ b/pkg/sql/executor_statement_metrics.go @@ -40,6 +40,7 @@ type EngineMetrics struct { SQLTxnLatency *metric.Histogram SQLTxnsOpen *metric.Gauge SQLActiveStatements *metric.Gauge + SQLContendedTxns *metric.Counter // TxnAbortCount counts transactions that were aborted, either due // to non-retriable errors, or retriable errors when the client-side diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index 4e0cf0a08d60..253ef883ab35 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -2363,6 +2363,14 @@ var charts = []sectionDescription{ }, AxisLabel: "Count", }, + { + Title: "SQL Transaction Contention", + Metrics: []string{ + "sql.txn.contended.count", + "sql.txn.contended.count.internal", + }, + AxisLabel: "Transactions", + }, }, }, {