From 7ef9f24075269f0d60b7b814f0fa835d6f39461a Mon Sep 17 00:00:00 2001 From: Andrew Baptist Date: Thu, 12 Dec 2024 14:57:57 -0500 Subject: [PATCH] kvclient: add metrics for the txn over count after response This commit adds a new metric txn.count_limit_on_response which tracks the number of transactions that exceeded the max transaction count on their response. Epic: none Release note (ops change): Add metric txn.count_limit_on_response. --- docs/generated/metrics/metrics.html | 1 + pkg/kv/kvclient/kvcoord/txn_interceptor_pipeliner.go | 1 + pkg/kv/kvclient/kvcoord/txn_metrics.go | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html index c2b8d112d356..8d5e03e4e623 100644 --- a/docs/generated/metrics/metrics.html +++ b/docs/generated/metrics/metrics.html @@ -1872,6 +1872,7 @@ APPLICATIONtxn.condensed_intent_spansKV transactions that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans_gauge for a gauge of such transactions currently running.KV TransactionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONtxn.condensed_intent_spans_gaugeKV transactions currently running that have exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). See also txn.condensed_intent_spans for a perpetual counter/rate.KV TransactionsGAUGECOUNTAVGNONE APPLICATIONtxn.condensed_intent_spans_rejectedKV transactions that have been aborted because they exceeded their intent tracking memory budget (kv.transaction.max_intents_bytes). Rejection is caused by kv.transaction.reject_over_max_intents_budget.KV TransactionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONtxn.count_limit_on_responseKV transactions that have exceeded the count limit on a responseKV TransactionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONtxn.count_limit_rejectedKV transactions that have been aborted because they exceeded the max number of writes and locking reads allowedKV TransactionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONtxn.durationsKV transaction durationsKV Txn DurationHISTOGRAMNANOSECONDSAVGNONE APPLICATIONtxn.inflight_locks_over_tracking_budgetKV transactions whose in-flight writes and locking reads have exceeded the intent tracking memory budget (kv.transaction.max_intents_bytes).KV TransactionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE diff --git a/pkg/kv/kvclient/kvcoord/txn_interceptor_pipeliner.go b/pkg/kv/kvclient/kvcoord/txn_interceptor_pipeliner.go index 634cb2233379..c514e8f4c9bb 100644 --- a/pkg/kv/kvclient/kvcoord/txn_interceptor_pipeliner.go +++ b/pkg/kv/kvclient/kvcoord/txn_interceptor_pipeliner.go @@ -740,6 +740,7 @@ func (tp *txnPipeliner) updateLockTracking( "allowed by kv.transaction.max_intents_and_locks: "+ "count: %d, txn: %s, ba: %s", tp.writeCount, ba.Txn, ba.Summary()) } + tp.txnMetrics.TxnsResponseOverCountLimit.Inc(1) } // Deal with compacting the lock spans. diff --git a/pkg/kv/kvclient/kvcoord/txn_metrics.go b/pkg/kv/kvclient/kvcoord/txn_metrics.go index f7132cd76fe8..ec2a1bb3fa95 100644 --- a/pkg/kv/kvclient/kvcoord/txn_metrics.go +++ b/pkg/kv/kvclient/kvcoord/txn_metrics.go @@ -35,6 +35,7 @@ type TxnMetrics struct { TxnsWithCondensedIntentsGauge *metric.Gauge TxnsRejectedByLockSpanBudget *metric.Counter TxnsRejectedByCountLimit *metric.Counter + TxnsResponseOverCountLimit *metric.Counter TxnsInFlightLocksOverTrackingBudget *metric.Counter // Restarts is the number of times we had to restart the transaction. @@ -181,6 +182,12 @@ var ( Measurement: "KV Transactions", Unit: metric.Unit_COUNT, } + metaTxnsResponseOverCountLimit = metric.Metadata{ + Name: "txn.count_limit_on_response", + Help: "KV transactions that have exceeded the count limit on a response", + Measurement: "KV Transactions", + Unit: metric.Unit_COUNT, + } metaTxnsInflightLocksOverTrackingBudget = metric.Metadata{ Name: "txn.inflight_locks_over_tracking_budget", Help: "KV transactions whose in-flight writes and locking reads have exceeded " + @@ -297,6 +304,7 @@ func MakeTxnMetrics(histogramWindow time.Duration) TxnMetrics { TxnsWithCondensedIntentsGauge: metric.NewGauge(metaTxnsWithCondensedIntentSpansGauge), TxnsRejectedByLockSpanBudget: metric.NewCounter(metaTxnsRejectedByLockSpanBudget), TxnsRejectedByCountLimit: metric.NewCounter(metaTxnsRejectedByCountLimit), + TxnsResponseOverCountLimit: metric.NewCounter(metaTxnsResponseOverCountLimit), TxnsInFlightLocksOverTrackingBudget: metric.NewCounter(metaTxnsInflightLocksOverTrackingBudget), Restarts: metric.NewHistogram(metric.HistogramOptions{ Metadata: metaRestartsHistogram,