Skip to content

Commit

Permalink
metrics: add connection and fail metrics by resource group name (#4…
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Feb 19, 2024
1 parent 3b7f150 commit 24a9caa
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 74 deletions.
10 changes: 5 additions & 5 deletions pkg/executor/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@ func (c *Compiler) Compile(ctx context.Context, stmtNode ast.StmtNode) (_ *ExecS
})

if preparedObj != nil {
CountStmtNode(preparedObj.PreparedAst.Stmt, sessVars.InRestrictedSQL)
CountStmtNode(preparedObj.PreparedAst.Stmt, sessVars.InRestrictedSQL, sessVars.ResourceGroupName)
} else {
CountStmtNode(stmtNode, sessVars.InRestrictedSQL)
CountStmtNode(stmtNode, sessVars.InRestrictedSQL, sessVars.ResourceGroupName)
}
var lowerPriority bool
if c.Ctx.GetSessionVars().StmtCtx.Priority == mysql.NoPriority {
Expand Down Expand Up @@ -187,7 +187,7 @@ func isPhysicalPlanNeedLowerPriority(p plannercore.PhysicalPlan) bool {
}

// CountStmtNode records the number of statements with the same type.
func CountStmtNode(stmtNode ast.StmtNode, inRestrictedSQL bool) {
func CountStmtNode(stmtNode ast.StmtNode, inRestrictedSQL bool, resourceGroup string) {
if inRestrictedSQL {
return
}
Expand All @@ -203,11 +203,11 @@ func CountStmtNode(stmtNode ast.StmtNode, inRestrictedSQL bool) {
}
case config.GetGlobalConfig().Status.RecordDBLabel:
for dbLabel := range dbLabels {
metrics.StmtNodeCounter.WithLabelValues(typeLabel, dbLabel).Inc()
metrics.StmtNodeCounter.WithLabelValues(typeLabel, dbLabel, resourceGroup).Inc()
}
}
} else {
metrics.StmtNodeCounter.WithLabelValues(typeLabel, "").Inc()
metrics.StmtNodeCounter.WithLabelValues(typeLabel, "", resourceGroup).Inc()
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/metrics/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func InitExecutorMetrics() {
Subsystem: "executor",
Name: "statement_total",
Help: "Counter of StmtNode.",
}, []string{LblType, LblDb})
}, []string{LblType, LblDb, LblResourceGroup})

DbStmtNodeCounter = NewCounterVec(
prometheus.CounterOpts{
Expand Down
10 changes: 5 additions & 5 deletions pkg/metrics/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ var (
QueryDurationHistogram *prometheus.HistogramVec
QueryTotalCounter *prometheus.CounterVec
AffectedRowsCounter *prometheus.CounterVec
ConnGauge prometheus.Gauge
ConnGauge *prometheus.GaugeVec
DisconnectionCounter *prometheus.CounterVec
PreparedStmtGauge prometheus.Gauge
ExecuteErrorCounter *prometheus.CounterVec
Expand Down Expand Up @@ -97,7 +97,7 @@ func InitServerMetrics() {
Subsystem: "server",
Name: "query_total",
Help: "Counter of queries.",
}, []string{LblType, LblResult})
}, []string{LblType, LblResult, LblResourceGroup})

AffectedRowsCounter = NewCounterVec(
prometheus.CounterOpts{
Expand All @@ -107,13 +107,13 @@ func InitServerMetrics() {
Help: "Counters of server affected rows.",
}, []string{LblSQLType})

ConnGauge = NewGauge(
ConnGauge = NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "tidb",
Subsystem: "server",
Name: "connections",
Help: "Number of connections.",
})
}, []string{LblResourceGroup})

DisconnectionCounter = NewCounterVec(
prometheus.CounterOpts{
Expand All @@ -136,7 +136,7 @@ func InitServerMetrics() {
Subsystem: "server",
Name: "execute_error_total",
Help: "Counter of execute errors.",
}, []string{LblType, LblDb})
}, []string{LblType, LblDb, LblResourceGroup})

CriticalErrorCounter = NewCounter(
prometheus.CounterOpts{
Expand Down
2 changes: 1 addition & 1 deletion pkg/metrics/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func InitSessionMetrics() {
Subsystem: "session",
Name: "resource_group_query_total",
Help: "Counter of the total number of queries for the resource group",
}, []string{LblName})
}, []string{LblName, LblResourceGroup})

FairLockingUsageCount = NewCounterVec(
prometheus.CounterOpts{
Expand Down
2 changes: 1 addition & 1 deletion pkg/metrics/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ func GetNonTransactionalStmtCounter() NonTransactionalStmtCounter {

// GetSavepointStmtCounter gets the savepoint statement executed counter.
func GetSavepointStmtCounter() int64 {
return readCounter(StmtNodeCounter.With(prometheus.Labels{LblType: "Savepoint", LblDb: ""}))
return readCounter(StmtNodeCounter.WithLabelValues("Savepoint", "", "default"))
}

// GetLazyPessimisticUniqueCheckSetCounter returns the counter of setting tidb_constraint_check_in_place_pessimistic to false.
Expand Down
1 change: 0 additions & 1 deletion pkg/parser/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ require (
require (
github.com/benbjohnson/clock v1.3.5 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/klauspost/compress v1.17.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
go.uber.org/atomic v1.11.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions pkg/parser/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
github.com/klauspost/compress v1.17.1 h1:NE3C767s2ak2bweCZo3+rdP4U/HoyVXLv/X9f2gPS5g=
github.com/klauspost/compress v1.17.1/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
Expand Down
1 change: 1 addition & 0 deletions pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ go_library(
"//pkg/config",
"//pkg/domain",
"//pkg/domain/infosync",
"//pkg/domain/resourcegroup",
"//pkg/errno",
"//pkg/executor",
"//pkg/executor/mppcoordmanager",
Expand Down
51 changes: 37 additions & 14 deletions pkg/server/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import (
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/domain/infosync"
"github.com/pingcap/tidb/pkg/domain/resourcegroup"
"github.com/pingcap/tidb/pkg/errno"
"github.com/pingcap/tidb/pkg/executor"
"github.com/pingcap/tidb/pkg/extension"
Expand Down Expand Up @@ -338,17 +339,26 @@ func (cc *clientConn) handshake(ctx context.Context) error {
func (cc *clientConn) Close() error {
cc.server.rwlock.Lock()
delete(cc.server.clients, cc.connectionID)
connections := len(cc.server.clients)
resourceGroupName, count := "", 0
if ctx := cc.getCtx(); ctx != nil {
resourceGroupName = ctx.GetSessionVars().ResourceGroupName
count = cc.server.ConnNumByResourceGroup[resourceGroupName]
if count <= 1 {
delete(cc.server.ConnNumByResourceGroup, resourceGroupName)
} else {
cc.server.ConnNumByResourceGroup[resourceGroupName]--
}
}
cc.server.rwlock.Unlock()
return closeConn(cc, connections)
return closeConn(cc, resourceGroupName, count)
}

// closeConn is idempotent and thread-safe.
// It will be called on the same `clientConn` more than once to avoid connection leak.
func closeConn(cc *clientConn, connections int) error {
func closeConn(cc *clientConn, resourceGroupName string, connections int) error {
var err error
cc.closeOnce.Do(func() {
metrics.ConnGauge.Set(float64(connections))
metrics.ConnGauge.WithLabelValues(resourceGroupName).Set(float64(connections))
if cc.connectionID > 0 {
cc.server.dom.ReleaseConnID(cc.connectionID)
cc.connectionID = 0
Expand All @@ -372,7 +382,14 @@ func closeConn(cc *clientConn, connections int) error {

func (cc *clientConn) closeWithoutLock() error {
delete(cc.server.clients, cc.connectionID)
return closeConn(cc, len(cc.server.clients))
name := cc.getCtx().GetSessionVars().ResourceGroupName
count := cc.server.ConnNumByResourceGroup[name]
if count <= 1 {
delete(cc.server.ConnNumByResourceGroup, name)
} else {
cc.server.ConnNumByResourceGroup[name]--
}
return closeConn(cc, name, count-1)
}

// writeInitialHandshake sends server version, connection ID, server capability, collation, server status
Expand Down Expand Up @@ -1092,9 +1109,11 @@ func (cc *clientConn) Run(ctx context.Context) {
if ctx := cc.getCtx(); ctx != nil {
txnMode = ctx.GetSessionVars().GetReadableTxnMode()
}
for _, dbName := range session.GetDBNames(cc.getCtx().GetSessionVars()) {
metrics.ExecuteErrorCounter.WithLabelValues(metrics.ExecuteErrorToLabel(err), dbName).Inc()
vars := cc.getCtx().GetSessionVars()
for _, dbName := range session.GetDBNames(vars) {
metrics.ExecuteErrorCounter.WithLabelValues(metrics.ExecuteErrorToLabel(err), dbName, vars.ResourceGroupName).Inc()
}

if storeerr.ErrLockAcquireFailAndNoWaitSet.Equal(err) {
logutil.Logger(ctx).Debug("Expected error for FOR UPDATE NOWAIT", zap.Error(err))
} else {
Expand Down Expand Up @@ -1143,20 +1162,25 @@ func (cc *clientConn) addMetrics(cmd byte, startTime time.Time, err error) {
return
}

vars := cc.getCtx().GetSessionVars()
resourceGroupName := vars.ResourceGroupName
var counter prometheus.Counter
if err != nil && int(cmd) < len(server_metrics.QueryTotalCountErr) {
counter = server_metrics.QueryTotalCountErr[cmd]
} else if err == nil && int(cmd) < len(server_metrics.QueryTotalCountOk) {
counter = server_metrics.QueryTotalCountOk[cmd]
if len(resourceGroupName) == 0 || resourceGroupName == resourcegroup.DefaultResourceGroupName {
if err != nil && int(cmd) < len(server_metrics.QueryTotalCountErr) {
counter = server_metrics.QueryTotalCountErr[cmd]
} else if err == nil && int(cmd) < len(server_metrics.QueryTotalCountOk) {
counter = server_metrics.QueryTotalCountOk[cmd]
}
}

if counter != nil {
counter.Inc()
} else {
label := strconv.Itoa(int(cmd))
if err != nil {
metrics.QueryTotalCounter.WithLabelValues(label, "Error").Inc()
metrics.QueryTotalCounter.WithLabelValues(label, "Error", resourceGroupName).Inc()
} else {
metrics.QueryTotalCounter.WithLabelValues(label, "OK").Inc()
metrics.QueryTotalCounter.WithLabelValues(label, "OK", resourceGroupName).Inc()
}
}

Expand All @@ -1182,7 +1206,6 @@ func (cc *clientConn) addMetrics(cmd byte, startTime time.Time, err error) {
server_metrics.AffectedRowsCounterUpdate.Add(float64(affectedRows))
}

vars := cc.getCtx().GetSessionVars()
for _, dbName := range session.GetDBNames(vars) {
metrics.QueryDurationHistogram.WithLabelValues(sqlType, dbName, vars.StmtCtx.ResourceGroupName).Observe(cost.Seconds())
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/conn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2033,7 +2033,7 @@ func TestCloseConn(t *testing.T) {
for i := 0; i < numGoroutines; i++ {
go func() {
defer wg.Done()
err := closeConn(cc, 1)
err := closeConn(cc, "default", 1)
require.NoError(t, err)
}()
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/server/internal/testserverclient/server_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2265,7 +2265,7 @@ func (cli *TestServerClient) getMetrics(t *testing.T) []byte {

func getStmtCnt(content string) (stmtCnt map[string]int) {
stmtCnt = make(map[string]int)
r := regexp.MustCompile("tidb_executor_statement_total{db=\"\",type=\"([A-Z|a-z|-]+)\"} (\\d+)")
r := regexp.MustCompile("tidb_executor_statement_total{db=\"\",resource_group=\".*\",type=\"([A-Z|a-z|-]+)\"} (\\d+)")
matchResult := r.FindAllStringSubmatch(content, -1)
for _, v := range matchResult {
cnt, _ := strconv.Atoi(v[2])
Expand All @@ -2276,7 +2276,7 @@ func getStmtCnt(content string) (stmtCnt map[string]int) {

func getDBStmtCnt(content, dbName string) (stmtCnt map[string]int) {
stmtCnt = make(map[string]int)
r := regexp.MustCompile(fmt.Sprintf("tidb_executor_statement_total{db=\"%s\",type=\"([A-Z|a-z|-]+)\"} (\\d+)", dbName))
r := regexp.MustCompile(fmt.Sprintf("tidb_executor_statement_total{db=\"%s\",resource_group=\".*\",type=\"([A-Z|a-z|-]+)\"} (\\d+)", dbName))
matchResult := r.FindAllStringSubmatch(content, -1)
for _, v := range matchResult {
cnt, _ := strconv.Atoi(v[2])
Expand Down
1 change: 1 addition & 0 deletions pkg/server/metrics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
importpath = "github.com/pingcap/tidb/pkg/server/metrics",
visibility = ["//visibility:public"],
deps = [
"//pkg/domain/resourcegroup",
"//pkg/metrics",
"//pkg/parser/mysql",
"@com_github_prometheus_client_golang//prometheus",
Expand Down
53 changes: 27 additions & 26 deletions pkg/server/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package metrics

import (
"github.com/pingcap/tidb/pkg/domain/resourcegroup"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -48,34 +49,34 @@ func init() {
// InitMetricsVars init server metrics vars.
func InitMetricsVars() {
QueryTotalCountOk = []prometheus.Counter{
mysql.ComSleep: metrics.QueryTotalCounter.WithLabelValues("Sleep", "OK"),
mysql.ComQuit: metrics.QueryTotalCounter.WithLabelValues("Quit", "OK"),
mysql.ComInitDB: metrics.QueryTotalCounter.WithLabelValues("InitDB", "OK"),
mysql.ComQuery: metrics.QueryTotalCounter.WithLabelValues("Query", "OK"),
mysql.ComPing: metrics.QueryTotalCounter.WithLabelValues("Ping", "OK"),
mysql.ComFieldList: metrics.QueryTotalCounter.WithLabelValues("FieldList", "OK"),
mysql.ComStmtPrepare: metrics.QueryTotalCounter.WithLabelValues("StmtPrepare", "OK"),
mysql.ComStmtExecute: metrics.QueryTotalCounter.WithLabelValues("StmtExecute", "OK"),
mysql.ComStmtFetch: metrics.QueryTotalCounter.WithLabelValues("StmtFetch", "OK"),
mysql.ComStmtClose: metrics.QueryTotalCounter.WithLabelValues("StmtClose", "OK"),
mysql.ComStmtSendLongData: metrics.QueryTotalCounter.WithLabelValues("StmtSendLongData", "OK"),
mysql.ComStmtReset: metrics.QueryTotalCounter.WithLabelValues("StmtReset", "OK"),
mysql.ComSetOption: metrics.QueryTotalCounter.WithLabelValues("SetOption", "OK"),
mysql.ComSleep: metrics.QueryTotalCounter.WithLabelValues("Sleep", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComQuit: metrics.QueryTotalCounter.WithLabelValues("Quit", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComInitDB: metrics.QueryTotalCounter.WithLabelValues("InitDB", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComQuery: metrics.QueryTotalCounter.WithLabelValues("Query", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComPing: metrics.QueryTotalCounter.WithLabelValues("Ping", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComFieldList: metrics.QueryTotalCounter.WithLabelValues("FieldList", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtPrepare: metrics.QueryTotalCounter.WithLabelValues("StmtPrepare", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtExecute: metrics.QueryTotalCounter.WithLabelValues("StmtExecute", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtFetch: metrics.QueryTotalCounter.WithLabelValues("StmtFetch", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtClose: metrics.QueryTotalCounter.WithLabelValues("StmtClose", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtSendLongData: metrics.QueryTotalCounter.WithLabelValues("StmtSendLongData", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtReset: metrics.QueryTotalCounter.WithLabelValues("StmtReset", "OK", resourcegroup.DefaultResourceGroupName),
mysql.ComSetOption: metrics.QueryTotalCounter.WithLabelValues("SetOption", "OK", resourcegroup.DefaultResourceGroupName),
}
QueryTotalCountErr = []prometheus.Counter{
mysql.ComSleep: metrics.QueryTotalCounter.WithLabelValues("Sleep", "Error"),
mysql.ComQuit: metrics.QueryTotalCounter.WithLabelValues("Quit", "Error"),
mysql.ComInitDB: metrics.QueryTotalCounter.WithLabelValues("InitDB", "Error"),
mysql.ComQuery: metrics.QueryTotalCounter.WithLabelValues("Query", "Error"),
mysql.ComPing: metrics.QueryTotalCounter.WithLabelValues("Ping", "Error"),
mysql.ComFieldList: metrics.QueryTotalCounter.WithLabelValues("FieldList", "Error"),
mysql.ComStmtPrepare: metrics.QueryTotalCounter.WithLabelValues("StmtPrepare", "Error"),
mysql.ComStmtExecute: metrics.QueryTotalCounter.WithLabelValues("StmtExecute", "Error"),
mysql.ComStmtFetch: metrics.QueryTotalCounter.WithLabelValues("StmtFetch", "Error"),
mysql.ComStmtClose: metrics.QueryTotalCounter.WithLabelValues("StmtClose", "Error"),
mysql.ComStmtSendLongData: metrics.QueryTotalCounter.WithLabelValues("StmtSendLongData", "Error"),
mysql.ComStmtReset: metrics.QueryTotalCounter.WithLabelValues("StmtReset", "Error"),
mysql.ComSetOption: metrics.QueryTotalCounter.WithLabelValues("SetOption", "Error"),
mysql.ComSleep: metrics.QueryTotalCounter.WithLabelValues("Sleep", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComQuit: metrics.QueryTotalCounter.WithLabelValues("Quit", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComInitDB: metrics.QueryTotalCounter.WithLabelValues("InitDB", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComQuery: metrics.QueryTotalCounter.WithLabelValues("Query", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComPing: metrics.QueryTotalCounter.WithLabelValues("Ping", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComFieldList: metrics.QueryTotalCounter.WithLabelValues("FieldList", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtPrepare: metrics.QueryTotalCounter.WithLabelValues("StmtPrepare", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtExecute: metrics.QueryTotalCounter.WithLabelValues("StmtExecute", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtFetch: metrics.QueryTotalCounter.WithLabelValues("StmtFetch", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtClose: metrics.QueryTotalCounter.WithLabelValues("StmtClose", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtSendLongData: metrics.QueryTotalCounter.WithLabelValues("StmtSendLongData", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComStmtReset: metrics.QueryTotalCounter.WithLabelValues("StmtReset", "Error", resourcegroup.DefaultResourceGroupName),
mysql.ComSetOption: metrics.QueryTotalCounter.WithLabelValues("SetOption", "Error", resourcegroup.DefaultResourceGroupName),
}

DisconnectNormal = metrics.DisconnectionCounter.WithLabelValues(metrics.LblOK)
Expand Down
Loading

0 comments on commit 24a9caa

Please sign in to comment.