Skip to content

Commit

Permalink
Merge #64442
Browse files Browse the repository at this point in the history
64442: sql: exclude Bulk IO operations from service latency metrics r=maryliag a=Azhng

Previously, we included Bulk IO operations such as BACKUP
in our service latency metrics. This caused spikes in our time
series graph for service latency, which can lead to confusion.

Closes #42174

Release note (sql change): Bulk IO operations will no longer be included
 in service latency metrics.

Co-authored-by: Azhng <[email protected]>
  • Loading branch information
craig[bot] and Azhng committed May 4, 2021
2 parents e5b8503 + 559f205 commit 3f872bd
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions pkg/sql/executor_statement_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ func (ex *connExecutor) recordStatementSummary(
execOverhead := svcLat - processingLat

stmt := &planner.stmt
shouldIncludeInLatencyMetrics := shouldIncludeStmtInLatencyMetrics(stmt)
flags := planner.curPlan.flags
if automaticRetryCount == 0 {
ex.updateOptCounters(flags)
Expand All @@ -192,11 +193,15 @@ func (ex *connExecutor) recordStatementSummary(
if _, ok := stmt.AST.(*tree.Select); ok {
m.DistSQLSelectCount.Inc(1)
}
m.DistSQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
m.DistSQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
if shouldIncludeInLatencyMetrics {
m.DistSQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
m.DistSQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
}
}
if shouldIncludeInLatencyMetrics {
m.SQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
}
m.SQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
}

stmtID := ex.statsCollector.recordStatement(
Expand Down Expand Up @@ -257,3 +262,28 @@ func (ex *connExecutor) updateOptCounters(planFlags planFlags) {
m.SQLOptPlanCacheMisses.Inc(1)
}
}

// Bulk IO operations cause spikes in our time series chart for service latency. We exclude them from the service
// latency metrics to avoid confusions.
func shouldIncludeStmtInLatencyMetrics(stmt *Statement) bool {
switch stmt.AST.(type) {
case *tree.Backup:
return false
case *tree.ShowBackup:
return false
case *tree.Restore:
return false
case *tree.Import:
return false
case *tree.Export:
return false
case *tree.ScheduledBackup:
return false
case *tree.StreamIngestion:
return false
case *tree.ReplicationStream:
return false
}

return true
}

0 comments on commit 3f872bd

Please sign in to comment.