Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: track CPU time on gateway node #89122

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pkg/roachpb/app_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) {
s.Nodes = util.CombineUniqueInt64(s.Nodes, other.Nodes)
s.PlanGists = util.CombineUniqueString(s.PlanGists, other.PlanGists)
s.IndexRecommendations = other.IndexRecommendations
s.CpuTime.Add(other.CpuTime, s.Count, other.Count)

s.ExecStats.Add(other.ExecStats)

Expand Down Expand Up @@ -189,7 +190,8 @@ func (s *StatementStatistics) AlmostEqual(other *StatementStatistics, eps float6
s.SensitiveInfo.Equal(other.SensitiveInfo) &&
s.BytesRead.AlmostEqual(other.BytesRead, eps) &&
s.RowsRead.AlmostEqual(other.RowsRead, eps) &&
s.RowsWritten.AlmostEqual(other.RowsWritten, eps)
s.RowsWritten.AlmostEqual(other.RowsWritten, eps) &&
s.CpuTime.AlmostEqual(other.CpuTime, eps)
// s.ExecStats are deliberately ignored since they are subject to sampling
// probability and are not fully deterministic (e.g. the number of network
// messages depends on the range cache state).
Expand Down
3 changes: 3 additions & 0 deletions pkg/roachpb/app_stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ message StatementStatistics {
// index_recommendations is the list of index recommendations generated for the statement fingerprint.
repeated string index_recommendations = 27;

// cpu_time is the amount of CPU time spent (in seconds) by this query.
optional NumericStat cpu_time = 28 [(gogoproto.nullable) = false];

// Note: be sure to update `sql/app_stats.go` when adding/removing fields here!

reserved 13, 14, 17, 18, 19, 20;
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ go_library(
"//pkg/util/errorutil/unimplemented",
"//pkg/util/fsm",
"//pkg/util/grpcutil",
"//pkg/util/grunning",
"//pkg/util/hlc",
"//pkg/util/humanizeutil",
"//pkg/util/interval",
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/conn_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/fsm"
"github.com/cockroachdb/cockroach/pkg/util/grunning"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logcrash"
Expand Down Expand Up @@ -1878,12 +1879,16 @@ func (ex *connExecutor) execCmd() error {
ex.machine.CurState(), pos, cmd)
}

nanos := grunning.Time().Nanoseconds()
receivedCPUTime := time.Unix(0, nanos)

var ev fsm.Event
var payload fsm.EventPayload
var res ResultBase
switch tcmd := cmd.(type) {
case ExecStmt:
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionQueryReceived, tcmd.TimeReceived)
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionQueryReceivedCPUTime, receivedCPUTime)
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionStartParse, tcmd.ParseStart)
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionEndParse, tcmd.ParseEnd)

Expand Down Expand Up @@ -1937,6 +1942,7 @@ func (ex *connExecutor) execCmd() error {
// ExecPortal is handled like ExecStmt, except that the placeholder info
// is taken from the portal.
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionQueryReceived, tcmd.TimeReceived)
ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionQueryReceivedCPUTime, receivedCPUTime)
// When parsing has been done earlier, via a separate parse
// message, it is not any more part of the statistics collected
// for this execution. In that case, we simply report that
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/conn_executor_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/contextutil"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/fsm"
"github.com/cockroachdb/cockroach/pkg/util/grunning"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
Expand Down Expand Up @@ -1196,6 +1197,9 @@ func (ex *connExecutor) dispatchToExecutionEngine(
}
ex.sessionTracing.TraceExecEnd(ctx, res.Err(), res.RowsAffected())
ex.statsCollector.PhaseTimes().SetSessionPhaseTime(sessionphase.PlannerEndExecStmt, timeutil.Now())
cpuNanos := grunning.Time().Nanoseconds()
fmt.Println("planner end exec stmt nanos", cpuNanos)
ex.statsCollector.PhaseTimes().SetSessionPhaseTime(sessionphase.PlannerEndExecStmtCpuTime, time.Unix(0, cpuNanos))

ex.extraTxnState.rowsRead += stats.rowsRead
ex.extraTxnState.bytesRead += stats.bytesRead
Expand Down Expand Up @@ -1955,6 +1959,7 @@ var showQueryStatsFns = map[tree.Name]func(*sessionphase.Times) time.Duration{
// Since the last query has already finished, it is safe to retrieve its
// total service latency.
"service_latency": func(phaseTimes *sessionphase.Times) time.Duration { return phaseTimes.GetServiceLatencyTotal() },
"cpu_time": func(phaseTimes *sessionphase.Times) time.Duration { return phaseTimes.GetServiceCPUTimeTotal() },
"post_commit_jobs_latency": func(phaseTimes *sessionphase.Times) time.Duration { return phaseTimes.GetPostCommitJobsLatency() },
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/crdb_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1094,6 +1094,8 @@ CREATE TABLE crdb_internal.node_statement_statistics (
service_lat_var FLOAT NOT NULL,
overhead_lat_avg FLOAT NOT NULL,
overhead_lat_var FLOAT NOT NULL,
cpu_time_avg FLOAT NOT NULL,
cpu_time_var FLOAT NOT NULL,
bytes_read_avg FLOAT NOT NULL,
bytes_read_var FLOAT NOT NULL,
rows_read_avg FLOAT NOT NULL,
Expand Down Expand Up @@ -1197,6 +1199,8 @@ CREATE TABLE crdb_internal.node_statement_statistics (
tree.NewDFloat(tree.DFloat(stats.Stats.ServiceLat.GetVariance(stats.Stats.Count))), // service_lat_var
tree.NewDFloat(tree.DFloat(stats.Stats.OverheadLat.Mean)), // overhead_lat_avg
tree.NewDFloat(tree.DFloat(stats.Stats.OverheadLat.GetVariance(stats.Stats.Count))), // overhead_lat_var
tree.NewDFloat(tree.DFloat(stats.Stats.CpuTime.Mean)), // cpu_time_avg
tree.NewDFloat(tree.DFloat(stats.Stats.CpuTime.GetVariance(stats.Stats.Count))), // cpu_time_var
tree.NewDFloat(tree.DFloat(stats.Stats.BytesRead.Mean)), // bytes_read_avg
tree.NewDFloat(tree.DFloat(stats.Stats.BytesRead.GetVariance(stats.Stats.Count))), // bytes_read_var
tree.NewDFloat(tree.DFloat(stats.Stats.RowsRead.Mean)), // rows_read_avg
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/executor_statement_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ func (ex *connExecutor) recordStatementSummary(
// We want to exclude any overhead to reduce possible confusion.
svcLatRaw := phaseTimes.GetServiceLatencyNoOverhead()
svcLat := svcLatRaw.Seconds()
cpuTime := phaseTimes.GetServiceCPUTimeTotal().Seconds()

// processing latency: contributing towards SQL results.
processingLat := parseLat + planLat + runLat
Expand Down Expand Up @@ -192,6 +193,7 @@ func (ex *connExecutor) recordStatementSummary(
RunLatency: runLat,
ServiceLatency: svcLat,
OverheadLatency: execOverhead,
ServiceCPUTime: cpuTime,
BytesRead: stats.bytesRead,
RowsRead: stats.rowsRead,
RowsWritten: stats.rowsWritten,
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/pgwire/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ go_library(
"//pkg/util/duration",
"//pkg/util/envutil",
"//pkg/util/errorutil/unimplemented",
"//pkg/util/grunning",
"//pkg/util/humanizeutil",
"//pkg/util/ipaddr",
"//pkg/util/json",
Expand Down
11 changes: 11 additions & 0 deletions pkg/sql/sessionphase/session_phase.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ const (

// Executor phases.

SessionQueryReceivedCPUTime

// SessionQueryReceived is the SessionPhase when a query is received.
SessionQueryReceived

Expand All @@ -54,6 +56,8 @@ const (
// PlannerEndExecStmt is the SessionPhase when execution ends.
PlannerEndExecStmt

PlannerEndExecStmtCpuTime

// SessionQueryServiced is the SessionPhase when a query is serviced.
// Note: we compute this even for empty queries or "special" statements that
// have no execution, like SHOW TRANSACTION STATUS.
Expand Down Expand Up @@ -166,6 +170,13 @@ func (t *Times) GetServiceLatencyTotal() time.Duration {
return t.times[SessionQueryServiced].Sub(t.times[SessionQueryReceived])
}

// GetServiceCPUTimeTotal returns the total CPU time of serving a query
// including any overhead like internal retries.
// NOTE: SessionQueryServicedCPUTime phase must have been set.
func (t *Times) GetServiceCPUTimeTotal() time.Duration {
return t.times[PlannerEndExecStmtCpuTime].Sub(t.times[SessionQueryReceivedCPUTime])
}

// GetRunLatency returns the time between a query execution starting and
// ending.
func (t *Times) GetRunLatency() time.Duration {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ func (s *innerStmtStats) jsonFields() jsonFields {
{"runLat", (*numericStats)(&s.RunLat)},
{"svcLat", (*numericStats)(&s.ServiceLat)},
{"ovhLat", (*numericStats)(&s.OverheadLat)},
{"cpuTime", (*numericStats)(&s.CpuTime)},
{"bytesRead", (*numericStats)(&s.BytesRead)},
{"rowsRead", (*numericStats)(&s.RowsRead)},
{"rowsWritten", (*numericStats)(&s.RowsWritten)},
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func (s *Container) RecordStatement(
stats.mu.data.RunLat.Record(stats.mu.data.Count, value.RunLatency)
stats.mu.data.ServiceLat.Record(stats.mu.data.Count, value.ServiceLatency)
stats.mu.data.OverheadLat.Record(stats.mu.data.Count, value.OverheadLatency)
stats.mu.data.CpuTime.Record(stats.mu.data.Count, value.ServiceCPUTime)
stats.mu.data.BytesRead.Record(stats.mu.data.Count, float64(value.BytesRead))
stats.mu.data.RowsRead.Record(stats.mu.data.Count, float64(value.RowsRead))
stats.mu.data.RowsWritten.Record(stats.mu.data.Count, float64(value.RowsWritten))
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/sqlstats/ssprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ type RecordedStmtStats struct {
RunLatency float64
ServiceLatency float64
OverheadLatency float64
ServiceCPUTime float64
BytesRead int64
RowsRead int64
RowsWritten int64
Expand Down