From 540a6ed426548fa7debc40f47636a00a272aed3a Mon Sep 17 00:00:00 2001 From: Barry He Date: Tue, 23 Mar 2021 16:47:48 +0000 Subject: [PATCH] sql: add times series metrics for disk spilling Add time series metrics for disk spilling to track queries spilled to disk, bytes written, and bytes read. Release note (ui change): User can see time series metrics for disk spilling in advanced debug console --- pkg/sql/colcontainer/BUILD.bazel | 2 ++ pkg/sql/colcontainer/diskqueue.go | 29 +++++++++++++++++++++++++++++ pkg/sql/colflow/vectorized_flow.go | 5 +++-- pkg/sql/execinfra/metrics.go | 24 ++++++++++++++++++++++++ pkg/ts/catalog/chart_catalog.go | 12 ++++++++++++ 5 files changed, 70 insertions(+), 2 deletions(-) diff --git a/pkg/sql/colcontainer/BUILD.bazel b/pkg/sql/colcontainer/BUILD.bazel index 83b5f0408dde..45c92a16bcf4 100644 --- a/pkg/sql/colcontainer/BUILD.bazel +++ b/pkg/sql/colcontainer/BUILD.bazel @@ -12,9 +12,11 @@ go_library( "//pkg/col/coldata", "//pkg/col/colserde", "//pkg/sql/colexecerror", + "//pkg/sql/execinfra", "//pkg/sql/types", "//pkg/storage/fs", "//pkg/util/mon", + "//pkg/util/syncutil", "//pkg/util/uuid", "@com_github_cockroachdb_errors//:errors", "@com_github_golang_snappy//:snappy", diff --git a/pkg/sql/colcontainer/diskqueue.go b/pkg/sql/colcontainer/diskqueue.go index ad76fcdd51ad..51e86fd18a4f 100644 --- a/pkg/sql/colcontainer/diskqueue.go +++ b/pkg/sql/colcontainer/diskqueue.go @@ -19,9 +19,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/col/coldata" "github.com/cockroachdb/cockroach/pkg/col/colserde" + "github.com/cockroachdb/cockroach/pkg/sql/execinfra" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/storage/fs" "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/golang/snappy" @@ -191,6 +193,14 @@ type diskQueue struct { scratchDecompressedReadBytes []byte diskAcc *mon.BoundAccount + + // diskSpillingMetricsHelper keeps track of various disk spilling metrics. + diskSpillingMetricsHelper struct { + mu struct { + syncutil.Mutex + querySpilled bool + } + } } var _ RewindableQueue = &diskQueue{} @@ -296,6 +306,9 @@ func GetPatherFunc(f func(ctx context.Context) string) GetPather { type DiskQueueCfg struct { // FS is the filesystem interface to use. FS fs.FS + // DistSQLMetrics contains metrics for monitoring DistSQL processing. This + // can be nil if these metrics are not needed. + DistSQLMetrics *execinfra.DistSQLMetrics // GetPather returns where the temporary directory that will contain this // DiskQueue's files has been created. The directory name will be a UUID. // Note that the directory is created lazily on the first call to GetPath. @@ -363,6 +376,15 @@ func NewRewindableDiskQueue( return d, nil } +func (d *diskQueue) querySpilled() { + d.diskSpillingMetricsHelper.mu.Lock() + defer d.diskSpillingMetricsHelper.mu.Unlock() + if d.cfg.DistSQLMetrics != nil && !d.diskSpillingMetricsHelper.mu.querySpilled { + d.diskSpillingMetricsHelper.mu.querySpilled = true + d.cfg.DistSQLMetrics.QueriesSpilled.Inc(1) + } +} + func newDiskQueue( ctx context.Context, typs []*types.T, cfg DiskQueueCfg, diskAcc *mon.BoundAccount, ) (*diskQueue, error) { @@ -385,6 +407,7 @@ func newDiskQueue( if err := cfg.FS.MkdirAll(filepath.Join(cfg.GetPather.GetPath(ctx), d.dirName)); err != nil { return nil, err } + d.querySpilled() // rotateFile will create a new file to write to. return d, d.rotateFile(ctx) } @@ -520,6 +543,9 @@ func (d *diskQueue) writeFooterAndFlush(ctx context.Context) (err error) { } d.numBufferedBatches = 0 d.files[d.writeFileIdx].totalSize += written + if d.cfg.DistSQLMetrics != nil { + d.cfg.DistSQLMetrics.SpilledBytesWritten.Inc(int64(written)) + } if err := d.diskAcc.Grow(ctx, int64(written)); err != nil { return err } @@ -646,6 +672,9 @@ func (d *diskQueue) maybeInitDeserializer(ctx context.Context) (bool, error) { if err != nil && err != io.EOF { return false, err } + if d.cfg.DistSQLMetrics != nil { + d.cfg.DistSQLMetrics.SpilledBytesRead.Inc(int64(n)) + } if n != len(d.writer.scratch.compressedBuf) { return false, errors.Errorf("expected to read %d bytes but read %d", len(d.writer.scratch.compressedBuf), n) } diff --git a/pkg/sql/colflow/vectorized_flow.go b/pkg/sql/colflow/vectorized_flow.go index 3724ab6d9209..36665fc9df7d 100644 --- a/pkg/sql/colflow/vectorized_flow.go +++ b/pkg/sql/colflow/vectorized_flow.go @@ -182,8 +182,9 @@ func (f *vectorizedFlow) Setup( helper := newVectorizedFlowCreatorHelper(f.FlowBase) diskQueueCfg := colcontainer.DiskQueueCfg{ - FS: f.Cfg.TempFS, - GetPather: f, + FS: f.Cfg.TempFS, + DistSQLMetrics: f.Cfg.Metrics, + GetPather: f, } if err := diskQueueCfg.EnsureDefaults(); err != nil { return ctx, err diff --git a/pkg/sql/execinfra/metrics.go b/pkg/sql/execinfra/metrics.go index b3c7dd694b3c..b0c81fc7926e 100644 --- a/pkg/sql/execinfra/metrics.go +++ b/pkg/sql/execinfra/metrics.go @@ -31,6 +31,9 @@ type DistSQLMetrics struct { VecOpenFDs *metric.Gauge CurDiskBytesCount *metric.Gauge MaxDiskBytesHist *metric.Histogram + QueriesSpilled *metric.Counter + SpilledBytesWritten *metric.Counter + SpilledBytesRead *metric.Counter } // MetricStruct implements the metrics.Struct interface. @@ -111,6 +114,24 @@ var ( Measurement: "Disk", Unit: metric.Unit_BYTES, } + metaQueriesSpilled = metric.Metadata{ + Name: "sql.distsql.queries.spilled", + Help: "Number of queries that have spilled to disk", + Measurement: "Queries", + Unit: metric.Unit_COUNT, + } + metaSpilledBytesWritten = metric.Metadata{ + Name: "sql.disk.distsql.spilled.bytes.written", + Help: "Number of bytes written to temporary disk storage as a result of spilling", + Measurement: "Disk", + Unit: metric.Unit_BYTES, + } + metaSpilledBytesRead = metric.Metadata{ + Name: "sql.disk.distsql.spilled.bytes.read", + Help: "Number of bytes read from temporary disk storage as a result of spilling", + Measurement: "Disk", + Unit: metric.Unit_BYTES, + } ) // See pkg/sql/mem_metrics.go @@ -132,6 +153,9 @@ func MakeDistSQLMetrics(histogramWindow time.Duration) DistSQLMetrics { VecOpenFDs: metric.NewGauge(metaVecOpenFDs), CurDiskBytesCount: metric.NewGauge(metaDiskCurBytes), MaxDiskBytesHist: metric.NewHistogram(metaDiskMaxBytes, histogramWindow, log10int64times1000, 3), + QueriesSpilled: metric.NewCounter(metaQueriesSpilled), + SpilledBytesWritten: metric.NewCounter(metaSpilledBytesWritten), + SpilledBytesRead: metric.NewCounter(metaSpilledBytesRead), } } diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index e6b72f1cf67d..375a46e689aa 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -1710,6 +1710,18 @@ var charts = []sectionDescription{ Title: "Disk Usage per Statement", Metrics: []string{"sql.disk.distsql.max"}, }, + { + Title: "Number of Queries Spilled To Disk", + Metrics: []string{"sql.distsql.queries.spilled"}, + }, + { + Title: "Number of Bytes Written Due to Disk Spilling", + Metrics: []string{"sql.disk.distsql.spilled.bytes.written"}, + }, + { + Title: "Number of Bytes Read Due to Disk Spilling", + Metrics: []string{"sql.disk.distsql.spilled.bytes.read"}, + }, }, }, {