From 86f51efb26c709192f0aa4c839930334270ffa42 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Thu, 24 Feb 2022 16:57:06 +0100 Subject: [PATCH 1/6] kvserver: disable assertion in SetPriorityID Merge-to-master is currently pretty red, and this has popped up once or twice. Disable the assertion (which is fine for tests, for now) while we investigate. Touches https://github.com/cockroachdb/cockroach/issues/75939 Release note: None --- pkg/kv/kvserver/scheduler.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/kv/kvserver/scheduler.go b/pkg/kv/kvserver/scheduler.go index 9a2712c0e6c5..d658c72a1428 100644 --- a/pkg/kv/kvserver/scheduler.go +++ b/pkg/kv/kvserver/scheduler.go @@ -121,7 +121,10 @@ func (q *rangeIDQueue) Len() int { } func (q *rangeIDQueue) SetPriorityID(id roachpb.RangeID) { - if q.priorityID != 0 && q.priorityID != id { + if q.priorityID != 0 && q.priorityID != id && + // This assertion is temporarily disabled, see: + // https://github.com/cockroachdb/cockroach/issues/75939 + false { panic(fmt.Sprintf( "priority range ID already set: old=%d, new=%d", q.priorityID, id)) From 6cf5b12a079abcb3766f93d2eeceb00c4ccbb799 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Tue, 1 Mar 2022 11:42:24 +1100 Subject: [PATCH 2/6] ttljob: add row statistics Release note (sql change): This commit adds the ability for the TTL job to generate statistics on number of rows and number of expired rows on the table. This is off by default, controllable by the `ttl_row_stats_poll_interval` storage parameter syntax. Release justification: high benefit changes to new functionality --- pkg/sql/catalog/catpb/catalog.proto | 3 + pkg/sql/catalog/tabledesc/structured.go | 3 + pkg/sql/catalog/tabledesc/ttl.go | 20 ++++ pkg/sql/exec_util.go | 3 + .../testdata/logic_test/row_level_ttl | 33 +++--- pkg/sql/paramparse/paramobserver.go | 20 ++++ pkg/sql/paramparse/paramparse.go | 39 +++++++ .../sessiondatapb/local_only_session_data.go | 4 + pkg/sql/ttl/ttljob/BUILD.bazel | 5 + pkg/sql/ttl/ttljob/ttljob.go | 106 ++++++++++++++++++ pkg/sql/ttl/ttljob/ttljob_test.go | 12 ++ pkg/ts/catalog/chart_catalog.go | 8 ++ 12 files changed, 241 insertions(+), 15 deletions(-) diff --git a/pkg/sql/catalog/catpb/catalog.proto b/pkg/sql/catalog/catpb/catalog.proto index a313ac966cbd..cdf495308b78 100644 --- a/pkg/sql/catalog/catpb/catalog.proto +++ b/pkg/sql/catalog/catpb/catalog.proto @@ -210,4 +210,7 @@ message RowLevelTTL { optional int64 delete_rate_limit = 7 [(gogoproto.nullable)=false]; // Pause is set if the TTL job should not run. optional bool pause = 8 [(gogoproto.nullable)=false]; + // RowStatsPollInterval is the interval to report row statistics (number of rows on table, number of expired + // rows on table) during row level TTL. If zero, no statistics are reported. + optional int64 row_stats_poll_interval = 9 [(gogoproto.nullable)=false, (gogoproto.casttype)="time.Duration"]; } diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index 0d7f8b6788c1..3b1f786d856d 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -2555,6 +2555,9 @@ func (desc *wrapper) GetStorageParams(spaceBetweenEqual bool) []string { if pause := ttl.Pause; pause { appendStorageParam(`ttl_pause`, fmt.Sprintf(`%t`, pause)) } + if p := ttl.RowStatsPollInterval; p != 0 { + appendStorageParam(`ttl_row_stats_poll_interval`, fmt.Sprintf(`'%s'`, p.String())) + } } if exclude := desc.GetExcludeDataFromBackup(); exclude { appendStorageParam(`exclude_data_from_backup`, `true`) diff --git a/pkg/sql/catalog/tabledesc/ttl.go b/pkg/sql/catalog/tabledesc/ttl.go index 1592585056e0..affced824e92 100644 --- a/pkg/sql/catalog/tabledesc/ttl.go +++ b/pkg/sql/catalog/tabledesc/ttl.go @@ -11,6 +11,8 @@ package tabledesc import ( + "time" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" @@ -53,6 +55,11 @@ func ValidateRowLevelTTL(ttl *catpb.RowLevelTTL) error { return err } } + if ttl.RowStatsPollInterval != 0 { + if err := ValidateTTLRowStatsPollInterval("ttl_row_stats_poll_interval", ttl.RowStatsPollInterval); err != nil { + return err + } + } return nil } @@ -93,6 +100,19 @@ func ValidateTTLCronExpr(key string, str string) error { return nil } +// ValidateTTLRowStatsPollInterval validates the automatic statistics field +// of TTL. +func ValidateTTLRowStatsPollInterval(key string, val time.Duration) error { + if val <= 0 { + return pgerror.Newf( + pgcode.InvalidParameterValue, + `"%s" must be at least 1`, + key, + ) + } + return nil +} + // ValidateTTLRateLimit validates the rate limit parameters of TTL. func ValidateTTLRateLimit(key string, val int64) error { if val <= 0 { diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index ec06ce5c2dba..4f77c997a572 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -1467,6 +1467,9 @@ type TTLTestingKnobs struct { // AOSTDuration changes the AOST timestamp duration to add to the // current time. AOSTDuration *time.Duration + // OnStatisticsError is a hook that takes in an error if gathering statistics + // generates an error. + OnStatisticsError func(err error) // MockDescriptorVersionDuringDelete is a version to mock the delete descriptor // as during delete. MockDescriptorVersionDuringDelete *descpb.DescriptorVersion diff --git a/pkg/sql/logictest/testdata/logic_test/row_level_ttl b/pkg/sql/logictest/testdata/logic_test/row_level_ttl index f73159bb03b3..08c91073f779 100644 --- a/pkg/sql/logictest/testdata/logic_test/row_level_ttl +++ b/pkg/sql/logictest/testdata/logic_test/row_level_ttl @@ -346,23 +346,23 @@ CREATE TABLE tbl ( id INT PRIMARY KEY, text TEXT, FAMILY (id, text) -) WITH (ttl_expire_after = '10 minutes', ttl_select_batch_size = 50, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true) +) WITH (ttl_expire_after = '10 minutes', ttl_select_batch_size = 50, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true, ttl_row_stats_poll_interval = '1 minute') query T SELECT reloptions FROM pg_class WHERE relname = 'tbl' ---- -{ttl='on',ttl_automatic_column='on',ttl_expire_after='00:10:00':::INTERVAL,ttl_select_batch_size=50,ttl_range_concurrency=2,ttl_delete_rate_limit=100,ttl_pause=true} +{ttl='on',ttl_automatic_column='on',ttl_expire_after='00:10:00':::INTERVAL,ttl_select_batch_size=50,ttl_range_concurrency=2,ttl_delete_rate_limit=100,ttl_pause=true,ttl_row_stats_poll_interval='1m0s'} query T SELECT create_statement FROM [SHOW CREATE TABLE tbl] ---- CREATE TABLE public.tbl ( - id INT8 NOT NULL, - text STRING NULL, - crdb_internal_expiration TIMESTAMPTZ NOT VISIBLE NOT NULL DEFAULT current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL ON UPDATE current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL, - CONSTRAINT tbl_pkey PRIMARY KEY (id ASC), - FAMILY fam_0_id_text_crdb_internal_expiration (id, text, crdb_internal_expiration) -) WITH (ttl = 'on', ttl_automatic_column = 'on', ttl_expire_after = '00:10:00':::INTERVAL, ttl_select_batch_size = 50, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true) + id INT8 NOT NULL, + text STRING NULL, + crdb_internal_expiration TIMESTAMPTZ NOT VISIBLE NOT NULL DEFAULT current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL ON UPDATE current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL, + CONSTRAINT tbl_pkey PRIMARY KEY (id ASC), + FAMILY fam_0_id_text_crdb_internal_expiration (id, text, crdb_internal_expiration) +) WITH (ttl = 'on', ttl_automatic_column = 'on', ttl_expire_after = '00:10:00':::INTERVAL, ttl_select_batch_size = 50, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true, ttl_row_stats_poll_interval = '1m0s') statement ok ALTER TABLE tbl SET (ttl_delete_batch_size = 100) @@ -371,12 +371,12 @@ query T SELECT create_statement FROM [SHOW CREATE TABLE tbl] ---- CREATE TABLE public.tbl ( - id INT8 NOT NULL, - text STRING NULL, - crdb_internal_expiration TIMESTAMPTZ NOT VISIBLE NOT NULL DEFAULT current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL ON UPDATE current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL, - CONSTRAINT tbl_pkey PRIMARY KEY (id ASC), - FAMILY fam_0_id_text_crdb_internal_expiration (id, text, crdb_internal_expiration) -) WITH (ttl = 'on', ttl_automatic_column = 'on', ttl_expire_after = '00:10:00':::INTERVAL, ttl_select_batch_size = 50, ttl_delete_batch_size = 100, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true) + id INT8 NOT NULL, + text STRING NULL, + crdb_internal_expiration TIMESTAMPTZ NOT VISIBLE NOT NULL DEFAULT current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL ON UPDATE current_timestamp():::TIMESTAMPTZ + '00:10:00':::INTERVAL, + CONSTRAINT tbl_pkey PRIMARY KEY (id ASC), + FAMILY fam_0_id_text_crdb_internal_expiration (id, text, crdb_internal_expiration) +) WITH (ttl = 'on', ttl_automatic_column = 'on', ttl_expire_after = '00:10:00':::INTERVAL, ttl_select_batch_size = 50, ttl_delete_batch_size = 100, ttl_range_concurrency = 2, ttl_delete_rate_limit = 100, ttl_pause = true, ttl_row_stats_poll_interval = '1m0s') statement error "ttl_select_batch_size" must be at least 1 ALTER TABLE tbl SET (ttl_select_batch_size = -1) @@ -390,8 +390,11 @@ ALTER TABLE tbl SET (ttl_range_concurrency = -1) statement error "ttl_delete_rate_limit" must be at least 1 ALTER TABLE tbl SET (ttl_delete_rate_limit = -1) +statement error "ttl_row_stats_poll_interval" must be at least 1 +ALTER TABLE tbl SET (ttl_row_stats_poll_interval = '-1 second') + statement ok -ALTER TABLE tbl RESET (ttl_delete_batch_size, ttl_select_batch_size, ttl_range_concurrency, ttl_delete_rate_limit, ttl_pause) +ALTER TABLE tbl RESET (ttl_delete_batch_size, ttl_select_batch_size, ttl_range_concurrency, ttl_delete_rate_limit, ttl_pause, ttl_row_stats_poll_interval) query T SELECT create_statement FROM [SHOW CREATE TABLE tbl] diff --git a/pkg/sql/paramparse/paramobserver.go b/pkg/sql/paramparse/paramobserver.go index 47ddfdb543f6..167378590b92 100644 --- a/pkg/sql/paramparse/paramobserver.go +++ b/pkg/sql/paramparse/paramobserver.go @@ -389,6 +389,26 @@ var tableParams = map[string]tableParam{ return nil }, }, + `ttl_row_stats_poll_interval`: { + onSet: func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error { + d, err := DatumAsDuration(evalCtx, key, datum) + if err != nil { + return err + } + if po.tableDesc.RowLevelTTL == nil { + po.tableDesc.RowLevelTTL = &catpb.RowLevelTTL{} + } + if err := tabledesc.ValidateTTLRowStatsPollInterval(key, d); err != nil { + return err + } + po.tableDesc.RowLevelTTL.RowStatsPollInterval = d + return nil + }, + onReset: func(po *TableStorageParamObserver, evalCtx *tree.EvalContext, key string) error { + po.tableDesc.RowLevelTTL.RowStatsPollInterval = 0 + return nil + }, + }, `exclude_data_from_backup`: { onSet: func(ctx context.Context, po *TableStorageParamObserver, semaCtx *tree.SemaContext, evalCtx *tree.EvalContext, key string, datum tree.Datum) error { diff --git a/pkg/sql/paramparse/paramparse.go b/pkg/sql/paramparse/paramparse.go index 26ad534ca4cf..7f6680afab16 100644 --- a/pkg/sql/paramparse/paramparse.go +++ b/pkg/sql/paramparse/paramparse.go @@ -15,10 +15,12 @@ package paramparse import ( "strconv" "strings" + "time" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/errors" ) @@ -55,6 +57,43 @@ func DatumAsFloat(evalCtx *tree.EvalContext, name string, value tree.TypedExpr) return 0, err } +// DatumAsDuration transforms a tree.TypedExpr containing a Datum into a +// time.Duration. +func DatumAsDuration( + evalCtx *tree.EvalContext, name string, value tree.TypedExpr, +) (time.Duration, error) { + val, err := value.Eval(evalCtx) + if err != nil { + return 0, err + } + var d duration.Duration + switch v := tree.UnwrapDatum(evalCtx, val).(type) { + case *tree.DString: + datum, err := tree.ParseDInterval(evalCtx.SessionData().GetIntervalStyle(), string(*v)) + if err != nil { + return 0, err + } + d = datum.Duration + case *tree.DInterval: + d = v.Duration + default: + err = pgerror.Newf(pgcode.InvalidParameterValue, + "parameter %q requires a duration value", name) + err = errors.WithDetailf(err, + "%s is a %s", value, errors.Safe(val.ResolvedType())) + return 0, err + } + + secs, ok := d.AsInt64() + if !ok { + return 0, pgerror.Newf( + pgcode.InvalidParameterValue, + "invalid duration", + ) + } + return time.Duration(secs) * time.Second, nil +} + // DatumAsInt transforms a tree.TypedExpr containing a Datum into an int. func DatumAsInt(evalCtx *tree.EvalContext, name string, value tree.TypedExpr) (int64, error) { val, err := value.Eval(evalCtx) diff --git a/pkg/sql/sessiondatapb/local_only_session_data.go b/pkg/sql/sessiondatapb/local_only_session_data.go index fe535fee4735..ea8e1a497154 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.go +++ b/pkg/sql/sessiondatapb/local_only_session_data.go @@ -237,6 +237,10 @@ const ( // session default_transaction_quality_of_service value. SystemLow = QoSLevel(admission.LowPri) + // TTLStatsLow denotes a QoS level used internally by the TTL feature, which + // is not settable as a session default_transaction_quality_of_service value. + TTLStatsLow = QoSLevel(admission.TTLLowPri) + // TTLLow denotes a QoS level used internally by the TTL feature, which is not // settable as a session default_transaction_quality_of_service value. TTLLow = QoSLevel(admission.TTLLowPri) diff --git a/pkg/sql/ttl/ttljob/BUILD.bazel b/pkg/sql/ttl/ttljob/BUILD.bazel index 06c0d0aef871..e925c39e9ad7 100644 --- a/pkg/sql/ttl/ttljob/BUILD.bazel +++ b/pkg/sql/ttl/ttljob/BUILD.bazel @@ -14,17 +14,22 @@ go_library( "//pkg/keys", "//pkg/kv", "//pkg/roachpb", + "//pkg/security", "//pkg/settings", "//pkg/settings/cluster", "//pkg/sql", "//pkg/sql/catalog/catpb", + "//pkg/sql/catalog/colinfo", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/descs", "//pkg/sql/lexbase", "//pkg/sql/rowenc", "//pkg/sql/sem/tree", + "//pkg/sql/sessiondata", + "//pkg/sql/sessiondatapb", "//pkg/sql/types", "//pkg/util/ctxgroup", + "//pkg/util/log", "//pkg/util/metric", "//pkg/util/metric/aggmetric", "//pkg/util/quotapool", diff --git a/pkg/sql/ttl/ttljob/ttljob.go b/pkg/sql/ttl/ttljob/ttljob.go index 6ac4d8b8be0f..3d08d9c79f27 100644 --- a/pkg/sql/ttl/ttljob/ttljob.go +++ b/pkg/sql/ttl/ttljob/ttljob.go @@ -12,6 +12,7 @@ package ttljob import ( "context" + "fmt" "math" "regexp" "time" @@ -21,16 +22,21 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catpb" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/rowenc" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" + "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric" "github.com/cockroachdb/cockroach/pkg/util/quotapool" @@ -98,6 +104,8 @@ type RowLevelTTLAggMetrics struct { RowSelections *aggmetric.AggCounter RowDeletions *aggmetric.AggCounter NumActiveRanges *aggmetric.AggGauge + TotalRows *aggmetric.AggGauge + TotalExpiredRows *aggmetric.AggGauge mu struct { syncutil.Mutex @@ -112,6 +120,8 @@ type rowLevelTTLMetrics struct { RowSelections *aggmetric.Counter RowDeletions *aggmetric.Counter NumActiveRanges *aggmetric.Gauge + TotalRows *aggmetric.Gauge + TotalExpiredRows *aggmetric.Gauge } // MetricStruct implements the metric.Struct interface. @@ -134,6 +144,8 @@ func (m *RowLevelTTLAggMetrics) loadMetrics(relation string) rowLevelTTLMetrics RowSelections: m.RowSelections.AddChild(relation), RowDeletions: m.RowDeletions.AddChild(relation), NumActiveRanges: m.NumActiveRanges.AddChild(relation), + TotalRows: m.TotalRows.AddChild(relation), + TotalExpiredRows: m.TotalExpiredRows.AddChild(relation), } m.mu.m[relation] = ret return ret @@ -205,6 +217,22 @@ func makeRowLevelTTLAggMetrics(histogramWindowInterval time.Duration) metric.Str Unit: metric.Unit_COUNT, }, ), + TotalRows: b.Gauge( + metric.Metadata{ + Name: "jobs.row_level_ttl.total_rows", + Help: "Approximate number of rows on the TTL table.", + Measurement: "total_rows", + Unit: metric.Unit_COUNT, + }, + ), + TotalExpiredRows: b.Gauge( + metric.Metadata{ + Name: "jobs.row_level_ttl.total_expired_rows", + Help: "Approximate number of rows that have expired the TTL on the TTL table.", + Measurement: "total_expired_rows", + Unit: metric.Unit_COUNT, + }, + ), } ret.mu.m = make(map[string]rowLevelTTLMetrics) return ret @@ -344,6 +372,7 @@ func (t rowLevelTTLResumer) Resume(ctx context.Context, execCtx interface{}) err deleteRateLimit, ) + statsCloseCh := make(chan struct{}) ch := make(chan rangeToProcess, rangeConcurrency) for i := 0; i < rangeConcurrency; i++ { g.GoCtx(func(ctx context.Context) error { @@ -377,9 +406,27 @@ func (t rowLevelTTLResumer) Resume(ctx context.Context, execCtx interface{}) err }) } + if ttlSettings.RowStatsPollInterval != 0 { + g.GoCtx(func(ctx context.Context) error { + // Do once initially to ensure we have some base statistics. + fetchStatistics(ctx, p.ExecCfg(), details, metrics, aostDuration) + // Wait until poll interval is reached, or early exit when we are done + // with the TTL job. + for { + select { + case <-statsCloseCh: + return nil + case <-time.After(ttlSettings.RowStatsPollInterval): + fetchStatistics(ctx, p.ExecCfg(), details, metrics, aostDuration) + } + } + }) + } + if err := func() (retErr error) { defer func() { close(ch) + close(statsCloseCh) retErr = errors.CombineErrors(retErr, g.Wait()) }() done := false @@ -475,6 +522,65 @@ func getDeleteRateLimit(sv *settings.Values, ttl catpb.RowLevelTTL) int64 { return val } +func fetchStatistics( + ctx context.Context, + execCfg *sql.ExecutorConfig, + details jobspb.RowLevelTTLDetails, + metrics rowLevelTTLMetrics, + aostDuration time.Duration, +) { + if err := func() error { + aost, err := tree.MakeDTimestampTZ(timeutil.Now().Add(aostDuration), time.Microsecond) + if err != nil { + return err + } + for _, c := range []struct { + opName string + query string + args []interface{} + gauge *aggmetric.Gauge + }{ + { + opName: "ttl_num_rows", + query: `SELECT count(1) FROM [%d AS t] AS OF SYSTEM TIME %s`, + gauge: metrics.TotalRows, + }, + { + opName: "ttl_num_expired_rows", + query: `SELECT count(1) FROM [%d AS t] AS OF SYSTEM TIME %s WHERE ` + colinfo.TTLDefaultExpirationColumnName + ` < $1`, + args: []interface{}{details.Cutoff}, + gauge: metrics.TotalExpiredRows, + }, + } { + // User a super low quality of service (lower than TTL low), as we don't + // really care if statistics gets left behind and prefer the TTL job to + // have priority. + qosLevel := sessiondatapb.SystemLow + datums, err := execCfg.InternalExecutor.QueryRowEx( + ctx, + c.opName, + nil, + sessiondata.InternalExecutorOverride{ + User: security.RootUserName(), + QualityOfService: &qosLevel, + }, + fmt.Sprintf(c.query, details.TableID, aost.String()), + c.args..., + ) + if err != nil { + return err + } + c.gauge.Update(int64(tree.MustBeDInt(datums[0]))) + } + return nil + }(); err != nil { + if onStatisticsError := execCfg.TTLTestingKnobs.OnStatisticsError; onStatisticsError != nil { + onStatisticsError(err) + } + log.Warningf(ctx, "failed to get statistics for table id %d: %s", details.TableID, err) + } +} + func runTTLOnRange( ctx context.Context, execCfg *sql.ExecutorConfig, diff --git a/pkg/sql/ttl/ttljob/ttljob_test.go b/pkg/sql/ttl/ttljob/ttljob_test.go index 050f90e4445e..e96928c182e6 100644 --- a/pkg/sql/ttl/ttljob/ttljob_test.go +++ b/pkg/sql/ttl/ttljob/ttljob_test.go @@ -301,6 +301,15 @@ func TestRowLevelTTLJobRandomEntries(t *testing.T) { numExpiredRows: 1001, numNonExpiredRows: 5, }, + { + desc: "one column pk with statistics", + createTable: `CREATE TABLE tbl ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + text TEXT +) WITH (ttl_expire_after = '30 days', ttl_row_stats_poll_interval = '1 minute')`, + numExpiredRows: 1001, + numNonExpiredRows: 5, + }, { desc: "one column pk, concurrentSchemaChange", createTable: `CREATE TABLE tbl ( @@ -398,6 +407,9 @@ func TestRowLevelTTLJobRandomEntries(t *testing.T) { var zeroDuration time.Duration th, cleanupFunc := newRowLevelTTLTestJobTestHelper(t, sql.TTLTestingKnobs{ AOSTDuration: &zeroDuration, + OnStatisticsError: func(err error) { + require.NoError(t, err, "error gathering statistics") + }, }) defer cleanupFunc() diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index d082936e106c..a7654e7b7fb6 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -2430,6 +2430,14 @@ var charts = []sectionDescription{ }, AxisLabel: "Latency (nanoseconds)", }, + { + Title: "Row Statistics", + Metrics: []string{ + "jobs.row_level_ttl.total_rows", + "jobs.row_level_ttl.total_expired_rows", + }, + AxisLabel: "Number of Rows", + }, }, }, { From c248ac64fc3bafb61f6ab8ed40186d680f6dc262 Mon Sep 17 00:00:00 2001 From: Andrii Vorobiov Date: Sun, 2 Jan 2022 21:56:56 +0200 Subject: [PATCH 3/6] server: hot ranges api This change implements second version of hot ranges api that's required for UI to represent enhanced Hot Ranges page. Before, Hot ranges (under Advanced debugging page) used former version of HotRanges api that provided information about hot ranges with internal/ sensitive data (see: https://github.com/cockroachdb/cockroach/issues/53212) that should not be exposed to users. Now, in addition to existing endpoint, additional one is implemented that is based on a former version and provides hot ranges information that is only needed for Hot Ranges page (range id, qps, table, db, and index names for particular range). The list of hot ranges and their QPS is provided by `HotRanges` service and then information like DB, table and index names are retrieved from range's `StartKey` that might include this info or not (in case if it's meta range, or range that stores index itself for instance). `HotRange` and `HotRangeV2` services expect the same request type as an argument but return different responses. `HotRangeV2` service returns a flat list of hot ranges instead of grouped ranges per node/store. Release note: None server: add leaseholder node id to hot ranges api Current change extends `statuspb.HotRangesResponse` to include `LeaseholderNodeID` field to indicate the node id that contains leaseholder replica for current hot range. This change was made in `localHotRanges` function (that is used by `HotRanges` that in turn used by `HotRangeV2` service) to reuse existing logic of iteration over the stores and querying hot ranges. It extends its response by `LeaseholderNodeID` field. Otherwise, the same logic should be implemented in `HotRangeV2` service by calling `VisitStores` iterator. Release note: None Release justification: bug fixes and low-risk updates to new functionality --- docs/generated/http/full.md | 66 +++++++++++++ docs/generated/http/hotranges-other.md | 1 + pkg/server/BUILD.bazel | 1 + pkg/server/serverpb/status.proto | 52 +++++++++++ pkg/server/status.go | 92 +++++++++++++++++++ pkg/server/status_test.go | 25 +++++ .../views/reports/containers/debug/index.tsx | 8 +- 7 files changed, 241 insertions(+), 4 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 3059c4f009ec..9e410296d420 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3209,6 +3209,72 @@ target node(s) selected in a HotRangesRequest. | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.server.serverpb.HotRangesResponse-cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#cockroach.server.serverpb.HotRangesResponse-double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponse-int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | + + + + + + +## HotRangesV2 + +`GET /_status/v2/hotranges` + + + +Support status: [reserved](#support-status) + +#### Request Parameters + + + + +HotRangesRequest queries one or more cluster nodes for a list +of ranges currently considered “hot” by the node(s). + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.

If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) | + + + + + + + +#### Response Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | ranges contain list of hot ranges info that has highest number of QPS | [reserved](#support-status) | + + + + + + + +#### HotRangesResponseV2.HotRange + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| range_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | range_id indicates Range ID that's identified as hot range | [reserved](#support-status) | +| node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | node_id indicates on node that contains current hot range | [reserved](#support-status) | +| qps | [double](#cockroach.server.serverpb.HotRangesResponseV2-double) | | qps (queries per second) shows the amount of queries that interact with current range | [reserved](#support-status) | +| table_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | table_name indicates table which data is stored in this hot range | [reserved](#support-status) | +| database_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | database_name indicates on database that has current hot range | [reserved](#support-status) | +| index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range | [reserved](#support-status) | +| replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range | [reserved](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder | [reserved](#support-status) | diff --git a/docs/generated/http/hotranges-other.md b/docs/generated/http/hotranges-other.md index 55397521a649..7d5f0b096b97 100644 --- a/docs/generated/http/hotranges-other.md +++ b/docs/generated/http/hotranges-other.md @@ -62,5 +62,6 @@ Support status: [alpha](#support-status) | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | +| leaseholder_node_id | [int32](#int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index f58762595581..e970f1f1bc24 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -142,6 +142,7 @@ go_library( "//pkg/spanconfig/spanconfigsqltranslator", "//pkg/spanconfig/spanconfigsqlwatcher", "//pkg/sql", + "//pkg/sql/catalog", "//pkg/sql/catalog/bootstrap", "//pkg/sql/catalog/catalogkeys", "//pkg/sql/catalog/catconstants", diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index a45a6d50d078..57c40d4d3d96 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1140,6 +1140,12 @@ message HotRangesResponse { // on this range. // API: PUBLIC ALPHA double queries_per_second = 2; + // LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder + int32 leaseholder_node_id = 3 [ + (gogoproto.customname) = "LeaseholderNodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; } // StoreResponse contains the part of a hot ranges report that @@ -1188,6 +1194,45 @@ message HotRangesResponse { ]; } +message HotRangesResponseV2 { + message HotRange { + // range_id indicates Range ID that's identified as hot range + int32 range_id = 1 [ + (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID", + (gogoproto.customname) = "RangeID" + ]; + // node_id indicates on node that contains current hot range + int32 node_id = 2 [ + (gogoproto.customname) = "NodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + // qps (queries per second) shows the amount of queries that interact with current range + double qps = 3 [ + (gogoproto.customname) = "QPS" + ]; + // table_name indicates table which data is stored in this hot range + string table_name = 4; + // database_name indicates on database that has current hot range + string database_name = 5; + // index_name indicates the index name for current range + string index_name = 6; + // replica_node_ids specifies the list of node ids that contain replicas with current hot range + repeated int32 replica_node_ids = 7 [ + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + // leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder + int32 leaseholder_node_id = 8 [ + (gogoproto.customname) = "LeaseholderNodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + } + // ranges contain list of hot ranges info that has highest number of QPS + repeated HotRange ranges = 1; +} + message RangeRequest { int64 range_id = 1; } @@ -1874,6 +1919,13 @@ service Status { get : "/_status/hotranges" }; } + + rpc HotRangesV2(HotRangesRequest) returns (HotRangesResponseV2) { + option (google.api.http) = { + get : "/_status/v2/hotranges" + }; + } + rpc Range(RangeRequest) returns (RangeResponse) { option (google.api.http) = { get : "/_status/range/{range_id}" diff --git a/pkg/server/status.go b/pkg/server/status.go index 1f8d7c9b01cb..40e89f5c72ec 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -49,7 +49,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" + "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/contention" "github.com/cockroachdb/cockroach/pkg/sql/contentionpb" "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" @@ -2105,6 +2107,92 @@ func (s *statusServer) HotRanges( return response, nil } +func (s *statusServer) HotRangesV2( + ctx context.Context, req *serverpb.HotRangesRequest, +) (*serverpb.HotRangesResponseV2, error) { + resp, err := s.HotRanges(ctx, req) + if err != nil { + return nil, err + } + + dbNames := make(map[uint32]string) + tableNames := make(map[uint32]string) + indexNames := make(map[uint32]map[uint32]string) + parents := make(map[uint32]uint32) + + var descrs []catalog.Descriptor + if err = s.sqlServer.distSQLServer.CollectionFactory.Txn( + ctx, s.sqlServer.internalExecutor, s.db, + func(ctx context.Context, txn *kv.Txn, descriptors *descs.Collection) error { + all, err := descriptors.GetAllDescriptors(ctx, txn) + if err != nil { + return err + } + descrs = all.OrderedDescriptors() + return nil + }); err != nil { + return nil, err + } + + for _, desc := range descrs { + id := uint32(desc.GetID()) + switch desc := desc.(type) { + case catalog.TableDescriptor: + parents[id] = uint32(desc.GetParentID()) + tableNames[id] = desc.GetName() + indexNames[id] = make(map[uint32]string) + for _, idx := range desc.AllIndexes() { + indexNames[id][uint32(idx.GetID())] = idx.GetName() + } + case catalog.DatabaseDescriptor: + dbNames[id] = desc.GetName() + } + } + + var ranges []*serverpb.HotRangesResponseV2_HotRange + // TODO (koorosh): how to flatten triple nested loop? + for nodeID, hr := range resp.HotRangesByNodeID { + for _, store := range hr.Stores { + for _, r := range store.HotRanges { + var ( + dbName, tableName, indexName string + replicaNodeIDs []roachpb.NodeID + ) + _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) + if err != nil { + continue + } + parent := parents[tableID] + if parent != 0 { + tableName = tableNames[tableID] + dbName = dbNames[parent] + } else { + dbName = dbNames[tableID] + } + _, _, idxID, err := s.sqlServer.execCfg.Codec.DecodeIndexPrefix(r.Desc.StartKey.AsRawKey()) + if err == nil { + indexName = indexNames[tableID][idxID] + } + for _, repl := range r.Desc.Replicas().Descriptors() { + replicaNodeIDs = append(replicaNodeIDs, repl.NodeID) + } + ranges = append(ranges, &serverpb.HotRangesResponseV2_HotRange{ + RangeID: r.Desc.RangeID, + NodeID: nodeID, + QPS: r.QueriesPerSecond, + TableName: tableName, + DatabaseName: dbName, + IndexName: indexName, + ReplicaNodeIds: replicaNodeIDs, + LeaseholderNodeID: r.LeaseholderNodeID, + }) + } + } + } + + return &serverpb.HotRangesResponseV2{Ranges: ranges}, nil +} + func (s *statusServer) localHotRanges(ctx context.Context) serverpb.HotRangesResponse_NodeResponse { var resp serverpb.HotRangesResponse_NodeResponse err := s.stores.VisitStores(func(store *kvserver.Store) error { @@ -2114,6 +2202,10 @@ func (s *statusServer) localHotRanges(ctx context.Context) serverpb.HotRangesRes HotRanges: make([]serverpb.HotRangesResponse_HotRange, len(ranges)), } for i, r := range ranges { + replica, err := store.GetReplica(r.Desc.GetRangeID()) + if err == nil { + storeResp.HotRanges[i].LeaseholderNodeID = replica.State(ctx).Lease.Replica.NodeID + } storeResp.HotRanges[i].Desc = *r.Desc storeResp.HotRanges[i].QueriesPerSecond = r.QPS } diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 0e3dd78617f5..ae14b903ea4b 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -1102,6 +1102,31 @@ func TestHotRangesResponse(t *testing.T) { } } +func TestHotRanges2Response(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ts := startServer(t) + defer ts.Stopper().Stop(context.Background()) + + var hotRangesResp serverpb.HotRangesResponseV2 + if err := getStatusJSONProto(ts, "v2/hotranges", &hotRangesResp); err != nil { + t.Fatal(err) + } + if len(hotRangesResp.Ranges) == 0 { + t.Fatalf("didn't get hot range responses from any nodes") + } + lastQPS := math.MaxFloat64 + for _, r := range hotRangesResp.Ranges { + if r.RangeID == 0 { + t.Errorf("unexpected empty range id: %d", r.RangeID) + } + if r.QPS > lastQPS { + t.Errorf("unexpected increase in qps between ranges; prev=%.2f, current=%.2f", lastQPS, r.QPS) + } + lastQPS = r.QPS + } +} + func TestRangesResponse(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx index 2b57bd4616d3..c6d2d4bf8442 100644 --- a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx @@ -505,13 +505,13 @@ export default function Debug() { From af84b8a67d0e28a4b982004fa4743e7c42416e67 Mon Sep 17 00:00:00 2001 From: Andrii Vorobiov Date: Mon, 10 Jan 2022 12:14:13 +0200 Subject: [PATCH 4/6] server: update hot ranges api/v2 version Before, `listHotRanges` request handler of `apiV2Server` relied on `HotRanges` service that is now should be replaced by new `HotRangesV2` implementation. Current change reuses HotRangeV2 service in `api/v2/ranges/hot` api. It allows to share the same logic between REST and gRPC endpoints and gradually migrate to new version of API. Release note: None Release justification: bug fixes and low-risk updates to new functionality --- docs/generated/http/full.md | 5 ++- docs/generated/swagger/spec.json | 64 ++++++++++++++++++++++++++++---- pkg/server/api_v2_ranges.go | 60 +++++++++++++++++------------- pkg/server/api_v2_ranges_test.go | 17 ++------- pkg/server/serverpb/status.proto | 4 ++ pkg/server/status.go | 51 ++++++++++++++++--------- 6 files changed, 135 insertions(+), 66 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 9e410296d420..92f8a0c6ad36 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3248,7 +3248,7 @@ of ranges currently considered “hot” by the node(s). - +HotRangesResponseV2 is a response payload returned by `HotRangesV2` service. | Field | Type | Label | Description | Support status | @@ -3263,7 +3263,7 @@ of ranges currently considered “hot” by the node(s). #### HotRangesResponseV2.HotRange - +HotRange message describes a single hot range, ie its QPS, node ID it belongs to, etc. | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | @@ -3275,6 +3275,7 @@ of ranges currently considered “hot” by the node(s). | index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range | [reserved](#support-status) | | replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range | [reserved](#support-status) | | leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder | [reserved](#support-status) | +| schema_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | schema_name provides the name of schema (if exists) for table in current range | [reserved](#support-status) | diff --git a/docs/generated/swagger/spec.json b/docs/generated/swagger/spec.json index 5047ea7fe162..9383b9d9d176 100644 --- a/docs/generated/swagger/spec.json +++ b/docs/generated/swagger/spec.json @@ -1023,6 +1023,12 @@ }, "x-go-package": "github.com/cockroachdb/cockroach/pkg/util/metric" }, + "RangeID": { + "type": "integer", + "format": "int64", + "title": "A RangeID is a unique ID associated to a Raft consensus group.", + "x-go-package": "github.com/cockroachdb/cockroach/pkg/roachpb" + }, "RangeProblems": { "type": "object", "title": "RangeProblems describes issues reported by a range. For internal use only.", @@ -1788,6 +1794,51 @@ }, "x-go-package": "github.com/cockroachdb/cockroach/pkg/server" }, + "hotRangeInfo": { + "description": "(ie its range ID, QPS, table name, etc.).", + "type": "object", + "title": "Hot range details struct describes common information about hot range,", + "properties": { + "database_name": { + "type": "string", + "x-go-name": "DatabaseName" + }, + "index_name": { + "type": "string", + "x-go-name": "IndexName" + }, + "leaseholder_node_id": { + "$ref": "#/definitions/NodeID" + }, + "node_id": { + "$ref": "#/definitions/NodeID" + }, + "qps": { + "type": "number", + "format": "double", + "x-go-name": "QPS" + }, + "range_id": { + "$ref": "#/definitions/RangeID" + }, + "replica_node_ids": { + "type": "array", + "items": { + "$ref": "#/definitions/NodeID" + }, + "x-go-name": "ReplicaNodeIDs" + }, + "schema_name": { + "type": "string", + "x-go-name": "SchemaName" + }, + "table_name": { + "type": "string", + "x-go-name": "TableName" + } + }, + "x-go-package": "github.com/cockroachdb/cockroach/pkg/server" + }, "hotRangesResponse": { "type": "object", "title": "Response struct for listHotRanges.", @@ -1797,15 +1848,12 @@ "type": "string", "x-go-name": "Next" }, - "ranges_by_node_id": { - "type": "object", - "additionalProperties": { - "type": "array", - "items": { - "$ref": "#/definitions/rangeDescriptorInfo" - } + "ranges": { + "type": "array", + "items": { + "$ref": "#/definitions/hotRangeInfo" }, - "x-go-name": "RangesByNodeID" + "x-go-name": "Ranges" }, "response_error": { "type": "array", diff --git a/pkg/server/api_v2_ranges.go b/pkg/server/api_v2_ranges.go index 9794efe0f846..206aaaec9dd6 100644 --- a/pkg/server/api_v2_ranges.go +++ b/pkg/server/api_v2_ranges.go @@ -14,7 +14,6 @@ import ( "context" "fmt" "net/http" - "sort" "strconv" "strings" @@ -415,13 +414,29 @@ type responseError struct { // // swagger:model hotRangesResponse type hotRangesResponse struct { - RangesByNodeID map[string][]rangeDescriptorInfo `json:"ranges_by_node_id"` - Errors []responseError `json:"response_error,omitempty"` + Ranges []hotRangeInfo `json:"ranges"` + Errors []responseError `json:"response_error,omitempty"` // Continuation token for the next paginated call. Use as the `start` // parameter. Next string `json:"next,omitempty"` } +// Hot range details struct describes common information about hot range, +// (ie its range ID, QPS, table name, etc.). +// +// swagger:model hotRangeInfo +type hotRangeInfo struct { + RangeID roachpb.RangeID `json:"range_id"` + NodeID roachpb.NodeID `json:"node_id"` + QPS float64 `json:"qps"` + LeaseholderNodeID roachpb.NodeID `json:"leaseholder_node_id"` + TableName string `json:"table_name"` + DatabaseName string `json:"database_name"` + IndexName string `json:"index_name"` + SchemaName string `json:"schema_name"` + ReplicaNodeIDs []roachpb.NodeID `json:"replica_node_ids"` +} + // swagger:operation GET /ranges/hot/ listHotRanges // // List hot ranges @@ -464,9 +479,7 @@ func (a *apiV2Server) listHotRanges(w http.ResponseWriter, r *http.Request) { nodeIDStr := r.URL.Query().Get("node_id") limit, start := getRPCPaginationValues(r) - response := &hotRangesResponse{ - RangesByNodeID: make(map[string][]rangeDescriptorInfo), - } + response := &hotRangesResponse{} var requestedNodes []roachpb.NodeID if len(nodeIDStr) > 0 { requestedNodeID, _, err := a.status.parseNodeID(nodeIDStr) @@ -484,32 +497,29 @@ func (a *apiV2Server) listHotRanges(w http.ResponseWriter, r *http.Request) { remoteRequest := serverpb.HotRangesRequest{NodeID: "local"} nodeFn := func(ctx context.Context, client interface{}, nodeID roachpb.NodeID) (interface{}, error) { status := client.(serverpb.StatusClient) - resp, err := status.HotRanges(ctx, &remoteRequest) + resp, err := status.HotRangesV2(ctx, &remoteRequest) if err != nil || resp == nil { return nil, err } - rangeDescriptorInfos := make([]rangeDescriptorInfo, 0) - for _, store := range resp.HotRangesByNodeID[nodeID].Stores { - for _, hotRange := range store.HotRanges { - var r rangeDescriptorInfo - r.init(&hotRange.Desc) - r.StoreID = int32(store.StoreID) - r.QueriesPerSecond = hotRange.QueriesPerSecond - rangeDescriptorInfos = append(rangeDescriptorInfos, r) + + var hotRangeInfos = make([]hotRangeInfo, len(resp.Ranges)) + for i, r := range resp.Ranges { + hotRangeInfos[i] = hotRangeInfo{ + RangeID: r.RangeID, + NodeID: r.NodeID, + QPS: r.QPS, + LeaseholderNodeID: r.LeaseholderNodeID, + TableName: r.TableName, + DatabaseName: r.DatabaseName, + IndexName: r.IndexName, + ReplicaNodeIDs: r.ReplicaNodeIds, + SchemaName: r.SchemaName, } } - sort.Slice(rangeDescriptorInfos, func(i, j int) bool { - if rangeDescriptorInfos[i].StoreID == rangeDescriptorInfos[j].StoreID { - return rangeDescriptorInfos[i].RangeID < rangeDescriptorInfos[j].RangeID - } - return rangeDescriptorInfos[i].StoreID < rangeDescriptorInfos[j].StoreID - }) - return rangeDescriptorInfos, nil + return hotRangeInfos, nil } responseFn := func(nodeID roachpb.NodeID, resp interface{}) { - if hotRangesResp, ok := resp.([]rangeDescriptorInfo); ok { - response.RangesByNodeID[nodeID.String()] = hotRangesResp - } + response.Ranges = append(response.Ranges, resp.([]hotRangeInfo)...) } errorFn := func(nodeID roachpb.NodeID, err error) { response.Errors = append(response.Errors, responseError{ diff --git a/pkg/server/api_v2_ranges_test.go b/pkg/server/api_v2_ranges_test.go index bfffae1c4d4f..74af7708c466 100644 --- a/pkg/server/api_v2_ranges_test.go +++ b/pkg/server/api_v2_ranges_test.go @@ -46,25 +46,16 @@ func TestHotRangesV2(t *testing.T) { require.NoError(t, json.NewDecoder(resp.Body).Decode(&hotRangesResp)) require.NoError(t, resp.Body.Close()) - if len(hotRangesResp.RangesByNodeID) == 0 { + if len(hotRangesResp.Ranges) == 0 { t.Fatalf("didn't get hot range responses from any nodes") } if len(hotRangesResp.Errors) > 0 { t.Errorf("got an error in hot range response from n%d: %v", hotRangesResp.Errors[0].NodeID, hotRangesResp.Errors[0].ErrorMessage) } - - for nodeID, nodeResp := range hotRangesResp.RangesByNodeID { - if len(nodeResp) == 0 { - t.Fatalf("didn't get hot range response from node n%s", nodeID) - } - // We don't check for ranges being sorted by QPS, as this hot ranges - // report does not use that as its sort key (for stability across multiple - // pagination calls). - for _, r := range nodeResp { - if r.RangeID == 0 || (len(r.StartKey) == 0 && len(r.EndKey) == 0) { - t.Errorf("unexpected empty/unpopulated range descriptor: %+v", r) - } + for _, r := range hotRangesResp.Ranges { + if r.RangeID == 0 || r.NodeID == 0 { + t.Errorf("unexpected empty/unpopulated range descriptor: %+v", r) } } } diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 57c40d4d3d96..046ee58ee962 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1194,7 +1194,9 @@ message HotRangesResponse { ]; } +// HotRangesResponseV2 is a response payload returned by `HotRangesV2` service. message HotRangesResponseV2 { + // HotRange message describes a single hot range, ie its QPS, node ID it belongs to, etc. message HotRange { // range_id indicates Range ID that's identified as hot range int32 range_id = 1 [ @@ -1228,6 +1230,8 @@ message HotRangesResponseV2 { (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" ]; + // schema_name provides the name of schema (if exists) for table in current range + string schema_name = 9; } // ranges contain list of hot ranges info that has highest number of QPS repeated HotRange ranges = 1; diff --git a/pkg/server/status.go b/pkg/server/status.go index 40e89f5c72ec..bc8dfcd67dcf 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -2107,6 +2107,17 @@ func (s *statusServer) HotRanges( return response, nil } +type hotRangeReportMeta struct { + dbName string + tableName string + schemaName string + indexNames map[uint32]string + parentID uint32 + schemaParentID uint32 +} + +// HotRangesV2 returns hot ranges from all stores on requested node or all nodes in case +// request message doesn't include specific node ID. func (s *statusServer) HotRangesV2( ctx context.Context, req *serverpb.HotRangesRequest, ) (*serverpb.HotRangesResponseV2, error) { @@ -2115,11 +2126,7 @@ func (s *statusServer) HotRangesV2( return nil, err } - dbNames := make(map[uint32]string) - tableNames := make(map[uint32]string) - indexNames := make(map[uint32]map[uint32]string) - parents := make(map[uint32]uint32) - + rangeReportMetas := make(map[uint32]hotRangeReportMeta) var descrs []catalog.Descriptor if err = s.sqlServer.distSQLServer.CollectionFactory.Txn( ctx, s.sqlServer.internalExecutor, s.db, @@ -2136,42 +2143,49 @@ func (s *statusServer) HotRangesV2( for _, desc := range descrs { id := uint32(desc.GetID()) + meta := hotRangeReportMeta{ + indexNames: map[uint32]string{}, + } switch desc := desc.(type) { case catalog.TableDescriptor: - parents[id] = uint32(desc.GetParentID()) - tableNames[id] = desc.GetName() - indexNames[id] = make(map[uint32]string) + meta.tableName = desc.GetName() + meta.parentID = uint32(desc.GetParentID()) + meta.schemaParentID = uint32(desc.GetParentSchemaID()) for _, idx := range desc.AllIndexes() { - indexNames[id][uint32(idx.GetID())] = idx.GetName() + meta.indexNames[uint32(idx.GetID())] = idx.GetName() } + case catalog.SchemaDescriptor: + meta.schemaName = desc.GetName() case catalog.DatabaseDescriptor: - dbNames[id] = desc.GetName() + meta.dbName = desc.GetName() } + rangeReportMetas[id] = meta } var ranges []*serverpb.HotRangesResponseV2_HotRange - // TODO (koorosh): how to flatten triple nested loop? for nodeID, hr := range resp.HotRangesByNodeID { for _, store := range hr.Stores { for _, r := range store.HotRanges { var ( - dbName, tableName, indexName string - replicaNodeIDs []roachpb.NodeID + dbName, tableName, indexName, schemaName string + replicaNodeIDs []roachpb.NodeID ) _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) if err != nil { continue } - parent := parents[tableID] + parent := rangeReportMetas[tableID].parentID if parent != 0 { - tableName = tableNames[tableID] - dbName = dbNames[parent] + tableName = rangeReportMetas[tableID].tableName + dbName = rangeReportMetas[parent].dbName } else { - dbName = dbNames[tableID] + dbName = rangeReportMetas[tableID].dbName } + schemaParent := rangeReportMetas[tableID].schemaParentID + schemaName = rangeReportMetas[schemaParent].schemaName _, _, idxID, err := s.sqlServer.execCfg.Codec.DecodeIndexPrefix(r.Desc.StartKey.AsRawKey()) if err == nil { - indexName = indexNames[tableID][idxID] + indexName = rangeReportMetas[tableID].indexNames[idxID] } for _, repl := range r.Desc.Replicas().Descriptors() { replicaNodeIDs = append(replicaNodeIDs, repl.NodeID) @@ -2181,6 +2195,7 @@ func (s *statusServer) HotRangesV2( NodeID: nodeID, QPS: r.QueriesPerSecond, TableName: tableName, + SchemaName: schemaName, DatabaseName: dbName, IndexName: indexName, ReplicaNodeIds: replicaNodeIDs, From 35d2b786c8315bb8cdb3aeb711c1c413e379cfba Mon Sep 17 00:00:00 2001 From: Andrii Vorobiov Date: Fri, 14 Jan 2022 17:09:17 +0200 Subject: [PATCH 5/6] server, ui: use POST request method for HotRangesV2 service Initially, `HotRangesV2` service in status server was defined to use GET method to handle HTTP request. It was convenient way to display response data in Advanced debugging page since it allowed to render response body right on to page. But now, hot ranges will be used on user facing page and request dispatching for hot ranges api should follow generic workflow: initialize `HotRangesRequest` protobuf message and dispatch request with `src/util/api` service. This restriction forces to use POST method (since GET method doesn't allow to provide request body). In addition, Hot Ranges debugging page is refactored to use `api` service. Release note: None Release justification: bug fixes and low-risk updates to new functionality --- docs/generated/http/full.md | 24 ++++---- docs/generated/http/hotranges-other.md | 2 +- pkg/server/serverpb/status.proto | 25 ++++---- pkg/server/status.go | 1 + pkg/server/status_test.go | 2 +- pkg/ui/workspaces/db-console/src/app.tsx | 7 +++ pkg/ui/workspaces/db-console/src/util/api.ts | 15 +++++ .../views/reports/containers/debug/index.tsx | 20 +++++-- .../reports/containers/hotranges/index.tsx | 57 +++++++++++++++++++ 9 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 92f8a0c6ad36..5438796a7431 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3209,7 +3209,7 @@ target node(s) selected in a HotRangesRequest. | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.server.serverpb.HotRangesResponse-cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#cockroach.server.serverpb.HotRangesResponse-double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | -| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponse-int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponse-int32) | | LeaseholderNodeID indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) | @@ -3218,7 +3218,7 @@ target node(s) selected in a HotRangesRequest. ## HotRangesV2 -`GET /_status/v2/hotranges` +`POST /_status/v2/hotranges` @@ -3253,7 +3253,7 @@ HotRangesResponseV2 is a response payload returned by `HotRangesV2` service. | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | ranges contain list of hot ranges info that has highest number of QPS | [reserved](#support-status) | +| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | ranges contain list of hot ranges info that has highest number of QPS. | [reserved](#support-status) | @@ -3267,15 +3267,15 @@ HotRange message describes a single hot range, ie its QPS, node ID it belongs to | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| range_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | range_id indicates Range ID that's identified as hot range | [reserved](#support-status) | -| node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | node_id indicates on node that contains current hot range | [reserved](#support-status) | -| qps | [double](#cockroach.server.serverpb.HotRangesResponseV2-double) | | qps (queries per second) shows the amount of queries that interact with current range | [reserved](#support-status) | -| table_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | table_name indicates table which data is stored in this hot range | [reserved](#support-status) | -| database_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | database_name indicates on database that has current hot range | [reserved](#support-status) | -| index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range | [reserved](#support-status) | -| replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range | [reserved](#support-status) | -| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder | [reserved](#support-status) | -| schema_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | schema_name provides the name of schema (if exists) for table in current range | [reserved](#support-status) | +| range_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | range_id indicates Range ID that's identified as hot range. | [reserved](#support-status) | +| node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | node_id indicates the node that contains the current hot range. | [reserved](#support-status) | +| qps | [double](#cockroach.server.serverpb.HotRangesResponseV2-double) | | qps (queries per second) shows the amount of queries that interact with current range. | [reserved](#support-status) | +| table_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | table_name indicates the SQL table that the range belongs to. | [reserved](#support-status) | +| database_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | database_name indicates on database that has current hot range. | [reserved](#support-status) | +| index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range. | [reserved](#support-status) | +| replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range. | [reserved](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) | +| schema_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | schema_name provides the name of schema (if exists) for table in current range. | [reserved](#support-status) | diff --git a/docs/generated/http/hotranges-other.md b/docs/generated/http/hotranges-other.md index 7d5f0b096b97..fa43e6f7b957 100644 --- a/docs/generated/http/hotranges-other.md +++ b/docs/generated/http/hotranges-other.md @@ -62,6 +62,6 @@ Support status: [alpha](#support-status) | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | -| leaseholder_node_id | [int32](#int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | +| leaseholder_node_id | [int32](#int32) | | LeaseholderNodeID indicates the Node ID that is the current leaseholder for the given range. | [reserved](#support-status) | diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 046ee58ee962..49d381736922 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1140,7 +1140,7 @@ message HotRangesResponse { // on this range. // API: PUBLIC ALPHA double queries_per_second = 2; - // LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder + // LeaseholderNodeID indicates the Node ID that is the current leaseholder for the given range. int32 leaseholder_node_id = 3 [ (gogoproto.customname) = "LeaseholderNodeID", (gogoproto.casttype) = @@ -1198,42 +1198,42 @@ message HotRangesResponse { message HotRangesResponseV2 { // HotRange message describes a single hot range, ie its QPS, node ID it belongs to, etc. message HotRange { - // range_id indicates Range ID that's identified as hot range + // range_id indicates Range ID that's identified as hot range. int32 range_id = 1 [ (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID", (gogoproto.customname) = "RangeID" ]; - // node_id indicates on node that contains current hot range + // node_id indicates the node that contains the current hot range. int32 node_id = 2 [ (gogoproto.customname) = "NodeID", (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" ]; - // qps (queries per second) shows the amount of queries that interact with current range + // qps (queries per second) shows the amount of queries that interact with current range. double qps = 3 [ (gogoproto.customname) = "QPS" ]; - // table_name indicates table which data is stored in this hot range + // table_name indicates the SQL table that the range belongs to. string table_name = 4; - // database_name indicates on database that has current hot range + // database_name indicates on database that has current hot range. string database_name = 5; - // index_name indicates the index name for current range + // index_name indicates the index name for current range. string index_name = 6; - // replica_node_ids specifies the list of node ids that contain replicas with current hot range + // replica_node_ids specifies the list of node ids that contain replicas with current hot range. repeated int32 replica_node_ids = 7 [ (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" ]; - // leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder + // leaseholder_node_id indicates the Node ID that is the current leaseholder for the given range. int32 leaseholder_node_id = 8 [ (gogoproto.customname) = "LeaseholderNodeID", (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" ]; - // schema_name provides the name of schema (if exists) for table in current range + // schema_name provides the name of schema (if exists) for table in current range. string schema_name = 9; } - // ranges contain list of hot ranges info that has highest number of QPS + // ranges contain list of hot ranges info that has highest number of QPS. repeated HotRange ranges = 1; } @@ -1926,7 +1926,8 @@ service Status { rpc HotRangesV2(HotRangesRequest) returns (HotRangesResponseV2) { option (google.api.http) = { - get : "/_status/v2/hotranges" + post : "/_status/v2/hotranges" + body : "*" }; } diff --git a/pkg/server/status.go b/pkg/server/status.go index bc8dfcd67dcf..44a0accec471 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -2172,6 +2172,7 @@ func (s *statusServer) HotRangesV2( ) _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) if err != nil { + log.Warningf(ctx, "cannot decode tableID for range descriptor: %s. %s", r.Desc.String(), err.Error()) continue } parent := rangeReportMetas[tableID].parentID diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index ae14b903ea4b..7f0fc868e7eb 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -1109,7 +1109,7 @@ func TestHotRanges2Response(t *testing.T) { defer ts.Stopper().Stop(context.Background()) var hotRangesResp serverpb.HotRangesResponseV2 - if err := getStatusJSONProto(ts, "v2/hotranges", &hotRangesResp); err != nil { + if err := postStatusJSONProto(ts, "v2/hotranges", &serverpb.HotRangesRequest{}, &hotRangesResp); err != nil { t.Fatal(err) } if len(hotRangesResp.Ranges) == 0 { diff --git a/pkg/ui/workspaces/db-console/src/app.tsx b/pkg/ui/workspaces/db-console/src/app.tsx index ad99c7282586..b970265c2c4d 100644 --- a/pkg/ui/workspaces/db-console/src/app.tsx +++ b/pkg/ui/workspaces/db-console/src/app.tsx @@ -62,6 +62,7 @@ import Nodes from "src/views/reports/containers/nodes"; import ProblemRanges from "src/views/reports/containers/problemRanges"; import Range from "src/views/reports/containers/range"; import ReduxDebug from "src/views/reports/containers/redux"; +import HotRanges from "src/views/reports/containers/hotranges"; import Settings from "src/views/reports/containers/settings"; import Stores from "src/views/reports/containers/stores"; import SQLActivityPage from "src/views/sqlActivity/sqlActivityPage"; @@ -274,6 +275,12 @@ export const App: React.FC = (props: AppProps) => { path="/debug/enqueue_range" component={EnqueueRange} /> + + diff --git a/pkg/ui/workspaces/db-console/src/util/api.ts b/pkg/ui/workspaces/db-console/src/util/api.ts index 1be50156e7be..3d05634ce699 100644 --- a/pkg/ui/workspaces/db-console/src/util/api.ts +++ b/pkg/ui/workspaces/db-console/src/util/api.ts @@ -148,6 +148,9 @@ export type ResetIndexUsageStatsResponseMessage = protos.cockroach.server.server export type UserSQLRolesRequestMessage = protos.cockroach.server.serverpb.UserSQLRolesRequest; export type UserSQLRolesResponseMessage = protos.cockroach.server.serverpb.UserSQLRolesResponse; +export type HotRangesRequestMessage = protos.cockroach.server.serverpb.HotRangesRequest; +export type HotRangesV2ResponseMessage = protos.cockroach.server.serverpb.HotRangesResponseV2; + // API constants export const API_PREFIX = "_admin/v1"; @@ -821,3 +824,15 @@ export function getUserSQLRoles( timeout, ); } + +export function getHotRanges( + req: HotRangesRequestMessage, + timeout?: moment.Duration, +): Promise { + return timeoutFetch( + serverpb.HotRangesResponseV2, + `${STATUS_PREFIX}/v2/hotranges`, + req as any, + timeout, + ); +} diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx index c6d2d4bf8442..c0d32177e397 100644 --- a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx @@ -505,13 +505,25 @@ export default function Debug() { + + + + diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx new file mode 100644 index 000000000000..934d6862010e --- /dev/null +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx @@ -0,0 +1,57 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +import React, { useCallback, useEffect, useState } from "react"; +import { RouteComponentProps, withRouter } from "react-router-dom"; +import moment from "moment"; +import { Button } from "@cockroachlabs/ui-components"; +import { cockroach } from "src/js/protos"; +import { getHotRanges } from "src/util/api"; + +type HotRangesProps = RouteComponentProps<{ node_id: string }>; + +const HotRanges = (props: HotRangesProps) => { + const nodeIdParam = props.match.params["node_id"]; + const [nodeId, setNodeId] = useState(nodeIdParam); + const [time, setTime] = useState(moment()); + const [hotRanges, setHotRanges] = useState< + cockroach.server.serverpb.HotRangesResponseV2["ranges"] + >([]); + const requestHotRanges = useCallback(() => { + const request = cockroach.server.serverpb.HotRangesRequest.create({ + node_id: nodeId, + }); + getHotRanges(request).then(response => { + setHotRanges(response.ranges); + setTime(moment()); + }); + }, [nodeId]); + useEffect(requestHotRanges, [requestHotRanges, nodeId]); + useEffect(() => { + setNodeId(nodeIdParam); + }, [nodeIdParam]); + return ( +
+ {`Node ID: ${nodeId ?? "All nodes"}`} + {`Time: ${time.toISOString()}`} + +
{JSON.stringify(hotRanges, null, 2)}
+
+ ); +}; + +export default withRouter(HotRanges); From af62e80448d9f9855c23111d76dcb09091727eb2 Mon Sep 17 00:00:00 2001 From: Andrii Vorobiov Date: Wed, 16 Feb 2022 10:21:26 +0200 Subject: [PATCH 6/6] server: add pagination to hot ranges API This change extends hot ranges API to support pagination of responses. It is also possible to avoid pagination when page size is set to 0. Release note: None Release justification: bug fixes and low-risk updates to new functionality --- docs/generated/http/full.md | 22 ++- docs/generated/http/hotranges-request.md | 2 + pkg/server/serverpb/status.proto | 12 +- pkg/server/status.go | 143 ++++++++++++------ .../reports/containers/hotranges/index.tsx | 27 +++- 5 files changed, 156 insertions(+), 50 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 5438796a7431..977ff4d45f99 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3130,6 +3130,8 @@ of ranges currently considered “hot” by the node(s). | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | | node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.

If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) | +| page_size | [int32](#cockroach.server.serverpb.HotRangesRequest-int32) | | | [reserved](#support-status) | +| page_token | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) | @@ -3236,6 +3238,8 @@ of ranges currently considered “hot” by the node(s). | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | | node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.

If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) | +| page_size | [int32](#cockroach.server.serverpb.HotRangesRequest-int32) | | | [reserved](#support-status) | +| page_token | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | | [reserved](#support-status) | @@ -3253,7 +3257,9 @@ HotRangesResponseV2 is a response payload returned by `HotRangesV2` service. | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | -| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | ranges contain list of hot ranges info that has highest number of QPS. | [reserved](#support-status) | +| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | Ranges contain list of hot ranges info that has highest number of QPS. | [reserved](#support-status) | +| errors_by_node_id | [HotRangesResponseV2.ErrorsByNodeIdEntry](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.ErrorsByNodeIdEntry) | repeated | errors contains any errors that occurred during fan-out calls to other nodes. | [reserved](#support-status) | +| next_page_token | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | NextPageToken represents next pagination token to request next slice of data. | [reserved](#support-status) | @@ -3281,6 +3287,20 @@ HotRange message describes a single hot range, ie its QPS, node ID it belongs to + +#### HotRangesResponseV2.ErrorsByNodeIdEntry + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | | | +| value | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | | | + + + + + ## Range diff --git a/docs/generated/http/hotranges-request.md b/docs/generated/http/hotranges-request.md index 6c542161d115..c7ccfd28d4e7 100644 --- a/docs/generated/http/hotranges-request.md +++ b/docs/generated/http/hotranges-request.md @@ -12,5 +12,7 @@ Support status: [alpha](#support-status) | Field | Type | Label | Description | Support status | | ----- | ---- | ----- | ----------- | -------------- | | node_id | [string](#string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.

If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) | +| page_size | [int32](#int32) | | | [reserved](#support-status) | +| page_token | [string](#string) | | | [reserved](#support-status) | diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 49d381736922..aa73cd4f9b46 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1110,6 +1110,8 @@ message HotRangesRequest { // in the cluster. // API: PUBLIC ALPHA string node_id = 1 [(gogoproto.customname) = "NodeID"]; + int32 page_size = 2 [(gogoproto.nullable) = true]; + string page_token = 3 [(gogoproto.nullable) = true]; } // HotRangesResponse is the payload produced in response @@ -1233,8 +1235,16 @@ message HotRangesResponseV2 { // schema_name provides the name of schema (if exists) for table in current range. string schema_name = 9; } - // ranges contain list of hot ranges info that has highest number of QPS. + // Ranges contain list of hot ranges info that has highest number of QPS. repeated HotRange ranges = 1; + // errors contains any errors that occurred during fan-out calls to other nodes. + map errors_by_node_id = 2 [ + (gogoproto.castkey) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID", + (gogoproto.customname) = "ErrorsByNodeID", + (gogoproto.nullable) = false + ]; + // NextPageToken represents next pagination token to request next slice of data. + string next_page_token = 3 [(gogoproto.nullable) = true]; } message RangeRequest { diff --git a/pkg/server/status.go b/pkg/server/status.go index 44a0accec471..917d8140d0ff 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -2121,14 +2121,23 @@ type hotRangeReportMeta struct { func (s *statusServer) HotRangesV2( ctx context.Context, req *serverpb.HotRangesRequest, ) (*serverpb.HotRangesResponseV2, error) { - resp, err := s.HotRanges(ctx, req) - if err != nil { + if _, err := s.privilegeChecker.requireAdminUser(ctx); err != nil { return nil, err } + size := int(req.PageSize) + start := paginationState{} + + if len(req.PageToken) > 0 { + if err := start.UnmarshalText([]byte(req.PageToken)); err != nil { + return nil, err + } + } + rangeReportMetas := make(map[uint32]hotRangeReportMeta) var descrs []catalog.Descriptor - if err = s.sqlServer.distSQLServer.CollectionFactory.Txn( + var err error + if err := s.sqlServer.distSQLServer.CollectionFactory.Txn( ctx, s.sqlServer.internalExecutor, s.db, func(ctx context.Context, txn *kv.Txn, descriptors *descs.Collection) error { all, err := descriptors.GetAllDescriptors(ctx, txn) @@ -2162,51 +2171,99 @@ func (s *statusServer) HotRangesV2( rangeReportMetas[id] = meta } - var ranges []*serverpb.HotRangesResponseV2_HotRange - for nodeID, hr := range resp.HotRangesByNodeID { - for _, store := range hr.Stores { - for _, r := range store.HotRanges { - var ( - dbName, tableName, indexName, schemaName string - replicaNodeIDs []roachpb.NodeID - ) - _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) - if err != nil { - log.Warningf(ctx, "cannot decode tableID for range descriptor: %s. %s", r.Desc.String(), err.Error()) - continue - } - parent := rangeReportMetas[tableID].parentID - if parent != 0 { - tableName = rangeReportMetas[tableID].tableName - dbName = rangeReportMetas[parent].dbName - } else { - dbName = rangeReportMetas[tableID].dbName - } - schemaParent := rangeReportMetas[tableID].schemaParentID - schemaName = rangeReportMetas[schemaParent].schemaName - _, _, idxID, err := s.sqlServer.execCfg.Codec.DecodeIndexPrefix(r.Desc.StartKey.AsRawKey()) - if err == nil { - indexName = rangeReportMetas[tableID].indexNames[idxID] - } - for _, repl := range r.Desc.Replicas().Descriptors() { - replicaNodeIDs = append(replicaNodeIDs, repl.NodeID) + response := &serverpb.HotRangesResponseV2{ + ErrorsByNodeID: make(map[roachpb.NodeID]string), + } + + var requestedNodes []roachpb.NodeID + if len(req.NodeID) > 0 { + requestedNodeID, _, err := s.parseNodeID(req.NodeID) + if err != nil { + return nil, err + } + requestedNodes = []roachpb.NodeID{requestedNodeID} + } + + dialFn := func(ctx context.Context, nodeID roachpb.NodeID) (interface{}, error) { + client, err := s.dialNode(ctx, nodeID) + return client, err + } + remoteRequest := serverpb.HotRangesRequest{NodeID: "local"} + nodeFn := func(ctx context.Context, client interface{}, nodeID roachpb.NodeID) (interface{}, error) { + status := client.(serverpb.StatusClient) + resp, err := status.HotRanges(ctx, &remoteRequest) + if err != nil || resp == nil { + return nil, err + } + var ranges []*serverpb.HotRangesResponseV2_HotRange + for nodeID, hr := range resp.HotRangesByNodeID { + for _, store := range hr.Stores { + for _, r := range store.HotRanges { + var ( + dbName, tableName, indexName, schemaName string + replicaNodeIDs []roachpb.NodeID + ) + _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) + if err != nil { + log.Warningf(ctx, "cannot decode tableID for range descriptor: %s. %s", r.Desc.String(), err.Error()) + continue + } + parent := rangeReportMetas[tableID].parentID + if parent != 0 { + tableName = rangeReportMetas[tableID].tableName + dbName = rangeReportMetas[parent].dbName + } else { + dbName = rangeReportMetas[tableID].dbName + } + schemaParent := rangeReportMetas[tableID].schemaParentID + schemaName = rangeReportMetas[schemaParent].schemaName + _, _, idxID, err := s.sqlServer.execCfg.Codec.DecodeIndexPrefix(r.Desc.StartKey.AsRawKey()) + if err == nil { + indexName = rangeReportMetas[tableID].indexNames[idxID] + } + for _, repl := range r.Desc.Replicas().Descriptors() { + replicaNodeIDs = append(replicaNodeIDs, repl.NodeID) + } + ranges = append(ranges, &serverpb.HotRangesResponseV2_HotRange{ + RangeID: r.Desc.RangeID, + NodeID: nodeID, + QPS: r.QueriesPerSecond, + TableName: tableName, + SchemaName: schemaName, + DatabaseName: dbName, + IndexName: indexName, + ReplicaNodeIds: replicaNodeIDs, + LeaseholderNodeID: r.LeaseholderNodeID, + }) } - ranges = append(ranges, &serverpb.HotRangesResponseV2_HotRange{ - RangeID: r.Desc.RangeID, - NodeID: nodeID, - QPS: r.QueriesPerSecond, - TableName: tableName, - SchemaName: schemaName, - DatabaseName: dbName, - IndexName: indexName, - ReplicaNodeIds: replicaNodeIDs, - LeaseholderNodeID: r.LeaseholderNodeID, - }) } } + return ranges, nil + } + responseFn := func(nodeID roachpb.NodeID, resp interface{}) { + if resp == nil { + return + } + hotRanges := resp.([]*serverpb.HotRangesResponseV2_HotRange) + response.Ranges = append(response.Ranges, hotRanges...) } + errorFn := func(nodeID roachpb.NodeID, err error) { + response.ErrorsByNodeID[nodeID] = err.Error() + } + + next, err := s.paginatedIterateNodes( + ctx, "hotRanges", size, start, requestedNodes, dialFn, + nodeFn, responseFn, errorFn) - return &serverpb.HotRangesResponseV2{Ranges: ranges}, nil + if err != nil { + return nil, err + } + var nextBytes []byte + if nextBytes, err = next.MarshalText(); err != nil { + return nil, err + } + response.NextPageToken = string(nextBytes) + return response, nil } func (s *statusServer) localHotRanges(ctx context.Context) serverpb.HotRangesResponse_NodeResponse { diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx index 934d6862010e..4533a36b31c8 100644 --- a/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/hotranges/index.tsx @@ -24,16 +24,33 @@ const HotRanges = (props: HotRangesProps) => { const [hotRanges, setHotRanges] = useState< cockroach.server.serverpb.HotRangesResponseV2["ranges"] >([]); - const requestHotRanges = useCallback(() => { + const [pageToken, setPageToken] = useState(""); + const pageSize = 50; + + const refreshHotRanges = useCallback(() => { + setHotRanges([]); + setPageToken(""); + }, []); + + useEffect(() => { const request = cockroach.server.serverpb.HotRangesRequest.create({ node_id: nodeId, + page_size: pageSize, + page_token: pageToken, }); getHotRanges(request).then(response => { - setHotRanges(response.ranges); + if (response.ranges.length == 0) { + return; + } + setPageToken(response.next_page_token); + setHotRanges([...hotRanges, ...response.ranges]); setTime(moment()); }); - }, [nodeId]); - useEffect(requestHotRanges, [requestHotRanges, nodeId]); + // Avoid dispatching request when `hotRanges` list is updated. + // This effect should be triggered only when pageToken is changed. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [pageToken]); + useEffect(() => { setNodeId(nodeIdParam); }, [nodeIdParam]); @@ -46,7 +63,7 @@ const HotRanges = (props: HotRangesProps) => { > {`Node ID: ${nodeId ?? "All nodes"}`} {`Time: ${time.toISOString()}`} -
{JSON.stringify(hotRanges, null, 2)}