From 967b163fc56e77f64de04f2893b81fa081a28f15 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 31 Aug 2022 17:10:47 +0200 Subject: [PATCH 1/3] builtins: stream consistency checker output Also makes it resilient to per-Range errors, which now no longer tank the entire operation. ```sql -- avoid buffering in cli \set display_format=csv; -- avoid rows getting buffered at server set avoid_buffering=true; -- compute as fast as possible SET CLUSTER SETTING server.consistency_check.max_rate = '1tb'; SELECT * FROM crdb_internal.check_consistency(false, '', ''); ``` Release justification: improvement for a debugging-related feature Release note: None --- .../logic_test/builtin_function_notenant | 22 +++--- pkg/sql/sem/builtins/generator_builtins.go | 69 ++++++++++++++++--- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant index 696bab428d46..f6c01d180ddb 100644 --- a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant +++ b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant @@ -4,20 +4,20 @@ subtest check_consistency # Sanity-check crdb_internal.check_consistency. -statement error start key must be >= "\\x02" -SELECT crdb_internal.check_consistency(true, '\x01', '\xffff') +statement error start key must be > "\\x02" +SELECT crdb_internal.check_consistency(true, '\x02', '\xffff') statement error end key must be < "\\xff\\xff" -SELECT crdb_internal.check_consistency(true, '\x02', '\xffff00') +SELECT crdb_internal.check_consistency(true, '\x0200', '\xffff00') statement error start key must be less than end key -SELECT crdb_internal.check_consistency(true, '\x02', '\x02') +SELECT crdb_internal.check_consistency(true, '\x03', '\x03') statement error start key must be less than end key -SELECT crdb_internal.check_consistency(true, '\x03', '\x02') +SELECT crdb_internal.check_consistency(true, '\x04', '\x03') query ITT -SELECT range_id, status, regexp_replace(detail, '[0-9]+', '', 'g') FROM crdb_internal.check_consistency(true, '\x02', '\xffff') WHERE range_id = 1 +SELECT range_id, status, regexp_replace(detail, '[0-9]+', '', 'g') FROM crdb_internal.check_consistency(true, '\x03', '\xffff') WHERE range_id = 1 ---- 1 RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: IntentAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: SeparatedIntentCount: RangeKeyCount: RangeKeyBytes: RangeValCount: RangeValBytes: SysBytes: SysCount: AbortSpanBytes:} @@ -32,14 +32,18 @@ SELECT count(*) > 5 FROM crdb_internal.check_consistency(true, '', '') true # Query that should touch only a single range. +# +# NB: the use of ScanMetaKVs causes issues here. Bounds [`k`, k.Next()]` don't work, +# with errors such as (here k=\xff): +# pq: failed to verify keys for Scan: end key /Meta2/"\xff\x00" must be greater than start /Meta2/"\xff\x00" query B -SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '\x03', '\x0300') +SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '\xff', '\xffff') ---- true -# Ditto, but implicit start key \x02 +# Ditto, but implicit start key \x03 query B -SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '', '\x0200') +SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '', '\x04') ---- true diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 6cb368df72f0..39e46869f314 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -1827,11 +1827,15 @@ func (j *jsonRecordSetGenerator) Next(ctx context.Context) (bool, error) { } type checkConsistencyGenerator struct { + txn *kv.Txn // to load range descriptors consistencyChecker eval.ConsistencyCheckRunner from, to roachpb.Key mode roachpb.ChecksumMode + + descs []roachpb.RangeDescriptor // remainingRows is populated by Start(). Each Next() call peels of the first - // row and moves it to curRow. + // row and moves it to curRow. When empty, consumes from 'descs' to produce + // more rows. remainingRows []roachpb.CheckConsistencyResponse_Result curRow roachpb.CheckConsistencyResponse_Result } @@ -1850,14 +1854,18 @@ func makeCheckConsistencyGenerator( keyTo := roachpb.Key(*args[2].(*tree.DBytes)) if len(keyFrom) == 0 { - keyFrom = keys.LocalMax + // NB: you'd expect LocalMax here but when we go and call ScanMetaKVs, it + // would interpret LocalMax as Meta1Prefix and translate that to KeyMin, + // then fail on the scan. That method should really handle this better + // but also we should use IterateRangeDescriptors instead. + keyFrom = keys.Meta2Prefix } if len(keyTo) == 0 { keyTo = roachpb.KeyMax } - if bytes.Compare(keyFrom, keys.LocalMax) < 0 { - return nil, errors.Errorf("start key must be >= %q", []byte(keys.LocalMax)) + if bytes.Compare(keyFrom, keys.LocalMax) <= 0 { + return nil, errors.Errorf("start key must be > %q", []byte(keys.LocalMax)) } if bytes.Compare(keyTo, roachpb.KeyMax) > 0 { return nil, errors.Errorf("end key must be < %q", []byte(roachpb.KeyMax)) @@ -1872,6 +1880,7 @@ func makeCheckConsistencyGenerator( } return &checkConsistencyGenerator{ + txn: ctx.Txn, consistencyChecker: ctx.ConsistencyChecker, from: keyFrom, to: keyTo, @@ -1891,18 +1900,62 @@ func (*checkConsistencyGenerator) ResolvedType() *types.T { // Start is part of the tree.ValueGenerator interface. func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error { - resp, err := c.consistencyChecker.CheckConsistency(ctx, c.from, c.to, c.mode) + span := roachpb.Span{Key: c.from, EndKey: c.to} + // NB: should use IterateRangeDescriptors here which is in the 'upgrade' + // package to avoid pulling all into memory. That needs a refactor, though. + // kvprober also has some code to iterate in batches. + descs, err := kvclient.ScanMetaKVs(ctx, c.txn, span) if err != nil { return err } - c.remainingRows = resp.Result + for _, v := range descs { + var desc roachpb.RangeDescriptor + if err := v.ValueProto(&desc); err != nil { + return err + } + if len(desc.StartKey) == 0 { + desc.StartKey = keys.MustAddr(keys.LocalMax) + // Elide potential second copy we might be getting for r1 + // if meta1 and meta2 haven't split. + // This too should no longer be necessary with IterateRangeDescriptors. + if len(c.descs) == 1 { + continue + } + } + c.descs = append(c.descs, desc) + } return nil } // Next is part of the tree.ValueGenerator interface. -func (c *checkConsistencyGenerator) Next(_ context.Context) (bool, error) { +func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { if len(c.remainingRows) == 0 { - return false, nil + if len(c.descs) == 0 { + return false, nil + } + // NB: peeling off the spans one by one allows this generator to produce + // rows in a streaming manner. If we called CheckConsistency(c.from, c.to) + // we would only get the result once all checks have completed and it will + // generally be a lot more brittle since an error will completely wipe out + // the result set. + desc := c.descs[0] + c.descs = c.descs[1:] + resp, err := c.consistencyChecker.CheckConsistency( + ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode, + ) + if err != nil { + // Emit result as a row, and keep going. + c.remainingRows = []roachpb.CheckConsistencyResponse_Result{ + { + RangeID: desc.RangeID, + StartKey: desc.StartKey, + Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE, + Detail: err.Error(), + }, + } + } else { + c.remainingRows = resp.Result + } } c.curRow = c.remainingRows[0] c.remainingRows = c.remainingRows[1:] From b52735c922baee9cbfb7efad7438d4d766c93e57 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 31 Aug 2022 23:46:59 +0200 Subject: [PATCH 2/3] builtins: add `duration` column to check_consistency Noticed that some checks were taking a very long time but it's hard to tell which ones unless setting up and watching streaming output. Add a duration column. This can also help us reason about the general speed of the consistency checker; it does do very expensive hashing at the moment (sha256) when something much cheaper and weaker would do. Release justification: debug improvement for an internal builtin Release note: None --- docs/generated/sql/functions.md | 2 +- pkg/sql/sem/builtins/fixed_oids.go | 2 +- pkg/sql/sem/builtins/generator_builtins.go | 17 +++++++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index 17664eac7b70..5cbedb9784a6 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -3018,7 +3018,7 @@ may increase either contention or retry errors, or both.

Immutable crdb_internal.assignment_cast(val: anyelement, type: anyelement) → anyelement

This function is used internally to perform assignment casts during mutations.

Stable -crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) → tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail}

Runs a consistency check on ranges touching the specified key range. an empty start or end key is treated as the minimum and maximum possible, respectively. stats_only should only be set to false when targeting a small number of ranges to avoid overloading the cluster. Each returned row contains the range ID, the status (a roachpb.CheckConsistencyResponse_Status), and verbose detail.

+crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) → tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail, interval AS duration}

Runs a consistency check on ranges touching the specified key range. an empty start or end key is treated as the minimum and maximum possible, respectively. stats_only should only be set to false when targeting a small number of ranges to avoid overloading the cluster. Each returned row contains the range ID, the status (a roachpb.CheckConsistencyResponse_Status), and verbose detail.

Example usage: SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)

Volatile diff --git a/pkg/sql/sem/builtins/fixed_oids.go b/pkg/sql/sem/builtins/fixed_oids.go index 84c7a2bac85d..b07f58e27771 100644 --- a/pkg/sql/sem/builtins/fixed_oids.go +++ b/pkg/sql/sem/builtins/fixed_oids.go @@ -370,7 +370,7 @@ var builtinOidsBySignature = map[string]oid.Oid{ `crdb_internal.active_version() -> jsonb`: 1296, `crdb_internal.approximate_timestamp(timestamp: decimal) -> timestamp`: 1298, `crdb_internal.assignment_cast(val: anyelement, type: anyelement) -> anyelement`: 1341, - `crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) -> tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail}`: 347, + `crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) -> tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail, interval AS duration}`: 347, `crdb_internal.check_password_hash_format(password: bytes) -> string`: 1376, `crdb_internal.cluster_id() -> uuid`: 1299, `crdb_internal.cluster_name() -> string`: 1301, diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 39e46869f314..6ecb00c66b3e 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -35,6 +35,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/errorutil" "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/tracing" "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" "github.com/cockroachdb/errors" @@ -1837,6 +1838,7 @@ type checkConsistencyGenerator struct { // row and moves it to curRow. When empty, consumes from 'descs' to produce // more rows. remainingRows []roachpb.CheckConsistencyResponse_Result + curDuration time.Duration curRow roachpb.CheckConsistencyResponse_Result } @@ -1889,8 +1891,8 @@ func makeCheckConsistencyGenerator( } var checkConsistencyGeneratorType = types.MakeLabeledTuple( - []*types.T{types.Int, types.Bytes, types.String, types.String, types.String}, - []string{"range_id", "start_key", "start_key_pretty", "status", "detail"}, + []*types.T{types.Int, types.Bytes, types.String, types.String, types.String, types.Interval}, + []string{"range_id", "start_key", "start_key_pretty", "status", "detail", "duration"}, ) // ResolvedType is part of the tree.ValueGenerator interface. @@ -1929,6 +1931,7 @@ func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error // Next is part of the tree.ValueGenerator interface. func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { + tBegin := timeutil.Now() if len(c.remainingRows) == 0 { if len(c.descs) == 0 { return false, nil @@ -1954,9 +1957,13 @@ func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { }, } } else { + // NB: this could have more than one entry, if a range split in the + // meantime. c.remainingRows = resp.Result } } + + c.curDuration = timeutil.Since(tBegin) c.curRow = c.remainingRows[0] c.remainingRows = c.remainingRows[1:] return true, nil @@ -1964,12 +1971,18 @@ func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { // Values is part of the tree.ValueGenerator interface. func (c *checkConsistencyGenerator) Values() (tree.Datums, error) { + intervalMeta := types.IntervalTypeMetadata{ + DurationField: types.IntervalDurationField{ + DurationType: types.IntervalDurationType_MILLISECOND, + }, + } return tree.Datums{ tree.NewDInt(tree.DInt(c.curRow.RangeID)), tree.NewDBytes(tree.DBytes(c.curRow.StartKey)), tree.NewDString(roachpb.Key(c.curRow.StartKey).String()), tree.NewDString(c.curRow.Status.String()), tree.NewDString(c.curRow.Detail), + tree.NewDInterval(duration.MakeDuration(c.curDuration.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta), }, nil } From 19c95242e94595ec68ce6b2dbbf54572e1b9cfb7 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Thu, 8 Sep 2022 14:50:51 +0200 Subject: [PATCH 3/3] builtins: refactor check_consistency for clarity This implements a number of suggestions made by Pavel[^1]. [^1]: https://github.com/cockroachdb/cockroach/pull/87378#discussion_r965081891 Release justification: None needed; 22.2 has been cut Release note: None --- pkg/sql/sem/builtins/generator_builtins.go | 107 ++++++++++++--------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 6ecb00c66b3e..7dcdaf18300f 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -1833,13 +1833,21 @@ type checkConsistencyGenerator struct { from, to roachpb.Key mode roachpb.ChecksumMode + // The descriptors for which we haven't yet emitted rows. Rows are consumed + // from this field and produce one (or more, in the case of splits not reflected + // in the descriptor) rows in `next`. descs []roachpb.RangeDescriptor - // remainingRows is populated by Start(). Each Next() call peels of the first - // row and moves it to curRow. When empty, consumes from 'descs' to produce - // more rows. - remainingRows []roachpb.CheckConsistencyResponse_Result - curDuration time.Duration - curRow roachpb.CheckConsistencyResponse_Result + // The current row, emitted by Values(). + cur roachpb.CheckConsistencyResponse_Result + // The time it took to produce the current row, i.e. how long it took to run + // the consistency check that produced the row. When a consistency check + // produces more than one row (i.e. after a split), all of the duration will + // be attributed to the first row. + dur time.Duration + // next are the potentially prefetched subsequent rows. This is usually empty + // (as one consistency check produces one result which immediately moves to + // `cur`) except when a descriptor we use doesn't reflect subsequent splits. + next []roachpb.CheckConsistencyResponse_Result } var _ eval.ValueGenerator = &checkConsistencyGenerator{} @@ -1929,60 +1937,67 @@ func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error return nil } -// Next is part of the tree.ValueGenerator interface. -func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { +// maybeRefillRows checks whether c.next is empty and if so, consumes the first +// element of c.descs for a consistency check. This populates c.next with at +// least one result (even on error). Returns the duration of the consistency +// check, if any, and zero otherwise. +func (c *checkConsistencyGenerator) maybeRefillRows(ctx context.Context) time.Duration { + if len(c.next) > 0 || len(c.descs) == 0 { + // We have a row to produce or no more ranges to check, so we're done + // for now or for good, respectively. + return 0 + } tBegin := timeutil.Now() - if len(c.remainingRows) == 0 { - if len(c.descs) == 0 { - return false, nil - } - // NB: peeling off the spans one by one allows this generator to produce - // rows in a streaming manner. If we called CheckConsistency(c.from, c.to) - // we would only get the result once all checks have completed and it will - // generally be a lot more brittle since an error will completely wipe out - // the result set. - desc := c.descs[0] - c.descs = c.descs[1:] - resp, err := c.consistencyChecker.CheckConsistency( - ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode, - ) - if err != nil { - // Emit result as a row, and keep going. - c.remainingRows = []roachpb.CheckConsistencyResponse_Result{ - { - RangeID: desc.RangeID, - StartKey: desc.StartKey, - Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE, - Detail: err.Error(), - }, - } - } else { - // NB: this could have more than one entry, if a range split in the - // meantime. - c.remainingRows = resp.Result - } + // NB: peeling off the spans one by one allows this generator to produce + // rows in a streaming manner. If we called CheckConsistency(c.from, c.to) + // we would only get the result once all checks have completed and it will + // generally be a lot more brittle since an error will completely wipe out + // the result set. + desc := c.descs[0] + c.descs = c.descs[1:] + resp, err := c.consistencyChecker.CheckConsistency( + ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode, + ) + if err != nil { + resp = &roachpb.CheckConsistencyResponse{Result: []roachpb.CheckConsistencyResponse_Result{{ + RangeID: desc.RangeID, + StartKey: desc.StartKey, + Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE, + Detail: err.Error(), + }}} } - c.curDuration = timeutil.Since(tBegin) - c.curRow = c.remainingRows[0] - c.remainingRows = c.remainingRows[1:] + // NB: this could have more than one entry, if a range split in the + // meantime. + c.next = resp.Result + return timeutil.Since(tBegin) +} + +// Next is part of the tree.ValueGenerator interface. +func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) { + dur := c.maybeRefillRows(ctx) + if len(c.next) == 0 { + return false, nil + } + c.dur, c.cur, c.next = dur, c.next[0], c.next[1:] return true, nil } // Values is part of the tree.ValueGenerator interface. func (c *checkConsistencyGenerator) Values() (tree.Datums, error) { + row := c.cur intervalMeta := types.IntervalTypeMetadata{ DurationField: types.IntervalDurationField{ DurationType: types.IntervalDurationType_MILLISECOND, }, } return tree.Datums{ - tree.NewDInt(tree.DInt(c.curRow.RangeID)), - tree.NewDBytes(tree.DBytes(c.curRow.StartKey)), - tree.NewDString(roachpb.Key(c.curRow.StartKey).String()), - tree.NewDString(c.curRow.Status.String()), - tree.NewDString(c.curRow.Detail), - tree.NewDInterval(duration.MakeDuration(c.curDuration.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta), + tree.NewDInt(tree.DInt(row.RangeID)), + tree.NewDBytes(tree.DBytes(row.StartKey)), + tree.NewDString(roachpb.Key(row.StartKey).String()), + tree.NewDString(row.Status.String()), + tree.NewDString(row.Detail), + tree.NewDInterval(duration.MakeDuration(c.dur.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta), }, nil }