From 967b163fc56e77f64de04f2893b81fa081a28f15 Mon Sep 17 00:00:00 2001
From: Tobias Grieger
Date: Wed, 31 Aug 2022 17:10:47 +0200
Subject: [PATCH 1/3] builtins: stream consistency checker output
Also makes it resilient to per-Range errors, which now no longer
tank the entire operation.
```sql
-- avoid buffering in cli
\set display_format=csv;
-- avoid rows getting buffered at server
set avoid_buffering=true;
-- compute as fast as possible
SET CLUSTER SETTING server.consistency_check.max_rate = '1tb';
SELECT * FROM crdb_internal.check_consistency(false, '', '');
```
Release justification: improvement for a debugging-related feature
Release note: None
---
.../logic_test/builtin_function_notenant | 22 +++---
pkg/sql/sem/builtins/generator_builtins.go | 69 ++++++++++++++++---
2 files changed, 74 insertions(+), 17 deletions(-)
diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant
index 696bab428d46..f6c01d180ddb 100644
--- a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant
+++ b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant
@@ -4,20 +4,20 @@ subtest check_consistency
# Sanity-check crdb_internal.check_consistency.
-statement error start key must be >= "\\x02"
-SELECT crdb_internal.check_consistency(true, '\x01', '\xffff')
+statement error start key must be > "\\x02"
+SELECT crdb_internal.check_consistency(true, '\x02', '\xffff')
statement error end key must be < "\\xff\\xff"
-SELECT crdb_internal.check_consistency(true, '\x02', '\xffff00')
+SELECT crdb_internal.check_consistency(true, '\x0200', '\xffff00')
statement error start key must be less than end key
-SELECT crdb_internal.check_consistency(true, '\x02', '\x02')
+SELECT crdb_internal.check_consistency(true, '\x03', '\x03')
statement error start key must be less than end key
-SELECT crdb_internal.check_consistency(true, '\x03', '\x02')
+SELECT crdb_internal.check_consistency(true, '\x04', '\x03')
query ITT
-SELECT range_id, status, regexp_replace(detail, '[0-9]+', '', 'g') FROM crdb_internal.check_consistency(true, '\x02', '\xffff') WHERE range_id = 1
+SELECT range_id, status, regexp_replace(detail, '[0-9]+', '', 'g') FROM crdb_internal.check_consistency(true, '\x03', '\xffff') WHERE range_id = 1
----
1 RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: IntentAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: SeparatedIntentCount: RangeKeyCount: RangeKeyBytes: RangeValCount: RangeValBytes: SysBytes: SysCount: AbortSpanBytes:}
@@ -32,14 +32,18 @@ SELECT count(*) > 5 FROM crdb_internal.check_consistency(true, '', '')
true
# Query that should touch only a single range.
+#
+# NB: the use of ScanMetaKVs causes issues here. Bounds [`k`, k.Next()]` don't work,
+# with errors such as (here k=\xff):
+# pq: failed to verify keys for Scan: end key /Meta2/"\xff\x00" must be greater than start /Meta2/"\xff\x00"
query B
-SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '\x03', '\x0300')
+SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '\xff', '\xffff')
----
true
-# Ditto, but implicit start key \x02
+# Ditto, but implicit start key \x03
query B
-SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '', '\x0200')
+SELECT count(*) = 1 FROM crdb_internal.check_consistency(true, '', '\x04')
----
true
diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go
index 6cb368df72f0..39e46869f314 100644
--- a/pkg/sql/sem/builtins/generator_builtins.go
+++ b/pkg/sql/sem/builtins/generator_builtins.go
@@ -1827,11 +1827,15 @@ func (j *jsonRecordSetGenerator) Next(ctx context.Context) (bool, error) {
}
type checkConsistencyGenerator struct {
+ txn *kv.Txn // to load range descriptors
consistencyChecker eval.ConsistencyCheckRunner
from, to roachpb.Key
mode roachpb.ChecksumMode
+
+ descs []roachpb.RangeDescriptor
// remainingRows is populated by Start(). Each Next() call peels of the first
- // row and moves it to curRow.
+ // row and moves it to curRow. When empty, consumes from 'descs' to produce
+ // more rows.
remainingRows []roachpb.CheckConsistencyResponse_Result
curRow roachpb.CheckConsistencyResponse_Result
}
@@ -1850,14 +1854,18 @@ func makeCheckConsistencyGenerator(
keyTo := roachpb.Key(*args[2].(*tree.DBytes))
if len(keyFrom) == 0 {
- keyFrom = keys.LocalMax
+ // NB: you'd expect LocalMax here but when we go and call ScanMetaKVs, it
+ // would interpret LocalMax as Meta1Prefix and translate that to KeyMin,
+ // then fail on the scan. That method should really handle this better
+ // but also we should use IterateRangeDescriptors instead.
+ keyFrom = keys.Meta2Prefix
}
if len(keyTo) == 0 {
keyTo = roachpb.KeyMax
}
- if bytes.Compare(keyFrom, keys.LocalMax) < 0 {
- return nil, errors.Errorf("start key must be >= %q", []byte(keys.LocalMax))
+ if bytes.Compare(keyFrom, keys.LocalMax) <= 0 {
+ return nil, errors.Errorf("start key must be > %q", []byte(keys.LocalMax))
}
if bytes.Compare(keyTo, roachpb.KeyMax) > 0 {
return nil, errors.Errorf("end key must be < %q", []byte(roachpb.KeyMax))
@@ -1872,6 +1880,7 @@ func makeCheckConsistencyGenerator(
}
return &checkConsistencyGenerator{
+ txn: ctx.Txn,
consistencyChecker: ctx.ConsistencyChecker,
from: keyFrom,
to: keyTo,
@@ -1891,18 +1900,62 @@ func (*checkConsistencyGenerator) ResolvedType() *types.T {
// Start is part of the tree.ValueGenerator interface.
func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error {
- resp, err := c.consistencyChecker.CheckConsistency(ctx, c.from, c.to, c.mode)
+ span := roachpb.Span{Key: c.from, EndKey: c.to}
+ // NB: should use IterateRangeDescriptors here which is in the 'upgrade'
+ // package to avoid pulling all into memory. That needs a refactor, though.
+ // kvprober also has some code to iterate in batches.
+ descs, err := kvclient.ScanMetaKVs(ctx, c.txn, span)
if err != nil {
return err
}
- c.remainingRows = resp.Result
+ for _, v := range descs {
+ var desc roachpb.RangeDescriptor
+ if err := v.ValueProto(&desc); err != nil {
+ return err
+ }
+ if len(desc.StartKey) == 0 {
+ desc.StartKey = keys.MustAddr(keys.LocalMax)
+ // Elide potential second copy we might be getting for r1
+ // if meta1 and meta2 haven't split.
+ // This too should no longer be necessary with IterateRangeDescriptors.
+ if len(c.descs) == 1 {
+ continue
+ }
+ }
+ c.descs = append(c.descs, desc)
+ }
return nil
}
// Next is part of the tree.ValueGenerator interface.
-func (c *checkConsistencyGenerator) Next(_ context.Context) (bool, error) {
+func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
if len(c.remainingRows) == 0 {
- return false, nil
+ if len(c.descs) == 0 {
+ return false, nil
+ }
+ // NB: peeling off the spans one by one allows this generator to produce
+ // rows in a streaming manner. If we called CheckConsistency(c.from, c.to)
+ // we would only get the result once all checks have completed and it will
+ // generally be a lot more brittle since an error will completely wipe out
+ // the result set.
+ desc := c.descs[0]
+ c.descs = c.descs[1:]
+ resp, err := c.consistencyChecker.CheckConsistency(
+ ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode,
+ )
+ if err != nil {
+ // Emit result as a row, and keep going.
+ c.remainingRows = []roachpb.CheckConsistencyResponse_Result{
+ {
+ RangeID: desc.RangeID,
+ StartKey: desc.StartKey,
+ Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE,
+ Detail: err.Error(),
+ },
+ }
+ } else {
+ c.remainingRows = resp.Result
+ }
}
c.curRow = c.remainingRows[0]
c.remainingRows = c.remainingRows[1:]
From b52735c922baee9cbfb7efad7438d4d766c93e57 Mon Sep 17 00:00:00 2001
From: Tobias Grieger
Date: Wed, 31 Aug 2022 23:46:59 +0200
Subject: [PATCH 2/3] builtins: add `duration` column to check_consistency
Noticed that some checks were taking a very long time but it's hard to
tell which ones unless setting up and watching streaming output.
Add a duration column. This can also help us reason about the general
speed of the consistency checker; it does do very expensive hashing
at the moment (sha256) when something much cheaper and weaker would
do.
Release justification: debug improvement for an internal builtin
Release note: None
---
docs/generated/sql/functions.md | 2 +-
pkg/sql/sem/builtins/fixed_oids.go | 2 +-
pkg/sql/sem/builtins/generator_builtins.go | 17 +++++++++++++++--
3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md
index 17664eac7b70..5cbedb9784a6 100644
--- a/docs/generated/sql/functions.md
+++ b/docs/generated/sql/functions.md
@@ -3018,7 +3018,7 @@ may increase either contention or retry errors, or both.
Immutable |
crdb_internal.assignment_cast(val: anyelement, type: anyelement) → anyelement | This function is used internally to perform assignment casts during mutations.
| Stable |
-crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) → tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail} | Runs a consistency check on ranges touching the specified key range. an empty start or end key is treated as the minimum and maximum possible, respectively. stats_only should only be set to false when targeting a small number of ranges to avoid overloading the cluster. Each returned row contains the range ID, the status (a roachpb.CheckConsistencyResponse_Status), and verbose detail.
+crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) → tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail, interval AS duration} | Runs a consistency check on ranges touching the specified key range. an empty start or end key is treated as the minimum and maximum possible, respectively. stats_only should only be set to false when targeting a small number of ranges to avoid overloading the cluster. Each returned row contains the range ID, the status (a roachpb.CheckConsistencyResponse_Status), and verbose detail.
Example usage:
SELECT * FROM crdb_internal.check_consistency(true, ‘\x02’, ‘\x04’)
| Volatile |
diff --git a/pkg/sql/sem/builtins/fixed_oids.go b/pkg/sql/sem/builtins/fixed_oids.go
index 84c7a2bac85d..b07f58e27771 100644
--- a/pkg/sql/sem/builtins/fixed_oids.go
+++ b/pkg/sql/sem/builtins/fixed_oids.go
@@ -370,7 +370,7 @@ var builtinOidsBySignature = map[string]oid.Oid{
`crdb_internal.active_version() -> jsonb`: 1296,
`crdb_internal.approximate_timestamp(timestamp: decimal) -> timestamp`: 1298,
`crdb_internal.assignment_cast(val: anyelement, type: anyelement) -> anyelement`: 1341,
- `crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) -> tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail}`: 347,
+ `crdb_internal.check_consistency(stats_only: bool, start_key: bytes, end_key: bytes) -> tuple{int AS range_id, bytes AS start_key, string AS start_key_pretty, string AS status, string AS detail, interval AS duration}`: 347,
`crdb_internal.check_password_hash_format(password: bytes) -> string`: 1376,
`crdb_internal.cluster_id() -> uuid`: 1299,
`crdb_internal.cluster_name() -> string`: 1301,
diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go
index 39e46869f314..6ecb00c66b3e 100644
--- a/pkg/sql/sem/builtins/generator_builtins.go
+++ b/pkg/sql/sem/builtins/generator_builtins.go
@@ -35,6 +35,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/mon"
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
"github.com/cockroachdb/errors"
@@ -1837,6 +1838,7 @@ type checkConsistencyGenerator struct {
// row and moves it to curRow. When empty, consumes from 'descs' to produce
// more rows.
remainingRows []roachpb.CheckConsistencyResponse_Result
+ curDuration time.Duration
curRow roachpb.CheckConsistencyResponse_Result
}
@@ -1889,8 +1891,8 @@ func makeCheckConsistencyGenerator(
}
var checkConsistencyGeneratorType = types.MakeLabeledTuple(
- []*types.T{types.Int, types.Bytes, types.String, types.String, types.String},
- []string{"range_id", "start_key", "start_key_pretty", "status", "detail"},
+ []*types.T{types.Int, types.Bytes, types.String, types.String, types.String, types.Interval},
+ []string{"range_id", "start_key", "start_key_pretty", "status", "detail", "duration"},
)
// ResolvedType is part of the tree.ValueGenerator interface.
@@ -1929,6 +1931,7 @@ func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error
// Next is part of the tree.ValueGenerator interface.
func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
+ tBegin := timeutil.Now()
if len(c.remainingRows) == 0 {
if len(c.descs) == 0 {
return false, nil
@@ -1954,9 +1957,13 @@ func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
},
}
} else {
+ // NB: this could have more than one entry, if a range split in the
+ // meantime.
c.remainingRows = resp.Result
}
}
+
+ c.curDuration = timeutil.Since(tBegin)
c.curRow = c.remainingRows[0]
c.remainingRows = c.remainingRows[1:]
return true, nil
@@ -1964,12 +1971,18 @@ func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
// Values is part of the tree.ValueGenerator interface.
func (c *checkConsistencyGenerator) Values() (tree.Datums, error) {
+ intervalMeta := types.IntervalTypeMetadata{
+ DurationField: types.IntervalDurationField{
+ DurationType: types.IntervalDurationType_MILLISECOND,
+ },
+ }
return tree.Datums{
tree.NewDInt(tree.DInt(c.curRow.RangeID)),
tree.NewDBytes(tree.DBytes(c.curRow.StartKey)),
tree.NewDString(roachpb.Key(c.curRow.StartKey).String()),
tree.NewDString(c.curRow.Status.String()),
tree.NewDString(c.curRow.Detail),
+ tree.NewDInterval(duration.MakeDuration(c.curDuration.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta),
}, nil
}
From 19c95242e94595ec68ce6b2dbbf54572e1b9cfb7 Mon Sep 17 00:00:00 2001
From: Tobias Grieger
Date: Thu, 8 Sep 2022 14:50:51 +0200
Subject: [PATCH 3/3] builtins: refactor check_consistency for clarity
This implements a number of suggestions made by Pavel[^1].
[^1]: https://github.com/cockroachdb/cockroach/pull/87378#discussion_r965081891
Release justification: None needed; 22.2 has been cut
Release note: None
---
pkg/sql/sem/builtins/generator_builtins.go | 107 ++++++++++++---------
1 file changed, 61 insertions(+), 46 deletions(-)
diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go
index 6ecb00c66b3e..7dcdaf18300f 100644
--- a/pkg/sql/sem/builtins/generator_builtins.go
+++ b/pkg/sql/sem/builtins/generator_builtins.go
@@ -1833,13 +1833,21 @@ type checkConsistencyGenerator struct {
from, to roachpb.Key
mode roachpb.ChecksumMode
+ // The descriptors for which we haven't yet emitted rows. Rows are consumed
+ // from this field and produce one (or more, in the case of splits not reflected
+ // in the descriptor) rows in `next`.
descs []roachpb.RangeDescriptor
- // remainingRows is populated by Start(). Each Next() call peels of the first
- // row and moves it to curRow. When empty, consumes from 'descs' to produce
- // more rows.
- remainingRows []roachpb.CheckConsistencyResponse_Result
- curDuration time.Duration
- curRow roachpb.CheckConsistencyResponse_Result
+ // The current row, emitted by Values().
+ cur roachpb.CheckConsistencyResponse_Result
+ // The time it took to produce the current row, i.e. how long it took to run
+ // the consistency check that produced the row. When a consistency check
+ // produces more than one row (i.e. after a split), all of the duration will
+ // be attributed to the first row.
+ dur time.Duration
+ // next are the potentially prefetched subsequent rows. This is usually empty
+ // (as one consistency check produces one result which immediately moves to
+ // `cur`) except when a descriptor we use doesn't reflect subsequent splits.
+ next []roachpb.CheckConsistencyResponse_Result
}
var _ eval.ValueGenerator = &checkConsistencyGenerator{}
@@ -1929,60 +1937,67 @@ func (c *checkConsistencyGenerator) Start(ctx context.Context, _ *kv.Txn) error
return nil
}
-// Next is part of the tree.ValueGenerator interface.
-func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
+// maybeRefillRows checks whether c.next is empty and if so, consumes the first
+// element of c.descs for a consistency check. This populates c.next with at
+// least one result (even on error). Returns the duration of the consistency
+// check, if any, and zero otherwise.
+func (c *checkConsistencyGenerator) maybeRefillRows(ctx context.Context) time.Duration {
+ if len(c.next) > 0 || len(c.descs) == 0 {
+ // We have a row to produce or no more ranges to check, so we're done
+ // for now or for good, respectively.
+ return 0
+ }
tBegin := timeutil.Now()
- if len(c.remainingRows) == 0 {
- if len(c.descs) == 0 {
- return false, nil
- }
- // NB: peeling off the spans one by one allows this generator to produce
- // rows in a streaming manner. If we called CheckConsistency(c.from, c.to)
- // we would only get the result once all checks have completed and it will
- // generally be a lot more brittle since an error will completely wipe out
- // the result set.
- desc := c.descs[0]
- c.descs = c.descs[1:]
- resp, err := c.consistencyChecker.CheckConsistency(
- ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode,
- )
- if err != nil {
- // Emit result as a row, and keep going.
- c.remainingRows = []roachpb.CheckConsistencyResponse_Result{
- {
- RangeID: desc.RangeID,
- StartKey: desc.StartKey,
- Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE,
- Detail: err.Error(),
- },
- }
- } else {
- // NB: this could have more than one entry, if a range split in the
- // meantime.
- c.remainingRows = resp.Result
- }
+ // NB: peeling off the spans one by one allows this generator to produce
+ // rows in a streaming manner. If we called CheckConsistency(c.from, c.to)
+ // we would only get the result once all checks have completed and it will
+ // generally be a lot more brittle since an error will completely wipe out
+ // the result set.
+ desc := c.descs[0]
+ c.descs = c.descs[1:]
+ resp, err := c.consistencyChecker.CheckConsistency(
+ ctx, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey(), c.mode,
+ )
+ if err != nil {
+ resp = &roachpb.CheckConsistencyResponse{Result: []roachpb.CheckConsistencyResponse_Result{{
+ RangeID: desc.RangeID,
+ StartKey: desc.StartKey,
+ Status: roachpb.CheckConsistencyResponse_RANGE_INDETERMINATE,
+ Detail: err.Error(),
+ }}}
}
- c.curDuration = timeutil.Since(tBegin)
- c.curRow = c.remainingRows[0]
- c.remainingRows = c.remainingRows[1:]
+ // NB: this could have more than one entry, if a range split in the
+ // meantime.
+ c.next = resp.Result
+ return timeutil.Since(tBegin)
+}
+
+// Next is part of the tree.ValueGenerator interface.
+func (c *checkConsistencyGenerator) Next(ctx context.Context) (bool, error) {
+ dur := c.maybeRefillRows(ctx)
+ if len(c.next) == 0 {
+ return false, nil
+ }
+ c.dur, c.cur, c.next = dur, c.next[0], c.next[1:]
return true, nil
}
// Values is part of the tree.ValueGenerator interface.
func (c *checkConsistencyGenerator) Values() (tree.Datums, error) {
+ row := c.cur
intervalMeta := types.IntervalTypeMetadata{
DurationField: types.IntervalDurationField{
DurationType: types.IntervalDurationType_MILLISECOND,
},
}
return tree.Datums{
- tree.NewDInt(tree.DInt(c.curRow.RangeID)),
- tree.NewDBytes(tree.DBytes(c.curRow.StartKey)),
- tree.NewDString(roachpb.Key(c.curRow.StartKey).String()),
- tree.NewDString(c.curRow.Status.String()),
- tree.NewDString(c.curRow.Detail),
- tree.NewDInterval(duration.MakeDuration(c.curDuration.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta),
+ tree.NewDInt(tree.DInt(row.RangeID)),
+ tree.NewDBytes(tree.DBytes(row.StartKey)),
+ tree.NewDString(roachpb.Key(row.StartKey).String()),
+ tree.NewDString(row.Status.String()),
+ tree.NewDString(row.Detail),
+ tree.NewDInterval(duration.MakeDuration(c.dur.Nanoseconds(), 0 /* days */, 0 /* months */), intervalMeta),
}, nil
}
|