From a4f34e779d039357f8ecd1bbdfc1c39f48954bd5 Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Fri, 21 Jul 2023 10:28:24 -0400 Subject: [PATCH 1/3] sql: add internal function to detect keys in system tables Previously, we had no easy way of knowing if a key belonged to a system table. This was inadequate because we can improve our redacted debug zips by including contention information involving these tables. This patch will add a new builtin crdb_internal.is_system_table_key which can be used for conditional redaction in SQL. Release note: None --- pkg/sql/faketreeeval/evalctx.go | 5 +++++ pkg/sql/privileged_accessor.go | 10 ++++++++++ pkg/sql/sem/builtins/builtins.go | 32 ++++++++++++++++++++++++++++++ pkg/sql/sem/builtins/fixed_oids.go | 1 + pkg/sql/sem/eval/deps.go | 3 +++ 5 files changed, 51 insertions(+) diff --git a/pkg/sql/faketreeeval/evalctx.go b/pkg/sql/faketreeeval/evalctx.go index 6a3e1dd25216..7274368b78f7 100644 --- a/pkg/sql/faketreeeval/evalctx.go +++ b/pkg/sql/faketreeeval/evalctx.go @@ -535,6 +535,11 @@ func (ep *DummyPrivilegedAccessor) LookupZoneConfigByNamespaceID( return "", false, errors.WithStack(errEvalPrivileged) } +// IsSystemTable is part of the tree.PrivilegedAccessor interface. +func (ep *DummyPrivilegedAccessor) IsSystemTable(ctx context.Context, id int64) (bool, error) { + return false, errors.WithStack(errEvalPrivileged) +} + // DummySessionAccessor implements the eval.SessionAccessor interface by returning errors. type DummySessionAccessor struct{} diff --git a/pkg/sql/privileged_accessor.go b/pkg/sql/privileged_accessor.go index 8f8dc9648942..109c95adaf91 100644 --- a/pkg/sql/privileged_accessor.go +++ b/pkg/sql/privileged_accessor.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/sql/sem/catid" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/errors" @@ -76,6 +77,15 @@ func (p *planner) LookupZoneConfigByNamespaceID( return tree.DBytes(zc.GetRawBytesInStorage()), true, nil } +// IsSystemTable implements tree.PrivilegedAccessor. +func (p *planner) IsSystemTable(ctx context.Context, id int64) (bool, error) { + tbl, err := p.Descriptors().ByID(p.Txn()).Get().Table(ctx, catid.DescID(id)) + if err != nil { + return false, err + } + return catalog.IsSystemDescriptor(tbl), nil +} + // checkDescriptorPermissions returns nil if the executing user has permissions // to check the permissions of a descriptor given its ID, or the id given // is not a descriptor of a table or database. diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 667d0f125016..bff0204d8db1 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -5728,6 +5728,38 @@ SELECT Volatility: volatility.Immutable, }, ), + // Return if a key belongs to a system table, which should make it to print + // within redacted output. + "crdb_internal.is_system_table_key": makeBuiltin( + tree.FunctionProperties{ + Category: builtinconstants.CategorySystemInfo, + Undocumented: true, + }, + tree.Overload{ + Types: tree.ParamTypes{ + {Name: "raw_key", Typ: types.Bytes}, + }, + ReturnType: tree.FixedReturnType(types.Bool), + Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) { + _, tableID, err := evalCtx.Codec.DecodeTablePrefix(roachpb.Key(tree.MustBeDBytes(args[0]))) + if err != nil { + // If a key isn't prefixed with a table ID ignore. + //nolint:returnerrcheck + return tree.DBoolFalse, nil + } + isSystemTable, err := evalCtx.PrivilegedAccessor.IsSystemTable(ctx, int64(tableID)) + if err != nil { + // If we can't find the descriptor or its not the right type then its + // not a system table. + //nolint:returnerrcheck + return tree.DBoolFalse, nil + } + return tree.MakeDBool(tree.DBool(isSystemTable)), nil + }, + Info: "This function is used only by CockroachDB's developers for testing purposes.", + Volatility: volatility.Stable, + }, + ), // Return a pretty string for a given span, skipping the specified number of // fields. diff --git a/pkg/sql/sem/builtins/fixed_oids.go b/pkg/sql/sem/builtins/fixed_oids.go index 26486974f306..47d98a8c2a7d 100644 --- a/pkg/sql/sem/builtins/fixed_oids.go +++ b/pkg/sql/sem/builtins/fixed_oids.go @@ -2439,6 +2439,7 @@ var builtinOidsArray = []string{ 2466: `crdb_internal.setup_span_configs_stream(tenant_name: string) -> bytes`, 2467: `crdb_internal.request_statement_bundle(stmtFingerprint: string, planGist: string, samplingProbability: float, minExecutionLatency: interval, expiresAfter: interval) -> bool`, 2468: `crdb_internal.request_statement_bundle(stmtFingerprint: string, planGist: string, antiPlanGist: bool, samplingProbability: float, minExecutionLatency: interval, expiresAfter: interval) -> bool`, + 2469: `crdb_internal.is_system_table_key(raw_key: bytes) -> bool`, } var builtinOidsBySignature map[string]oid.Oid diff --git a/pkg/sql/sem/eval/deps.go b/pkg/sql/sem/eval/deps.go index 21afbc763537..e5ab63c2bd91 100644 --- a/pkg/sql/sem/eval/deps.go +++ b/pkg/sql/sem/eval/deps.go @@ -534,6 +534,9 @@ type PrivilegedAccessor interface { // Returns the config byte array, a bool representing whether the namespace exists, // and an error if there is one. LookupZoneConfigByNamespaceID(ctx context.Context, id int64) (tree.DBytes, bool, error) + + // IsSystemTable returns if a given descriptor ID is a system table.s + IsSystemTable(ctx context.Context, id int64) (bool, error) } // RegionOperator gives access to the current region, validation for all From 2d824738f366d99e466903d6e4b81b93fbb6974b Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Fri, 21 Jul 2023 11:03:15 -0400 Subject: [PATCH 2/3] cli: include system table contention information in redacted debug zips Previously, when generating a redacted debug zip we would omit information about retries and contended keys, which could make issues involving contention leasing / descriptors / jobs hard to diagnose. For example, it's easily possible for users to collect redacted zips when these situations are occurring and we will not have sufficient information to diagnose them. This patch removes redaction from relevant columns if we know that the keys involve system tables or just harmless stuff like retries/retry reasons. Fixes: #104593 Release note: None --- pkg/cli/zip_table_registry.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/cli/zip_table_registry.go b/pkg/cli/zip_table_registry.go index b852920ddc9e..ce1bbc999338 100644 --- a/pkg/cli/zip_table_registry.go +++ b/pkg/cli/zip_table_registry.go @@ -107,10 +107,12 @@ func (r DebugZipTableRegistry) GetTables() []string { var zipInternalTablesPerCluster = DebugZipTableRegistry{ "crdb_internal.cluster_contention_events": { // `key` column contains the contended key, which may contain sensitive - // row-level data. + // row-level data. So, we will only fetch if the table is under the system + // schema. nonSensitiveCols: NonSensitiveColumns{ "table_id", "index_id", + "IF(crdb_internal.is_system_table_key(key), crdb_internal.pretty_key(key, 0) ,'redacted') as pretty_key", "num_contention_events", "cumulative_contention_time", "txn_id", @@ -158,6 +160,8 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{ "exec_node_ids", "contention", "index_recommendations", + "retries", + "last_retry_reason", }, }, "crdb_internal.cluster_locks": { @@ -531,7 +535,8 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{ }, "crdb_internal.transaction_contention_events": { // `contending_key` column contains the contended key, which may - // contain sensitive row-level data. + // contain sensitive row-level data. So, we will only fetch if the + // table is under the system schema. nonSensitiveCols: NonSensitiveColumns{ "collection_ts", "blocking_txn_id", @@ -539,6 +544,7 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{ "waiting_txn_id", "waiting_txn_fingerprint_id", "contention_duration", + "IF(crdb_internal.is_system_table_key(contending_key), crdb_internal.pretty_key(contending_key, 0) ,'redacted') as contending_pretty_key", }, }, "crdb_internal.zones": { From 13749086bf7adb11b8aa59907c02ec2a7b7e7076 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 9 Aug 2023 11:57:16 -0400 Subject: [PATCH 3/3] Revert "kvprober: metamorphically enable / configure kvprober" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts (most of) commit 769ba1c46d5bfbdcfc14069f545809926f9f35c5. That commit metamorphically enabled kvprober. This has been observed to be destabliziing to unit tests. When the metamorphic constant is enabled (50% of the time) and when kvprober is fast enough, random ranges will see extra requests that they aren’t expecting. This adds nondeterminism which can trip up tests in any number of different ways. All of the following flakes have been tracked back to kvprober: Fixes #107864. Fixes #108242. Fixes #108441. Fixes #108349. Fixes #108124. Closes #108366. Release note: None --- pkg/kv/kvprober/BUILD.bazel | 1 - pkg/kv/kvprober/settings.go | 12 +++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pkg/kv/kvprober/BUILD.bazel b/pkg/kv/kvprober/BUILD.bazel index 735724bbb7a7..83c099d35cc6 100644 --- a/pkg/kv/kvprober/BUILD.bazel +++ b/pkg/kv/kvprober/BUILD.bazel @@ -17,7 +17,6 @@ go_library( "//pkg/roachpb", "//pkg/settings", "//pkg/settings/cluster", - "//pkg/util", "//pkg/util/log", "//pkg/util/log/logcrash", "//pkg/util/metric", diff --git a/pkg/kv/kvprober/settings.go b/pkg/kv/kvprober/settings.go index af111f24dab4..1ddc18d1cced 100644 --- a/pkg/kv/kvprober/settings.go +++ b/pkg/kv/kvprober/settings.go @@ -14,12 +14,9 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/settings" - "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/errors" ) -var defaultEnabled = util.ConstantWithMetamorphicTestBool("kv.prober.*.enabled", false) - // kv.prober.bypass_admission_control controls whether kvprober's requests // should bypass kv layer's admission control. Setting this value to true // ensures that kvprober will not be significantly affected if the cluster is @@ -30,13 +27,14 @@ var bypassAdmissionControl = settings.RegisterBoolSetting( "set to bypass admission control queue for kvprober requests; "+ "note that dedicated clusters should have this set as users own capacity planning "+ "but serverless clusters should not have this set as SREs own capacity planning", - util.ConstantWithMetamorphicTestBool("kv.prober.bypass_admission_control.enabled", true)) + true, +) var readEnabled = settings.RegisterBoolSetting( settings.TenantWritable, "kv.prober.read.enabled", "whether the KV read prober is enabled", - defaultEnabled) + false) // TODO(josh): Another option is for the cluster setting to be a QPS target // for the cluster as a whole. @@ -72,7 +70,7 @@ var writeEnabled = settings.RegisterBoolSetting( settings.TenantWritable, "kv.prober.write.enabled", "whether the KV write prober is enabled", - defaultEnabled) + false) var writeInterval = settings.RegisterDurationSetting( settings.TenantWritable, @@ -150,7 +148,7 @@ var quarantineWriteEnabled = settings.RegisterBoolSetting( "quarantine pool holds a separate group of ranges that have previously failed "+ "a probe which are continually probed. This helps determine outages for ranges "+ " with a high level of confidence", - defaultEnabled) + false) var quarantineWriteInterval = settings.RegisterDurationSetting( settings.TenantWritable,