From edc105f11abb625a00cda8a9b640fbea52574b79 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Fri, 18 Feb 2022 19:20:13 -0500 Subject: [PATCH 1/3] sql: fix cached gists panic'ing after schema changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously we didn't guard against out of bounds column ids in gists, if a gist is created on a table and then that table drops columns we would hit a runtime index out of bounds panic. By design gist decoding is best effort and should silently ignore these columns when this happens. The columns are used for equality conditions, ie: └── • lookup join │ table: broker@broker_pkey │ equality: (tr_s_symb) = (b_id) So they are a nice to have. Doing anything more sophisticated would dramatically increase the gist size so isn't worth it. Fixes: #76800 Release note: None --- .../exec/execbuilder/testdata/explain_gist | 22 +++++++++++++++++++ pkg/sql/opt/exec/explain/result_columns.go | 14 +++++++----- pkg/sql/plan_opt.go | 3 +-- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/explain_gist b/pkg/sql/opt/exec/execbuilder/testdata/explain_gist index 1b6f5422d8b8..7cf69d0e3145 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/explain_gist +++ b/pkg/sql/opt/exec/execbuilder/testdata/explain_gist @@ -62,3 +62,25 @@ query T SELECT crdb_internal.decode_plan_gist('$gist') ---- • + +# Regression test for #76800 +statement ok +CREATE TABLE t2 (a int, b int, c int, d int, e int) + +let $gist +EXPLAIN (GIST) SELECT * FROM t2 + +# To hit bug requires deleting lots of columns because of hidden columns. +statement ok +ALTER TABLE t2 DROP COLUMN b; +ALTER TABLE t2 DROP COLUMN c; +ALTER TABLE t2 DROP COLUMN a; +ALTER TABLE t2 DROP COLUMN d; +ALTER TABLE t2 DROP COLUMN e + +query T +SELECT crdb_internal.decode_plan_gist('$gist') +---- +• scan + table: t2@t2_pkey + spans: FULL SCAN diff --git a/pkg/sql/opt/exec/explain/result_columns.go b/pkg/sql/opt/exec/explain/result_columns.go index 5c2a15104d3a..8840e0e59969 100644 --- a/pkg/sql/opt/exec/explain/result_columns.go +++ b/pkg/sql/opt/exec/explain/result_columns.go @@ -201,11 +201,15 @@ func getResultColumns( func tableColumns(table cat.Table, ordinals exec.TableColumnOrdinalSet) colinfo.ResultColumns { cols := make(colinfo.ResultColumns, 0, ordinals.Len()) for i, ok := ordinals.Next(0); ok; i, ok = ordinals.Next(i + 1) { - col := table.Column(i) - cols = append(cols, colinfo.ResultColumn{ - Name: string(col.ColName()), - Typ: col.DatumType(), - }) + // Be defensive about bitset values because they may come from cached + // gists and the columns they refer to could have been removed. + if i < table.ColumnCount() { + col := table.Column(i) + cols = append(cols, colinfo.ResultColumn{ + Name: string(col.ColName()), + Typ: col.DatumType(), + }) + } } return cols } diff --git a/pkg/sql/plan_opt.go b/pkg/sql/plan_opt.go index f4f92af94671..a6afc4da3c4b 100644 --- a/pkg/sql/plan_opt.go +++ b/pkg/sql/plan_opt.go @@ -655,8 +655,7 @@ func (opc *optPlanningCtx) runExecBuilder( return nil } -// DecodeGist Avoid an import cycle by keeping the cat out of the tree, RFC: is -// there a better way? +// DecodeGist Avoid an import cycle by keeping the cat out of the tree. func (p *planner) DecodeGist(gist string) ([]string, error) { return explain.DecodePlanGistToRows(gist, &p.optPlanningCtx.catalog) } From 160fb1906e7e4349c62fce31d96d759576ac855c Mon Sep 17 00:00:00 2001 From: Aditya Maru Date: Tue, 15 Feb 2022 14:04:11 -0500 Subject: [PATCH 2/3] spanconfigsqltranslator: emit all SystemTarget span configs when required This change teaches the SQLTranslator to emit SpanConfigurations corresponding to `spanconfig.SystemTargets`. Today, these SpanConfigurations only contain ProtectionPolicies, corresponding to protected timestamp records that may be written by the host tenant to protect its cluster, a secondary tenant to protect its cluster, or the host tenant to protect a secondary tenant. The SystemTarget span configurations will be applied to a SystemSpanConfig store that will be introduced in a follow up PR. Informs: #73727 Release note: None --- .../datadriven_test.go | 29 +++++--- .../testdata/protectedts | 64 ++++++++++++++++-- .../testdata/tenant/protectedts | 66 ++++++++++++++----- pkg/roachpb/span_config.go | 6 ++ pkg/spanconfig/spanconfig.go | 13 ++-- .../spanconfigreconciler/reconciler.go | 14 ++-- .../spanconfigsqltranslator/sqltranslator.go | 58 +++++++++++++++- pkg/spanconfig/spanconfigtestutils/utils.go | 25 +++++++ 8 files changed, 235 insertions(+), 40 deletions(-) diff --git a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go index 84d106f5ca13..30f155e04dac 100644 --- a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go +++ b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/datadriven_test.go @@ -119,6 +119,8 @@ func TestDataDriven(t *testing.T) { } datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string { + var generateSystemSpanConfigs bool + var descIDs []descpb.ID switch d.Cmd { case "exec-sql": tenant.Exec(d.Input) @@ -130,36 +132,37 @@ func TestDataDriven(t *testing.T) { return output case "translate": - // Parse the args to get the object ID we're looking to + // Parse the args to get the descriptor ID we're looking to // translate. - var objID descpb.ID switch { case d.HasArg("named-zone"): var zone string d.ScanArgs(t, "named-zone", &zone) namedZoneID, found := zonepb.NamedZones[zonepb.NamedZone(zone)] require.Truef(t, found, "unknown named zone: %s", zone) - objID = descpb.ID(namedZoneID) + descIDs = []descpb.ID{descpb.ID(namedZoneID)} case d.HasArg("id"): var scanID int d.ScanArgs(t, "id", &scanID) - objID = descpb.ID(scanID) + descIDs = []descpb.ID{descpb.ID(scanID)} case d.HasArg("database"): var dbName string d.ScanArgs(t, "database", &dbName) if d.HasArg("table") { var tbName string d.ScanArgs(t, "table", &tbName) - objID = tenant.LookupTableByName(ctx, dbName, tbName).GetID() + descIDs = []descpb.ID{tenant.LookupTableByName(ctx, dbName, tbName).GetID()} } else { - objID = tenant.LookupDatabaseByName(ctx, dbName).GetID() + descIDs = []descpb.ID{tenant.LookupDatabaseByName(ctx, dbName).GetID()} } + case d.HasArg("system-span-configurations"): + generateSystemSpanConfigs = true default: d.Fatalf(t, "insufficient/improper args (%v) provided to translate", d.CmdArgs) } sqlTranslator := tenant.SpanConfigSQLTranslator().(spanconfig.SQLTranslator) - records, _, err := sqlTranslator.Translate(ctx, descpb.IDs{objID}) + records, _, err := sqlTranslator.Translate(ctx, descIDs, generateSystemSpanConfigs) require.NoError(t, err) sort.Slice(records, func(i, j int) bool { return records[i].Target.Less(records[j].Target) @@ -167,8 +170,16 @@ func TestDataDriven(t *testing.T) { var output strings.Builder for _, record := range records { - output.WriteString(fmt.Sprintf("%-42s %s\n", record.Target.GetSpan(), - spanconfigtestutils.PrintSpanConfigDiffedAgainstDefaults(record.Config))) + switch { + case record.Target.IsSpanTarget(): + output.WriteString(fmt.Sprintf("%-42s %s\n", record.Target.GetSpan(), + spanconfigtestutils.PrintSpanConfigDiffedAgainstDefaults(record.Config))) + case record.Target.IsSystemTarget(): + output.WriteString(fmt.Sprintf("%-42s %s\n", record.Target.GetSystemTarget(), + spanconfigtestutils.PrintSystemSpanConfigDiffedAgainstDefault(record.Config))) + default: + panic("unsupported target type") + } } return output.String() diff --git a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/protectedts b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/protectedts index 5e6401befaf9..07164558e80a 100644 --- a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/protectedts +++ b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/protectedts @@ -1,7 +1,5 @@ # Create a database with some tables and write protected timestamps on the # tables and database. Check that span configurations are as we expect. -# TODO(adityamaru): Add tests with Cluster, Tenant target once the translator -# has been taught to generate SystemSpanConfigs. exec-sql CREATE DATABASE db; @@ -41,10 +39,44 @@ translate database=db /Table/10{6-7} num_replicas=7 num_voters=5 pts=[1 2] /Table/10{7-8} num_replicas=7 pts=[2] +# Write a protected timestamp on the cluster. +protect record-id=3 ts=3 +cluster +---- + +# Write a protected timestamp on some tenants. +protect record-id=4 ts=4 +tenants 111,112,113 +---- + +# Write another protected timestamp on a subset of the tenants. +protect record-id=5 ts=3 +tenants 111,112 +---- + +translate system-span-configurations +---- +{entire-keyspace} pts=[3] +{source=1,target=111} pts=[3 4] +{source=1,target=112} pts=[3 4] +{source=1,target=113} pts=[4] + +translate database=db +---- +/Table/10{6-7} num_replicas=7 num_voters=5 pts=[1 2] +/Table/10{7-8} num_replicas=7 pts=[2] + # Release the protected timestamp on table t1 release record-id=1 ---- +translate system-span-configurations +---- +{entire-keyspace} pts=[3] +{source=1,target=111} pts=[3 4] +{source=1,target=112} pts=[3 4] +{source=1,target=113} pts=[4] + translate database=db ---- /Table/10{6-7} num_replicas=7 num_voters=5 pts=[2] @@ -54,24 +86,44 @@ translate database=db release record-id=2 ---- +translate system-span-configurations +---- +{entire-keyspace} pts=[3] +{source=1,target=111} pts=[3 4] +{source=1,target=112} pts=[3 4] +{source=1,target=113} pts=[4] + translate database=db ---- /Table/10{6-7} num_replicas=7 num_voters=5 /Table/10{7-8} num_replicas=7 + +# Release the protected timestamp on the cluster and tenants. +release record-id=3 +---- + +release record-id=4 +---- + # Create an index on t1 to ensure that subzones also see protected timestamps. exec-sql CREATE INDEX idx ON db.t1(id); ALTER INDEX db.t1@idx CONFIGURE ZONE USING gc.ttlseconds = 1; ---- -protect record-id=3 ts=3 +protect record-id=6 ts=6 descs 106 ---- +translate system-span-configurations +---- +{source=1,target=111} pts=[3] +{source=1,target=112} pts=[3] + translate database=db ---- -/Table/106{-/2} num_replicas=7 num_voters=5 pts=[3] -/Table/106/{2-3} ttl_seconds=1 num_replicas=7 num_voters=5 pts=[3] -/Table/10{6/3-7} num_replicas=7 num_voters=5 pts=[3] +/Table/106{-/2} num_replicas=7 num_voters=5 pts=[6] +/Table/106/{2-3} ttl_seconds=1 num_replicas=7 num_voters=5 pts=[6] +/Table/10{6/3-7} num_replicas=7 num_voters=5 pts=[6] /Table/10{7-8} num_replicas=7 diff --git a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/tenant/protectedts b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/tenant/protectedts index 7c08607d98b4..825a08eda4a8 100644 --- a/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/tenant/protectedts +++ b/pkg/ccl/spanconfigccl/spanconfigsqltranslatorccl/testdata/tenant/protectedts @@ -8,9 +8,9 @@ CREATE TABLE db.t2(); ---- # Schema object IDs -# db: 54 -# t1: 56 -# t2: 57 +# db: 104 +# t1: 106 +# t2: 107 # Alter zone config fields on the database and one of the tables to ensure # things are cascading. @@ -21,37 +21,65 @@ ALTER TABLE db.t1 CONFIGURE ZONE USING num_voters=5; # Write a protected timestamp on t1. protect record-id=1 ts=1 -descs 56 +descs 106 ---- translate database=db ---- -/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 +/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 pts=[1] /Tenant/10/Table/10{7-8} num_replicas=7 # Write a protected timestamp on db, so we should see it on both t1 and t2. protect record-id=2 ts=2 -descs 54 +descs 104 ---- translate database=db ---- -/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 -/Tenant/10/Table/10{7-8} num_replicas=7 +/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 pts=[1 2] +/Tenant/10/Table/10{7-8} num_replicas=7 pts=[2] + + +# Write a protected timestamp on the tenant cluster. +protect record-id=3 ts=3 +cluster +---- + +# Write another protected timestamp on the tenant cluster. +protect record-id=4 ts=3 +cluster +---- + +translate system-span-configurations +---- +{source=10,target=10} pts=[3 3] + +translate database=db +---- +/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 pts=[1 2] +/Tenant/10/Table/10{7-8} num_replicas=7 pts=[2] # Release the protected timestamp on table t1 release record-id=1 ---- +translate system-span-configurations +---- +{source=10,target=10} pts=[3 3] + translate database=db ---- -/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 -/Tenant/10/Table/10{7-8} num_replicas=7 +/Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 pts=[2] +/Tenant/10/Table/10{7-8} num_replicas=7 pts=[2] # Release the protected timestamp on database db release record-id=2 ---- +translate system-span-configurations +---- +{source=10,target=10} pts=[3 3] + translate database=db ---- /Tenant/10/Table/10{6-7} num_replicas=7 num_voters=5 @@ -63,13 +91,21 @@ CREATE INDEX idx ON db.t1(id); ALTER INDEX db.t1@idx CONFIGURE ZONE USING gc.ttlseconds = 1; ---- -protect record-id=3 ts=3 -descs 56 +protect record-id=5 ts=5 +descs 106 +---- + +# Release the protected timestamp on the tenant cluster. +release record-id=3 +---- + +translate system-span-configurations ---- +{source=10,target=10} pts=[3] translate database=db ---- -/Tenant/10/Table/106{-/2} num_replicas=7 num_voters=5 -/Tenant/10/Table/106/{2-3} ttl_seconds=1 num_replicas=7 num_voters=5 -/Tenant/10/Table/10{6/3-7} num_replicas=7 num_voters=5 +/Tenant/10/Table/106{-/2} num_replicas=7 num_voters=5 pts=[5] +/Tenant/10/Table/106/{2-3} ttl_seconds=1 num_replicas=7 num_voters=5 pts=[5] +/Tenant/10/Table/10{6/3-7} num_replicas=7 num_voters=5 pts=[5] /Tenant/10/Table/10{7-8} num_replicas=7 diff --git a/pkg/roachpb/span_config.go b/pkg/roachpb/span_config.go index 9c3cc253da90..27288fae6506 100644 --- a/pkg/roachpb/span_config.go +++ b/pkg/roachpb/span_config.go @@ -108,6 +108,12 @@ func TestingDefaultSpanConfig() SpanConfig { } } +// TestingDefaultSystemSpanConfiguration exports the default span config that +// applies to spanconfig.SystemTargets for testing purposes. +func TestingDefaultSystemSpanConfiguration() SpanConfig { + return SpanConfig{} +} + // TestingSystemSpanConfig exports the system span config for testing purposes. func TestingSystemSpanConfig() SpanConfig { config := TestingDefaultSpanConfig() diff --git a/pkg/spanconfig/spanconfig.go b/pkg/spanconfig/spanconfig.go index 749521f894f9..1cb74c1ad913 100644 --- a/pkg/spanconfig/spanconfig.go +++ b/pkg/spanconfig/spanconfig.go @@ -98,8 +98,11 @@ type KVSubscriber interface { type SQLTranslator interface { // Translate generates the span configuration state given a list of // {descriptor, named zone} IDs. Entries are unique, and are omitted for IDs - // that don't exist. The timestamp at which the translation is valid is also - // returned. + // that don't exist. + // Additionally, if `generateSystemSpanConfigurations` is set to true, + // Translate will generate all the span configurations that apply to + // `spanconfig.SystemTargets`. The timestamp at which the translation is valid + // is also returned. // // For every ID we first descend the zone configuration hierarchy with the // ID as the root to accumulate IDs of all leaf objects. Leaf objects are @@ -109,7 +112,8 @@ type SQLTranslator interface { // for each one of these accumulated IDs, we generate tuples // by following up the inheritance chain to fully hydrate the span // configuration. Translate also accounts for and negotiates subzone spans. - Translate(ctx context.Context, ids descpb.IDs) ([]Record, hlc.Timestamp, error) + Translate(ctx context.Context, ids descpb.IDs, + generateSystemSpanConfigurations bool) ([]Record, hlc.Timestamp, error) } // FullTranslate translates the entire SQL zone configuration state to the span @@ -119,7 +123,8 @@ func FullTranslate(ctx context.Context, s SQLTranslator) ([]Record, hlc.Timestam // As RANGE DEFAULT is the root of all zone configurations (including other // named zones for the system tenant), we can construct the entire span // configuration state by starting from RANGE DEFAULT. - return s.Translate(ctx, descpb.IDs{keys.RootNamespaceID}) + return s.Translate(ctx, descpb.IDs{keys.RootNamespaceID}, + true /* generateSystemSpanConfigurations */) } // SQLWatcherHandler is the signature of a handler that can be passed into diff --git a/pkg/spanconfig/spanconfigreconciler/reconciler.go b/pkg/spanconfig/spanconfigreconciler/reconciler.go index ac53c2975924..79444fc5148b 100644 --- a/pkg/spanconfig/spanconfigreconciler/reconciler.go +++ b/pkg/spanconfig/spanconfigreconciler/reconciler.go @@ -369,15 +369,19 @@ func (r *incrementalReconciler) reconcile( return callback(checkpoint) // nothing to do; propagate the checkpoint } - // Process the DescriptorUpdates and identify all descriptor IDs that - // require translation. + // Process the SQLUpdates and identify all descriptor IDs that require + // translation. If the SQLUpdates includes ProtectedTimestampUpdates then + // instruct the translator to generate all records that apply to + // spanconfig.SystemTargets as well. + // + // TODO(adityamaru): Conditionally set the bool to true to generate system + // span configurations. + var generateSystemSpanConfigurations bool var allIDs descpb.IDs for _, update := range sqlUpdates { if update.IsDescriptorUpdate() { allIDs = append(allIDs, update.GetDescriptorUpdate().ID) } - // TODO(adityamaru): Set the bool that tells the translator to emit - // SystemSpanConfigs. } // TODO(irfansharif): Would it be easier to just have the translator @@ -390,7 +394,7 @@ func (r *incrementalReconciler) reconcile( return err } - entries, _, err := r.sqlTranslator.Translate(ctx, allIDs) + entries, _, err := r.sqlTranslator.Translate(ctx, allIDs, generateSystemSpanConfigurations) if err != nil { return err } diff --git a/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go b/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go index 2918547edb92..825afb71486c 100644 --- a/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go +++ b/pkg/spanconfig/spanconfigsqltranslator/sqltranslator.go @@ -56,7 +56,7 @@ func New( // Translate is part of the spanconfig.SQLTranslator interface. func (s *SQLTranslator) Translate( - ctx context.Context, ids descpb.IDs, + ctx context.Context, ids descpb.IDs, generateSystemSpanConfigurations bool, ) ([]spanconfig.Record, hlc.Timestamp, error) { var records []spanconfig.Record // txn used to translate the IDs, so that we can get its commit timestamp @@ -84,6 +84,13 @@ func (s *SQLTranslator) Translate( } ptsStateReader := newProtectedTimestampStateReader(ctx, ptsState) + if generateSystemSpanConfigurations { + records, err = s.generateSystemSpanConfigRecords(ptsStateReader) + if err != nil { + return errors.Wrap(err, "failed to generate SystemTarget records") + } + } + // For every ID we want to translate, first expand it to descendant leaf // IDs that have span configurations associated for them. We also // de-duplicate leaf IDs to not generate redundant entries. @@ -145,6 +152,55 @@ var descLookupFlags = tree.CommonLookupFlags{ AvoidLeased: true, } +// generateSystemSpanConfigRecords is responsible for generating all the SpanConfigs +// that apply to spanconfig.SystemTargets. +func (s *SQLTranslator) generateSystemSpanConfigRecords( + ptsStateReader *protectedTimestampStateReader, +) ([]spanconfig.Record, error) { + tenantPrefix := s.codec.TenantPrefix() + _, sourceTenantID, err := keys.DecodeTenantPrefix(tenantPrefix) + if err != nil { + return nil, err + } + records := make([]spanconfig.Record, 0) + + // Aggregate cluster target protections for the tenant. + clusterProtections := ptsStateReader.getProtectionPoliciesForCluster() + if len(clusterProtections) != 0 { + var systemTarget spanconfig.SystemTarget + var err error + if sourceTenantID == roachpb.SystemTenantID { + systemTarget = spanconfig.MakeEntireKeyspaceTarget() + } else { + systemTarget, err = spanconfig.MakeTenantKeyspaceTarget(sourceTenantID, sourceTenantID) + if err != nil { + return nil, err + } + } + clusterSystemRecord := spanconfig.Record{ + Target: spanconfig.MakeTargetFromSystemTarget(systemTarget), + Config: roachpb.SpanConfig{GCPolicy: roachpb.GCPolicy{ProtectionPolicies: clusterProtections}}} + records = append(records, clusterSystemRecord) + } + + // Aggregate tenant target protections. + tenantProtections := ptsStateReader.getProtectionPoliciesForTenants() + for _, protection := range tenantProtections { + tenantProtection := protection + systemTarget, err := spanconfig.MakeTenantKeyspaceTarget(sourceTenantID, tenantProtection.tenantID) + if err != nil { + return nil, err + } + tenantSystemRecord := spanconfig.Record{ + Target: spanconfig.MakeTargetFromSystemTarget(systemTarget), + Config: roachpb.SpanConfig{GCPolicy: roachpb.GCPolicy{ + ProtectionPolicies: tenantProtection.protections}}, + } + records = append(records, tenantSystemRecord) + } + return records, nil +} + // generateSpanConfigurations generates the span configurations for the given // ID. The ID must belong to an object that has a span configuration associated // with it, i.e, it should either belong to a table or a named zone. diff --git a/pkg/spanconfig/spanconfigtestutils/utils.go b/pkg/spanconfig/spanconfigtestutils/utils.go index a8f485baf75d..98b26794358b 100644 --- a/pkg/spanconfig/spanconfigtestutils/utils.go +++ b/pkg/spanconfig/spanconfigtestutils/utils.go @@ -276,6 +276,31 @@ func PrintSpanConfigRecord(t *testing.T, record spanconfig.Record) string { return fmt.Sprintf("%s:%s", PrintTarget(t, record.Target), PrintSpanConfig(record.Config)) } +// PrintSystemSpanConfigDiffedAgainstDefault is a helper function that diffs the +// given config against the default system span config that applies to +// spanconfig.SystemTargets, and returns a string for the mismatched fields. +func PrintSystemSpanConfigDiffedAgainstDefault(conf roachpb.SpanConfig) string { + if conf.Equal(roachpb.TestingDefaultSystemSpanConfiguration()) { + return "default system span config" + } + + var diffs []string + defaultSystemTargetConf := roachpb.TestingDefaultSystemSpanConfiguration() + if !reflect.DeepEqual(conf.GCPolicy.ProtectionPolicies, defaultSystemTargetConf.GCPolicy.ProtectionPolicies) { + sort.Slice(conf.GCPolicy.ProtectionPolicies, func(i, j int) bool { + lhs := conf.GCPolicy.ProtectionPolicies[i].ProtectedTimestamp + rhs := conf.GCPolicy.ProtectionPolicies[j].ProtectedTimestamp + return lhs.Less(rhs) + }) + timestamps := make([]string, 0, len(conf.GCPolicy.ProtectionPolicies)) + for _, pts := range conf.GCPolicy.ProtectionPolicies { + timestamps = append(timestamps, strconv.Itoa(int(pts.ProtectedTimestamp.WallTime))) + } + diffs = append(diffs, fmt.Sprintf("pts=[%s]", strings.Join(timestamps, " "))) + } + return strings.Join(diffs, " ") +} + // PrintSpanConfigDiffedAgainstDefaults is a helper function that diffs the given // config against RANGE {DEFAULT, SYSTEM} and the config for the system database // (as expected on both kinds of tenants), and returns a string for the From 472e7401214d2fa1918507ca5c8ee6fbb7a7856b Mon Sep 17 00:00:00 2001 From: Oleg Afanasyev Date: Tue, 22 Feb 2022 19:00:06 +0000 Subject: [PATCH 3/3] mon: when creating inherited BytesMonitor don't copy metrics Previously monitor inherited parameters like name, resource, limits etc from parent and also included metrics. This is not good as metrics were updated twice when borrowing resources, one time by child monitor and again by parent monitor which lead to misreporting. This patch removes metrics from inheritance. It is safe to do as metrics could be safely set to nil. It could lead to some memory that is reserved by child pool reported as allocated, but it is better than doing x2. Release note: None --- pkg/util/mon/BUILD.bazel | 1 + pkg/util/mon/bytes_usage.go | 12 ++++++++++-- pkg/util/mon/bytes_usage_test.go | 19 +++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/pkg/util/mon/BUILD.bazel b/pkg/util/mon/BUILD.bazel index 893a9a50536d..3fdbb2b51eca 100644 --- a/pkg/util/mon/BUILD.bazel +++ b/pkg/util/mon/BUILD.bazel @@ -33,6 +33,7 @@ go_test( "//pkg/settings/cluster", "//pkg/util/leaktest", "//pkg/util/log", + "//pkg/util/metric", "//pkg/util/randutil", "@com_github_stretchr_testify//require", ], diff --git a/pkg/util/mon/bytes_usage.go b/pkg/util/mon/bytes_usage.go index e333a07622d6..3164d1c186b6 100644 --- a/pkg/util/mon/bytes_usage.go +++ b/pkg/util/mon/bytes_usage.go @@ -309,6 +309,14 @@ func NewMonitorWithLimit( // NewMonitorInheritWithLimit creates a new monitor with a limit local to this // monitor with all other attributes inherited from the passed in monitor. +// Note on metrics and inherited monitors. +// When using pool to share resource, downstream monitors must not use the +// same metric objects as pool monitor to avoid reporting the same allocation +// multiple times. Downstream monitors should use their own metrics as needed +// by using BytesMonitor.SetMetrics function. +// Also note that because monitors pre-allocate resources from pool in chunks, +// those chunks would be reported as used by pool while downstream monitors will +// not. func NewMonitorInheritWithLimit(name string, limit int64, m *BytesMonitor) *BytesMonitor { m.mu.Lock() defer m.mu.Unlock() @@ -316,8 +324,8 @@ func NewMonitorInheritWithLimit(name string, limit int64, m *BytesMonitor) *Byte name, m.resource, limit, - m.mu.curBytesCount, - m.mu.maxBytesHist, + nil, // curCount is not inherited as we don't want to double count allocations + nil, // maxHist is not inherited as we don't want to double count allocations m.poolAllocationSize, m.noteworthyUsageBytes, m.settings, diff --git a/pkg/util/mon/bytes_usage_test.go b/pkg/util/mon/bytes_usage_test.go index f35b39609d91..19d6fa5b408b 100644 --- a/pkg/util/mon/bytes_usage_test.go +++ b/pkg/util/mon/bytes_usage_test.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/stretchr/testify/require" ) @@ -366,6 +367,24 @@ func TestMemoryAllocationEdgeCases(t *testing.T) { m.Stop(ctx) } +func TestMultiSharedGauge(t *testing.T) { + ctx := context.Background() + resourceGauge := metric.NewGauge(metric.Metadata{}) + minAllocation := int64(1000) + + parent := NewMonitor("root", MemoryResource, resourceGauge, nil, minAllocation, 0, + cluster.MakeTestingClusterSettings()) + parent.Start(ctx, nil, MakeStandaloneBudget(100000)) + + child := NewMonitorInheritWithLimit("child", 20000, parent) + child.Start(ctx, parent, BoundAccount{}) + + acc := child.MakeBoundAccount() + require.NoError(t, acc.Grow(ctx, 100)) + + require.Equal(t, minAllocation, resourceGauge.Value(), "Metric") +} + func BenchmarkBoundAccountGrow(b *testing.B) { ctx := context.Background() m := NewMonitor("test", MemoryResource,