From 44ee9b3d071ba953a77ed8926aee35c7c4a2af94 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 17 Oct 2019 17:50:51 -0400 Subject: [PATCH] storage/report: don't deserialize zone configs over and over This patch is expected to speedup reports generation considerably by not unmarshalling zone config protos for every range. Instead, the report-geneating visitors now keep state around the zone that a visited range is in and reuse that state if they're told that the following range is in the same zone. The range iteration infrastructure was enhanced to figure out when zones change from range to range and tell that to the visitors. visitRanges() now figures out that runs of consecutive ranges belong to the same zone. This is done through a new zoneResolver struct, that's optimized for the case where it's asked to resolve ranges in key order. Without this patch, generating the reports was pinning a core for minutes for a cluster with partitioning and 200k ranges. The profiles showed that it's all zone config unmarshalling. Fixes #41609 Release note (performance improvement): The performance of generating the system.replication_* reports was greatly improved for large clusters. --- pkg/base/zone.go | 6 + .../reports/constraint_stats_report.go | 42 +++- .../reports/constraint_stats_report_test.go | 167 ++++++++------ .../reports/critical_localities_report.go | 74 ++++--- .../reports/replication_stats_report.go | 52 +++-- .../reports/replication_stats_report_test.go | 7 +- pkg/storage/reports/reporter.go | 155 ++++++++++++- pkg/storage/reports/reporter_test.go | 209 ++++++++++++++++++ pkg/testutils/keysutils/pretty_scanner.go | 2 +- 9 files changed, 580 insertions(+), 134 deletions(-) diff --git a/pkg/base/zone.go b/pkg/base/zone.go index c4fcf75d2827..669a7a9c53d8 100644 --- a/pkg/base/zone.go +++ b/pkg/base/zone.go @@ -15,6 +15,12 @@ package base // sentinel. type SubzoneID uint32 +// ToSubzoneIndex turns a SubzoneID into the index corresponding to the correct +// Subzone within the parent zone's Subzones slice. +func (id SubzoneID) ToSubzoneIndex() int32 { + return int32(id) - 1 +} + // SubzoneIDFromIndex turns a subzone's index within its parent zone into its // SubzoneID. func SubzoneIDFromIndex(idx int) SubzoneID { diff --git a/pkg/storage/reports/constraint_stats_report.go b/pkg/storage/reports/constraint_stats_report.go index a99a0ae6c005..4c1f8f09554d 100644 --- a/pkg/storage/reports/constraint_stats_report.go +++ b/pkg/storage/reports/constraint_stats_report.go @@ -379,6 +379,12 @@ type constraintConformanceVisitor struct { report *replicationConstraintStatsReportSaver visitErr bool + + // prevZoneKey and prevConstraints maintain state from one range to the next. + // This state can be reused when a range is covered by the same zone config as + // the previous one. Reusing it speeds up the report generation. + prevZoneKey ZoneKey + prevConstraints []config.Constraints } var _ rangeVisitor = &constraintConformanceVisitor{} @@ -406,6 +412,8 @@ func (v *constraintConformanceVisitor) failed() bool { // reset is part of the rangeVisitor interface. func (v *constraintConformanceVisitor) reset(ctx context.Context) { v.visitErr = false + v.prevZoneKey = ZoneKey{} + v.prevConstraints = nil v.report.resetReport() // Iterate through all the zone configs to create report entries for all the @@ -428,23 +436,19 @@ func (v *constraintConformanceVisitor) reset(ctx context.Context) { } } -// constraintConformanceVisitor is part of the rangeVisitor interface. -func (v *constraintConformanceVisitor) visit( - ctx context.Context, r roachpb.RangeDescriptor, +// visitNewZone is part of the rangeVisitor interface. +func (v *constraintConformanceVisitor) visitNewZone( + ctx context.Context, r *roachpb.RangeDescriptor, ) (retErr error) { defer func() { - if retErr != nil { - v.visitErr = true - } + v.visitErr = retErr != nil }() - storeDescs := v.storeResolver(r) - // Find the applicable constraints, which may be inherited. var constraints []config.Constraints var zKey ZoneKey - _, err := visitZones(ctx, r, v.cfg, + _, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders, func(_ context.Context, zone *config.ZoneConfig, key ZoneKey) bool { if zone.Constraints == nil { return false @@ -456,10 +460,26 @@ func (v *constraintConformanceVisitor) visit( if err != nil { return errors.Errorf("unexpected error visiting zones: %s", err) } + v.prevZoneKey = zKey + v.prevConstraints = constraints + v.countRange(ctx, r, zKey, constraints) + return nil +} +// visitSameZone is part of the rangeVisitor interface. +func (v *constraintConformanceVisitor) visitSameZone( + ctx context.Context, r *roachpb.RangeDescriptor, +) (retErr error) { + v.countRange(ctx, r, v.prevZoneKey, v.prevConstraints) + return nil +} + +func (v *constraintConformanceVisitor) countRange( + ctx context.Context, r *roachpb.RangeDescriptor, key ZoneKey, constraints []config.Constraints, +) { + storeDescs := v.storeResolver(r) violated := processRange(ctx, storeDescs, constraints) for _, c := range violated { - v.report.AddViolation(zKey, Constraint, c) + v.report.AddViolation(key, Constraint, c) } - return nil } diff --git a/pkg/storage/reports/constraint_stats_report_test.go b/pkg/storage/reports/constraint_stats_report_test.go index e2e395016bc5..777860f7d461 100644 --- a/pkg/storage/reports/constraint_stats_report_test.go +++ b/pkg/storage/reports/constraint_stats_report_test.go @@ -32,6 +32,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/keysutil" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" @@ -44,8 +45,8 @@ func TestConformanceReport(t *testing.T) { defer leaktest.AfterTest(t)() tests := []conformanceConstraintTestCase{ { + name: "simple no violations", baseReportTestCase: baseReportTestCase{ - name: "simple no violations", defaultZone: zone{replicas: 3}, schema: []database{ { @@ -90,9 +91,9 @@ func TestConformanceReport(t *testing.T) { }}, }, { + name: "violations at multiple levels", // Test zone constraints inheritance at all levels. baseReportTestCase: baseReportTestCase{ - name: "violations at multiple levels", defaultZone: zone{replicas: 3, constraints: "[+default]"}, schema: []database{ { @@ -363,6 +364,19 @@ func (t table) validate() error { return t.partitions.validate() } +func (t *table) addPKIdx() { + if len(t.indexes) > 0 && t.indexes[0].name == "PK" { + return + } + // Add the PK index if missing. + pkIdx := index{ + name: "PK", + zone: nil, + partitions: t.partitions, + } + t.indexes = append(append([]index{pkIdx}), t.indexes...) +} + type database struct { name string tables []table @@ -451,11 +465,11 @@ func (n node) toDescriptors() (roachpb.NodeDescriptor, []roachpb.StoreDescriptor type conformanceConstraintTestCase struct { baseReportTestCase - exp []constraintEntry + name string + exp []constraintEntry } type baseReportTestCase struct { - name string schema []database splits []split nodes []node @@ -698,79 +712,35 @@ func compileTestCase(tc baseReportTestCase) (compiledTestCase, error) { objectCounter++ tableToID[table.name] = tableID - pkIdx := index{ - name: "PK", - zone: nil, - partitions: table.partitions, - } - table.indexes = append(append([]index(nil), pkIdx), table.indexes...) - // Create a table descriptor to be used for creating the zone config. + table.addPKIdx() tableDesc, err := makeTableDesc(table, tableID, dbID) if err != nil { return compiledTestCase{}, errors.Wrap(err, "error creating table descriptor") } sysCfgBuilder.addTableDesc(tableID, tableDesc) - // Create the table's zone config. - var tableZone *config.ZoneConfig - if table.zone != nil { - tableZone = new(config.ZoneConfig) - *tableZone = table.zone.toZoneConfig() - } - // Add subzones for the PK partitions. - tableZone = addIndexSubzones( - table.indexes[0], tableZone, tableDesc, 1 /* id of PK */) - // Add subzones for all the indexes. - for i, idx := range table.indexes { - idxID := i + 1 // index 1 is the PK - idxToID[fmt.Sprintf("%s.%s", table.name, idx.name)] = idxID - tableZone = addIndexSubzones(idx, tableZone, tableDesc, idxID) + tableZone, err := generateTableZone(table, tableDesc) + if err != nil { + return compiledTestCase{}, err } - // Fill in the SubzoneSpans. if tableZone != nil { - var err error - tableZone.SubzoneSpans, err = sql.GenerateSubzoneSpans( - nil, uuid.UUID{} /* clusterID */, &tableDesc, tableZone.Subzones, - false /* hasNewSubzones */) - if err != nil { - return compiledTestCase{}, errors.Wrap(err, "error generating subzone spans") - } - if err := sysCfgBuilder.addTableZone(tableDesc, tableID, *tableZone); err != nil { + if err := sysCfgBuilder.addTableZone(tableDesc, *tableZone); err != nil { return compiledTestCase{}, err } } + // Add the indexes to idxToID. + for i, idx := range table.indexes { + idxID := i + 1 // index 1 is the PK + idxToID[fmt.Sprintf("%s.%s", table.name, idx.name)] = idxID + } } } keyScanner := keysutils.MakePrettyScannerForNamedTables(tableToID, idxToID) - ranges := make([]roachpb.RangeDescriptor, len(tc.splits)) - for i, split := range tc.splits { - prettyKey := tc.splits[i].key - startKey, err := keyScanner.Scan(split.key) - if err != nil { - return compiledTestCase{}, errors.Wrapf(err, "failed to parse key: %s", prettyKey) - } - var endKey roachpb.Key - if i < len(tc.splits)-1 { - prettyKey := tc.splits[i+1].key - endKey, err = keyScanner.Scan(prettyKey) - if err != nil { - return compiledTestCase{}, errors.Wrapf(err, "failed to parse key: %s", prettyKey) - } - } else { - endKey = roachpb.KeyMax - } - - rd := roachpb.RangeDescriptor{ - RangeID: roachpb.RangeID(i + 1), // IDs start at 1 - StartKey: keys.MustAddr(startKey), - EndKey: keys.MustAddr(endKey), - } - for _, storeID := range split.stores { - rd.AddReplica(roachpb.NodeID(storeID), roachpb.StoreID(storeID), roachpb.VOTER_FULL) - } - ranges[i] = rd + ranges, err := processSplits(keyScanner, tc.splits) + if err != nil { + return compiledTestCase{}, err } var storeDescs []roachpb.StoreDescriptor @@ -781,7 +751,7 @@ func compileTestCase(tc baseReportTestCase) (compiledTestCase, error) { } storeDescs = append(storeDescs, sds...) } - storeResolver := func(r roachpb.RangeDescriptor) []roachpb.StoreDescriptor { + storeResolver := func(r *roachpb.RangeDescriptor) []roachpb.StoreDescriptor { stores := make([]roachpb.StoreDescriptor, len(r.Replicas().Voters())) for i, rep := range r.Replicas().Voters() { for _, desc := range storeDescs { @@ -817,6 +787,67 @@ func compileTestCase(tc baseReportTestCase) (compiledTestCase, error) { }, nil } +func generateTableZone(t table, tableDesc sqlbase.TableDescriptor) (*config.ZoneConfig, error) { + // Create the table's zone config. + var tableZone *config.ZoneConfig + if t.zone != nil { + tableZone = new(config.ZoneConfig) + *tableZone = t.zone.toZoneConfig() + } + // Add subzones for the PK partitions. + tableZone = addIndexSubzones(t.indexes[0], tableZone, 1 /* id of PK */) + // Add subzones for all the indexes. + for i, idx := range t.indexes { + idxID := i + 1 // index 1 is the PK + tableZone = addIndexSubzones(idx, tableZone, idxID) + } + // Fill in the SubzoneSpans. + if tableZone != nil { + var err error + tableZone.SubzoneSpans, err = sql.GenerateSubzoneSpans( + nil, uuid.UUID{} /* clusterID */, &tableDesc, tableZone.Subzones, + false /* hasNewSubzones */) + if err != nil { + return nil, errors.Wrap(err, "error generating subzone spans") + } + } + return tableZone, nil +} + +func processSplits( + keyScanner keysutil.PrettyScanner, splits []split, +) ([]roachpb.RangeDescriptor, error) { + ranges := make([]roachpb.RangeDescriptor, len(splits)) + for i, split := range splits { + prettyKey := splits[i].key + startKey, err := keyScanner.Scan(split.key) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse key: %s", prettyKey) + } + var endKey roachpb.Key + if i < len(splits)-1 { + prettyKey := splits[i+1].key + endKey, err = keyScanner.Scan(prettyKey) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse key: %s", prettyKey) + } + } else { + endKey = roachpb.KeyMax + } + + rd := roachpb.RangeDescriptor{ + RangeID: roachpb.RangeID(i + 1), // IDs start at 1 + StartKey: keys.MustAddr(startKey), + EndKey: keys.MustAddr(endKey), + } + for _, storeID := range split.stores { + rd.AddReplica(roachpb.NodeID(storeID), roachpb.StoreID(storeID), roachpb.VOTER_FULL) + } + ranges[i] = rd + } + return ranges, nil +} + func makeTableDesc(t table, tableID int, dbID int) (sqlbase.TableDescriptor, error) { if err := t.validate(); err != nil { return sqlbase.TableDescriptor{}, err @@ -857,9 +888,7 @@ func makeTableDesc(t table, tableID int, dbID int) (sqlbase.TableDescriptor, err // // parent: Can be nil if the parent table doesn't have a zone of its own. In that // case, if any subzones are created, a placeholder zone will also be created and returned. -func addIndexSubzones( - idx index, parent *config.ZoneConfig, tableDesc sql.TableDescriptor, idxID int, -) *config.ZoneConfig { +func addIndexSubzones(idx index, parent *config.ZoneConfig, idxID int) *config.ZoneConfig { res := parent ensureParent := func() { @@ -937,10 +966,8 @@ func (b *systemConfigBuilder) addDatabaseZone(name string, id int, cfg config.Zo return b.addZoneInner(name, id, cfg) } -func (b *systemConfigBuilder) addTableZone( - t sqlbase.TableDescriptor, id int, cfg config.ZoneConfig, -) error { - if err := b.addZoneInner(t.Name, id, cfg); err != nil { +func (b *systemConfigBuilder) addTableZone(t sqlbase.TableDescriptor, cfg config.ZoneConfig) error { + if err := b.addZoneInner(t.Name, int(t.ID), cfg); err != nil { return err } // Figure out the mapping from all the partition names to zone keys. @@ -959,7 +986,7 @@ func (b *systemConfigBuilder) addTableZone( object = fmt.Sprintf("%s.%s", idx, subzone.PartitionName) } if err := b.addZoneToObjectMapping( - MakeZoneKey(uint32(id), base.SubzoneIDFromIndex(i)), object, + MakeZoneKey(uint32(t.ID), base.SubzoneIDFromIndex(i)), object, ); err != nil { return err } diff --git a/pkg/storage/reports/critical_localities_report.go b/pkg/storage/reports/critical_localities_report.go index 31775fe1ccc3..a8ab8604a60b 100644 --- a/pkg/storage/reports/critical_localities_report.go +++ b/pkg/storage/reports/critical_localities_report.go @@ -271,6 +271,11 @@ type criticalLocalitiesVisitor struct { report *replicationCriticalLocalitiesReportSaver visitErr bool + + // prevZoneKey maintains state from one range to the next. This state can be + // reused when a range is covered by the same zone config as the previous one. + // Reusing it speeds up the report generation. + prevZoneKey ZoneKey } var _ rangeVisitor = &criticalLocalitiesVisitor{} @@ -304,21 +309,55 @@ func (v *criticalLocalitiesVisitor) reset(ctx context.Context) { v.report.resetReport() } -// visit is part of the rangeVisitor interface. -func (v *criticalLocalitiesVisitor) visit( - ctx context.Context, r roachpb.RangeDescriptor, +// visitNewZone is part of the rangeVisitor interface. +func (v *criticalLocalitiesVisitor) visitNewZone( + ctx context.Context, r *roachpb.RangeDescriptor, ) (retErr error) { + defer func() { + v.visitErr = retErr != nil + }() + + // Get the zone. + var zKey ZoneKey + found, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders, + func(_ context.Context, zone *config.ZoneConfig, key ZoneKey) bool { + if !zoneChangesReplication(zone) { + return false + } + zKey = key + return true + }) + if err != nil { + return errors.AssertionFailedf("unexpected error visiting zones: %s", err) + } + if !found { + return errors.AssertionFailedf("no suitable zone config found for range: %s", r) + } + v.prevZoneKey = zKey + + return v.countRange(ctx, zKey, r) +} + +// visitSameZone is part of the rangeVisitor interface. +func (v *criticalLocalitiesVisitor) visitSameZone( + ctx context.Context, r *roachpb.RangeDescriptor, +) (retErr error) { defer func() { if retErr != nil { v.visitErr = true } }() + return v.countRange(ctx, v.prevZoneKey, r) +} +func (v *criticalLocalitiesVisitor) countRange( + ctx context.Context, zoneKey ZoneKey, r *roachpb.RangeDescriptor, +) error { stores := v.storeResolver(r) for _, c := range v.localityConstraints { if err := processLocalityForRange( - ctx, r, v.report, &c, v.cfg, v.nodeChecker, stores, + ctx, r, zoneKey, v.report, &c, v.cfg, v.nodeChecker, stores, ); err != nil { return err } @@ -330,32 +369,17 @@ func (v *criticalLocalitiesVisitor) visit( // range with replicas in each of the stores given, contributing to rep. func processLocalityForRange( ctx context.Context, - r roachpb.RangeDescriptor, + r *roachpb.RangeDescriptor, + zoneKey ZoneKey, rep *replicationCriticalLocalitiesReportSaver, c *config.Constraints, cfg *config.SystemConfig, nodeChecker nodeChecker, storeDescs []roachpb.StoreDescriptor, ) error { - // Get the zone. - var zKey ZoneKey - found, err := visitZones(ctx, r, cfg, - func(_ context.Context, zone *config.ZoneConfig, key ZoneKey) bool { - if !zoneChangesReplication(zone) { - return false - } - zKey = key - return true - }) - if err != nil { - return errors.AssertionFailedf("unexpected error visiting zones: %s", err) - } - if !found { - return errors.AssertionFailedf("no suitable zone config found for range: %s", r) - } - - // Compute the required quorum and the number of live nodes. If the number of live nodes gets lower - // than the required quorum then the range is already unavailable. + // Compute the required quorum and the number of live nodes. If the number of + // live nodes gets lower than the required quorum then the range is already + // unavailable. quorumCount := len(r.Replicas().Voters())/2 + 1 liveNodeCount := len(storeDescs) for _, storeDesc := range storeDescs { @@ -395,7 +419,7 @@ func processLocalityForRange( // If the live nodes outside of the given locality are not enough to // form quorum then this locality is critical. if quorumCount > liveNodeCount-passCount { - rep.AddCriticalLocality(zKey, loc) + rep.AddCriticalLocality(zoneKey, loc) } return nil } diff --git a/pkg/storage/reports/replication_stats_report.go b/pkg/storage/reports/replication_stats_report.go index 52f9f8b63aec..0db53c6070ea 100644 --- a/pkg/storage/reports/replication_stats_report.go +++ b/pkg/storage/reports/replication_stats_report.go @@ -277,6 +277,12 @@ type replicationStatsVisitor struct { report *replicationStatsReportSaver visitErr bool + + // prevZoneKey and prevNumReplicas maintain state from one range to the next. + // This state can be reused when a range is covered by the same zone config as + // the previous one. Reusing it speeds up the report generation. + prevZoneKey ZoneKey + prevNumReplicas int } var _ rangeVisitor = &replicationStatsVisitor{} @@ -305,6 +311,8 @@ func (v *replicationStatsVisitor) failed() bool { func (v *replicationStatsVisitor) reset(ctx context.Context) { v.visitErr = false v.report.resetReport() + v.prevZoneKey = ZoneKey{} + v.prevNumReplicas = -1 // Iterate through all the zone configs to create report entries for all the // zones that have constraints. Otherwise, just iterating through the ranges @@ -334,22 +342,24 @@ func (v *replicationStatsVisitor) ensureEntries(key ZoneKey, zone *config.ZoneCo } } -// visit is part of the rangeVisitor interface. -func (v *replicationStatsVisitor) visit( - ctx context.Context, r roachpb.RangeDescriptor, +// visitNewZone is part of the rangeVisitor interface. +func (v *replicationStatsVisitor) visitNewZone( + ctx context.Context, r *roachpb.RangeDescriptor, ) (retErr error) { defer func() { - if retErr != nil { - v.visitErr = true - } + v.visitErr = retErr != nil }() - - // Get the zone var zKey ZoneKey var zConfig *config.ZoneConfig var numReplicas int - found, err := visitZones(ctx, r, v.cfg, + + // Figure out the zone config for whose report the current range is to be + // counted. This is the lowest-level zone config covering the range that + // changes replication settings. We also need to figure out the replication + // factor this zone is configured with; the replication factor might be + // inherited from a higher-level zone config. + found, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders, func(_ context.Context, zone *config.ZoneConfig, key ZoneKey) bool { if zConfig == nil { if !zoneChangesReplication(zone) { @@ -375,14 +385,31 @@ func (v *replicationStatsVisitor) visit( if err != nil { return errors.AssertionFailedf("unexpected error visiting zones for range %s: %s", r, err) } + v.prevZoneKey = zKey + v.prevNumReplicas = numReplicas if !found { return errors.AssertionFailedf( "no zone config with replication attributes found for range: %s", r) } + v.countRange(zKey, numReplicas, r) + return nil +} + +// visitSameZone is part of the rangeVisitor interface. +func (v *replicationStatsVisitor) visitSameZone( + ctx context.Context, r *roachpb.RangeDescriptor, +) error { + v.countRange(v.prevZoneKey, v.prevNumReplicas, r) + return nil +} + +func (v *replicationStatsVisitor) countRange( + key ZoneKey, replicationFactor int, r *roachpb.RangeDescriptor, +) { voters := len(r.Replicas().Voters()) - underReplicated := numReplicas > voters - overReplicated := numReplicas < voters + underReplicated := replicationFactor > voters + overReplicated := replicationFactor < voters var liveNodeCount int for _, rep := range r.Replicas().Voters() { if v.nodeChecker(rep.NodeID) { @@ -391,8 +418,7 @@ func (v *replicationStatsVisitor) visit( } unavailable := liveNodeCount < (len(r.Replicas().Voters())/2 + 1) - v.report.AddZoneRangeStatus(zKey, unavailable, underReplicated, overReplicated) - return nil + v.report.AddZoneRangeStatus(key, unavailable, underReplicated, overReplicated) } // zoneChangesReplication determines whether a given zone config changes diff --git a/pkg/storage/reports/replication_stats_report_test.go b/pkg/storage/reports/replication_stats_report_test.go index b806d1a34d51..8ecc4baa9de6 100644 --- a/pkg/storage/reports/replication_stats_report_test.go +++ b/pkg/storage/reports/replication_stats_report_test.go @@ -142,7 +142,8 @@ type replicationStatsEntry struct { type replicationStatsTestCase struct { baseReportTestCase - exp []replicationStatsEntry + name string + exp []replicationStatsEntry } // runReplicationStatsTest runs one test case. It processes the input schema, @@ -182,8 +183,8 @@ func TestReplicationStatsReport(t *testing.T) { defer leaktest.AfterTest(t)() tests := []replicationStatsTestCase{ { + name: "simple no violations", baseReportTestCase: baseReportTestCase{ - name: "simple no violations", defaultZone: zone{replicas: 3}, schema: []database{ { @@ -263,8 +264,8 @@ func TestReplicationStatsReport(t *testing.T) { }, }, { + name: "simple violations", baseReportTestCase: baseReportTestCase{ - name: "simple violations", defaultZone: zone{replicas: 3}, schema: []database{ { diff --git a/pkg/storage/reports/reporter.go b/pkg/storage/reports/reporter.go index 49ccb34e3a85..913b92b93baa 100644 --- a/pkg/storage/reports/reporter.go +++ b/pkg/storage/reports/reporter.go @@ -183,7 +183,9 @@ func (stats *Reporter) update( } allStores := stats.storePool.GetStores() - var getStoresFromGossip StoreResolver = func(r roachpb.RangeDescriptor) []roachpb.StoreDescriptor { + var getStoresFromGossip StoreResolver = func( + r *roachpb.RangeDescriptor, + ) []roachpb.StoreDescriptor { storeDescs := make([]roachpb.StoreDescriptor, len(r.Replicas().Voters())) // We'll return empty descriptors for stores that gossip doesn't have a // descriptor for. These stores will be considered to satisfy all @@ -308,6 +310,104 @@ func (stats *Reporter) isNodeLive(nodeID roachpb.NodeID) bool { } } +// zoneResolver resolves ranges to their zone configs. It is optimized for the +// case where a range falls in the same range as a the previously-resolved range +// (which is the common case when asked to resolve ranges in key order). +type zoneResolver struct { + init bool + // curObjectID is the object (i.e. usually table) of the configured range. + curObjectID uint32 + // curRootZone is the lowest zone convering the previously resolved range + // that's not a subzone. + // This is used to compute the subzone for a range. + curRootZone *config.ZoneConfig + // curZoneKey is the zone key for the previously resolved range. + curZoneKey ZoneKey +} + +// resolveRange resolves a range to its zone. +func (c *zoneResolver) resolveRange( + ctx context.Context, rng *roachpb.RangeDescriptor, cfg *config.SystemConfig, +) (ZoneKey, error) { + if c.checkSameZone(ctx, rng) { + return c.curZoneKey, nil + } + return c.updateZone(ctx, rng, cfg) +} + +// setZone remembers the passed-in info as the reference for further +// checkSameZone() calls. +// Clients should generally use the higher-level updateZone(). +func (c *zoneResolver) setZone(objectID uint32, key ZoneKey, rootZone *config.ZoneConfig) { + c.init = true + c.curObjectID = objectID + c.curRootZone = rootZone + c.curZoneKey = key +} + +// updateZone updates the state of the zoneChecker to the zone of the passed-in +// range descriptor. +func (c *zoneResolver) updateZone( + ctx context.Context, rd *roachpb.RangeDescriptor, cfg *config.SystemConfig, +) (ZoneKey, error) { + objectID, _ := config.DecodeKeyIntoZoneIDAndSuffix(rd.StartKey) + first := true + var zoneKey ZoneKey + var rootZone *config.ZoneConfig + // We're going to walk the zone hierarchy looking for two things: + // 1) The lowest zone containing rd. We'll use the subzone ID for it. + // 2) The lowest zone containing rd that's not a subzone. + found, err := visitZones( + ctx, rd, cfg, includeSubzonePlaceholders, + func(_ context.Context, zone *config.ZoneConfig, key ZoneKey) bool { + if first { + first = false + zoneKey = key + } + if key.SubzoneID == NoSubzone { + rootZone = zone + return true + } + return false + }) + if err != nil { + return ZoneKey{}, err + } + if !found { + return ZoneKey{}, errors.AssertionFailedf("failed to resolve zone for range: %s", rd) + } + c.setZone(objectID, zoneKey, rootZone) + return zoneKey, nil +} + +// checkSameZone returns true if the most specific zone that contains rng is the +// one previously passed to setZone(). +// +// NB: This method allows for false negatives (but no false positives). For +// example, if the zoneChecker was previously configured for a range starting at +// /Table/51 and is now queried for /Table/52, it will say that the zones don't +// match even if in fact they do ( because neither table defines its own zone +// and they're both inheriting a higher zone). +func (c *zoneResolver) checkSameZone(ctx context.Context, rng *roachpb.RangeDescriptor) bool { + if !c.init { + return false + } + + objectID, keySuffix := config.DecodeKeyIntoZoneIDAndSuffix(rng.StartKey) + if objectID != c.curObjectID { + return false + } + _, subzoneIdx := c.curRootZone.GetSubzoneForKeySuffix(keySuffix) + return subzoneIdx == c.curZoneKey.SubzoneID.ToSubzoneIndex() +} + +type visitOpt bool + +const ( + ignoreSubzonePlaceholders visitOpt = false + includeSubzonePlaceholders visitOpt = true +) + // visitZones applies a visitor to the hierarchy of zone configs that apply to // the given range, starting from the most specific to the default zone config. // @@ -319,11 +419,12 @@ func (stats *Reporter) isNodeLive(nodeID roachpb.NodeID) bool { // zone hierarchy was exhausted. func visitZones( ctx context.Context, - r roachpb.RangeDescriptor, + rng *roachpb.RangeDescriptor, cfg *config.SystemConfig, + opt visitOpt, visitor func(context.Context, *config.ZoneConfig, ZoneKey) bool, ) (bool, error) { - id, keySuffix := config.DecodeKeyIntoZoneIDAndSuffix(r.StartKey) + id, keySuffix := config.DecodeKeyIntoZoneIDAndSuffix(rng.StartKey) zone, err := getZoneByID(id, cfg) if err != nil { return false, err @@ -344,7 +445,7 @@ func visitZones( } } // Try the zone for our object. - if !zone.IsSubzonePlaceholder() { + if (opt == includeSubzonePlaceholders) || !zone.IsSubzonePlaceholder() { if visitor(ctx, zone, MakeZoneKey(id, 0)) { return true, nil } @@ -477,21 +578,32 @@ func constraintSatisfied( // StoreResolver is a function resolving a range to a store descriptor for each // of the replicas. Empty store descriptors are to be returned when there's no // information available for the store. -type StoreResolver func(roachpb.RangeDescriptor) []roachpb.StoreDescriptor +type StoreResolver func(*roachpb.RangeDescriptor) []roachpb.StoreDescriptor // rangeVisitor abstracts the interface for range iteration implemented by all // report generators. type rangeVisitor interface { - // visit is called by visitRanges() for each range, in order. The visitor will - // update its report with the range's info. If an error is returned, visit() - // will not be called anymore before reset(). - // If an error() is returned, failed() needs to return true until reset() is called. - visit(context.Context, roachpb.RangeDescriptor) error + // visitNewZone/visitSameZone is called by visitRanges() for each range, in + // order. The visitor will update its report with the range's info. If an + // error is returned, visit() will not be called anymore before reset(). + // If an error() is returned, failed() needs to return true until reset() is + // called. + // + // Once visitNewZone() has been called once, visitSameZone() is called for + // further ranges as long as these ranges are covered by the same zone config. + // As soon as the range is not covered by it, visitNewZone() is called again. + // The idea is that visitors can maintain state about that zone that applies + // to multiple ranges, and so visitSameZone() allows them to efficiently reuse + // that state (in particular, not unmarshall ZoneConfigs again). + visitNewZone(context.Context, *roachpb.RangeDescriptor) error + visitSameZone(context.Context, *roachpb.RangeDescriptor) error + // failed returns true if an error was encountered by the last visit() call // (and reset( ) wasn't called since). // The idea is that, if failed() returns true, the report that the visitor // produces will be considered incomplete and not persisted. failed() bool + // reset resets the visitor's state, preparing it for visit() calls starting // at the first range. This is called on retriable errors during range iteration. reset(ctx context.Context) @@ -523,6 +635,11 @@ func visitRanges( origVisitors := make([]rangeVisitor, len(visitors)) copy(origVisitors, visitors) var visitorErrs []error + var resolver zoneResolver + + var key ZoneKey + first := true + // Iterate over all the ranges. for { rd, err := rangeStore.Next(ctx) @@ -542,8 +659,24 @@ func visitRanges( // We're done. break } + + newKey, err := resolver.resolveRange(ctx, &rd, cfg) + if err != nil { + return err + } + sameZoneAsPrevRange := !first && key == newKey + key = newKey + first = false + for i, v := range visitors { - if err := v.visit(ctx, rd); err != nil { + var err error + if sameZoneAsPrevRange { + err = v.visitSameZone(ctx, &rd) + } else { + err = v.visitNewZone(ctx, &rd) + } + + if err != nil { // Sanity check - v.failed() should return an error now (the same as err above). if !v.failed() { return errors.Errorf("expected visitor %T to have failed() after error: %s", v, err) diff --git a/pkg/storage/reports/reporter_test.go b/pkg/storage/reports/reporter_test.go index ee2791ac17ff..e5a0429cf69e 100644 --- a/pkg/storage/reports/reporter_test.go +++ b/pkg/storage/reports/reporter_test.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/keysutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -475,3 +476,211 @@ func computeReplicationStatsReport( err := visitRanges(ctx, rangeStore, cfg, &v) return v.report, err } + +func TestZoneChecker(t *testing.T) { + defer leaktest.AfterTest(t)() + ctx := context.Background() + + type tc struct { + split string + newZone bool + newRootZoneCfg *config.ZoneConfig + newZoneKey ZoneKey + } + // NB: IDs need to be beyond MaxSystemConfigDescID, otherwise special logic + // kicks in for mapping keys to zones. + dbID := 50 + t1ID := 51 + t1 := table{name: "t1", + partitions: []partition{ + { + name: "p1", + start: []int{100}, + end: []int{200}, + zone: &zone{constraints: "[+p1]"}, + }, + { + name: "p2", + start: []int{300}, + end: []int{400}, + zone: &zone{constraints: "[+p2]"}, + }, + }, + } + t1.addPKIdx() + // Create a table descriptor to be used for creating the zone config. + t1Desc, err := makeTableDesc(t1, t1ID, dbID) + t1Zone, err := generateTableZone(t1, t1Desc) + p1SubzoneIndex := 0 + p2SubzoneIndex := 1 + require.Equal(t, "p1", t1Zone.Subzones[p1SubzoneIndex].PartitionName) + require.Equal(t, "p2", t1Zone.Subzones[p2SubzoneIndex].PartitionName) + t1ZoneKey := MakeZoneKey(uint32(t1ID), NoSubzone) + p1ZoneKey := MakeZoneKey(uint32(t1ID), base.SubzoneIDFromIndex(p1SubzoneIndex)) + p2ZoneKey := MakeZoneKey(uint32(t1ID), base.SubzoneIDFromIndex(p2SubzoneIndex)) + + ranges := []tc{ + { + split: "/Table/t1/pk/1", + newZone: true, + newZoneKey: t1ZoneKey, + newRootZoneCfg: t1Zone, + }, + { + split: "/Table/t1/pk/2", + newZone: false, + }, + { + // p1's zone + split: "/Table/t1/pk/100", + newZone: true, + newZoneKey: p1ZoneKey, + newRootZoneCfg: t1Zone, + }, + { + split: "/Table/t1/pk/101", + newZone: false, + }, + { + // Back to t1's zone + split: "/Table/t1/pk/200", + newZone: true, + newZoneKey: t1ZoneKey, + newRootZoneCfg: t1Zone, + }, + { + // p2's zone + split: "/Table/t1/pk/305", + newZone: true, + newZoneKey: p2ZoneKey, + newRootZoneCfg: t1Zone, + }, + } + + splits := make([]split, len(ranges)) + for i := range ranges { + splits[i].key = ranges[i].split + } + keyScanner := keysutils.MakePrettyScannerForNamedTables( + map[string]int{"t1": t1ID} /* tableNameToID */, nil /* idxNameToID */) + rngs, err := processSplits(keyScanner, splits) + require.NoError(t, err) + + var zc zoneResolver + for i, tc := range ranges { + sameZone := zc.checkSameZone(ctx, &rngs[i]) + newZone := !sameZone + require.Equal(t, tc.newZone, newZone, "failed at: %d (%s)", i, tc.split) + if newZone { + objectID, _ := config.DecodeKeyIntoZoneIDAndSuffix(rngs[i].StartKey) + zc.setZone(objectID, tc.newZoneKey, tc.newRootZoneCfg) + } + } +} + +// TestRangeIteration checks that visitRanges() correctly informs range +// visitors whether ranges fall in the same zone vs a new zone. +func TestRangeIteration(t *testing.T) { + defer leaktest.AfterTest(t)() + ctx := context.Background() + + schema := baseReportTestCase{ + schema: []database{{ + name: "db1", + zone: &zone{ + replicas: 3, + }, + tables: []table{ + { + name: "t1", + partitions: []partition{ + { + name: "p1", + start: []int{100}, + end: []int{200}, + zone: &zone{}, + }, + { + name: "p2", + start: []int{200}, + end: []int{300}, + zone: &zone{}, + }, + }, + }, + { + name: "t2", + }, + }, + }, + }, + splits: []split{ + {key: "/Table/t1/pk/1"}, + {key: "/Table/t1/pk/2"}, + {key: "/Table/t1/pk/100"}, + {key: "/Table/t1/pk/101"}, + {key: "/Table/t1/pk/200"}, + {key: "/Table/t1/pk/305"}, + {key: "/Table/t2/pk/1"}, + }, + defaultZone: zone{}, + } + + compiled, err := compileTestCase(schema) + require.NoError(t, err) + v := recordingRangeVisitor{} + require.NoError(t, visitRanges(ctx, &compiled.iter, compiled.cfg, &v)) + + type entry struct { + newZone bool + key string + } + exp := []entry{ + {newZone: true, key: "/Table/51/1/1"}, + {newZone: false, key: "/Table/51/1/2"}, + {newZone: true, key: "/Table/51/1/100"}, + {newZone: false, key: "/Table/51/1/101"}, + {newZone: true, key: "/Table/51/1/200"}, + {newZone: true, key: "/Table/51/1/305"}, + {newZone: true, key: "/Table/52/1/1"}, + } + got := make([]entry, len(v.rngs)) + for i, r := range v.rngs { + got[i].newZone = r.newZone + got[i].key = r.rng.StartKey.String() + } + require.Equal(t, exp, got) +} + +type recordingRangeVisitor struct { + rngs []visitorEntry +} + +var _ rangeVisitor = &recordingRangeVisitor{} + +func (r *recordingRangeVisitor) visitNewZone( + _ context.Context, rng *roachpb.RangeDescriptor, +) error { + r.rngs = append(r.rngs, visitorEntry{newZone: true, rng: *rng}) + return nil +} + +func (r *recordingRangeVisitor) visitSameZone( + _ context.Context, rng *roachpb.RangeDescriptor, +) error { + r.rngs = append(r.rngs, visitorEntry{newZone: false, rng: *rng}) + return nil +} + +func (r *recordingRangeVisitor) failed() bool { + return false +} + +func (r *recordingRangeVisitor) reset(ctx context.Context) { + r.rngs = nil +} + +type visitorEntry struct { + newZone bool + rng roachpb.RangeDescriptor +} diff --git a/pkg/testutils/keysutils/pretty_scanner.go b/pkg/testutils/keysutils/pretty_scanner.go index 8390223325c0..6673967553f4 100644 --- a/pkg/testutils/keysutils/pretty_scanner.go +++ b/pkg/testutils/keysutils/pretty_scanner.go @@ -23,7 +23,7 @@ import ( ) // MakePrettyScannerForNamedTables create a PrettyScanner that, beside what the -// PrettyScanner is generally uble to decode, can also decode keys of the form +// PrettyScanner is generally able to decode, can also decode keys of the form // "///1/2/3/..." using supplied maps from names to ids. func MakePrettyScannerForNamedTables( tableNameToID map[string]int, idxNameToID map[string]int,