From e3a99175da53f2fdd8898a2dd374169995d8a9e7 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 21 Jul 2022 13:23:48 -0700 Subject: [PATCH 1/3] colexec: fix span use after finish in edge cases in parallel sync Previously, in some cases the parallel unordered synchronizer would finish the tracing spans prematurely which would result "use of Span after Finish" panics in test builds and this is now fixed. Release note: None --- pkg/sql/colexec/parallel_unordered_synchronizer.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/sql/colexec/parallel_unordered_synchronizer.go b/pkg/sql/colexec/parallel_unordered_synchronizer.go index 152374eaa5b4..0efa2fda969f 100644 --- a/pkg/sql/colexec/parallel_unordered_synchronizer.go +++ b/pkg/sql/colexec/parallel_unordered_synchronizer.go @@ -486,17 +486,19 @@ func (s *ParallelUnorderedSynchronizer) Close(ctx context.Context) error { // Note that at this point we know that the input goroutines won't be // spawned up (our consumer won't call Next/DrainMeta after calling Close), // so it is safe to close all closers from this goroutine. - for i, span := range s.tracingSpans { - if span != nil { - span.Finish() - s.tracingSpans[i] = nil - } - } var lastErr error for _, input := range s.inputs { if err := input.ToClose.Close(ctx); err != nil { lastErr = err } } + // Finish the spans after closing the Closers since Close() implementations + // might log some stuff. + for i, span := range s.tracingSpans { + if span != nil { + span.Finish() + s.tracingSpans[i] = nil + } + } return lastErr } From 9513ed0c01908de94848faff04996cfc71c983c6 Mon Sep 17 00:00:00 2001 From: Lidor Carmel Date: Thu, 21 Jul 2022 12:32:39 -0700 Subject: [PATCH 2/3] kvserver: add an interface for replica changes for the store rebalancer In order to simulate the store rebalancer, we need an interface that can be used for lease transfers and replica moves. This PR wraps existing code with a new interface, which will also be implemented in the asim package. Release note: None --- pkg/kv/kvserver/replica_range_lease.go | 2 + pkg/kv/kvserver/replicate_queue.go | 61 +++++++++++++++++++++++--- pkg/kv/kvserver/store_rebalancer.go | 12 ++++- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/pkg/kv/kvserver/replica_range_lease.go b/pkg/kv/kvserver/replica_range_lease.go index d6ac463daef1..fc51536953a4 100644 --- a/pkg/kv/kvserver/replica_range_lease.go +++ b/pkg/kv/kvserver/replica_range_lease.go @@ -851,6 +851,8 @@ func (r *Replica) requestLeaseLocked( // blocks until the transfer is done. If a transfer is already in progress, this // method joins in waiting for it to complete if it's transferring to the same // replica. Otherwise, a NotLeaseHolderError is returned. +// +// AdminTransferLease implements the ReplicaLeaseMover interface. func (r *Replica) AdminTransferLease(ctx context.Context, target roachpb.StoreID) error { // initTransferHelper inits a transfer if no extension is in progress. // It returns a channel for waiting for the result of a pending diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 9cff159f6c8e..7450595e8b99 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -1567,26 +1567,73 @@ func (rq *replicateQueue) shedLease( if qpsMeasurementDur < replicastats.MinStatsDuration { avgQPS = 0 } - if err := rq.transferLease(ctx, repl, target, avgQPS); err != nil { + if err := rq.TransferLease(ctx, repl, repl.store.StoreID(), target.StoreID, avgQPS); err != nil { return allocator.TransferErr, err } return allocator.TransferOK, nil } -func (rq *replicateQueue) transferLease( - ctx context.Context, repl *Replica, target roachpb.ReplicaDescriptor, rangeQPS float64, +// ReplicaLeaseMover handles lease transfers for a single range. +type ReplicaLeaseMover interface { + // AdminTransferLease moves the lease to the requested store. + AdminTransferLease(ctx context.Context, target roachpb.StoreID) error + + // String returns info about the replica. + String() string +} + +// RangeRebalancer handles replica moves and lease transfers. +type RangeRebalancer interface { + // TransferLease uses a LeaseMover interface to move a lease between stores. + // The QPS is used to update stats for the stores. + TransferLease( + ctx context.Context, + rlm ReplicaLeaseMover, + source, target roachpb.StoreID, + rangeQPS float64, + ) error + + // RelocateRange relocates replicas to the requested stores, and can transfer + // the lease for the range to the first target voter. + RelocateRange( + ctx context.Context, + key interface{}, + voterTargets, nonVoterTargets []roachpb.ReplicationTarget, + transferLeaseToFirstVoter bool, + ) error +} + +// TransferLease implements the RangeRebalancer interface. +func (rq *replicateQueue) TransferLease( + ctx context.Context, rlm ReplicaLeaseMover, source, target roachpb.StoreID, rangeQPS float64, ) error { rq.metrics.TransferLeaseCount.Inc(1) - log.VEventf(ctx, 1, "transferring lease to s%d", target.StoreID) - if err := repl.AdminTransferLease(ctx, target.StoreID); err != nil { - return errors.Wrapf(err, "%s: unable to transfer lease to s%d", repl, target.StoreID) + log.VEventf(ctx, 1, "transferring lease to s%d", target) + if err := rlm.AdminTransferLease(ctx, target); err != nil { + return errors.Wrapf(err, "%s: unable to transfer lease to s%d", rlm, target) } rq.lastLeaseTransfer.Store(timeutil.Now()) rq.store.cfg.StorePool.UpdateLocalStoresAfterLeaseTransfer( - repl.store.StoreID(), target.StoreID, rangeQPS) + source, target, rangeQPS) return nil } +// RelocateRange implements the RangeRebalancer interface. +func (rq *replicateQueue) RelocateRange( + ctx context.Context, + key interface{}, + voterTargets, nonVoterTargets []roachpb.ReplicationTarget, + transferLeaseToFirstVoter bool, +) error { + return rq.store.DB().AdminRelocateRange( + ctx, + key, + voterTargets, + nonVoterTargets, + transferLeaseToFirstVoter, + ) +} + func (rq *replicateQueue) changeReplicas( ctx context.Context, repl *Replica, diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index 6abb6022e2e2..0e6677460db1 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -106,6 +106,7 @@ type StoreRebalancer struct { metrics StoreRebalancerMetrics st *cluster.Settings rq *replicateQueue + rr RangeRebalancer replRankings *replicaRankings getRaftStatusFn func(replica *Replica) *raft.Status } @@ -123,6 +124,7 @@ func NewStoreRebalancer( metrics: makeStoreRebalancerMetrics(), st: st, rq: rq, + rr: rq, replRankings: replRankings, getRaftStatusFn: func(replica *Replica) *raft.Status { return replica.RaftStatus() @@ -251,7 +253,13 @@ func (sr *StoreRebalancer) rebalanceStore( timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl) if err := contextutil.RunWithTimeout(ctx, "transfer lease", timeout, func(ctx context.Context) error { - return sr.rq.transferLease(ctx, replWithStats.repl, target, replWithStats.qps) + return sr.rr.TransferLease( + ctx, + replWithStats.repl, + replWithStats.repl.StoreID(), + target.StoreID, + replWithStats.qps, + ) }); err != nil { log.Errorf(ctx, "unable to transfer lease to s%d: %+v", target.StoreID, err) continue @@ -320,7 +328,7 @@ func (sr *StoreRebalancer) rebalanceStore( timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl) if err := contextutil.RunWithTimeout(ctx, "relocate range", timeout, func(ctx context.Context) error { - return sr.rq.store.DB().AdminRelocateRange( + return sr.rr.RelocateRange( ctx, descBeforeRebalance.StartKey.AsRawKey(), voterTargets, From 016019b4a3feada1f808155f67eb387193967ca9 Mon Sep 17 00:00:00 2001 From: Nick Travers Date: Thu, 21 Jul 2022 16:24:41 -0700 Subject: [PATCH 3/3] clusterversion,storage: add explicit version for split user keys Pebble ceased splitting user keys across multiple sstables in a single level in cockroachdb/pebble@a860bbad. A Pebble format major version was added in 22.1 to mark existing tables with split user that formed an "atomic compaction unit" for compaction. The compaction to recombine the split keys happens at a low priority within Pebble, so there is no guarantee that these tables have been recombined. Pebble range key support depends on having split user keys recombined. As such, the Pebble migration to upgrade to a version that support range keys must first perform a blocking migration to compact tables with split user keys. Currently, the Cockroach cluster version `EnsurePebbleFormatVersionRangeKeys` ratchets the Pebble version through both the split user keys migration, up to the version that supports range keys. Strictly speaking, the Pebble format version migration will take care of the sequencing. However, to provide better visibility into the various Pebble migrations, and to cater for the fact that the split user keys migration may take some time to recombine the necessary tables, a dedicated cluster version for the split keys migration is added. This dedicated cluster version makes it unambiguous what's blocking the Cockroach version finalization. Release note (backward-incompatible change, ops change): A cluster version is added to allow Pebble to recombine certain SSTables (specifically, user keys that are split across multiple files in a level of the LSM). Recombining the split user keys is required for supporting the range keys feature. The migration to recombine the SSTables is expected to be short (split user keys are rare in practice), but will block subsequent migrations until all tables have been recombined. The `storage.marked-for-compaction-files` time series metric can show the progress of the migration. Close #84012. --- .../settings/settings-for-tenants.txt | 2 +- docs/generated/settings/settings.html | 2 +- pkg/clusterversion/cockroach_versions.go | 32 ++++++++++++------- pkg/clusterversion/key_string.go | 29 +++++++++-------- pkg/storage/pebble.go | 4 +++ 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index bcca1bf17cfb..f916e12ddff2 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -283,4 +283,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used. trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https:///#/debug/tracez trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. -version version 22.1-28 set the active cluster version in the format '.' +version version 22.1-30 set the active cluster version in the format '.' diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 5d1e60b4d3a9..31db96f77148 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -214,6 +214,6 @@ trace.opentelemetry.collectorstringaddress of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as :. If no port is specified, 4317 will be used. trace.span_registry.enabledbooleantrueif set, ongoing traces can be seen at https:///#/debug/tracez trace.zipkin.collectorstringthe address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. -versionversion22.1-28set the active cluster version in the format '.' +versionversion22.1-30set the active cluster version in the format '.' diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index d4ea07a7b45e..d4bb66fa63ba 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -289,6 +289,10 @@ const ( // LocalTimestamps enables the use of local timestamps in MVCC values. LocalTimestamps + // PebbleFormatSplitUserKeysMarkedCompacted updates the Pebble format + // version that recombines all user keys that may be split across multiple + // files into a single table. + PebbleFormatSplitUserKeysMarkedCompacted // EnsurePebbleFormatVersionRangeKeys is the first step of a two-part // migration that bumps Pebble's format major version to a version that // supports range keys. @@ -517,53 +521,57 @@ var versionsSingleton = keyedVersions{ Version: roachpb.Version{Major: 22, Minor: 1, Internal: 4}, }, { - Key: EnsurePebbleFormatVersionRangeKeys, + Key: PebbleFormatSplitUserKeysMarkedCompacted, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 6}, }, { - Key: EnablePebbleFormatVersionRangeKeys, + Key: EnsurePebbleFormatVersionRangeKeys, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 8}, }, { - Key: TrigramInvertedIndexes, + Key: EnablePebbleFormatVersionRangeKeys, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 10}, }, { - Key: RemoveGrantPrivilege, + Key: TrigramInvertedIndexes, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 12}, }, { - Key: MVCCRangeTombstones, + Key: RemoveGrantPrivilege, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 14}, }, { - Key: UpgradeSequenceToBeReferencedByID, + Key: MVCCRangeTombstones, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 16}, }, { - Key: SampledStmtDiagReqs, + Key: UpgradeSequenceToBeReferencedByID, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 18}, }, { - Key: AddSSTableTombstones, + Key: SampledStmtDiagReqs, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 20}, }, { - Key: SystemPrivilegesTable, + Key: AddSSTableTombstones, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 22}, }, { - Key: EnablePredicateProjectionChangefeed, + Key: SystemPrivilegesTable, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 24}, }, { - Key: AlterSystemSQLInstancesAddLocality, + Key: EnablePredicateProjectionChangefeed, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 26}, }, { - Key: SystemExternalConnectionsTable, + Key: AlterSystemSQLInstancesAddLocality, Version: roachpb.Version{Major: 22, Minor: 1, Internal: 28}, }, + { + Key: SystemExternalConnectionsTable, + Version: roachpb.Version{Major: 22, Minor: 1, Internal: 30}, + }, // ************************************************* // Step (2): Add new versions here. diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index f3256a9d1f96..2e253b5f9d57 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -46,23 +46,24 @@ func _() { _ = x[V22_1-35] _ = x[Start22_2-36] _ = x[LocalTimestamps-37] - _ = x[EnsurePebbleFormatVersionRangeKeys-38] - _ = x[EnablePebbleFormatVersionRangeKeys-39] - _ = x[TrigramInvertedIndexes-40] - _ = x[RemoveGrantPrivilege-41] - _ = x[MVCCRangeTombstones-42] - _ = x[UpgradeSequenceToBeReferencedByID-43] - _ = x[SampledStmtDiagReqs-44] - _ = x[AddSSTableTombstones-45] - _ = x[SystemPrivilegesTable-46] - _ = x[EnablePredicateProjectionChangefeed-47] - _ = x[AlterSystemSQLInstancesAddLocality-48] - _ = x[SystemExternalConnectionsTable-49] + _ = x[PebbleFormatSplitUserKeysMarkedCompacted-38] + _ = x[EnsurePebbleFormatVersionRangeKeys-39] + _ = x[EnablePebbleFormatVersionRangeKeys-40] + _ = x[TrigramInvertedIndexes-41] + _ = x[RemoveGrantPrivilege-42] + _ = x[MVCCRangeTombstones-43] + _ = x[UpgradeSequenceToBeReferencedByID-44] + _ = x[SampledStmtDiagReqs-45] + _ = x[AddSSTableTombstones-46] + _ = x[SystemPrivilegesTable-47] + _ = x[EnablePredicateProjectionChangefeed-48] + _ = x[AlterSystemSQLInstancesAddLocality-49] + _ = x[SystemExternalConnectionsTable-50] } -const _Key_name = "V21_2Start22_1PebbleFormatBlockPropertyCollectorProbeRequestPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersEnablePebbleFormatVersionBlockPropertiesMVCCIndexBackfillerEnableLeaseHolderRemovalLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqsAddSSTableTombstonesSystemPrivilegesTableEnablePredicateProjectionChangefeedAlterSystemSQLInstancesAddLocalitySystemExternalConnectionsTable" +const _Key_name = "V21_2Start22_1PebbleFormatBlockPropertyCollectorProbeRequestPublicSchemasWithDescriptorsEnsureSpanConfigReconciliationEnsureSpanConfigSubscriptionEnableSpanConfigStoreSCRAMAuthenticationUnsafeLossOfQuorumRecoveryRangeLogAlterSystemProtectedTimestampAddColumnEnableProtectedTimestampsForTenantDeleteCommentsWithDroppedIndexesRemoveIncompatibleDatabasePrivilegesAddRaftAppliedIndexTermMigrationPostAddRaftAppliedIndexTermMigrationDontProposeWriteTimestampForLeaseTransfersEnablePebbleFormatVersionBlockPropertiesMVCCIndexBackfillerEnableLeaseHolderRemovalLooselyCoupledRaftLogTruncationChangefeedIdlenessBackupDoesNotOverwriteLatestAndCheckpointEnableDeclarativeSchemaChangerRowLevelTTLPebbleFormatSplitUserKeysMarkedIncrementalBackupSubdirEnableNewStoreRebalancerClusterLocksVirtualTableAutoStatsTableSettingsSuperRegionsEnableNewChangefeedOptionsSpanCountTablePreSeedSpanCountTableSeedSpanCountTableV22_1Start22_2LocalTimestampsPebbleFormatSplitUserKeysMarkedCompactedEnsurePebbleFormatVersionRangeKeysEnablePebbleFormatVersionRangeKeysTrigramInvertedIndexesRemoveGrantPrivilegeMVCCRangeTombstonesUpgradeSequenceToBeReferencedByIDSampledStmtDiagReqsAddSSTableTombstonesSystemPrivilegesTableEnablePredicateProjectionChangefeedAlterSystemSQLInstancesAddLocalitySystemExternalConnectionsTable" -var _Key_index = [...]uint16{0, 5, 14, 48, 60, 88, 118, 146, 167, 186, 220, 258, 292, 324, 360, 392, 428, 470, 510, 529, 553, 584, 602, 643, 673, 684, 715, 738, 762, 786, 808, 820, 846, 860, 881, 899, 904, 913, 928, 962, 996, 1018, 1038, 1057, 1090, 1109, 1129, 1150, 1185, 1219, 1249} +var _Key_index = [...]uint16{0, 5, 14, 48, 60, 88, 118, 146, 167, 186, 220, 258, 292, 324, 360, 392, 428, 470, 510, 529, 553, 584, 602, 643, 673, 684, 715, 738, 762, 786, 808, 820, 846, 860, 881, 899, 904, 913, 928, 968, 1002, 1036, 1058, 1078, 1097, 1130, 1149, 1169, 1190, 1225, 1259, 1289} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index a1863f11c08a..2fd7a2beb5fc 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -1810,6 +1810,10 @@ func (p *Pebble) SetMinVersion(version roachpb.Version) error { if formatVers < pebble.FormatRangeKeys { formatVers = pebble.FormatRangeKeys } + case !version.Less(clusterversion.ByKey(clusterversion.PebbleFormatSplitUserKeysMarkedCompacted)): + if formatVers < pebble.FormatMarkedCompacted { + formatVers = pebble.FormatMarkedCompacted + } case !version.Less(clusterversion.ByKey(clusterversion.PebbleFormatSplitUserKeysMarked)): if formatVers < pebble.FormatSplitUserKeysMarked { formatVers = pebble.FormatSplitUserKeysMarked