From aa716488013eb021549761d9617a5a2e64a56dd7 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 22:36:06 -0400 Subject: [PATCH 01/14] kv: deflake TestPriorityRatchetOnAbortOrPush Fixes #68584. The test was flaky for the reasons described in #68584. There doesn't appear to be an easy way to fix this behavior, and it's not clear how valuable doing so even is given how little we rely on transaction priorities anymore, so the commit just deflakes the test by rejecting them. Release justification: deflaking a test. --- pkg/kv/kvclient/kvcoord/txn_test.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/kv/kvclient/kvcoord/txn_test.go b/pkg/kv/kvclient/kvcoord/txn_test.go index 8a286075f9ba..9a33a94302e8 100644 --- a/pkg/kv/kvclient/kvcoord/txn_test.go +++ b/pkg/kv/kvclient/kvcoord/txn_test.go @@ -202,7 +202,17 @@ func TestLostUpdate(t *testing.T) { func TestPriorityRatchetOnAbortOrPush(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - s := createTestDB(t) + s := createTestDBWithKnobs(t, &kvserver.StoreTestingKnobs{ + TestingRequestFilter: func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error { + // Reject transaction heartbeats, which can make the test flaky when they + // detect an aborted transaction before the Get operation does. See #68584 + // for an explanation. + if ba.IsSingleHeartbeatTxnRequest() { + return roachpb.NewErrorf("rejected") + } + return nil + }, + }) defer s.Stop() pushByReading := func(key roachpb.Key) { From 5effb91f6fdb53bb525c56329db4f6c4dcbf03d1 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:39:47 -0400 Subject: [PATCH 02/14] clusterversion,kv: remove NodeMembershipStatus Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 9 +- pkg/clusterversion/key_string.go | 101 +++++++++++------------ pkg/kv/kvserver/replica_gossip.go | 10 --- pkg/server/server.go | 11 --- 4 files changed, 51 insertions(+), 80 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index b84ab9a5e717..99a67e81249c 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -92,7 +92,7 @@ type Key int // You'll then want to backport (i) to the release branch itself (i.e. // release-20.2). You'll also want to bump binaryMinSupportedVersion. In the // example above, you'll set it to V20_2. This indicates that the -// minimum binary version required in a cluster with with nodes running +// minimum binary version required in a cluster with nodes running // v21.1 binaries (including pre-release alphas) is v20.2, i.e. that an // upgrade into such a binary must start out from at least v20.2 nodes. // @@ -163,9 +163,6 @@ const ( // the 21.1 release. This is because we now support tenants at the // predecessor binary interacting with a fully upgraded KV cluster. Start20_2 - // NodeMembershipStatus gates the usage of the MembershipStatus enum in the - // Liveness proto. See comment on proto definition for more details. - NodeMembershipStatus // MinPasswordLength adds the server.user_login.min_password_length setting. MinPasswordLength // AbortSpanBytes adds a field to MVCCStats @@ -345,10 +342,6 @@ var versionsSingleton = keyedVersions{ Key: Start20_2, Version: roachpb.Version{Major: 20, Minor: 1, Internal: 1}, }, - { - Key: NodeMembershipStatus, - Version: roachpb.Version{Major: 20, Minor: 1, Internal: 11}, - }, { Key: MinPasswordLength, Version: roachpb.Version{Major: 20, Minor: 1, Internal: 13}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index f333092bcd4e..3df032b0517f 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -9,60 +9,59 @@ func _() { // Re-run the stringer command to generate them again. var x [1]struct{} _ = x[Start20_2-0] - _ = x[NodeMembershipStatus-1] - _ = x[MinPasswordLength-2] - _ = x[AbortSpanBytes-3] - _ = x[CreateLoginPrivilege-4] - _ = x[HBAForNonTLS-5] - _ = x[V20_2-6] - _ = x[Start21_1-7] - _ = x[CPutInline-8] - _ = x[ReplicaVersions-9] - _ = x[replacedTruncatedAndRangeAppliedStateMigration-10] - _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-11] - _ = x[TruncatedAndRangeAppliedStateMigration-12] - _ = x[PostTruncatedAndRangeAppliedStateMigration-13] - _ = x[SeparatedIntents-14] - _ = x[TracingVerbosityIndependentSemantics-15] - _ = x[PriorReadSummaries-16] - _ = x[NonVotingReplicas-17] - _ = x[V21_1-18] - _ = x[Start21_1PLUS-19] - _ = x[Start21_2-20] - _ = x[JoinTokensTable-21] - _ = x[AcquisitionTypeInLeaseHistory-22] - _ = x[SerializeViewUDTs-23] - _ = x[ExpressionIndexes-24] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-25] - _ = x[FixDescriptors-26] - _ = x[SQLStatsTable-27] - _ = x[DatabaseRoleSettings-28] - _ = x[TenantUsageTable-29] - _ = x[SQLInstancesTable-30] - _ = x[NewRetryableRangefeedErrors-31] - _ = x[AlterSystemWebSessionsCreateIndexes-32] - _ = x[SeparatedIntentsMigration-33] - _ = x[PostSeparatedIntentsMigration-34] - _ = x[RetryJobsWithExponentialBackoff-35] - _ = x[RecordsBasedRegistry-36] - _ = x[AutoSpanConfigReconciliationJob-37] - _ = x[PreventNewInterleavedTables-38] - _ = x[EnsureNoInterleavedTables-39] - _ = x[DefaultPrivileges-40] - _ = x[ZonesTableForSecondaryTenants-41] - _ = x[UseKeyEncodeForHashShardedIndexes-42] - _ = x[DatabasePlacementPolicy-43] - _ = x[GeneratedAsIdentity-44] - _ = x[OnUpdateExpressions-45] - _ = x[SpanConfigurationsTable-46] - _ = x[BoundedStaleness-47] - _ = x[SQLStatsCompactionScheduledJob-48] - _ = x[DateAndIntervalStyle-49] + _ = x[MinPasswordLength-1] + _ = x[AbortSpanBytes-2] + _ = x[CreateLoginPrivilege-3] + _ = x[HBAForNonTLS-4] + _ = x[V20_2-5] + _ = x[Start21_1-6] + _ = x[CPutInline-7] + _ = x[ReplicaVersions-8] + _ = x[replacedTruncatedAndRangeAppliedStateMigration-9] + _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-10] + _ = x[TruncatedAndRangeAppliedStateMigration-11] + _ = x[PostTruncatedAndRangeAppliedStateMigration-12] + _ = x[SeparatedIntents-13] + _ = x[TracingVerbosityIndependentSemantics-14] + _ = x[PriorReadSummaries-15] + _ = x[NonVotingReplicas-16] + _ = x[V21_1-17] + _ = x[Start21_1PLUS-18] + _ = x[Start21_2-19] + _ = x[JoinTokensTable-20] + _ = x[AcquisitionTypeInLeaseHistory-21] + _ = x[SerializeViewUDTs-22] + _ = x[ExpressionIndexes-23] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-24] + _ = x[FixDescriptors-25] + _ = x[SQLStatsTable-26] + _ = x[DatabaseRoleSettings-27] + _ = x[TenantUsageTable-28] + _ = x[SQLInstancesTable-29] + _ = x[NewRetryableRangefeedErrors-30] + _ = x[AlterSystemWebSessionsCreateIndexes-31] + _ = x[SeparatedIntentsMigration-32] + _ = x[PostSeparatedIntentsMigration-33] + _ = x[RetryJobsWithExponentialBackoff-34] + _ = x[RecordsBasedRegistry-35] + _ = x[AutoSpanConfigReconciliationJob-36] + _ = x[PreventNewInterleavedTables-37] + _ = x[EnsureNoInterleavedTables-38] + _ = x[DefaultPrivileges-39] + _ = x[ZonesTableForSecondaryTenants-40] + _ = x[UseKeyEncodeForHashShardedIndexes-41] + _ = x[DatabasePlacementPolicy-42] + _ = x[GeneratedAsIdentity-43] + _ = x[OnUpdateExpressions-44] + _ = x[SpanConfigurationsTable-45] + _ = x[BoundedStaleness-46] + _ = x[SQLStatsCompactionScheduledJob-47] + _ = x[DateAndIntervalStyle-48] } -const _Key_name = "Start20_2NodeMembershipStatusMinPasswordLengthAbortSpanBytesCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1CPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthAbortSpanBytesCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1CPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 29, 46, 60, 80, 92, 97, 106, 116, 131, 177, 227, 265, 307, 323, 359, 377, 394, 399, 412, 421, 436, 465, 482, 499, 548, 562, 575, 595, 611, 628, 655, 690, 715, 744, 775, 795, 826, 853, 878, 895, 924, 957, 980, 999, 1018, 1041, 1057, 1087, 1107} +var _Key_index = [...]uint16{0, 9, 26, 40, 60, 72, 77, 86, 96, 111, 157, 207, 245, 287, 303, 339, 357, 374, 379, 392, 401, 416, 445, 462, 479, 528, 542, 555, 575, 591, 608, 635, 670, 695, 724, 755, 775, 806, 833, 858, 875, 904, 937, 960, 979, 998, 1021, 1037, 1067, 1087} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/replica_gossip.go b/pkg/kv/kvserver/replica_gossip.go index 8e265d27f8b8..299138e8422c 100644 --- a/pkg/kv/kvserver/replica_gossip.go +++ b/pkg/kv/kvserver/replica_gossip.go @@ -13,7 +13,6 @@ package kvserver import ( "context" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/config" "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/keys" @@ -191,15 +190,6 @@ func (r *Replica) MaybeGossipNodeLivenessRaftMuLocked( continue } } - if !r.ClusterSettings().Version.IsActive(ctx, clusterversion.NodeMembershipStatus) { - // We can't transmit liveness records with a backwards incompatible - // representation unless we're told by the user that there are no - // pre-v20.1 nodes around. We should never get here. - if kvLiveness.Membership.Decommissioned() { - log.Fatal(ctx, "programming error: illegal membership status: decommissioned") - } - } - if err := r.store.Gossip().AddInfoProto(key, &kvLiveness, 0); err != nil { return errors.Wrapf(err, "failed to gossip node liveness (%+v)", kvLiveness) } diff --git a/pkg/server/server.go b/pkg/server/server.go index 79b9bbb0ed53..8640ab5a8470 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -2393,17 +2393,6 @@ func (s *SQLServer) startServeSQL( func (s *Server) Decommission( ctx context.Context, targetStatus livenesspb.MembershipStatus, nodeIDs []roachpb.NodeID, ) error { - if !s.st.Version.IsActive(ctx, clusterversion.NodeMembershipStatus) { - if targetStatus.Decommissioned() { - // In mixed-version cluster settings, we need to ensure that we're - // on-the-wire compatible with nodes only familiar with the boolean - // representation of membership state. We do the simple thing and - // simply disallow the setting of the fully decommissioned state until - // we're guaranteed to be on v20.2. - targetStatus = livenesspb.MembershipStatus_DECOMMISSIONING - } - } - // If we're asked to decommission ourself we may lose access to cluster RPC, // so we decommission ourself last. We copy the slice to avoid mutating the // input slice. From 0cb53f31d9d488ac609fa99f88bc2322e0a7f8be Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:45:11 -0400 Subject: [PATCH 03/14] clusterversion,kv: remove AbortSpanBytes Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 7 -- pkg/clusterversion/key_string.go | 97 +++++++++---------- .../kvserver/batcheval/cmd_end_transaction.go | 9 -- pkg/kv/kvserver/replica_consistency.go | 9 +- pkg/kv/kvserver/replica_proposal.go | 7 -- 5 files changed, 52 insertions(+), 77 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 99a67e81249c..282e6354a979 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -165,9 +165,6 @@ const ( Start20_2 // MinPasswordLength adds the server.user_login.min_password_length setting. MinPasswordLength - // AbortSpanBytes adds a field to MVCCStats - // (MVCCStats.AbortSpanBytes) that tracks the size of a range's abort span. - AbortSpanBytes // CreateLoginPrivilege is when CREATELOGIN/NOCREATELOGIN are introduced. // // It represents adding authn principal management via CREATELOGIN role @@ -346,10 +343,6 @@ var versionsSingleton = keyedVersions{ Key: MinPasswordLength, Version: roachpb.Version{Major: 20, Minor: 1, Internal: 13}, }, - { - Key: AbortSpanBytes, - Version: roachpb.Version{Major: 20, Minor: 1, Internal: 14}, - }, { Key: CreateLoginPrivilege, Version: roachpb.Version{Major: 20, Minor: 1, Internal: 20}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 3df032b0517f..819cf5630a11 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -10,58 +10,57 @@ func _() { var x [1]struct{} _ = x[Start20_2-0] _ = x[MinPasswordLength-1] - _ = x[AbortSpanBytes-2] - _ = x[CreateLoginPrivilege-3] - _ = x[HBAForNonTLS-4] - _ = x[V20_2-5] - _ = x[Start21_1-6] - _ = x[CPutInline-7] - _ = x[ReplicaVersions-8] - _ = x[replacedTruncatedAndRangeAppliedStateMigration-9] - _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-10] - _ = x[TruncatedAndRangeAppliedStateMigration-11] - _ = x[PostTruncatedAndRangeAppliedStateMigration-12] - _ = x[SeparatedIntents-13] - _ = x[TracingVerbosityIndependentSemantics-14] - _ = x[PriorReadSummaries-15] - _ = x[NonVotingReplicas-16] - _ = x[V21_1-17] - _ = x[Start21_1PLUS-18] - _ = x[Start21_2-19] - _ = x[JoinTokensTable-20] - _ = x[AcquisitionTypeInLeaseHistory-21] - _ = x[SerializeViewUDTs-22] - _ = x[ExpressionIndexes-23] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-24] - _ = x[FixDescriptors-25] - _ = x[SQLStatsTable-26] - _ = x[DatabaseRoleSettings-27] - _ = x[TenantUsageTable-28] - _ = x[SQLInstancesTable-29] - _ = x[NewRetryableRangefeedErrors-30] - _ = x[AlterSystemWebSessionsCreateIndexes-31] - _ = x[SeparatedIntentsMigration-32] - _ = x[PostSeparatedIntentsMigration-33] - _ = x[RetryJobsWithExponentialBackoff-34] - _ = x[RecordsBasedRegistry-35] - _ = x[AutoSpanConfigReconciliationJob-36] - _ = x[PreventNewInterleavedTables-37] - _ = x[EnsureNoInterleavedTables-38] - _ = x[DefaultPrivileges-39] - _ = x[ZonesTableForSecondaryTenants-40] - _ = x[UseKeyEncodeForHashShardedIndexes-41] - _ = x[DatabasePlacementPolicy-42] - _ = x[GeneratedAsIdentity-43] - _ = x[OnUpdateExpressions-44] - _ = x[SpanConfigurationsTable-45] - _ = x[BoundedStaleness-46] - _ = x[SQLStatsCompactionScheduledJob-47] - _ = x[DateAndIntervalStyle-48] + _ = x[CreateLoginPrivilege-2] + _ = x[HBAForNonTLS-3] + _ = x[V20_2-4] + _ = x[Start21_1-5] + _ = x[CPutInline-6] + _ = x[ReplicaVersions-7] + _ = x[replacedTruncatedAndRangeAppliedStateMigration-8] + _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-9] + _ = x[TruncatedAndRangeAppliedStateMigration-10] + _ = x[PostTruncatedAndRangeAppliedStateMigration-11] + _ = x[SeparatedIntents-12] + _ = x[TracingVerbosityIndependentSemantics-13] + _ = x[PriorReadSummaries-14] + _ = x[NonVotingReplicas-15] + _ = x[V21_1-16] + _ = x[Start21_1PLUS-17] + _ = x[Start21_2-18] + _ = x[JoinTokensTable-19] + _ = x[AcquisitionTypeInLeaseHistory-20] + _ = x[SerializeViewUDTs-21] + _ = x[ExpressionIndexes-22] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-23] + _ = x[FixDescriptors-24] + _ = x[SQLStatsTable-25] + _ = x[DatabaseRoleSettings-26] + _ = x[TenantUsageTable-27] + _ = x[SQLInstancesTable-28] + _ = x[NewRetryableRangefeedErrors-29] + _ = x[AlterSystemWebSessionsCreateIndexes-30] + _ = x[SeparatedIntentsMigration-31] + _ = x[PostSeparatedIntentsMigration-32] + _ = x[RetryJobsWithExponentialBackoff-33] + _ = x[RecordsBasedRegistry-34] + _ = x[AutoSpanConfigReconciliationJob-35] + _ = x[PreventNewInterleavedTables-36] + _ = x[EnsureNoInterleavedTables-37] + _ = x[DefaultPrivileges-38] + _ = x[ZonesTableForSecondaryTenants-39] + _ = x[UseKeyEncodeForHashShardedIndexes-40] + _ = x[DatabasePlacementPolicy-41] + _ = x[GeneratedAsIdentity-42] + _ = x[OnUpdateExpressions-43] + _ = x[SpanConfigurationsTable-44] + _ = x[BoundedStaleness-45] + _ = x[SQLStatsCompactionScheduledJob-46] + _ = x[DateAndIntervalStyle-47] } -const _Key_name = "Start20_2MinPasswordLengthAbortSpanBytesCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1CPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1CPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 40, 60, 72, 77, 86, 96, 111, 157, 207, 245, 287, 303, 339, 357, 374, 379, 392, 401, 416, 445, 462, 479, 528, 542, 555, 575, 591, 608, 635, 670, 695, 724, 755, 775, 806, 833, 858, 875, 904, 937, 960, 979, 998, 1021, 1037, 1067, 1087} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 82, 97, 143, 193, 231, 273, 289, 325, 343, 360, 365, 378, 387, 402, 431, 448, 465, 514, 528, 541, 561, 577, 594, 621, 656, 681, 710, 741, 761, 792, 819, 844, 861, 890, 923, 946, 965, 984, 1007, 1023, 1053, 1073} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 4366dff3d277..5ebd5bc3ef0f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -931,15 +931,6 @@ func splitTriggerHelper( return enginepb.MVCCStats{}, result.Result{}, err } - if !rec.ClusterSettings().Version.IsActive(ctx, clusterversion.AbortSpanBytes) { - // Since the stats here is used to seed the initial state for the RHS - // replicas, we need to be careful about zero-ing out the abort span - // bytes if the cluster version introducing it is not yet active. Not - // doing so can result in inconsistencies in MVCCStats across replicas - // in a mixed-version cluster. - h.AbsPostSplitRight().AbortSpanBytes = 0 - } - // Note: we don't copy the queue last processed times. This means // we'll process the RHS range in consistency and time series // maintenance queues again possibly sooner than if we copied. The diff --git a/pkg/kv/kvserver/replica_consistency.go b/pkg/kv/kvserver/replica_consistency.go index f97a5c8e3fd2..16f9c036a06d 100644 --- a/pkg/kv/kvserver/replica_consistency.go +++ b/pkg/kv/kvserver/replica_consistency.go @@ -19,7 +19,6 @@ import ( "sync" "time" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" @@ -233,10 +232,10 @@ func (r *Replica) CheckConsistency( // are consistent. Verify this only for clusters that started out on 19.1 or // higher. if !v.Less(roachpb.Version{Major: 19, Minor: 1}) { - // If version >= 19.1 but < AbortSpanBytes, we want to ignore any delta - // in AbortSpanBytes when comparing stats since older versions will not be - // tracking abort span bytes. - if v.Less(clusterversion.ByKey(clusterversion.AbortSpanBytes)) { + // If version >= 19.1 but < 20.1-14 (AbortSpanBytes before its removal), + // we want to ignore any delta in AbortSpanBytes when comparing stats + // since older versions will not be tracking abort span bytes. + if v.Less(roachpb.Version{Major: 20, Minor: 1, Internal: 14}) { delta.AbortSpanBytes = 0 haveDelta = delta != enginepb.MVCCStats{} } diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index a788babde7c2..5ffb39d9966c 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -864,13 +864,6 @@ func (r *Replica) evaluateProposal( res.Replicated.Delta.ContainsEstimates *= 2 } - // If the cluster version doesn't track abort span size in MVCCStats, we - // zero it out to prevent inconsistencies in MVCCStats across nodes in a - // possibly mixed-version cluster. - if !r.ClusterSettings().Version.IsActive(ctx, clusterversion.AbortSpanBytes) { - res.Replicated.Delta.AbortSpanBytes = 0 - } - // If the RangeAppliedState key is not being used and the cluster version is // high enough to guarantee that all current and future binaries will // understand the key, we send the migration flag through Raft. Because From 99157d973ab194ae4db160392fda21e89bfce91d Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:51:41 -0400 Subject: [PATCH 04/14] clusterversion,kv: remove CPutInline Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 6 -- pkg/clusterversion/key_string.go | 87 ++++++++++++------------ pkg/kv/BUILD.bazel | 1 - pkg/kv/batch.go | 12 +--- pkg/kv/client_test.go | 4 +- pkg/kv/db.go | 23 +------ pkg/kv/db_test.go | 4 +- pkg/server/status/BUILD.bazel | 1 - pkg/server/status/recorder.go | 5 +- 9 files changed, 52 insertions(+), 91 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 282e6354a979..e738e91b608e 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -180,8 +180,6 @@ const ( // // Start21_1 demarcates work towards CockroachDB v21.1. Start21_1 - // CPutInline is conditional put support for inline values. - CPutInline // ReplicaVersions enables the versioning of Replica state. ReplicaVersions // replacedTruncatedAndRangeAppliedStateMigration stands in for @@ -361,10 +359,6 @@ var versionsSingleton = keyedVersions{ Key: Start21_1, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 2}, }, - { - Key: CPutInline, - Version: roachpb.Version{Major: 20, Minor: 2, Internal: 10}, - }, { Key: ReplicaVersions, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 12}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 819cf5630a11..6e10593b5d4a 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -14,53 +14,52 @@ func _() { _ = x[HBAForNonTLS-3] _ = x[V20_2-4] _ = x[Start21_1-5] - _ = x[CPutInline-6] - _ = x[ReplicaVersions-7] - _ = x[replacedTruncatedAndRangeAppliedStateMigration-8] - _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-9] - _ = x[TruncatedAndRangeAppliedStateMigration-10] - _ = x[PostTruncatedAndRangeAppliedStateMigration-11] - _ = x[SeparatedIntents-12] - _ = x[TracingVerbosityIndependentSemantics-13] - _ = x[PriorReadSummaries-14] - _ = x[NonVotingReplicas-15] - _ = x[V21_1-16] - _ = x[Start21_1PLUS-17] - _ = x[Start21_2-18] - _ = x[JoinTokensTable-19] - _ = x[AcquisitionTypeInLeaseHistory-20] - _ = x[SerializeViewUDTs-21] - _ = x[ExpressionIndexes-22] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-23] - _ = x[FixDescriptors-24] - _ = x[SQLStatsTable-25] - _ = x[DatabaseRoleSettings-26] - _ = x[TenantUsageTable-27] - _ = x[SQLInstancesTable-28] - _ = x[NewRetryableRangefeedErrors-29] - _ = x[AlterSystemWebSessionsCreateIndexes-30] - _ = x[SeparatedIntentsMigration-31] - _ = x[PostSeparatedIntentsMigration-32] - _ = x[RetryJobsWithExponentialBackoff-33] - _ = x[RecordsBasedRegistry-34] - _ = x[AutoSpanConfigReconciliationJob-35] - _ = x[PreventNewInterleavedTables-36] - _ = x[EnsureNoInterleavedTables-37] - _ = x[DefaultPrivileges-38] - _ = x[ZonesTableForSecondaryTenants-39] - _ = x[UseKeyEncodeForHashShardedIndexes-40] - _ = x[DatabasePlacementPolicy-41] - _ = x[GeneratedAsIdentity-42] - _ = x[OnUpdateExpressions-43] - _ = x[SpanConfigurationsTable-44] - _ = x[BoundedStaleness-45] - _ = x[SQLStatsCompactionScheduledJob-46] - _ = x[DateAndIntervalStyle-47] + _ = x[ReplicaVersions-6] + _ = x[replacedTruncatedAndRangeAppliedStateMigration-7] + _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-8] + _ = x[TruncatedAndRangeAppliedStateMigration-9] + _ = x[PostTruncatedAndRangeAppliedStateMigration-10] + _ = x[SeparatedIntents-11] + _ = x[TracingVerbosityIndependentSemantics-12] + _ = x[PriorReadSummaries-13] + _ = x[NonVotingReplicas-14] + _ = x[V21_1-15] + _ = x[Start21_1PLUS-16] + _ = x[Start21_2-17] + _ = x[JoinTokensTable-18] + _ = x[AcquisitionTypeInLeaseHistory-19] + _ = x[SerializeViewUDTs-20] + _ = x[ExpressionIndexes-21] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-22] + _ = x[FixDescriptors-23] + _ = x[SQLStatsTable-24] + _ = x[DatabaseRoleSettings-25] + _ = x[TenantUsageTable-26] + _ = x[SQLInstancesTable-27] + _ = x[NewRetryableRangefeedErrors-28] + _ = x[AlterSystemWebSessionsCreateIndexes-29] + _ = x[SeparatedIntentsMigration-30] + _ = x[PostSeparatedIntentsMigration-31] + _ = x[RetryJobsWithExponentialBackoff-32] + _ = x[RecordsBasedRegistry-33] + _ = x[AutoSpanConfigReconciliationJob-34] + _ = x[PreventNewInterleavedTables-35] + _ = x[EnsureNoInterleavedTables-36] + _ = x[DefaultPrivileges-37] + _ = x[ZonesTableForSecondaryTenants-38] + _ = x[UseKeyEncodeForHashShardedIndexes-39] + _ = x[DatabasePlacementPolicy-40] + _ = x[GeneratedAsIdentity-41] + _ = x[OnUpdateExpressions-42] + _ = x[SpanConfigurationsTable-43] + _ = x[BoundedStaleness-44] + _ = x[SQLStatsCompactionScheduledJob-45] + _ = x[DateAndIntervalStyle-46] } -const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1CPutInlineReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1ReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 82, 97, 143, 193, 231, 273, 289, 325, 343, 360, 365, 378, 387, 402, 431, 448, 465, 514, 528, 541, 561, 577, 594, 621, 656, 681, 710, 741, 761, 792, 819, 844, 861, 890, 923, 946, 965, 984, 1007, 1023, 1053, 1073} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 87, 133, 183, 221, 263, 279, 315, 333, 350, 355, 368, 377, 392, 421, 438, 455, 504, 518, 531, 551, 567, 584, 611, 646, 671, 700, 731, 751, 782, 809, 834, 851, 880, 913, 936, 955, 974, 997, 1013, 1043, 1063} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/BUILD.bazel b/pkg/kv/BUILD.bazel index 721830635144..e353d9b3d054 100644 --- a/pkg/kv/BUILD.bazel +++ b/pkg/kv/BUILD.bazel @@ -17,7 +17,6 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/base", - "//pkg/clusterversion", "//pkg/keys", "//pkg/kv/kvbase", "//pkg/roachpb:with-mocks", diff --git a/pkg/kv/batch.go b/pkg/kv/batch.go index 2f0295bd07be..88e72b44f20a 100644 --- a/pkg/kv/batch.go +++ b/pkg/kv/batch.go @@ -13,7 +13,6 @@ package kv import ( "context" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/hlc" @@ -464,7 +463,7 @@ func (b *Batch) CPutAllowingIfNotExists(key, value interface{}, expValue []byte) b.cputInternal(key, value, expValue, true, false) } -// cPutInline conditionally sets the value for a key if the existing value is +// CPutInline conditionally sets the value for a key if the existing value is // equal to expValue, but does not maintain multi-version values. To // conditionally set a value only if the key doesn't currently exist, pass an // empty expValue. The most recent value is always overwritten. Inline values @@ -478,14 +477,7 @@ func (b *Batch) CPutAllowingIfNotExists(key, value interface{}, expValue []byte) // // A nil value can be used to delete the respective key, since there is no // DelInline(). This is different from CPut(). -// -// Callers should check the version gate clusterversion.CPutInline to make sure -// this is supported. The method is unexported to prevent external callers using -// this without checking the version, since the CtxForCPutInline guard can't be -// used with Batch. -func (b *Batch) cPutInline(key, value interface{}, expValue []byte) { - // TODO(erikgrinaker): export once clusterversion.CPutInline is removed. - _ = clusterversion.CPutInline +func (b *Batch) CPutInline(key, value interface{}, expValue []byte) { b.cputInternal(key, value, expValue, false, true) } diff --git a/pkg/kv/client_test.go b/pkg/kv/client_test.go index d2a6052377b9..a13a209df339 100644 --- a/pkg/kv/client_test.go +++ b/pkg/kv/client_test.go @@ -369,10 +369,10 @@ func TestClientPutInline(t *testing.T) { func TestClientCPutInline(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + ctx := context.Background() s, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) - defer s.Stopper().Stop(context.Background()) + defer s.Stopper().Stop(ctx) db := createTestClient(t, s) - ctx := kv.CtxForCPutInline(context.Background()) key := testUser + "/key" value := []byte("value") diff --git a/pkg/kv/db.go b/pkg/kv/db.go index fc2021cbb9dc..d166e8c45ac5 100644 --- a/pkg/kv/db.go +++ b/pkg/kv/db.go @@ -16,7 +16,6 @@ import ( "strings" "github.com/cockroachdb/cockroach/pkg/base" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/admission" @@ -409,18 +408,6 @@ func (db *DB) CPut(ctx context.Context, key, value interface{}, expValue []byte) return getOneErr(db.Run(ctx, b), b) } -// CtxForCPutInline is a gate to make sure the caller is aware that CPutInline -// is only available with clusterversion.CPutInline, and must check this before -// using the method. -func CtxForCPutInline(ctx context.Context) context.Context { - // TODO(erikgrinaker): This code and all of its uses can be removed when the - // version below is removed: - _ = clusterversion.CPutInline - return context.WithValue(ctx, canUseCPutInline{}, canUseCPutInline{}) -} - -type canUseCPutInline struct{} - // CPutInline conditionally sets the value for a key if the existing value is // equal to expValue, but does not maintain multi-version values. To // conditionally set a value only if the key doesn't currently exist, pass an @@ -436,17 +423,9 @@ type canUseCPutInline struct{} // An empty expValue means that the key is expected to not exist. If not empty, // expValue needs to correspond to a Value.TagAndDataBytes() - i.e. a key's // value without the checksum (as the checksum includes the key too). -// -// Callers should check the version gate clusterversion.CPutInline to make sure -// this is supported, and must wrap the context using CtxForCPutInline(ctx) to -// enable the call. func (db *DB) CPutInline(ctx context.Context, key, value interface{}, expValue []byte) error { - if ctx.Value(canUseCPutInline{}) == nil { - return errors.New("CPutInline is new in 21.1, you must check the CPutInline cluster version " + - "and use CtxForCPutInline to enable it") - } b := &Batch{} - b.cPutInline(key, value, expValue) + b.CPutInline(key, value, expValue) return getOneErr(db.Run(ctx, b), b) } diff --git a/pkg/kv/db_test.go b/pkg/kv/db_test.go index 5c418126739e..ab2472013ece 100644 --- a/pkg/kv/db_test.go +++ b/pkg/kv/db_test.go @@ -170,9 +170,9 @@ func TestDB_CPut(t *testing.T) { func TestDB_CPutInline(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + ctx := context.Background() s, db := setup(t) - defer s.Stopper().Stop(context.Background()) - ctx := kv.CtxForCPutInline(context.Background()) + defer s.Stopper().Stop(ctx) if err := db.PutInline(ctx, "aa", "1"); err != nil { t.Fatal(err) diff --git a/pkg/server/status/BUILD.bazel b/pkg/server/status/BUILD.bazel index d06999942b8d..94ddbf15fc4f 100644 --- a/pkg/server/status/BUILD.bazel +++ b/pkg/server/status/BUILD.bazel @@ -40,7 +40,6 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/build", - "//pkg/clusterversion", "//pkg/gossip", "//pkg/keys", "//pkg/kv", diff --git a/pkg/server/status/recorder.go b/pkg/server/status/recorder.go index eaf95ff5f274..80572356f5b1 100644 --- a/pkg/server/status/recorder.go +++ b/pkg/server/status/recorder.go @@ -24,7 +24,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/build" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" @@ -512,7 +511,7 @@ func (mr *MetricsRecorder) WriteNodeStatus( // of the build info in the node status, writing one of these every 10s // will generate more versions than will easily fit into a range over // the course of a day. - if mustExist && mr.settings.Version.IsActive(ctx, clusterversion.CPutInline) { + if mustExist { entry, err := db.Get(ctx, key) if err != nil { return err @@ -520,7 +519,7 @@ func (mr *MetricsRecorder) WriteNodeStatus( if entry.Value == nil { return errors.New("status entry not found, node may have been decommissioned") } - err = db.CPutInline(kv.CtxForCPutInline(ctx), key, &nodeStatus, entry.Value.TagAndDataBytes()) + err = db.CPutInline(ctx, key, &nodeStatus, entry.Value.TagAndDataBytes()) if detail := (*roachpb.ConditionFailedError)(nil); errors.As(err, &detail) { if detail.ActualValue == nil { return errors.New("status entry not found, node may have been decommissioned") From 10d10e84a91a035e05d126ad088e9d566fb2304c Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:53:50 -0400 Subject: [PATCH 05/14] clusterversion,kv: remove ReplicaVersions Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 6 -- pkg/clusterversion/key_string.go | 85 ++++++++++++------------ pkg/kv/kvserver/store_snapshot.go | 7 +- 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index e738e91b608e..34158e165593 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -180,8 +180,6 @@ const ( // // Start21_1 demarcates work towards CockroachDB v21.1. Start21_1 - // ReplicaVersions enables the versioning of Replica state. - ReplicaVersions // replacedTruncatedAndRangeAppliedStateMigration stands in for // TruncatedAndRangeAppliedStateMigration which was re-introduced after the // migration job was introduced. This is necessary because the jobs @@ -359,10 +357,6 @@ var versionsSingleton = keyedVersions{ Key: Start21_1, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 2}, }, - { - Key: ReplicaVersions, - Version: roachpb.Version{Major: 20, Minor: 2, Internal: 12}, - }, { Key: replacedTruncatedAndRangeAppliedStateMigration, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 14}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 6e10593b5d4a..137488984179 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -14,52 +14,51 @@ func _() { _ = x[HBAForNonTLS-3] _ = x[V20_2-4] _ = x[Start21_1-5] - _ = x[ReplicaVersions-6] - _ = x[replacedTruncatedAndRangeAppliedStateMigration-7] - _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-8] - _ = x[TruncatedAndRangeAppliedStateMigration-9] - _ = x[PostTruncatedAndRangeAppliedStateMigration-10] - _ = x[SeparatedIntents-11] - _ = x[TracingVerbosityIndependentSemantics-12] - _ = x[PriorReadSummaries-13] - _ = x[NonVotingReplicas-14] - _ = x[V21_1-15] - _ = x[Start21_1PLUS-16] - _ = x[Start21_2-17] - _ = x[JoinTokensTable-18] - _ = x[AcquisitionTypeInLeaseHistory-19] - _ = x[SerializeViewUDTs-20] - _ = x[ExpressionIndexes-21] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-22] - _ = x[FixDescriptors-23] - _ = x[SQLStatsTable-24] - _ = x[DatabaseRoleSettings-25] - _ = x[TenantUsageTable-26] - _ = x[SQLInstancesTable-27] - _ = x[NewRetryableRangefeedErrors-28] - _ = x[AlterSystemWebSessionsCreateIndexes-29] - _ = x[SeparatedIntentsMigration-30] - _ = x[PostSeparatedIntentsMigration-31] - _ = x[RetryJobsWithExponentialBackoff-32] - _ = x[RecordsBasedRegistry-33] - _ = x[AutoSpanConfigReconciliationJob-34] - _ = x[PreventNewInterleavedTables-35] - _ = x[EnsureNoInterleavedTables-36] - _ = x[DefaultPrivileges-37] - _ = x[ZonesTableForSecondaryTenants-38] - _ = x[UseKeyEncodeForHashShardedIndexes-39] - _ = x[DatabasePlacementPolicy-40] - _ = x[GeneratedAsIdentity-41] - _ = x[OnUpdateExpressions-42] - _ = x[SpanConfigurationsTable-43] - _ = x[BoundedStaleness-44] - _ = x[SQLStatsCompactionScheduledJob-45] - _ = x[DateAndIntervalStyle-46] + _ = x[replacedTruncatedAndRangeAppliedStateMigration-6] + _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-7] + _ = x[TruncatedAndRangeAppliedStateMigration-8] + _ = x[PostTruncatedAndRangeAppliedStateMigration-9] + _ = x[SeparatedIntents-10] + _ = x[TracingVerbosityIndependentSemantics-11] + _ = x[PriorReadSummaries-12] + _ = x[NonVotingReplicas-13] + _ = x[V21_1-14] + _ = x[Start21_1PLUS-15] + _ = x[Start21_2-16] + _ = x[JoinTokensTable-17] + _ = x[AcquisitionTypeInLeaseHistory-18] + _ = x[SerializeViewUDTs-19] + _ = x[ExpressionIndexes-20] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-21] + _ = x[FixDescriptors-22] + _ = x[SQLStatsTable-23] + _ = x[DatabaseRoleSettings-24] + _ = x[TenantUsageTable-25] + _ = x[SQLInstancesTable-26] + _ = x[NewRetryableRangefeedErrors-27] + _ = x[AlterSystemWebSessionsCreateIndexes-28] + _ = x[SeparatedIntentsMigration-29] + _ = x[PostSeparatedIntentsMigration-30] + _ = x[RetryJobsWithExponentialBackoff-31] + _ = x[RecordsBasedRegistry-32] + _ = x[AutoSpanConfigReconciliationJob-33] + _ = x[PreventNewInterleavedTables-34] + _ = x[EnsureNoInterleavedTables-35] + _ = x[DefaultPrivileges-36] + _ = x[ZonesTableForSecondaryTenants-37] + _ = x[UseKeyEncodeForHashShardedIndexes-38] + _ = x[DatabasePlacementPolicy-39] + _ = x[GeneratedAsIdentity-40] + _ = x[OnUpdateExpressions-41] + _ = x[SpanConfigurationsTable-42] + _ = x[BoundedStaleness-43] + _ = x[SQLStatsCompactionScheduledJob-44] + _ = x[DateAndIntervalStyle-45] } -const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1ReplicaVersionsreplacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 87, 133, 183, 221, 263, 279, 315, 333, 350, 355, 368, 377, 392, 421, 438, 455, 504, 518, 531, 551, 567, 584, 611, 646, 671, 700, 731, 751, 782, 809, 834, 851, 880, 913, 936, 955, 974, 997, 1013, 1043, 1063} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 264, 300, 318, 335, 340, 353, 362, 377, 406, 423, 440, 489, 503, 516, 536, 552, 569, 596, 631, 656, 685, 716, 736, 767, 794, 819, 836, 865, 898, 921, 940, 959, 982, 998, 1028, 1048} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/store_snapshot.go b/pkg/kv/kvserver/store_snapshot.go index a2c0a0c51dae..649f2171e8d2 100644 --- a/pkg/kv/kvserver/store_snapshot.go +++ b/pkg/kv/kvserver/store_snapshot.go @@ -16,7 +16,6 @@ import ( "io" "time" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" @@ -924,10 +923,6 @@ func SendEmptySnapshot( return err } - var replicaVersion roachpb.Version - if st.Version.IsActive(ctx, clusterversion.ReplicaVersions) { - replicaVersion = st.Version.ActiveVersionOrEmpty(ctx).Version - } ms, err = stateloader.WriteInitialReplicaState( ctx, eng, @@ -936,7 +931,7 @@ func SendEmptySnapshot( roachpb.Lease{}, hlc.Timestamp{}, // gcThreshold stateloader.TruncatedStateUnreplicated, - replicaVersion, + st.Version.ActiveVersionOrEmpty(ctx).Version, ) if err != nil { return err From 78dd7d6f75797bd9c69d1891c8f2916e5b4add72 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:54:30 -0400 Subject: [PATCH 06/14] clusterversion,kv: remove SeparatedIntents Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 6 -- pkg/clusterversion/key_string.go | 75 ++++++++++++------------ 2 files changed, 37 insertions(+), 44 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 34158e165593..fd86da7d7f18 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -206,8 +206,6 @@ const ( // using the replicated legacy TruncatedState. It's also used in asserting // that no replicated truncated state representation is found. PostTruncatedAndRangeAppliedStateMigration - // SeparatedIntents allows the writing of separated intents/locks. - SeparatedIntents // TracingVerbosityIndependentSemantics marks a change in which trace spans // are propagated across RPC boundaries independently of their verbosity setting. // This requires a version gate this violates implicit assumptions in v20.2. @@ -373,10 +371,6 @@ var versionsSingleton = keyedVersions{ Key: PostTruncatedAndRangeAppliedStateMigration, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 24}, }, - { - Key: SeparatedIntents, - Version: roachpb.Version{Major: 20, Minor: 2, Internal: 26}, - }, { Key: TracingVerbosityIndependentSemantics, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 28}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 137488984179..5a6c78be3e23 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -18,47 +18,46 @@ func _() { _ = x[replacedPostTruncatedAndRangeAppliedStateMigration-7] _ = x[TruncatedAndRangeAppliedStateMigration-8] _ = x[PostTruncatedAndRangeAppliedStateMigration-9] - _ = x[SeparatedIntents-10] - _ = x[TracingVerbosityIndependentSemantics-11] - _ = x[PriorReadSummaries-12] - _ = x[NonVotingReplicas-13] - _ = x[V21_1-14] - _ = x[Start21_1PLUS-15] - _ = x[Start21_2-16] - _ = x[JoinTokensTable-17] - _ = x[AcquisitionTypeInLeaseHistory-18] - _ = x[SerializeViewUDTs-19] - _ = x[ExpressionIndexes-20] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-21] - _ = x[FixDescriptors-22] - _ = x[SQLStatsTable-23] - _ = x[DatabaseRoleSettings-24] - _ = x[TenantUsageTable-25] - _ = x[SQLInstancesTable-26] - _ = x[NewRetryableRangefeedErrors-27] - _ = x[AlterSystemWebSessionsCreateIndexes-28] - _ = x[SeparatedIntentsMigration-29] - _ = x[PostSeparatedIntentsMigration-30] - _ = x[RetryJobsWithExponentialBackoff-31] - _ = x[RecordsBasedRegistry-32] - _ = x[AutoSpanConfigReconciliationJob-33] - _ = x[PreventNewInterleavedTables-34] - _ = x[EnsureNoInterleavedTables-35] - _ = x[DefaultPrivileges-36] - _ = x[ZonesTableForSecondaryTenants-37] - _ = x[UseKeyEncodeForHashShardedIndexes-38] - _ = x[DatabasePlacementPolicy-39] - _ = x[GeneratedAsIdentity-40] - _ = x[OnUpdateExpressions-41] - _ = x[SpanConfigurationsTable-42] - _ = x[BoundedStaleness-43] - _ = x[SQLStatsCompactionScheduledJob-44] - _ = x[DateAndIntervalStyle-45] + _ = x[TracingVerbosityIndependentSemantics-10] + _ = x[PriorReadSummaries-11] + _ = x[NonVotingReplicas-12] + _ = x[V21_1-13] + _ = x[Start21_1PLUS-14] + _ = x[Start21_2-15] + _ = x[JoinTokensTable-16] + _ = x[AcquisitionTypeInLeaseHistory-17] + _ = x[SerializeViewUDTs-18] + _ = x[ExpressionIndexes-19] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-20] + _ = x[FixDescriptors-21] + _ = x[SQLStatsTable-22] + _ = x[DatabaseRoleSettings-23] + _ = x[TenantUsageTable-24] + _ = x[SQLInstancesTable-25] + _ = x[NewRetryableRangefeedErrors-26] + _ = x[AlterSystemWebSessionsCreateIndexes-27] + _ = x[SeparatedIntentsMigration-28] + _ = x[PostSeparatedIntentsMigration-29] + _ = x[RetryJobsWithExponentialBackoff-30] + _ = x[RecordsBasedRegistry-31] + _ = x[AutoSpanConfigReconciliationJob-32] + _ = x[PreventNewInterleavedTables-33] + _ = x[EnsureNoInterleavedTables-34] + _ = x[DefaultPrivileges-35] + _ = x[ZonesTableForSecondaryTenants-36] + _ = x[UseKeyEncodeForHashShardedIndexes-37] + _ = x[DatabasePlacementPolicy-38] + _ = x[GeneratedAsIdentity-39] + _ = x[OnUpdateExpressions-40] + _ = x[SpanConfigurationsTable-41] + _ = x[BoundedStaleness-42] + _ = x[SQLStatsCompactionScheduledJob-43] + _ = x[DateAndIntervalStyle-44] } -const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationSeparatedIntentsTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 264, 300, 318, 335, 340, 353, 362, 377, 406, 423, 440, 489, 503, 516, 536, 552, 569, 596, 631, 656, 685, 716, 736, 767, 794, 819, 836, 865, 898, 921, 940, 959, 982, 998, 1028, 1048} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 284, 302, 319, 324, 337, 346, 361, 390, 407, 424, 473, 487, 500, 520, 536, 553, 580, 615, 640, 669, 700, 720, 751, 778, 803, 820, 849, 882, 905, 924, 943, 966, 982, 1012, 1032} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { From ac4cfa51b9c906a8ef87fe8f9941439c7b4b7a31 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:58:04 -0400 Subject: [PATCH 07/14] clusterversion,kv: remove PriorReadSummaries Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 8 --- pkg/clusterversion/key_string.go | 71 +++++++++---------- .../kvserver/batcheval/cmd_end_transaction.go | 3 +- pkg/kv/kvserver/batcheval/cmd_lease.go | 3 +- pkg/kv/kvserver/replica_command.go | 8 --- pkg/sql/sem/tree/BUILD.bazel | 1 - pkg/sql/sem/tree/as_of.go | 5 +- 7 files changed, 38 insertions(+), 61 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index fd86da7d7f18..a038ec5a1ff3 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -210,10 +210,6 @@ const ( // are propagated across RPC boundaries independently of their verbosity setting. // This requires a version gate this violates implicit assumptions in v20.2. TracingVerbosityIndependentSemantics - // PriorReadSummaries introduces support for the use of read summary objects - // to ship information about reads on a range through lease changes and - // range merges. - PriorReadSummaries // NonVotingReplicas enables the creation of non-voting replicas. NonVotingReplicas // V21_1 is CockroachDB v21.1. It's used for all v21.1.x patch releases. @@ -375,10 +371,6 @@ var versionsSingleton = keyedVersions{ Key: TracingVerbosityIndependentSemantics, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 28}, }, - { - Key: PriorReadSummaries, - Version: roachpb.Version{Major: 20, Minor: 2, Internal: 44}, - }, { Key: NonVotingReplicas, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 46}, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 5a6c78be3e23..0a275a37f795 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -19,45 +19,44 @@ func _() { _ = x[TruncatedAndRangeAppliedStateMigration-8] _ = x[PostTruncatedAndRangeAppliedStateMigration-9] _ = x[TracingVerbosityIndependentSemantics-10] - _ = x[PriorReadSummaries-11] - _ = x[NonVotingReplicas-12] - _ = x[V21_1-13] - _ = x[Start21_1PLUS-14] - _ = x[Start21_2-15] - _ = x[JoinTokensTable-16] - _ = x[AcquisitionTypeInLeaseHistory-17] - _ = x[SerializeViewUDTs-18] - _ = x[ExpressionIndexes-19] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-20] - _ = x[FixDescriptors-21] - _ = x[SQLStatsTable-22] - _ = x[DatabaseRoleSettings-23] - _ = x[TenantUsageTable-24] - _ = x[SQLInstancesTable-25] - _ = x[NewRetryableRangefeedErrors-26] - _ = x[AlterSystemWebSessionsCreateIndexes-27] - _ = x[SeparatedIntentsMigration-28] - _ = x[PostSeparatedIntentsMigration-29] - _ = x[RetryJobsWithExponentialBackoff-30] - _ = x[RecordsBasedRegistry-31] - _ = x[AutoSpanConfigReconciliationJob-32] - _ = x[PreventNewInterleavedTables-33] - _ = x[EnsureNoInterleavedTables-34] - _ = x[DefaultPrivileges-35] - _ = x[ZonesTableForSecondaryTenants-36] - _ = x[UseKeyEncodeForHashShardedIndexes-37] - _ = x[DatabasePlacementPolicy-38] - _ = x[GeneratedAsIdentity-39] - _ = x[OnUpdateExpressions-40] - _ = x[SpanConfigurationsTable-41] - _ = x[BoundedStaleness-42] - _ = x[SQLStatsCompactionScheduledJob-43] - _ = x[DateAndIntervalStyle-44] + _ = x[NonVotingReplicas-11] + _ = x[V21_1-12] + _ = x[Start21_1PLUS-13] + _ = x[Start21_2-14] + _ = x[JoinTokensTable-15] + _ = x[AcquisitionTypeInLeaseHistory-16] + _ = x[SerializeViewUDTs-17] + _ = x[ExpressionIndexes-18] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-19] + _ = x[FixDescriptors-20] + _ = x[SQLStatsTable-21] + _ = x[DatabaseRoleSettings-22] + _ = x[TenantUsageTable-23] + _ = x[SQLInstancesTable-24] + _ = x[NewRetryableRangefeedErrors-25] + _ = x[AlterSystemWebSessionsCreateIndexes-26] + _ = x[SeparatedIntentsMigration-27] + _ = x[PostSeparatedIntentsMigration-28] + _ = x[RetryJobsWithExponentialBackoff-29] + _ = x[RecordsBasedRegistry-30] + _ = x[AutoSpanConfigReconciliationJob-31] + _ = x[PreventNewInterleavedTables-32] + _ = x[EnsureNoInterleavedTables-33] + _ = x[DefaultPrivileges-34] + _ = x[ZonesTableForSecondaryTenants-35] + _ = x[UseKeyEncodeForHashShardedIndexes-36] + _ = x[DatabasePlacementPolicy-37] + _ = x[GeneratedAsIdentity-38] + _ = x[OnUpdateExpressions-39] + _ = x[SpanConfigurationsTable-40] + _ = x[BoundedStaleness-41] + _ = x[SQLStatsCompactionScheduledJob-42] + _ = x[DateAndIntervalStyle-43] } -const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationTracingVerbosityIndependentSemanticsPriorReadSummariesNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationTracingVerbosityIndependentSemanticsNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 284, 302, 319, 324, 337, 346, 361, 390, 407, 424, 473, 487, 500, 520, 536, 553, 580, 615, 640, 669, 700, 720, 751, 778, 803, 820, 849, 882, 905, 924, 943, 966, 982, 1012, 1032} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 284, 301, 306, 319, 328, 343, 372, 389, 406, 455, 469, 482, 502, 518, 535, 562, 597, 622, 651, 682, 702, 733, 760, 785, 802, 831, 864, 887, 906, 925, 948, 964, 994, 1014} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 5ebd5bc3ef0f..90a2f5920bc1 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -1108,8 +1108,7 @@ func mergeTrigger( // directly in this method. The primary reason why these are different is // because the RHS's persistent read summary may not be up-to-date, as it is // not updated by the SubsumeRequest. - readSumActive := rec.ClusterSettings().Version.IsActive(ctx, clusterversion.PriorReadSummaries) - if merge.RightReadSummary != nil && readSumActive { + if merge.RightReadSummary != nil { mergedSum := merge.RightReadSummary.Clone() if priorSum, err := readsummary.Load(ctx, batch, rec.GetRangeID()); err != nil { return result.Result{}, err diff --git a/pkg/kv/kvserver/batcheval/cmd_lease.go b/pkg/kv/kvserver/batcheval/cmd_lease.go index 221695102b60..9776819400a6 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease.go @@ -145,8 +145,7 @@ func evalNewLease( // only ever updates in-mem state) but it's easy to get things wrong (in // which case they could easily take a catastrophic turn) and the benefit is // low. - readSumActive := rec.ClusterSettings().Version.IsActive(ctx, clusterversion.PriorReadSummaries) - if priorReadSum != nil && readSumActive { + if priorReadSum != nil { if err := readsummary.Set(ctx, readWriter, rec.GetRangeID(), ms, priorReadSum); err != nil { return newFailedLeaseTrigger(isTransfer), err } diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index 10d48087b433..3e85d053a7bd 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -19,7 +19,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/base" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" @@ -562,13 +561,6 @@ func (r *Replica) AdminMerge( log.Event(ctx, "merge txn begins") txn.SetDebugName(mergeTxnName) - // If we aren't certain that all possible nodes in the cluster support a - // range merge transaction refreshing its reads while the RHS range is - // subsumed, observe the commit timestamp to force a client-side retry. - if !r.ClusterSettings().Version.IsActive(ctx, clusterversion.PriorReadSummaries) { - _ = txn.CommitTimestamp() - } - // Pipelining might send QueryIntent requests to the RHS after the RHS has // noticed the merge and started blocking all traffic. This causes the merge // transaction to deadlock. Just turn pipelining off; the structure of the diff --git a/pkg/sql/sem/tree/BUILD.bazel b/pkg/sql/sem/tree/BUILD.bazel index 02a06230070f..325d87281de0 100644 --- a/pkg/sql/sem/tree/BUILD.bazel +++ b/pkg/sql/sem/tree/BUILD.bazel @@ -116,7 +116,6 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/base", - "//pkg/clusterversion", "//pkg/geo", "//pkg/geo/geopb", "//pkg/keys", diff --git a/pkg/sql/sem/tree/as_of.go b/pkg/sql/sem/tree/as_of.go index 9a96f19b9b4c..641fd61e5eb5 100644 --- a/pkg/sql/sem/tree/as_of.go +++ b/pkg/sql/sem/tree/as_of.go @@ -18,7 +18,6 @@ import ( "time" "github.com/cockroachdb/apd/v2" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" @@ -227,9 +226,7 @@ func DatumToHLC(evalCtx *EvalContext, stmtTimestamp time.Time, d Datum) (hlc.Tim s := string(*d) // Parse synthetic flag. syn := false - if strings.HasSuffix(s, "?") && evalCtx.Settings.Version.IsActive(evalCtx.Context, clusterversion.PriorReadSummaries) { - // NOTE: we don't parse this in mixed-version clusters because v20.2 - // nodes will not know how to handle synthetic timestamps. + if strings.HasSuffix(s, "?") { s = s[:len(s)-1] syn = true } From ef4710c0a0c62d68ec1eb85aef9ce81ae1815e1f Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 1 Sep 2021 23:59:35 -0400 Subject: [PATCH 08/14] clusterversion,kv: remove NonVotingReplicas Partially addresses #66544 by removing a cluster version and its associated dependencies which, for any cluster whose version is at least 21.1, is certain to be active. Release justification: cluster version cleanup --- pkg/clusterversion/cockroach_versions.go | 6 --- pkg/clusterversion/key_string.go | 69 ++++++++++++------------ pkg/kv/kvserver/replicate_queue.go | 6 --- pkg/sql/set_zone_config.go | 20 ------- 4 files changed, 34 insertions(+), 67 deletions(-) diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index a038ec5a1ff3..04fcb942998d 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -210,8 +210,6 @@ const ( // are propagated across RPC boundaries independently of their verbosity setting. // This requires a version gate this violates implicit assumptions in v20.2. TracingVerbosityIndependentSemantics - // NonVotingReplicas enables the creation of non-voting replicas. - NonVotingReplicas // V21_1 is CockroachDB v21.1. It's used for all v21.1.x patch releases. V21_1 @@ -371,10 +369,6 @@ var versionsSingleton = keyedVersions{ Key: TracingVerbosityIndependentSemantics, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 28}, }, - { - Key: NonVotingReplicas, - Version: roachpb.Version{Major: 20, Minor: 2, Internal: 46}, - }, { // V21_1 is CockroachDB v21.1. It's used for all v21.1.x patch releases. Key: V21_1, diff --git a/pkg/clusterversion/key_string.go b/pkg/clusterversion/key_string.go index 0a275a37f795..866500386868 100644 --- a/pkg/clusterversion/key_string.go +++ b/pkg/clusterversion/key_string.go @@ -19,44 +19,43 @@ func _() { _ = x[TruncatedAndRangeAppliedStateMigration-8] _ = x[PostTruncatedAndRangeAppliedStateMigration-9] _ = x[TracingVerbosityIndependentSemantics-10] - _ = x[NonVotingReplicas-11] - _ = x[V21_1-12] - _ = x[Start21_1PLUS-13] - _ = x[Start21_2-14] - _ = x[JoinTokensTable-15] - _ = x[AcquisitionTypeInLeaseHistory-16] - _ = x[SerializeViewUDTs-17] - _ = x[ExpressionIndexes-18] - _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-19] - _ = x[FixDescriptors-20] - _ = x[SQLStatsTable-21] - _ = x[DatabaseRoleSettings-22] - _ = x[TenantUsageTable-23] - _ = x[SQLInstancesTable-24] - _ = x[NewRetryableRangefeedErrors-25] - _ = x[AlterSystemWebSessionsCreateIndexes-26] - _ = x[SeparatedIntentsMigration-27] - _ = x[PostSeparatedIntentsMigration-28] - _ = x[RetryJobsWithExponentialBackoff-29] - _ = x[RecordsBasedRegistry-30] - _ = x[AutoSpanConfigReconciliationJob-31] - _ = x[PreventNewInterleavedTables-32] - _ = x[EnsureNoInterleavedTables-33] - _ = x[DefaultPrivileges-34] - _ = x[ZonesTableForSecondaryTenants-35] - _ = x[UseKeyEncodeForHashShardedIndexes-36] - _ = x[DatabasePlacementPolicy-37] - _ = x[GeneratedAsIdentity-38] - _ = x[OnUpdateExpressions-39] - _ = x[SpanConfigurationsTable-40] - _ = x[BoundedStaleness-41] - _ = x[SQLStatsCompactionScheduledJob-42] - _ = x[DateAndIntervalStyle-43] + _ = x[V21_1-11] + _ = x[Start21_1PLUS-12] + _ = x[Start21_2-13] + _ = x[JoinTokensTable-14] + _ = x[AcquisitionTypeInLeaseHistory-15] + _ = x[SerializeViewUDTs-16] + _ = x[ExpressionIndexes-17] + _ = x[DeleteDeprecatedNamespaceTableDescriptorMigration-18] + _ = x[FixDescriptors-19] + _ = x[SQLStatsTable-20] + _ = x[DatabaseRoleSettings-21] + _ = x[TenantUsageTable-22] + _ = x[SQLInstancesTable-23] + _ = x[NewRetryableRangefeedErrors-24] + _ = x[AlterSystemWebSessionsCreateIndexes-25] + _ = x[SeparatedIntentsMigration-26] + _ = x[PostSeparatedIntentsMigration-27] + _ = x[RetryJobsWithExponentialBackoff-28] + _ = x[RecordsBasedRegistry-29] + _ = x[AutoSpanConfigReconciliationJob-30] + _ = x[PreventNewInterleavedTables-31] + _ = x[EnsureNoInterleavedTables-32] + _ = x[DefaultPrivileges-33] + _ = x[ZonesTableForSecondaryTenants-34] + _ = x[UseKeyEncodeForHashShardedIndexes-35] + _ = x[DatabasePlacementPolicy-36] + _ = x[GeneratedAsIdentity-37] + _ = x[OnUpdateExpressions-38] + _ = x[SpanConfigurationsTable-39] + _ = x[BoundedStaleness-40] + _ = x[SQLStatsCompactionScheduledJob-41] + _ = x[DateAndIntervalStyle-42] } -const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationTracingVerbosityIndependentSemanticsNonVotingReplicasV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" +const _Key_name = "Start20_2MinPasswordLengthCreateLoginPrivilegeHBAForNonTLSV20_2Start21_1replacedTruncatedAndRangeAppliedStateMigrationreplacedPostTruncatedAndRangeAppliedStateMigrationTruncatedAndRangeAppliedStateMigrationPostTruncatedAndRangeAppliedStateMigrationTracingVerbosityIndependentSemanticsV21_1Start21_1PLUSStart21_2JoinTokensTableAcquisitionTypeInLeaseHistorySerializeViewUDTsExpressionIndexesDeleteDeprecatedNamespaceTableDescriptorMigrationFixDescriptorsSQLStatsTableDatabaseRoleSettingsTenantUsageTableSQLInstancesTableNewRetryableRangefeedErrorsAlterSystemWebSessionsCreateIndexesSeparatedIntentsMigrationPostSeparatedIntentsMigrationRetryJobsWithExponentialBackoffRecordsBasedRegistryAutoSpanConfigReconciliationJobPreventNewInterleavedTablesEnsureNoInterleavedTablesDefaultPrivilegesZonesTableForSecondaryTenantsUseKeyEncodeForHashShardedIndexesDatabasePlacementPolicyGeneratedAsIdentityOnUpdateExpressionsSpanConfigurationsTableBoundedStalenessSQLStatsCompactionScheduledJobDateAndIntervalStyle" -var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 284, 301, 306, 319, 328, 343, 372, 389, 406, 455, 469, 482, 502, 518, 535, 562, 597, 622, 651, 682, 702, 733, 760, 785, 802, 831, 864, 887, 906, 925, 948, 964, 994, 1014} +var _Key_index = [...]uint16{0, 9, 26, 46, 58, 63, 72, 118, 168, 206, 248, 284, 289, 302, 311, 326, 355, 372, 389, 438, 452, 465, 485, 501, 518, 545, 580, 605, 634, 665, 685, 716, 743, 768, 785, 814, 847, 870, 889, 908, 931, 947, 977, 997} func (i Key) String() string { if i < 0 || i >= Key(len(_Key_index)-1) { diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 976bc0603e8c..2ff157a6e9ac 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -18,7 +18,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/base" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" @@ -661,11 +660,6 @@ func (rq *replicateQueue) addOrReplaceNonVoters( removeIdx int, dryRun bool, ) (requeue bool, _ error) { - // Non-voter creation is disabled before 21.1. - if v, st := clusterversion.NonVotingReplicas, repl.ClusterSettings(); !st.Version.IsActive(ctx, v) { - return false, errors.AssertionFailedf("non-voting replicas cannot be created pre-21.1") - } - desc, conf := repl.DescAndSpanConfig() existingNonVoters := desc.Replicas().NonVoterDescriptors() diff --git a/pkg/sql/set_zone_config.go b/pkg/sql/set_zone_config.go index ec08d851055d..c975a84b2941 100644 --- a/pkg/sql/set_zone_config.go +++ b/pkg/sql/set_zone_config.go @@ -17,7 +17,6 @@ import ( "strings" "github.com/cockroachdb/cockroach/pkg/base" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/config" "github.com/cockroachdb/cockroach/pkg/config/zonepb" "github.com/cockroachdb/cockroach/pkg/keys" @@ -78,9 +77,6 @@ var supportedZoneConfigOptions = map[tree.Name]struct { requiredType: types.Bool, setter: func(c *zonepb.ZoneConfig, d tree.Datum) { c.GlobalReads = proto.Bool(bool(tree.MustBeDBool(d))) }, checkAllowed: func(ctx context.Context, execCfg *ExecutorConfig, d tree.Datum) error { - if err := checkVersionActive(ctx, execCfg, clusterversion.NonVotingReplicas, "global_reads"); err != nil { - return err - } if !tree.MustBeDBool(d) { // Always allow the value to be unset. return nil @@ -100,9 +96,6 @@ var supportedZoneConfigOptions = map[tree.Name]struct { "num_voters": { requiredType: types.Int, setter: func(c *zonepb.ZoneConfig, d tree.Datum) { c.NumVoters = proto.Int32(int32(tree.MustBeDInt(d))) }, - checkAllowed: func(ctx context.Context, execCfg *ExecutorConfig, _ tree.Datum) error { - return checkVersionActive(ctx, execCfg, clusterversion.NonVotingReplicas, "num_voters") - }, }, "gc.ttlseconds": { requiredType: types.Int, @@ -133,9 +126,6 @@ var supportedZoneConfigOptions = map[tree.Name]struct { c.VoterConstraints = voterConstraintsList.Constraints c.NullVoterConstraintsIsEmpty = true }, - checkAllowed: func(ctx context.Context, execCfg *ExecutorConfig, _ tree.Datum) error { - return checkVersionActive(ctx, execCfg, clusterversion.NonVotingReplicas, "voter_constraints") - }, }, "lease_preferences": { requiredType: types.String, @@ -164,16 +154,6 @@ func loadYAML(dst interface{}, yamlString string) { } } -func checkVersionActive( - ctx context.Context, execCfg *ExecutorConfig, minVersion clusterversion.Key, option string, -) error { - if !execCfg.Settings.Version.IsActive(ctx, minVersion) { - return pgerror.Newf(pgcode.FeatureNotSupported, - "%s cannot be used until cluster version is finalized", option) - } - return nil -} - func (p *planner) SetZoneConfig(ctx context.Context, n *tree.SetZoneConfig) (planNode, error) { if err := checkSchemaChangeEnabled( ctx, From 3e0bbe11c8978e0241f04882cfba17dec12e9425 Mon Sep 17 00:00:00 2001 From: Paul Bardea Date: Mon, 30 Aug 2021 10:03:30 -0400 Subject: [PATCH 09/14] jobs: don't reset next_run when resuming active schedules Consider an active schedule that was created with a specific first_run time. The first_run would populate the next_run_time on the schedule. The resumption of this schedule before it executed would re-evaluate the next runtime based off the schedule's recurrence. This commit changes the scheduling system to only recompute the next run time on paused schedules. Release justification: bug fix Release note (bug fix): Fix a bug where resuming an active schedule would always reset its next run time. This was sometimes undesirable with schedules that had a first_run option specified. --- pkg/jobs/schedule_control_test.go | 18 ++++++++++++++++++ pkg/sql/control_schedules.go | 12 ++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/pkg/jobs/schedule_control_test.go b/pkg/jobs/schedule_control_test.go index 01dd44ac0ba3..66225584ea32 100644 --- a/pkg/jobs/schedule_control_test.go +++ b/pkg/jobs/schedule_control_test.go @@ -27,6 +27,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/stretchr/testify/require" ) @@ -75,6 +76,23 @@ func TestScheduleControl(t *testing.T) { require.True(t, th.loadSchedule(t, scheduleID).IsPaused()) }) + t.Run("pause-active-schedule", func(t *testing.T) { + schedule := th.newScheduledJob(t, "test schedule", "select 42") + require.NoError(t, schedule.SetSchedule("@weekly")) + // Datums only store up until microseconds. + ms := time.Microsecond + firstRunTime := timeutil.Now().Add(10 * time.Second).Truncate(ms) + schedule.SetNextRun(firstRunTime) + require.NoError(t, schedule.Create(ctx, th.cfg.InternalExecutor, nil)) + scheduleID := schedule.ScheduleID() + require.Equal(t, schedule.NextRun(), firstRunTime) + th.sqlDB.Exec(t, "RESUME SCHEDULE $1", scheduleID) + + afterSchedule := th.loadSchedule(t, scheduleID) + require.False(t, afterSchedule.IsPaused()) + require.Equal(t, afterSchedule.NextRun(), firstRunTime) + }) + t.Run("cannot-resume-one-off-schedule", func(t *testing.T) { schedule := th.newScheduledJob(t, "test schedule", "select 42") require.NoError(t, schedule.Create(ctx, th.cfg.InternalExecutor, nil)) diff --git a/pkg/sql/control_schedules.go b/pkg/sql/control_schedules.go index 022b0d11f5d1..3c053cb86d39 100644 --- a/pkg/sql/control_schedules.go +++ b/pkg/sql/control_schedules.go @@ -68,7 +68,7 @@ func loadSchedule(params runParams, scheduleID tree.Datum) (*jobs.ScheduledJob, "load-schedule", params.EvalContext().Txn, sessiondata.InternalExecutorOverride{User: security.RootUserName()}, fmt.Sprintf( - "SELECT schedule_id, schedule_expr, executor_type, execution_args FROM %s WHERE schedule_id = $1", + "SELECT schedule_id, next_run, schedule_expr, executor_type, execution_args FROM %s WHERE schedule_id = $1", env.ScheduledJobsTableName(), ), scheduleID) @@ -138,9 +138,13 @@ func (n *controlSchedulesNode) startExec(params runParams) error { schedule.Pause() err = updateSchedule(params, schedule) case tree.ResumeSchedule: - err = schedule.ScheduleNextRun() - if err == nil { - err = updateSchedule(params, schedule) + // Only schedule the next run time on PAUSED schedules, since ACTIVE schedules may + // have a custom next run time set by first_run. + if schedule.IsPaused() { + err = schedule.ScheduleNextRun() + if err == nil { + err = updateSchedule(params, schedule) + } } case tree.DropSchedule: var ex jobs.ScheduledJobExecutor From f6fc512c36d0b7a371dca6ec2423edfe38a4780a Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Tue, 10 Aug 2021 18:01:40 -0400 Subject: [PATCH 10/14] kvserver: count draining nodes as live when computing quorum Similar to https://github.com/cockroachdb/cockroach/pull/67714 Draining nodes were considered non-live by the allocator when it made the determination of whether a range could achieve quorum. This meant that, for instance, on a cluster with a replication factor of 5, if we had 3 or more nodes marked draining, we (with a high likelihood) wouldn't be able to decommission nodes from the cluster. Furthermore, due to the same reason as above the system also would incorrectly decide to not rebalance ranges that had more than a quorum of replicas on draining nodes. This patch fixes this problem by considering replicas on draining nodes as live for the purposes of determining whether a range has quorum. This likely fixes a considerable subset of "stuck decommissioning" issues we've seen in the wild. Follows from https://github.com/cockroachlabs/support/issues/1105 Release note: None --- pkg/kv/kvserver/allocator.go | 10 ++++----- pkg/kv/kvserver/replica_command.go | 12 +++++----- pkg/kv/kvserver/replicate_queue.go | 4 ++-- pkg/kv/kvserver/store_pool.go | 35 ++++++++++++++++++++---------- pkg/kv/kvserver/store_pool_test.go | 12 +++++----- 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/pkg/kv/kvserver/allocator.go b/pkg/kv/kvserver/allocator.go index 53b33bf2afdc..5c4ac86230d5 100644 --- a/pkg/kv/kvserver/allocator.go +++ b/pkg/kv/kvserver/allocator.go @@ -549,8 +549,8 @@ func (a *Allocator) computeAction( // stores to be unavailable, just because their nodes have failed a liveness // heartbeat in the recent past. This means we won't move those replicas // elsewhere (for a regular rebalance or for decommissioning). - const includeSuspectStores = true - liveVoters, deadVoters := a.storePool.liveAndDeadReplicas(voterReplicas, includeSuspectStores) + const includeSuspectAndDrainingStores = true + liveVoters, deadVoters := a.storePool.liveAndDeadReplicas(voterReplicas, includeSuspectAndDrainingStores) if len(liveVoters) < quorum { // Do not take any replacement/removal action if we do not have a quorum of @@ -630,7 +630,7 @@ func (a *Allocator) computeAction( } liveNonVoters, deadNonVoters := a.storePool.liveAndDeadReplicas( - nonVoterReplicas, includeSuspectStores, + nonVoterReplicas, includeSuspectAndDrainingStores, ) if haveNonVoters == neededNonVoters && len(deadNonVoters) > 0 { // The range has non-voter(s) on a dead node that we should replace. @@ -1297,7 +1297,7 @@ func (a *Allocator) TransferLeaseTarget( return roachpb.ReplicaDescriptor{} } // Verify that the preferred replica is eligible to receive the lease. - preferred, _ = a.storePool.liveAndDeadReplicas(preferred, false /* includeSuspectStores */) + preferred, _ = a.storePool.liveAndDeadReplicas(preferred, false /* includeSuspectAndDrainingStores */) if len(preferred) == 1 { return preferred[0] } @@ -1312,7 +1312,7 @@ func (a *Allocator) TransferLeaseTarget( } // Only consider live, non-draining, non-suspect replicas. - existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectStores */) + existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectAndDrainingStores */) // Short-circuit if there are no valid targets out there. if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseStoreID) { diff --git a/pkg/kv/kvserver/replica_command.go b/pkg/kv/kvserver/replica_command.go index 10d48087b433..88a9aaf4c93f 100644 --- a/pkg/kv/kvserver/replica_command.go +++ b/pkg/kv/kvserver/replica_command.go @@ -2042,11 +2042,11 @@ type changeReplicasTxnArgs struct { // // - Replicas on decommissioning node/store are considered live. // - // - If `includeSuspectStores` is true, stores that are marked suspect (i.e. + // - If `includeSuspectAndDrainingStores` is true, stores that are marked suspect (i.e. // stores that have failed a liveness heartbeat in the recent past) are // considered live. Otherwise, they are excluded from the returned slices. liveAndDeadReplicas func( - repls []roachpb.ReplicaDescriptor, includeSuspectStores bool, + repls []roachpb.ReplicaDescriptor, includeSuspectAndDrainingStores bool, ) (liveReplicas, deadReplicas []roachpb.ReplicaDescriptor) logChange logChangeFn @@ -2141,7 +2141,7 @@ func execChangeReplicasTxn( // Note that the allocator will avoid rebalancing to stores that are // currently marked suspect. See uses of StorePool.getStoreList() in // allocator.go. - liveReplicas, _ := args.liveAndDeadReplicas(replicas.Descriptors(), true /* includeSuspectStores */) + liveReplicas, _ := args.liveAndDeadReplicas(replicas.Descriptors(), true /* includeSuspectAndDrainingStores */) if !replicas.CanMakeProgress( func(rDesc roachpb.ReplicaDescriptor) bool { for _, inner := range liveReplicas { @@ -2895,8 +2895,10 @@ func (s *Store) relocateOne( for _, candidate := range candidateTargets { store, ok := storeMap[candidate.StoreID] if !ok { - return nil, nil, fmt.Errorf("cannot up-replicate to s%d; missing gossiped StoreDescriptor", - candidate.StoreID) + return nil, nil, fmt.Errorf( + "cannot up-replicate to s%d; missing gossiped StoreDescriptor"+ + " (the store is likely dead, draining or decommissioning)", candidate.StoreID, + ) } candidateDescs = append(candidateDescs, *store) } diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 976bc0603e8c..7dc487564347 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -365,10 +365,10 @@ func (rq *replicateQueue) processOneChange( voterReplicas := desc.Replicas().VoterDescriptors() nonVoterReplicas := desc.Replicas().NonVoterDescriptors() liveVoterReplicas, deadVoterReplicas := rq.allocator.storePool.liveAndDeadReplicas( - voterReplicas, true, /* includeSuspectStores */ + voterReplicas, true, /* includeSuspectAndDrainingStores */ ) liveNonVoterReplicas, deadNonVoterReplicas := rq.allocator.storePool.liveAndDeadReplicas( - nonVoterReplicas, true, /* includeSuspectStores */ + nonVoterReplicas, true, /* includeSuspectAndDrainingStores */ ) // NB: the replication layer ensures that the below operations don't cause diff --git a/pkg/kv/kvserver/store_pool.go b/pkg/kv/kvserver/store_pool.go index 11fa84324420..d77eb725a890 100644 --- a/pkg/kv/kvserver/store_pool.go +++ b/pkg/kv/kvserver/store_pool.go @@ -236,8 +236,14 @@ const ( storeStatusAvailable // The store is decommissioning. storeStatusDecommissioning - // The store failed it's liveness heartbeat recently and is considered suspect. + // The store failed it's liveness heartbeat recently and is considered + // suspect. Consequently, stores always move from `storeStatusUnknown` + // (indicating a node that has a non-live node liveness record) to + // `storeStatusSuspect`. storeStatusSuspect + // The store is alive but is currently marked as draining, so it is not a + // candidate for lease transfers or replica rebalancing. + storeStatusDraining ) func (sd *storeDetail) status( @@ -282,6 +288,9 @@ func (sd *storeDetail) status( // Even if the store has been updated via gossip, we still rely on // the node liveness to determine whether it is considered live. + // + // Store statuses checked in the following order: + // dead -> decommissioning -> unknown -> draining -> suspect -> available. switch nl(sd.desc.Node.NodeID, now, threshold) { case livenesspb.NodeLivenessStatus_DEAD, livenesspb.NodeLivenessStatus_DECOMMISSIONED: return storeStatusDead @@ -302,7 +311,7 @@ func (sd *storeDetail) status( // and we may not see a store in this state. To help with that we perform // a similar clear of lastAvailable on a DEAD store. sd.lastAvailable = time.Time{} - return storeStatusUnknown + return storeStatusDraining } if sd.isThrottled(now) { @@ -668,11 +677,12 @@ func (sp *StorePool) storeStatus(storeID roachpb.StoreID) (storeStatus, error) { // // - Replicas on decommissioning node/store are considered live. // -// - If `includeSuspectStores` is true, stores that are marked suspect (i.e. -// stores that have failed a liveness heartbeat in the recent past) are -// considered live. Otherwise, they are excluded from the returned slices. +// - If `includeSuspectAndDrainingStores` is true, stores that are marked +// suspect (i.e. stores that have failed a liveness heartbeat in the recent +// past), and stores that are marked as draining are considered live. Otherwise, +// they are excluded from the returned slices. func (sp *StorePool) liveAndDeadReplicas( - repls []roachpb.ReplicaDescriptor, includeSuspectStores bool, + repls []roachpb.ReplicaDescriptor, includeSuspectAndDrainingStores bool, ) (liveReplicas, deadReplicas []roachpb.ReplicaDescriptor) { sp.detailsMu.Lock() defer sp.detailsMu.Unlock() @@ -696,8 +706,8 @@ func (sp *StorePool) liveAndDeadReplicas( liveReplicas = append(liveReplicas, repl) case storeStatusUnknown: // No-op. - case storeStatusSuspect: - if includeSuspectStores { + case storeStatusSuspect, storeStatusDraining: + if includeSuspectAndDrainingStores { liveReplicas = append(liveReplicas, repl) } default: @@ -824,7 +834,8 @@ type throttledStoreReasons []string // getStoreList returns a storeList that contains all active stores that contain // the required attributes and their associated stats. The storeList is filtered // according to the provided storeFilter. It also returns the total number of -// alive and throttled stores. +// alive stores and a list of throttled stores with a reason for why they're +// throttled. func (sp *StorePool) getStoreList(filter storeFilter) (StoreList, int, throttledStoreReasons) { sp.detailsMu.Lock() defer sp.detailsMu.Unlock() @@ -881,9 +892,11 @@ func (sp *StorePool) getStoreListFromIDsLocked( case storeStatusAvailable: aliveStoreCount++ storeDescriptors = append(storeDescriptors, *detail.desc) + case storeStatusDraining: + throttled = append(throttled, fmt.Sprintf("s%d: draining", storeID)) case storeStatusSuspect: aliveStoreCount++ - throttled = append(throttled, "throttled because the node is considered suspect") + throttled = append(throttled, fmt.Sprintf("s%d: suspect", storeID)) if filter != storeFilterThrottled && filter != storeFilterSuspect { storeDescriptors = append(storeDescriptors, *detail.desc) } @@ -1006,7 +1019,7 @@ func (sp *StorePool) isStoreReadyForRoutineReplicaTransferInternal( log.VEventf(ctx, 3, "s%d is a live target, candidate for rebalancing", targetStoreID) return true - case storeStatusDead, storeStatusUnknown, storeStatusDecommissioning, storeStatusSuspect: + case storeStatusDead, storeStatusUnknown, storeStatusDecommissioning, storeStatusSuspect, storeStatusDraining: log.VEventf(ctx, 3, "not considering non-live store s%d (%v)", targetStoreID, status) return false diff --git a/pkg/kv/kvserver/store_pool_test.go b/pkg/kv/kvserver/store_pool_test.go index 4e8355142df1..3010375795ae 100644 --- a/pkg/kv/kvserver/store_pool_test.go +++ b/pkg/kv/kvserver/store_pool_test.go @@ -754,7 +754,7 @@ func TestStorePoolFindDeadReplicas(t *testing.T) { mnl.setNodeStatus(roachpb.NodeID(i), livenesspb.NodeLivenessStatus_LIVE) } - liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas, false /* includeSuspectStores */) + liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas, false /* includeSuspectAndDrainingStores */) if len(liveReplicas) != 5 { t.Fatalf("expected five live replicas, found %d (%v)", len(liveReplicas), liveReplicas) } @@ -776,7 +776,7 @@ func TestStorePoolFindDeadReplicas(t *testing.T) { // Mark node 4 as merely unavailable. mnl.setNodeStatus(4, livenesspb.NodeLivenessStatus_UNAVAILABLE) - liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas, false /* includeSuspectStores */) + liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas, false /* includeSuspectAndDrainingStores */) if a, e := liveReplicas, replicas[:3]; !reflect.DeepEqual(a, e) { t.Fatalf("expected live replicas %+v; got %+v", e, a) } @@ -803,7 +803,7 @@ func TestStorePoolDefaultState(t *testing.T) { liveReplicas, deadReplicas := sp.liveAndDeadReplicas( []roachpb.ReplicaDescriptor{{StoreID: 1}}, - false, /* includeSuspectStores */ + false, /* includeSuspectAndDrainingStores */ ) if len(liveReplicas) != 0 || len(deadReplicas) != 0 { t.Errorf("expected 0 live and 0 dead replicas; got %v and %v", liveReplicas, deadReplicas) @@ -913,7 +913,7 @@ func TestStorePoolSuspected(t *testing.T) { s = detail.status(now, timeUntilStoreDead, sp.nodeLivenessFn, timeAfterStoreSuspect) sp.detailsMu.Unlock() - require.Equal(t, s, storeStatusUnknown) + require.Equal(t, s, storeStatusDraining) require.True(t, detail.lastAvailable.IsZero()) mnl.setNodeStatus(store.Node.NodeID, livenesspb.NodeLivenessStatus_LIVE) @@ -1071,7 +1071,7 @@ func TestStorePoolDecommissioningReplicas(t *testing.T) { mnl.setNodeStatus(roachpb.NodeID(i), livenesspb.NodeLivenessStatus_LIVE) } - liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas, false /* includeSuspectStores */) + liveReplicas, deadReplicas := sp.liveAndDeadReplicas(replicas, false /* includeSuspectAndDrainingStores */) if len(liveReplicas) != 5 { t.Fatalf("expected five live replicas, found %d (%v)", len(liveReplicas), liveReplicas) } @@ -1083,7 +1083,7 @@ func TestStorePoolDecommissioningReplicas(t *testing.T) { // Mark node 5 as dead. mnl.setNodeStatus(5, livenesspb.NodeLivenessStatus_DEAD) - liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas, false /* includeSuspectStores */) + liveReplicas, deadReplicas = sp.liveAndDeadReplicas(replicas, false /* includeSuspectAndDrainingStores */) // Decommissioning replicas are considered live. if a, e := liveReplicas, replicas[:4]; !reflect.DeepEqual(a, e) { t.Fatalf("expected live replicas %+v; got %+v", e, a) From 2690ec20cac4f1bccca00a7fd851b86005080bab Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Tue, 10 Aug 2021 18:59:50 -0400 Subject: [PATCH 11/14] kvserver: stop transferring leases to draining/suspect nodes in the StoreRebalancer This commit prevents the StoreRebalancer from transferring leases to replicas on draining or suspect nodes. In some cases, we've seen this to cause new leases to be pushed to nodes that take too long to drain or that are stuck while draining due to other bugs. Informs https://github.com/cockroachlabs/support/issues/1105 Release note: None --- pkg/kv/kvserver/store_rebalancer.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index 350c34351676..01282257ca08 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -424,6 +424,14 @@ func (sr *StoreRebalancer) chooseLeaseToTransfer( var raftStatus *raft.Status preferred := sr.rq.allocator.preferredLeaseholders(conf, candidates) + + // Filter both the list of preferred stores as well as the list of all + // candidate replicas to only consider live (non-suspect, non-draining) + // nodes. + const includeSuspectAndDrainingStores = false + preferred, _ = sr.rq.allocator.storePool.liveAndDeadReplicas(preferred, includeSuspectAndDrainingStores) + candidates, _ = sr.rq.allocator.storePool.liveAndDeadReplicas(candidates, includeSuspectAndDrainingStores) + for _, candidate := range candidates { if candidate.StoreID == localDesc.StoreID { continue From ff2ae38fde339782266bf0360f1992113f134dac Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Tue, 10 Aug 2021 18:47:11 -0400 Subject: [PATCH 12/14] roachtest: add a drain+decommission roachtest This commit adds a roachtest that's meant to be a regression test against the hazard addressed by the first commit in this PR. This roachtest is meant to ensure that nodes that are marked "draining" are considered "live" by the allocator for when it makes the determination of whether a range can achieve quorum. Release note: None --- pkg/cmd/roachtest/tests/decommission.go | 119 +++++++++++++++++++++++- 1 file changed, 118 insertions(+), 1 deletion(-) diff --git a/pkg/cmd/roachtest/tests/decommission.go b/pkg/cmd/roachtest/tests/decommission.go index 0bcd0e151c04..3ff3bd24a308 100644 --- a/pkg/cmd/roachtest/tests/decommission.go +++ b/pkg/cmd/roachtest/tests/decommission.go @@ -39,7 +39,7 @@ func registerDecommission(r registry.Registry) { r.Add(registry.TestSpec{ Name: fmt.Sprintf("decommission/nodes=%d/duration=%s", numNodes, duration), Owner: registry.OwnerKV, - Cluster: r.MakeClusterSpec(4), + Cluster: r.MakeClusterSpec(numNodes), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { if c.IsLocal() { duration = 5 * time.Minute @@ -49,6 +49,18 @@ func registerDecommission(r registry.Registry) { }, }) } + { + numNodes := 9 + duration := 30 * time.Minute + r.Add(registry.TestSpec{ + Name: fmt.Sprintf("drain-and-decommission/nodes=%d", numNodes), + Owner: registry.OwnerKV, + Cluster: r.MakeClusterSpec(numNodes), + Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { + runDrainAndDecommission(ctx, t, c, numNodes, duration) + }, + }) + } { numNodes := 6 r.Add(registry.TestSpec{ @@ -74,6 +86,111 @@ func registerDecommission(r registry.Registry) { } } +// runDrainAndDecommission marks 3 nodes in the test cluster as "draining" and +// then attempts to decommission a fourth node from the cluster. This test is +// meant to ensure that, in the allocator, we consider "draining" nodes as +// "live" for the purposes of determining whether a range can achieve quorum. +// Note that, if "draining" nodes were not considered live for this purpose, +// decommissioning would stall forever since the allocator would incorrectly +// think that at least a handful of ranges (that need to be moved off the +// decommissioning node) are unavailable. +func runDrainAndDecommission( + ctx context.Context, t test.Test, c cluster.Cluster, nodes int, duration time.Duration, +) { + const defaultReplicationFactor = 5 + if defaultReplicationFactor > nodes { + t.Fatal("improper configuration: replication factor greater than number of nodes in the test") + } + pinnedNode := 1 + c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) + for i := 1; i <= nodes; i++ { + c.Start(ctx, c.Node(i)) + } + c.Run(ctx, c.Node(pinnedNode), `./cockroach workload init kv --drop --splits 1000`) + + run := func(stmt string) { + db := c.Conn(ctx, pinnedNode) + defer db.Close() + + t.Status(stmt) + _, err := db.ExecContext(ctx, stmt) + if err != nil { + t.Fatal(err) + } + t.L().Printf("run: %s\n", stmt) + } + + run(fmt.Sprintf(`ALTER RANGE default CONFIGURE ZONE USING num_replicas=%d`, defaultReplicationFactor)) + run(fmt.Sprintf(`ALTER DATABASE system CONFIGURE ZONE USING num_replicas=%d`, defaultReplicationFactor)) + + // Speed up the decommissioning. + run(`SET CLUSTER SETTING kv.snapshot_rebalance.max_rate='2GiB'`) + run(`SET CLUSTER SETTING kv.snapshot_recovery.max_rate='2GiB'`) + + t.Status("waiting for initial up-replication") + db := c.Conn(ctx, pinnedNode) + defer func() { + _ = db.Close() + }() + for { + fullReplicated := false + if err := db.QueryRow( + // Check if all ranges are fully replicated. + "SELECT min(array_length(replicas, 1)) >= $1 FROM crdb_internal.ranges", + defaultReplicationFactor, + ).Scan(&fullReplicated); err != nil { + t.Fatal(err) + } + if fullReplicated { + break + } + time.Sleep(time.Second) + } + + var m *errgroup.Group + m, ctx = errgroup.WithContext(ctx) + m.Go( + func() error { + return c.RunE(ctx, c.Node(pinnedNode), + fmt.Sprintf("./cockroach workload run kv --max-rate 500 --tolerate-errors --duration=%s {pgurl:1-%d}", + duration.String(), nodes-4, + ), + ) + }, + ) + + // Let the workload run for a small amount of time. + time.Sleep(1 * time.Minute) + + // Drain the last 3 nodes from the cluster. + for nodeID := nodes - 2; nodeID <= nodes; nodeID++ { + id := nodeID + m.Go(func() error { + drain := func(id int) error { + t.Status(fmt.Sprintf("draining node %d", id)) + return c.RunL(ctx, t.L(), c.Node(id), "./cockroach node drain --insecure") + } + return drain(id) + }) + } + // Sleep for long enough that all the other nodes in the cluster learn about + // the draining status of the two nodes we just drained. + time.Sleep(30 * time.Second) + + m.Go(func() error { + // Decommission the fourth-to-last node from the cluster. + id := nodes - 3 + decom := func(id int) error { + t.Status(fmt.Sprintf("decommissioning node %d", id)) + return c.RunL(ctx, t.L(), c.Node(id), "./cockroach node decommission --self --insecure") + } + return decom(id) + }) + if err := m.Wait(); err != nil { + t.Fatal(err) + } +} + // runDecommission decommissions and wipes nodes in a cluster repeatedly, // alternating between the node being shut down gracefully before and after the // decommissioning operation, while some light load is running against the From b42a9b3c72a2ee3907d51bf9dd385d0fbbe27ef6 Mon Sep 17 00:00:00 2001 From: Jeff Date: Tue, 17 Aug 2021 17:59:29 -0400 Subject: [PATCH 13/14] mt_start_sql: enable enterprise features for multitenant sql servers Enterprise features are controlled by the enterprise.license setting. Currently this setting applies only to the host tenant cluster. This change allows tenants to read the license from an environment variable. This should be replaced once it is possible to read the license directly from the host cluster. Release note: None --- pkg/ccl/serverccl/BUILD.bazel | 2 ++ pkg/ccl/serverccl/server_sql_test.go | 26 +++++++++++++++ pkg/ccl/utilccl/BUILD.bazel | 3 ++ pkg/ccl/utilccl/license_check.go | 46 ++++++++++++++++++--------- pkg/ccl/utilccl/license_check_test.go | 38 ++++++++++++++++++++++ pkg/server/tenant.go | 10 ++++++ pkg/util/envutil/BUILD.bazel | 1 + pkg/util/envutil/env.go | 24 ++++++++++++++ pkg/util/envutil/env_test.go | 42 ++++++++++++++++++++++++ 9 files changed, 177 insertions(+), 15 deletions(-) diff --git a/pkg/ccl/serverccl/BUILD.bazel b/pkg/ccl/serverccl/BUILD.bazel index d85efcca6b72..640c1ddcfa66 100644 --- a/pkg/ccl/serverccl/BUILD.bazel +++ b/pkg/ccl/serverccl/BUILD.bazel @@ -23,6 +23,7 @@ go_test( "//pkg/ccl", "//pkg/ccl/kvccl", "//pkg/ccl/utilccl", + "//pkg/ccl/utilccl/licenseccl", "//pkg/roachpb:with-mocks", "//pkg/security", "//pkg/security/securitytest", @@ -35,6 +36,7 @@ go_test( "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/util", + "//pkg/util/envutil", "//pkg/util/httputil", "//pkg/util/leaktest", "//pkg/util/log", diff --git a/pkg/ccl/serverccl/server_sql_test.go b/pkg/ccl/serverccl/server_sql_test.go index d7ae0c21b414..3e47517381f2 100644 --- a/pkg/ccl/serverccl/server_sql_test.go +++ b/pkg/ccl/serverccl/server_sql_test.go @@ -16,12 +16,15 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" + "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" + "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -84,6 +87,29 @@ func TestTenantCannotSetClusterSetting(t *testing.T) { require.Equal(t, pq.ErrorCode(pgcode.InsufficientPrivilege.String()), pqErr.Code, "err %v has unexpected code", err) } +func TestTenantCanUseEnterpriseFeatures(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + license, _ := (&licenseccl.License{ + Type: licenseccl.License_Enterprise, + }).Encode() + + defer utilccl.TestingDisableEnterprise()() + defer envutil.TestSetEnv(t, "COCKROACH_TENANT_LICENSE", license)() + + tc := serverutils.StartNewTestCluster(t, 1, base.TestClusterArgs{}) + defer tc.Stopper().Stop(context.Background()) + + _, db := serverutils.StartTenant(t, tc.Server(0), base.TestTenantArgs{TenantID: serverutils.TestTenantID(), AllowSettingClusterSettings: false}) + defer db.Close() + + _, err := db.Exec(`BACKUP INTO 'userfile:///backup'`) + require.NoError(t, err) + _, err = db.Exec(`BACKUP INTO LATEST IN 'userfile:///backup'`) + require.NoError(t, err) +} + func TestTenantUnauthenticatedAccess(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/ccl/utilccl/BUILD.bazel b/pkg/ccl/utilccl/BUILD.bazel index a026c38bc6d9..f505497080fe 100644 --- a/pkg/ccl/utilccl/BUILD.bazel +++ b/pkg/ccl/utilccl/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "//pkg/base", "//pkg/ccl/utilccl/licenseccl", "//pkg/kv/kvclient/kvcoord:with-mocks", + "//pkg/server", "//pkg/settings", "//pkg/settings/cluster", "//pkg/sql/catalog/colinfo", @@ -20,6 +21,7 @@ go_library( "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", "//pkg/sql/types", + "//pkg/util/envutil", "//pkg/util/grpcutil", "//pkg/util/timeutil", "//pkg/util/uuid", @@ -40,6 +42,7 @@ go_test( "//pkg/ccl/utilccl/licenseccl", "//pkg/settings/cluster", "//pkg/testutils", + "//pkg/util/envutil", "//pkg/util/timeutil", "//pkg/util/uuid", "@com_github_stretchr_testify//require", diff --git a/pkg/ccl/utilccl/license_check.go b/pkg/ccl/utilccl/license_check.go index 8d4e101367f8..3a9e698171b6 100644 --- a/pkg/ccl/utilccl/license_check.go +++ b/pkg/ccl/utilccl/license_check.go @@ -17,10 +17,12 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" @@ -45,16 +47,13 @@ var enterpriseLicense = func() *settings.StringSetting { return s }() -// testingEnterprise determines whether the cluster is enabled -// or disabled for the purposes of testing. -// It should be loaded and stored using atomic as it can race with an -// in progress kv reader during TestingDisableEnterprise / -// TestingEnableEnterprise. -var testingEnterprise int32 +// enterpriseStatus determines whether the cluster is enabled +// for enterprise features or if enterprise status depends on the license. +var enterpriseStatus int32 = deferToLicense const ( - testingEnterpriseDisabled = 0 - testingEnterpriseEnabled = 1 + deferToLicense = 0 + enterpriseEnabled = 1 ) // errEnterpriseRequired is returned by check() when the caller does @@ -68,22 +67,38 @@ type licenseCacheKey string // TestingEnableEnterprise allows overriding the license check in tests. func TestingEnableEnterprise() func() { - before := atomic.LoadInt32(&testingEnterprise) - atomic.StoreInt32(&testingEnterprise, testingEnterpriseEnabled) + before := atomic.LoadInt32(&enterpriseStatus) + atomic.StoreInt32(&enterpriseStatus, enterpriseEnabled) return func() { - atomic.StoreInt32(&testingEnterprise, before) + atomic.StoreInt32(&enterpriseStatus, before) } } // TestingDisableEnterprise allows re-enabling the license check in tests. func TestingDisableEnterprise() func() { - before := atomic.LoadInt32(&testingEnterprise) - atomic.StoreInt32(&testingEnterprise, testingEnterpriseDisabled) + before := atomic.LoadInt32(&enterpriseStatus) + atomic.StoreInt32(&enterpriseStatus, deferToLicense) return func() { - atomic.StoreInt32(&testingEnterprise, before) + atomic.StoreInt32(&enterpriseStatus, before) } } +// ApplyTenantLicense verifies the COCKROACH_TENANT_LICENSE environment variable +// and enables enterprise features for the process. This is a bit of a hack and +// should be replaced once it is possible to read the host cluster's +// enterprise.license setting. +func ApplyTenantLicense() error { + license, ok := envutil.EnvString("COCKROACH_TENANT_LICENSE", 0) + if !ok { + return nil + } + if _, err := decode(license); err != nil { + return errors.Wrap(err, "COCKROACH_TENANT_LICENSE encoding is invalid") + } + atomic.StoreInt32(&enterpriseStatus, enterpriseEnabled) + return nil +} + // CheckEnterpriseEnabled returns a non-nil error if the requested enterprise // feature is not enabled, including information or a link explaining how to // enable it. @@ -108,6 +123,7 @@ func init() { base.CheckEnterpriseEnabled = CheckEnterpriseEnabled base.LicenseType = getLicenseType base.TimeToEnterpriseLicenseExpiry = TimeToEnterpriseLicenseExpiry + server.ApplyTenantLicense = ApplyTenantLicense } // TimeToEnterpriseLicenseExpiry returns a Duration from `asOf` until the current @@ -128,7 +144,7 @@ func TimeToEnterpriseLicenseExpiry( func checkEnterpriseEnabledAt( st *cluster.Settings, at time.Time, cluster uuid.UUID, org, feature string, withDetails bool, ) error { - if atomic.LoadInt32(&testingEnterprise) == testingEnterpriseEnabled { + if atomic.LoadInt32(&enterpriseStatus) == enterpriseEnabled { return nil } license, err := getLicense(st) diff --git a/pkg/ccl/utilccl/license_check_test.go b/pkg/ccl/utilccl/license_check_test.go index ae85598f2f9b..6d91fb36e7c7 100644 --- a/pkg/ccl/utilccl/license_check_test.go +++ b/pkg/ccl/utilccl/license_check_test.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/stretchr/testify/require" @@ -188,3 +189,40 @@ func TestTimeToEnterpriseLicenseExpiry(t *testing.T) { }) } } + +func TestApplyTenantLicenseWithLicense(t *testing.T) { + license, _ := (&licenseccl.License{ + Type: licenseccl.License_Enterprise, + }).Encode() + + defer TestingDisableEnterprise()() + defer envutil.TestSetEnv(t, "COCKROACH_TENANT_LICENSE", license)() + + settings := cluster.MakeClusterSettings() + + require.Error(t, CheckEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.False(t, IsEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.NoError(t, ApplyTenantLicense()) + require.NoError(t, CheckEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.True(t, IsEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) +} + +func TestApplyTenantLicenseWithoutLicense(t *testing.T) { + defer TestingDisableEnterprise()() + + settings := cluster.MakeClusterSettings() + _, ok := envutil.EnvString("COCKROACH_TENANT_LICENSE", 0) + envutil.ClearEnvCache() + require.False(t, ok) + + require.Error(t, CheckEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.False(t, IsEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.NoError(t, ApplyTenantLicense()) + require.Error(t, CheckEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) + require.False(t, IsEnterpriseEnabled(settings, uuid.MakeV4(), "", "")) +} + +func TestApplyTenantLicenseWithInvalidLicense(t *testing.T) { + defer envutil.TestSetEnv(t, "COCKROACH_TENANT_LICENSE", "THIS IS NOT A VALID LICENSE")() + require.Error(t, ApplyTenantLicense()) +} diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 4fa76f510067..cba39c7a229d 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -57,6 +57,11 @@ func StartTenant( baseCfg BaseConfig, sqlCfg SQLConfig, ) (sqlServer *SQLServer, pgAddr string, httpAddr string, _ error) { + err := ApplyTenantLicense() + if err != nil { + return nil, "", "", err + } + args, err := makeTenantSQLServerArgs(stopper, kvClusterName, baseCfg, sqlCfg) if err != nil { return nil, "", "", err @@ -405,6 +410,11 @@ var NewTenantSideCostController = func( return noopTenantSideCostController{}, nil } +// ApplyTenantLicense is a hook for CCL code which enables enterprise features +// for the tenant process if the COCKROACH_TENANT_LICENSE environment variable +// is set. +var ApplyTenantLicense = func() error { return nil /* no-op */ } + // noopTenantSideCostController is a no-op implementation of // TenantSideCostController. type noopTenantSideCostController struct{} diff --git a/pkg/util/envutil/BUILD.bazel b/pkg/util/envutil/BUILD.bazel index ac03c340b785..8e34a3d2f413 100644 --- a/pkg/util/envutil/BUILD.bazel +++ b/pkg/util/envutil/BUILD.bazel @@ -17,4 +17,5 @@ go_test( size = "small", srcs = ["env_test.go"], embed = [":envutil"], + deps = ["@com_github_stretchr_testify//require"], ) diff --git a/pkg/util/envutil/env.go b/pkg/util/envutil/env.go index 1e78227a8971..87e8c5fa622e 100644 --- a/pkg/util/envutil/env.go +++ b/pkg/util/envutil/env.go @@ -19,6 +19,7 @@ import ( "sort" "strconv" "strings" + "testing" "time" "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" @@ -361,3 +362,26 @@ func EnvOrDefaultDuration(name string, value time.Duration) time.Duration { } return value } + +// TestSetEnv sets an environment variable and the cleanup function +// resets it to the original value. +func TestSetEnv(t *testing.T, name string, value string) func() { + ClearEnvCache() + before, exists := os.LookupEnv(name) + + if err := os.Setenv(name, value); err != nil { + t.Fatal(err) + } + return func() { + if exists { + if err := os.Setenv(name, before); err != nil { + t.Fatal(err) + } + } else { + if err := os.Unsetenv(name); err != nil { + t.Fatal(err) + } + } + ClearEnvCache() + } +} diff --git a/pkg/util/envutil/env_test.go b/pkg/util/envutil/env_test.go index 9e71424f9c4a..264425cae322 100644 --- a/pkg/util/envutil/env_test.go +++ b/pkg/util/envutil/env_test.go @@ -13,6 +13,8 @@ package envutil import ( "os" "testing" + + "github.com/stretchr/testify/require" ) func TestEnvOrDefault(t *testing.T) { @@ -27,3 +29,43 @@ func TestEnvOrDefault(t *testing.T) { t.Errorf("expected %d, got %d", def, act) } } + +func TestTestSetEnvExists(t *testing.T) { + key := "COCKROACH_ENVUTIL_TESTSETTING" + require.NoError(t, os.Setenv(key, "before")) + + ClearEnvCache() + value, ok := EnvString(key, 0) + require.True(t, ok) + require.Equal(t, value, "before") + + cleanup := TestSetEnv(t, key, "testvalue") + value, ok = EnvString(key, 0) + require.True(t, ok) + require.Equal(t, value, "testvalue") + + cleanup() + + value, ok = EnvString(key, 0) + require.True(t, ok) + require.Equal(t, value, "before") +} + +func TestTestSetEnvDoesNotExist(t *testing.T) { + key := "COCKROACH_ENVUTIL_TESTSETTING" + require.NoError(t, os.Unsetenv(key)) + + ClearEnvCache() + _, ok := EnvString(key, 0) + require.False(t, ok) + + cleanup := TestSetEnv(t, key, "foo") + value, ok := EnvString(key, 0) + require.True(t, ok) + require.Equal(t, value, "foo") + + cleanup() + + _, ok = EnvString(key, 0) + require.False(t, ok) +} From b344fb84b44de7a770aa89f759e401c9874a4b09 Mon Sep 17 00:00:00 2001 From: Aayush Shah Date: Wed, 1 Sep 2021 11:48:36 -0400 Subject: [PATCH 14/14] kvserver: stop transferring leases to replicas that may need snapshots This commit disallows the `replicateQueue` from initiating lease transfers to replicas that may be in need of a raft snapshot. Note that the `StoreRebalancer` already has a stronger form of this check since it disallows lease transfers to replicas that are lagging behind the raft leader (which includes the set of replicas that need a snapshot). In cases where the raft leader is not the leaseholder, we disallow the replicateQueue from any sort of lease transfer until leaseholdership and leadership are collocated. We rely on calls to `maybeTransferRaftLeadershipToLeaseholderLocked()` (called on every raft tick) to make sure that such periods of leadership / leaseholdership misalignment are ephemeral and rare. Release justification: bug fix Release note (bug fix): Fixes a bug that can cause prolonged unavailability due to lease transfer to a replica that may be in need of a raft snapshot. --- pkg/kv/kvserver/allocator.go | 91 +++++++++++-- pkg/kv/kvserver/allocator_test.go | 212 ++++++++++++++++++++++++++--- pkg/kv/kvserver/replicate_queue.go | 2 +- 3 files changed, 276 insertions(+), 29 deletions(-) diff --git a/pkg/kv/kvserver/allocator.go b/pkg/kv/kvserver/allocator.go index 53b33bf2afdc..336c8f234acf 100644 --- a/pkg/kv/kvserver/allocator.go +++ b/pkg/kv/kvserver/allocator.go @@ -1255,7 +1255,11 @@ func (a *Allocator) TransferLeaseTarget( ctx context.Context, conf roachpb.SpanConfig, existing []roachpb.ReplicaDescriptor, - leaseStoreID roachpb.StoreID, + leaseRepl interface { + RaftStatus() *raft.Status + StoreID() roachpb.StoreID + GetRangeID() roachpb.RangeID + }, stats *replicaStats, checkTransferLeaseSource bool, checkCandidateFullness bool, @@ -1268,7 +1272,7 @@ func (a *Allocator) TransferLeaseTarget( // eligible stores, make that explicit here. candidateLeasesMean := sl.candidateLeases.mean - source, ok := a.storePool.getStoreDescriptor(leaseStoreID) + source, ok := a.storePool.getStoreDescriptor(leaseRepl.StoreID()) if !ok { return roachpb.ReplicaDescriptor{} } @@ -1286,14 +1290,14 @@ func (a *Allocator) TransferLeaseTarget( // it's too big a change to make right before a major release. var candidates []roachpb.ReplicaDescriptor for _, repl := range existing { - if repl.StoreID != leaseStoreID { + if repl.StoreID != leaseRepl.StoreID() { candidates = append(candidates, repl) } } preferred = a.preferredLeaseholders(conf, candidates) } if len(preferred) == 1 { - if preferred[0].StoreID == leaseStoreID { + if preferred[0].StoreID == leaseRepl.StoreID() { return roachpb.ReplicaDescriptor{} } // Verify that the preferred replica is eligible to receive the lease. @@ -1306,7 +1310,7 @@ func (a *Allocator) TransferLeaseTarget( // If the current leaseholder is not preferred, set checkTransferLeaseSource // to false to motivate the below logic to transfer the lease. existing = preferred - if !storeHasReplica(leaseStoreID, roachpb.MakeReplicaSet(preferred).ReplicationTargets()) { + if !storeHasReplica(leaseRepl.StoreID(), roachpb.MakeReplicaSet(preferred).ReplicationTargets()) { checkTransferLeaseSource = false } } @@ -1314,9 +1318,28 @@ func (a *Allocator) TransferLeaseTarget( // Only consider live, non-draining, non-suspect replicas. existing, _ = a.storePool.liveAndDeadReplicas(existing, false /* includeSuspectStores */) + // Only proceed with the lease transfer if we are also the raft leader (we + // already know we are the leaseholder at this point), and only consider + // replicas that are in `StateReplicate` as potential candidates. + // + // NB: The RaftStatus() only returns a non-empty and non-nil result on the + // Raft leader (since Raft followers do not track the progress of other + // replicas, only the leader does). + // + // NB: On every Raft tick, we try to ensure that leadership is collocated with + // leaseholdership (see + // Replica.maybeTransferRaftLeadershipToLeaseholderLocked()). This means that + // on a range that is not already borked (i.e. can accept writes), periods of + // leader/leaseholder misalignment should be ephemeral and rare. We choose to + // be pessimistic here and choose to bail on the lease transfer, as opposed to + // potentially transferring the lease to a replica that may be waiting for a + // snapshot (which will wedge the range until the replica applies that + // snapshot). + existing = excludeReplicasInNeedOfSnapshots(ctx, leaseRepl.RaftStatus(), existing) + // Short-circuit if there are no valid targets out there. - if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseStoreID) { - log.VEventf(ctx, 2, "no lease transfer target found") + if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) { + log.VEventf(ctx, 2, "no lease transfer target found for r%d", leaseRepl.GetRangeID()) return roachpb.ReplicaDescriptor{} } @@ -1353,7 +1376,7 @@ func (a *Allocator) TransferLeaseTarget( var bestOption roachpb.ReplicaDescriptor bestOptionLeaseCount := int32(math.MaxInt32) for _, repl := range existing { - if leaseStoreID == repl.StoreID { + if leaseRepl.StoreID() == repl.StoreID { continue } storeDesc, ok := a.storePool.getStoreDescriptor(repl.StoreID) @@ -1762,6 +1785,58 @@ func replicaIsBehind(raftStatus *raft.Status, replicaID roachpb.ReplicaID) bool return true } +// replicaMayNeedSnapshot determines whether the replica referred to by +// `replicaID` may be in need of a raft snapshot. If this function is called +// with an empty or nil `raftStatus` (as will be the case when its called by a +// replica that is not the raft leader), we pessimistically assume that +// `replicaID` may need a snapshot. +func replicaMayNeedSnapshot(raftStatus *raft.Status, replicaID roachpb.ReplicaID) bool { + if raftStatus == nil || len(raftStatus.Progress) == 0 { + return true + } + if progress, ok := raftStatus.Progress[uint64(replicaID)]; ok { + // We can only reasonably assume that the follower replica is not in need of + // a snapshot iff it is in `StateReplicate`. However, even this is racey + // because we can still possibly have an ill-timed log truncation between + // when we make this determination and when we act on it. + return progress.State != tracker.StateReplicate + } + return true +} + +// excludeReplicasInNeedOfSnapshots filters out the `replicas` that may be in +// need of a raft snapshot. If this function is called with the `raftStatus` of +// a non-raft leader replica, an empty slice is returned. +func excludeReplicasInNeedOfSnapshots( + ctx context.Context, raftStatus *raft.Status, replicas []roachpb.ReplicaDescriptor, +) []roachpb.ReplicaDescriptor { + if raftStatus == nil || len(raftStatus.Progress) == 0 { + log.VEventf( + ctx, + 5, + "raft leader not collocated with the leaseholder; will not produce any lease transfer targets", + ) + return []roachpb.ReplicaDescriptor{} + } + + filled := 0 + for _, repl := range replicas { + if replicaMayNeedSnapshot(raftStatus, repl.ReplicaID) { + log.VEventf( + ctx, + 5, + "not considering [n%d, s%d] as a potential candidate for a lease transfer"+ + " because the replica may be waiting for a snapshot", + repl.NodeID, repl.StoreID, + ) + continue + } + replicas[filled] = repl + filled++ + } + return replicas[:filled] +} + // simulateFilterUnremovableReplicas removes any unremovable replicas from the // supplied slice. Unlike filterUnremovableReplicas, brandNewReplicaID is // considered up-to-date (and thus can participate in quorum), but is not diff --git a/pkg/kv/kvserver/allocator_test.go b/pkg/kv/kvserver/allocator_test.go index b24f41d2645d..f8345b12b178 100644 --- a/pkg/kv/kvserver/allocator_test.go +++ b/pkg/kv/kvserver/allocator_test.go @@ -378,9 +378,10 @@ func createTestAllocator( TestTimeUntilStoreDeadOff, deterministic, func() int { return numNodes }, livenesspb.NodeLivenessStatus_LIVE) - a := MakeAllocator(storePool, func(string) (time.Duration, bool) { - return 0, true - }) + a := MakeAllocator( + storePool, func(string) (time.Duration, bool) { + return 0, true + }) return stopper, g, storePool, a, manual } @@ -1318,6 +1319,43 @@ func TestAllocatorRebalanceByCount(t *testing.T) { } } +// mockRepl satisfies the interface for the `leaseRepl` passed into +// `Allocator.TransferLeaseTarget()` for these tests. +type mockRepl struct { + replicationFactor int32 + storeID roachpb.StoreID + replsInNeedOfSnapshot map[roachpb.ReplicaID]struct{} +} + +func (r *mockRepl) RaftStatus() *raft.Status { + raftStatus := &raft.Status{ + Progress: make(map[uint64]tracker.Progress), + } + for i := int32(1); i <= r.replicationFactor; i++ { + state := tracker.StateReplicate + if _, ok := r.replsInNeedOfSnapshot[roachpb.ReplicaID(i)]; ok { + state = tracker.StateSnapshot + } + raftStatus.Progress[uint64(i)] = tracker.Progress{State: state} + } + return raftStatus +} + +func (r *mockRepl) StoreID() roachpb.StoreID { + return r.storeID +} + +func (r *mockRepl) GetRangeID() roachpb.RangeID { + return roachpb.RangeID(0) +} + +func (r *mockRepl) markReplAsNeedingSnapshot(id roachpb.ReplicaID) { + if r.replsInNeedOfSnapshot == nil { + r.replsInNeedOfSnapshot = make(map[roachpb.ReplicaID]struct{}) + } + r.replsInNeedOfSnapshot[id] = struct{}{} +} + func TestAllocatorTransferLeaseTarget(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1338,9 +1376,9 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { sg.GossipStores(stores, t) existing := []roachpb.ReplicaDescriptor{ - {StoreID: 1}, - {StoreID: 2}, - {StoreID: 3}, + {StoreID: 1, ReplicaID: 1}, + {StoreID: 2, ReplicaID: 2}, + {StoreID: 3, ReplicaID: 3}, } // TODO(peter): Add test cases for non-empty constraints. @@ -1368,7 +1406,10 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { context.Background(), emptySpanConfig(), c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 3, + storeID: c.leaseholder, + }, nil, /* replicaStats */ c.check, true, /* checkCandidateFullness */ @@ -1381,6 +1422,116 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { } } +func TestAllocatorTransferLeaseToReplicasNeedingSnapshot(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + existing := []roachpb.ReplicaDescriptor{ + {StoreID: 1, NodeID: 1, ReplicaID: 1}, + {StoreID: 2, NodeID: 2, ReplicaID: 2}, + {StoreID: 3, NodeID: 3, ReplicaID: 3}, + {StoreID: 4, NodeID: 4, ReplicaID: 4}, + } + stopper, g, _, a, _ := createTestAllocator(10, true /* deterministic */) + defer stopper.Stop(context.Background()) + + // 4 stores where the lease count for each store is equal to 10x the store + // ID. + var stores []*roachpb.StoreDescriptor + for i := 1; i <= 4; i++ { + stores = append(stores, &roachpb.StoreDescriptor{ + StoreID: roachpb.StoreID(i), + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i)}, + Capacity: roachpb.StoreCapacity{LeaseCount: int32(10 * i)}, + }) + } + sg := gossiputil.NewStoreGossiper(g) + sg.GossipStores(stores, t) + + testCases := []struct { + existing []roachpb.ReplicaDescriptor + replsNeedingSnaps []roachpb.ReplicaID + leaseholder roachpb.StoreID + checkSource bool + transferTarget roachpb.StoreID + }{ + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1}, + leaseholder: 3, + checkSource: true, + transferTarget: 0, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1}, + leaseholder: 3, + checkSource: false, + transferTarget: 2, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1}, + leaseholder: 4, + checkSource: true, + transferTarget: 2, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1}, + leaseholder: 4, + checkSource: false, + transferTarget: 2, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1, 2}, + leaseholder: 4, + checkSource: false, + transferTarget: 3, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1, 2}, + leaseholder: 4, + checkSource: true, + transferTarget: 0, + }, + { + existing: existing, + replsNeedingSnaps: []roachpb.ReplicaID{1, 2, 3}, + leaseholder: 4, + checkSource: true, + transferTarget: 0, + }, + } + + for _, c := range testCases { + repl := &mockRepl{ + replicationFactor: 4, + storeID: c.leaseholder, + } + for _, r := range c.replsNeedingSnaps { + repl.markReplAsNeedingSnapshot(r) + } + t.Run("", func(t *testing.T) { + target := a.TransferLeaseTarget( + context.Background(), + emptySpanConfig(), + c.existing, + repl, + nil, + c.checkSource, + true, /* checkCandidateFullness */ + false, /* alwaysAllowDecisionWithoutStats */ + ) + if c.transferTarget != target.StoreID { + t.Fatalf("expected %d, but found %d", c.transferTarget, target.StoreID) + } + }) + } +} + func TestAllocatorTransferLeaseTargetConstraints(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1456,7 +1607,10 @@ func TestAllocatorTransferLeaseTargetConstraints(t *testing.T) { context.Background(), c.conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 3, + storeID: c.leaseholder, + }, nil, /* replicaStats */ true, true, /* checkCandidateFullness */ @@ -1517,9 +1671,9 @@ func TestAllocatorTransferLeaseTargetDraining(t *testing.T) { } existing := []roachpb.ReplicaDescriptor{ - {StoreID: 1}, - {StoreID: 2}, - {StoreID: 3}, + {StoreID: 1, ReplicaID: 1}, + {StoreID: 2, ReplicaID: 2}, + {StoreID: 3, ReplicaID: 3}, } testCases := []struct { @@ -1555,7 +1709,10 @@ func TestAllocatorTransferLeaseTargetDraining(t *testing.T) { context.Background(), c.conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 3, + storeID: c.leaseholder, + }, nil, /* replicaStats */ c.check, true, /* checkCandidateFullness */ @@ -2065,7 +2222,10 @@ func TestAllocatorLeasePreferences(t *testing.T) { context.Background(), conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 5, + storeID: c.leaseholder, + }, nil, /* replicaStats */ true, /* checkTransferLeaseSource */ true, /* checkCandidateFullness */ @@ -2078,7 +2238,10 @@ func TestAllocatorLeasePreferences(t *testing.T) { context.Background(), conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 5, + storeID: c.leaseholder, + }, nil, /* replicaStats */ false, /* checkTransferLeaseSource */ true, /* checkCandidateFullness */ @@ -2161,7 +2324,10 @@ func TestAllocatorLeasePreferencesMultipleStoresPerLocality(t *testing.T) { context.Background(), conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 6, + storeID: c.leaseholder, + }, nil, /* replicaStats */ true, /* checkTransferLeaseSource */ true, /* checkCandidateFullness */ @@ -2174,7 +2340,10 @@ func TestAllocatorLeasePreferencesMultipleStoresPerLocality(t *testing.T) { context.Background(), conf, c.existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 6, + storeID: c.leaseholder, + }, nil, /* replicaStats */ false, /* checkTransferLeaseSource */ true, /* checkCandidateFullness */ @@ -4484,9 +4653,9 @@ func TestAllocatorTransferLeaseTargetLoadBased(t *testing.T) { } existing := []roachpb.ReplicaDescriptor{ - {NodeID: 1, StoreID: 1}, - {NodeID: 2, StoreID: 2}, - {NodeID: 3, StoreID: 3}, + {NodeID: 1, StoreID: 1, ReplicaID: 1}, + {NodeID: 2, StoreID: 2, ReplicaID: 2}, + {NodeID: 3, StoreID: 3, ReplicaID: 3}, } testCases := []struct { @@ -4564,7 +4733,10 @@ func TestAllocatorTransferLeaseTargetLoadBased(t *testing.T) { context.Background(), emptySpanConfig(), existing, - c.leaseholder, + &mockRepl{ + replicationFactor: 3, + storeID: c.leaseholder, + }, c.stats, c.check, true, /* checkCandidateFullness */ diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 976bc0603e8c..50c83e59dcbb 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -1311,7 +1311,7 @@ func (rq *replicateQueue) shedLease( ctx, conf, desc.Replicas().VoterDescriptors(), - repl.store.StoreID(), + repl, repl.leaseholderStats, opts.checkTransferLeaseSource, opts.checkCandidateFullness,