From d98aa66b357fd3fbe47d938e470af18eafbe6dd7 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Wed, 25 Nov 2020 19:46:25 -0500 Subject: [PATCH] clusterversion: document process for introducing cluster versions And for deleting old ones. This stuff is endlessly confusing. Release note: None --- pkg/clusterversion/clusterversion.go | 2 +- pkg/clusterversion/cockroach_versions.go | 110 ++++++++++++++++++++--- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/pkg/clusterversion/clusterversion.go b/pkg/clusterversion/clusterversion.go index 010f7795f5f3..61d9e6ca8041 100644 --- a/pkg/clusterversion/clusterversion.go +++ b/pkg/clusterversion/clusterversion.go @@ -82,7 +82,7 @@ type Handle interface { // // If this returns true then all nodes in the cluster will eventually see // this version. However, this is not atomic because version gates (for a - // given version) are pushed through to each node in parallel. Because of + // given version) are pushed through to each node concurrently. Because of // this, nodes should not be gating proper handling of remotely initiated // requests that their binary knows how to handle on this state. The // following example shows why this is important: diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index 7604fcb25aee..e0d99fdcb9b0 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -15,19 +15,101 @@ import "github.com/cockroachdb/cockroach/pkg/roachpb" // VersionKey is a unique identifier for a version of CockroachDB. type VersionKey int -// Version constants. +// Version constants. You'll want to add a new one in the following cases: // -// To add a version: -// - Add it at the end of this block. -// - Add it at the end of the `Versions` block below. -// - For major or minor versions, bump binaryMinSupportedVersion. For -// example, if introducing the `20.1` release, bump it to -// VersionStart19_2 (i.e. `19.1-1`). +// (a) When introducing a backwards incompatible feature. Broadly, by this we +// mean code that's structured as follows: // -// To delete a version. -// - Remove its associated runtime checks. -// - If the version is not the latest one, delete the constant and remove its -// entry in the versionsSingleton. +// if (specific-version is active) { +// // Implies that all nodes in the cluster are running binaries that +// // have this code. We can "enable" the new feature knowing that +// // outbound RPCs, requests, etc. will be handled by nodes that know +// // how to do so. +// } else { +// // There may be some nodes running older binaries without this code. +// // To be safe, we'll want to behave as we did before introducing +// // this feature. +// } +// +// Authors of migrations need to be careful in ensuring that end-users +// aren't able to enable feature gates before they're active. This is fine: +// +// func handleSomeNewStatement() error { +// if !(specific-version is active) { +// return errors.New("cluster version needs to be bumped") +// } +// // ... +// } +// +// At the same time, with requests/RPCs originating at other crdb nodes, the +// initiator of the request gets to decide what's supported. A node should +// not refuse functionality on the grounds that its view of the version gate +// is as yet inactive. Consider the sender: +// +// func invokeSomeRPC(req) { +// if (specific-version is active) { +// // Like mentioned above, this implies that all nodes in the +// // cluster are running binaries that can handle this new +// // feature. We may have learned about this fact before the +// // node on the other end. This is due to the fact that migration +// // manager informs each node about the specific-version being +// // activated active concurrently. See BumpClusterVersion for +// // where that happens. Still, it's safe for us to enable the new +// // feature flags as we trust the recipient to know how to deal +// // with it. +// req.NewFeatureFlag = true +// } +// send(req) +// } +// +// And consider the recipient: +// +// func someRPC(req) { +// if !req.NewFeatureFlag { +// // Legacy behaviour... +// } +// // There's no need to even check if the specific-version is active. +// // If the flag is enabled, the specific-version must have been +// // activated, even if we haven't yet heard about it (we will pretty +// // soon). +// } +// +// See clusterversion.Handle.IsActive and usage of some existing versions +// below for more clues on the matter. +// +// (b) When cutting a major release branch. When cutting release-20.2 for +// example, you'll want to introduce the following to `master`. +// +// (i) Version20_2 (keyed to v20.2.0-0}) +// (ii) VersionStart21_1 (keyed to v20.2.0-1}) +// +// You'll then want to backport (i) to the release branch itself (i.e. +// release-20.2). You'll also want to bump binaryMinSupportedVersion. In the +// example above, you'll set it to Version20_2. This indicates that the +// minimum binary version required in a cluster with with nodes running +// v21.1 binaries (including pre-release alphas) is v20.2, i.e. that an +// upgrade into such a binary must start out from at least v20.2 nodes. +// +// Aside: At the time of writing, the binary min supported version is the +// last major release, though we may consider relaxing this in the future +// (i.e. for example could skip up to one major release) as we move to a more +// frequent release schedule. +// +// When introducing a version constant, you'll want to: +// (1) Add it at the end of this block +// (2) Add it at the end of the `versionsSingleton` block below. +// +// --- +// +// You'll want to delete versions from this list after cutting a major release. +// Once the development for 21.1 begins, after step (ii) from above, all +// versions introduced in the previous release can be removed (everything prior +// to Version20_2 in our example). +// +// When removing a version, you'll want to remove its associated runtime checks. +// All "is active" checks for the key will always evaluate to true. You'll also +// want to delete the constant and remove its entry in the `versionsSingleton` +// block below. // //go:generate stringer -type=VersionKey const ( @@ -36,6 +118,7 @@ const ( VersionContainsEstimatesCounter VersionNamespaceTableWithSchemas VersionAuthLocalAndTrustRejectMethods + VersionStart20_2 VersionGeospatialType VersionEnums @@ -58,10 +141,11 @@ const ( VersionCreateLoginPrivilege VersionHBAForNonTLS Version20_2 + VersionStart21_1 VersionEmptyArraysInInvertedIndexes - // Add new versions here (step one of two). + // Step (1): Add new versions here. ) // versionsSingleton lists all historical versions here in chronological order, @@ -269,7 +353,7 @@ var versionsSingleton = keyedVersions([]keyedVersion{ Version: roachpb.Version{Major: 20, Minor: 2, Internal: 2}, }, - // Add new versions here (step two of two). + // Step (2): Add new versions here. }) // TODO(irfansharif): clusterversion.binary{,MinimumSupported}Version