Skip to content

Commit

Permalink
clusterversion: prevent upgrades from master versions
Browse files Browse the repository at this point in the history
This change defines a new "unstableVersionsAbove" point on the cluster
version  line, above which any cluster versions are considered unstable
development-only versions which are still subject to change.

Performing an upgrade to a version while it is still unstable leaves a
cluster in a state where it persists a version that claims it has done
that upgrade and all prior, however those upgrades are still subject to
change by nature of being unstable. If it subsequently upgraded to a
stable version, this could result in subtle and nearly impossible to
detect issues, as being at or above a particular version is used to
assume that all subsequent version upgrades _as released_ were run; on a
cluster that ran an earlier iteration of an upgrade this does not hold.

Thus to prevent clusters which upgrade to development versions from
subsequently upgrading to a stable version, we offset all development
versions -- those above the unstableVersionsAbove point -- into the far
future by adding one million to their major version e.g. v22.x-y becomes
1000022.x-y. This means an attempt to subsequently "upgrade" to a stable
version -- such as v22.2 -- will look like a downgrade and be forbidden.

On the release branch, prior to starting to publish upgradable releases,
the unstableVersionsAbove value should be set to invalidVersionKey to
reflect that all version upgrades in that release branch are now
considered to be stable, meaning they must be treated as immutable and
append-only.

Release note (ops change): clusters that are upgraded to an alpha or
other manual build from the development branch will not be able to be
subsequently upgraded to a release build.

Release justification: high-priority change to existing functionality,
to allow releasing alphas with known version upgrade bugs while ensuring
they do not subsequently upgrade into stable version but silently
corrupted clusters.
  • Loading branch information
dt committed Aug 19, 2022
1 parent 2174c9f commit 5a511c2
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 19 deletions.
2 changes: 1 addition & 1 deletion docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,4 @@ trace.jaeger.agent string the address of a Jaeger agent to receive traces using
trace.opentelemetry.collector string address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.
trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https://<ui>/#/debug/tracez
trace.zipkin.collector string the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.
version version 22.1-60 set the active cluster version in the format '<major>.<minor>'
version version 1000022.1-60 set the active cluster version in the format '<major>.<minor>'
2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,6 @@
<tr><td><code>trace.opentelemetry.collector</code></td><td>string</td><td><code></code></td><td>address of an OpenTelemetry trace collector to receive traces using the otel gRPC protocol, as <host>:<port>. If no port is specified, 4317 will be used.</td></tr>
<tr><td><code>trace.span_registry.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, ongoing traces can be seen at https://<ui>/#/debug/tracez</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>the address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>22.1-60</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
<tr><td><code>version</code></td><td>version</td><td><code>1000022.1-60</code></td><td>set the active cluster version in the format '<major>.<minor>'</td></tr>
</tbody>
</table>
4 changes: 2 additions & 2 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8228,7 +8228,7 @@ func TestManifestTooNew(t *testing.T) {
require.NoError(t, protoutil.Unmarshal(manifestData, &backupManifest))

// Bump the version and write it back out to make it look newer.
backupManifest.ClusterVersion = roachpb.Version{Major: 99, Minor: 1}
backupManifest.ClusterVersion = roachpb.Version{Major: math.MaxInt32, Minor: 1}
manifestData, err = protoutil.Marshal(&backupManifest)
require.NoError(t, err)
require.NoError(t, ioutil.WriteFile(manifestPath, manifestData, 0644 /* perm */))
Expand All @@ -8238,7 +8238,7 @@ func TestManifestTooNew(t *testing.T) {
require.NoError(t, ioutil.WriteFile(manifestPath+backupinfo.BackupManifestChecksumSuffix, checksum, 0644 /* perm */))

// Verify we reject it.
sqlDB.ExpectErr(t, "backup from version 99.1 is newer than current version", `RESTORE DATABASE r1 FROM 'nodelocal://0/too_new'`)
sqlDB.ExpectErr(t, "backup from version 2147483647.1 is newer than current version", `RESTORE DATABASE r1 FROM 'nodelocal://0/too_new'`)

// Bump the version down and write it back out to make it look older.
backupManifest.ClusterVersion = roachpb.Version{Major: 20, Minor: 2, Internal: 2}
Expand Down
4 changes: 2 additions & 2 deletions pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ select crdb_internal.get_vmodule()
·

query T
select regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', '');
select regexp_replace(regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', ''), '10000', '');
----
22.1

Expand Down Expand Up @@ -453,7 +453,7 @@ select * from crdb_internal.gossip_alerts

# Anyone can see the executable version.
query T
select regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', '');
select regexp_replace(regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', ''), '10000', '');
----
22.1

Expand Down
40 changes: 37 additions & 3 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@ const TODOPreV21_2 = V21_2
// previously referenced a < 22.1 version until that check/gate can be removed.
const TODOPreV22_1 = V22_1

// versionsSingleton lists all historical versions here in chronological order,
// with comments describing what backwards-incompatible features were
// rawVersionsSingleton lists all historical versions here in chronological
// order, with comments describing what backwards-incompatible features were
// introduced.
//
// A roachpb.Version has the colloquial form MAJOR.MINOR[.PATCH][-INTERNAL],
Expand All @@ -341,7 +341,11 @@ const TODOPreV22_1 = V22_1
// Such clusters would need to be wiped. As a result, do not bump the major or
// minor version until we are absolutely sure that no new migrations will need
// to be added (i.e., when cutting the final release candidate).
var versionsSingleton = keyedVersions{
//
// rawVersionsSingleton is converted to versionsSingleton below, by adding a
// large number to every major if building from master, so as to ensure that
// master builds cannot be upgraded to release-branch builds.
var rawVersionsSingleton = keyedVersions{
{
// V21_2 is CockroachDB v21.2. It's used for all v21.2.x patch releases.
Key: V21_2,
Expand Down Expand Up @@ -522,6 +526,36 @@ var versionsSingleton = keyedVersions{
// *************************************************
}

const (
// unstableVersionsAbove is a cluster version Key above which any upgrades in
// this version are considered unstable development-only versions if it is not
// negative, and upgrading to them should permanently move a cluster to
// development versions. On master it should be the minted version of the last
// release, while on release branches it can be set to invalidVersionKey to
// disable marking any versions as development versions.
unstableVersionsAbove = V22_1

// finalVersion should be set on a release branch to the minted final cluster
// version key, e.g. to V22_2 on the release-22.2 branch once it is minted.
// Setting it has the effect of ensuring no versions are subsequently added.
finalVersion = invalidVersionKey
)

var versionsSingleton = func() keyedVersions {
if unstableVersionsAbove > invalidVersionKey {
const devOffset = 1000000
// Throw every version above the last release (which will be none on a release
// branch) 1 million major versions into the future, so any "upgrade" to a
// release branch build will be a downgrade and thus blocked.
for i := range rawVersionsSingleton {
if rawVersionsSingleton[i].Key > unstableVersionsAbove {
rawVersionsSingleton[i].Major += devOffset
}
}
}
return rawVersionsSingleton
}()

// TODO(irfansharif): clusterversion.binary{,MinimumSupported}Version
// feels out of place. A "cluster version" and a "binary version" are two
// separate concepts.
Expand Down
8 changes: 5 additions & 3 deletions pkg/clusterversion/key_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkg/kv/kvserver/client_migration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestStorePurgeOutdatedReplicas(t *testing.T) {
t.Run(fmt.Sprintf("with-initial-version=%t", withInitialVersion), func(t *testing.T) {
const numStores = 3
ctx := context.Background()
migrationVersion := roachpb.Version{Major: 42}
migrationVersion := roachpb.Version{Major: 1000042}

storeKnobs := &kvserver.StoreTestingKnobs{
DisableEagerReplicaRemoval: true,
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvserver/stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package kvserver
import (
"context"
"fmt"
math "math"
"sync"
"unsafe"

Expand Down Expand Up @@ -364,7 +365,7 @@ func SynthesizeClusterVersionFromEngines(
origin string
}

maxPossibleVersion := roachpb.Version{Major: 999999} // Sort above any real version.
maxPossibleVersion := roachpb.Version{Major: math.MaxInt32} // Sort above any real version.
minStoreVersion := originVersion{
Version: maxPossibleVersion,
origin: "(no store)",
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/crdb_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ func TestIsAtLeastVersion(t *testing.T) {
errorRE string
}{
{version: "21.2", expected: "true"},
{version: "99.2", expected: "false"},
{version: "1000099.2", expected: "false"},
{version: "foo", errorRE: ".*invalid version.*"},
} {
query := fmt.Sprintf("SELECT crdb_internal.is_at_least_version('%s')", tc.version)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ select crdb_internal.get_vmodule()
query T
select regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', '');
----
22.1
1000022.1

query ITTT colnames
select node_id, component, field, regexp_replace(regexp_replace(value, '^\d+$', '<port>'), e':\\d+', ':<port>') as value from crdb_internal.node_runtime_info
Expand Down Expand Up @@ -693,7 +693,7 @@ select * from crdb_internal.node_inflight_trace_spans
query T
select regexp_replace(crdb_internal.node_executable_version()::string, '(-\d+)?$', '');
----
22.1
1000022.1

user root

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
gosql "database/sql"
"encoding/hex"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
Expand Down Expand Up @@ -107,7 +108,7 @@ func TestPreconditionBeforeStartingAnUpgrade(t *testing.T) {
"There exists invalid descriptors as listed below. Fix these descriptors before attempting to upgrade again.\n"+
"Invalid descriptor: defaultdb.public.t (104) because 'relation \"t\" (104): invalid depended-on-by relation back reference: referenced descriptor ID 53: referenced descriptor not found'\n"+
"Invalid descriptor: defaultdb.public.temp_tbl (104) because 'no matching name info found in non-dropped relation \"t\"'",
err.Error())
strings.ReplaceAll(err.Error(), "1000022", "22"))
// The cluster version should remain at `v0`.
tdb.CheckQueryResults(t, "SHOW CLUSTER SETTING version", [][]string{{v0.String()}})
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/upgrade/upgrades/wait_for_del_range_in_gc_job_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ SELECT count(*)
WHERE job_type = 'SCHEMA CHANGE GC'
AND status = 'paused'`,
[][]string{{"2"}})
tdb.ExpectErr(t, `verifying precondition for version 22.1-\d+: `+
tdb.ExpectErr(t, `verifying precondition for version \d*22.1-\d+: `+
`paused GC jobs prevent upgrading GC job behavior: \[\d+ \d+]`,
"SET CLUSTER SETTING version = crdb_internal.node_executable_version()")

Expand Down

0 comments on commit 5a511c2

Please sign in to comment.