Skip to content

Commit

Permalink
Merge #129520
Browse files Browse the repository at this point in the history
129520: sql: fix checkClusterSettingValuesAreEquivalent for final release fence versions r=rafiss a=rafiss

When the `internal` field of a clusterversion is `0`, which it would be for all final cluster versions, the corresponding fence version will have an internal version of `-1`.

The logic in checkClusterSettingValuesAreEquivalent did not account for this. It was assuming that one could always decrement the internal version by 1 in order to get the predecessor version. Now, we use ListBetween instead, which does not need this assumption.

fixes #129460
Release note (bug fix): Fixed a rare bug in SHOW CLUSTER SETTING that could cause it to fail with an error like `timed out: value differs between local setting and KV`.

Co-authored-by: Rafi Shamim <[email protected]>
  • Loading branch information
craig[bot] and rafiss committed Aug 27, 2024
2 parents cd97615 + 2b25f72 commit 79de4ac
Show file tree
Hide file tree
Showing 12 changed files with 122 additions and 56 deletions.
18 changes: 18 additions & 0 deletions pkg/clusterversion/clusterversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,24 @@ func (cv ClusterVersion) Encode() []byte {
return encoded
}

// FenceVersion is the fence version -- the internal immediately prior -- for
// the given version.
//
// Fence versions allow the upgrades infrastructure to safely step through
// consecutive cluster versions in the presence of Nodes (running any binary
// version) being added to the cluster. See the upgrademanager package for
// intended usage.
//
// Fence versions (and the upgrades infrastructure entirely) were introduced in
// the 21.1 release cycle. In the same release cycle, we introduced the
// invariant that new user-defined versions (users being crdb engineers) must
// always have even-numbered Internal versions, thus reserving the odd numbers
// to slot in fence versions for each cluster version. See top-level
// documentation in the clusterversion package for more details.
func (cv ClusterVersion) FenceVersion() ClusterVersion {
return ClusterVersion{Version: cv.Version.FenceVersion()}
}

var _ settings.ClusterVersionImpl = ClusterVersion{}

// EncodingFromVersionStr is a shorthand to generate an encoded cluster version
Expand Down
10 changes: 0 additions & 10 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,16 +338,6 @@ func (k Key) Version() roachpb.Version {
return maybeApplyDevOffset(k, version)
}

// FenceVersion is the fence version -- the internal immediately prior -- for
// the named version, if it is Internal.
func (k Key) FenceVersion() roachpb.Version {
v := k.Version()
if v.Internal > 0 {
v.Internal -= 1
}
return v
}

// IsFinal returns true if the key corresponds to a final version (as opposed to
// a transitional internal version during upgrade).
func (k Key) IsFinal() bool {
Expand Down
4 changes: 4 additions & 0 deletions pkg/roachpb/metadata.proto
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,10 @@ message Version {
// cycle. They are subversions that are never the end versions of a release,
// i.e. users of stable, public release will only use binaries with the
// internal version set to 0.
// This field will be odd for fence versions, which allow the upgrades
// infrastructure to safely step through consecutive cluster versions. Note
// that -1 is a legal value, as that is the fence version for all final
// releases, which have an internal version of 0.
optional int32 internal = 4 [(gogoproto.nullable) = false];
}

Expand Down
77 changes: 67 additions & 10 deletions pkg/roachpb/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import (
"fmt"
"regexp"
"strconv"
"strings"

"github.com/cockroachdb/cockroach/pkg/util/buildutil"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
)
Expand Down Expand Up @@ -69,6 +71,16 @@ func (v Version) SafeFormat(p redact.SafePrinter, _ rune) {
p.Printf("%d.%d", v.Major, v.Minor)
return
}
// NB: Internal may be -1. This is the case for all fence versions for final
// versions of a release. Handle it specially to avoid printing the -1, which
// is confusable with the `-` separator.
if v.Internal < 0 {
if buildutil.CrdbTestBuild && v.Internal != -1 {
panic(errors.Newf("%s should not have Internal less than -1", v))
}
p.Printf("%d.%d-upgrading-final-step", v.Major, v.Minor)
return
}
// If the version is offset, remove the offset and add it back to the result. We want
// 1000023.1-upgrading-to-1000023.2-step-002, not 1000023.1-upgrading-to-23.2-step-002.
noOffsetVersion := v
Expand All @@ -94,22 +106,52 @@ func (v Version) IsFinal() bool {
return v.Internal == 0
}

// IsFence returns true if this is a fence version.
//
// A version is a fence version iff Internal is odd.
func (v Version) IsFence() bool {
// NB: Internal may be -1. This is the case for all fence versions for final
// versions of a release.
return v.Internal%2 != 0
}

// PrettyPrint returns the value in a format that makes it apparent whether or
// not it is a fence version.
func (v Version) PrettyPrint() string {
// If we're a version greater than v20.2 and have an odd internal version,
// we're a fence version. See fenceVersionFor in pkg/upgrade to understand
// what these are.
fenceVersion := !v.LessEq(Version{Major: 20, Minor: 2}) && (v.Internal%2) == 1
if !fenceVersion {
if !v.IsFence() {
return v.String()
}
return fmt.Sprintf("%v(fence)", v)
}

// FenceVersion is the fence version -- the internal immediately prior -- for
// the given version.
//
// Fence versions allow the upgrades infrastructure to safely step through
// consecutive cluster versions in the presence of Nodes (running any binary
// version) being added to the cluster. See the upgrademanager package for
// intended usage.
//
// Fence versions (and the upgrades infrastructure entirely) were introduced in
// the 21.1 release cycle. In the same release cycle, we introduced the
// invariant that new user-defined versions (users being crdb engineers) must
// always have even-numbered Internal versions, thus reserving the odd numbers
// to slot in fence versions for each cluster version. See top-level
// documentation in the clusterversion package for more details.
func (v Version) FenceVersion() Version {
if v.IsFence() {
panic(errors.Newf("%s already is a fence version", v))
}
// NB: Internal may be -1 after this. This is the case for all final versions
// for a release.
fenceV := v
fenceV.Internal--
return fenceV
}

var (
verPattern = regexp.MustCompile(
`^(?P<major>[0-9]+)\.(?P<minor>[0-9]+)(|(-|-upgrading(|-to-[0-9]+.[0-9]+)-step-)(?P<internal>[0-9]+))$`,
`^(?P<major>[0-9]+)\.(?P<minor>[0-9]+)(|-upgrading-final-step|(-|-upgrading(|-to-[0-9]+.[0-9]+)-step-)(?P<internal>[-0-9]+))$`,
)
verPatternMajorIdx = verPattern.SubexpIndex("major")
verPatternMinorIdx = verPattern.SubexpIndex("minor")
Expand Down Expand Up @@ -138,9 +180,14 @@ func ParseVersion(s string) (Version, error) {
return int32(n)
}
v := Version{
Major: toInt(matches[verPatternMajorIdx]),
Minor: toInt(matches[verPatternMinorIdx]),
Internal: toInt(matches[verPatternInternalIdx]),
Major: toInt(matches[verPatternMajorIdx]),
Minor: toInt(matches[verPatternMinorIdx]),
}
// NB: Internal is -1 for all fence versions for final versions of a release.
if strings.Contains(s, "-upgrading-final-step") {
v.Internal = -1
} else {
v.Internal = toInt(matches[verPatternInternalIdx])
}
if err != nil {
return Version{}, errors.Wrapf(err, "invalid version %s", s)
Expand Down Expand Up @@ -198,7 +245,9 @@ var successorSeries = map[ReleaseSeries]ReleaseSeries{
// ReleaseSeries obtains the release series for the given version. Specifically:
// - if the version is final (Internal=0), the ReleaseSeries has the same major/minor.
// - if the version is a transitional version during upgrade (e.g. v23.1-8),
// the result is the next final version (e.g. v23.1).
// the result is the next final version (e.g. v23.2).
// - if the internal version is -1 (which is the case for the fence
// version of a final version), the result has the same major/minor.
//
// For non-final versions (which indicate an update to the next series), this
// requires knowledge of the next series; unknown non-final versions will return
Expand All @@ -211,6 +260,14 @@ func (v Version) ReleaseSeries() (s ReleaseSeries, ok bool) {
if v.IsFinal() {
return base, true
}
// NB: Internal may be -1. This is the case for all fence versions for final
// versions of a release.
if v.Internal < 0 {
if buildutil.CrdbTestBuild && v.Internal != -1 {
panic(errors.Newf("%s should not have Internal less than -1", v))
}
return base, true
}
s, ok = base.Successor()
return s, ok
}
5 changes: 5 additions & 0 deletions pkg/roachpb/version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ func TestParseVersion(t *testing.T) {
{s: "1000023.1-upgrading-to-1000023.2-step-004", v: Version{Major: 1000023, Minor: 1, Internal: 4}, roundtrip: true},
{s: "23.1-4", v: Version{Major: 23, Minor: 1, Internal: 4}},
{s: "23.1-upgrading-step-004", v: Version{Major: 23, Minor: 1, Internal: 4}},
// NB: The fence version for a final version will have Internal=-1.
{s: "23.2-upgrading-final-step", v: Version{Major: 23, Minor: 2, Internal: -1}, roundtrip: true},
// We used to have unintuitive formatting logic for the -1 internal version.
// See https://github.com/cockroachdb/cockroach/issues/129460.
{s: "23.2-upgrading-step--01", v: Version{Major: 23, Minor: 2, Internal: -1}},
}
for _, tc := range testData {
t.Run("", func(t *testing.T) {
Expand Down
13 changes: 9 additions & 4 deletions pkg/sql/show_cluster_setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,15 @@ func checkClusterSettingValuesAreEquivalent(localRawVal, kvRawVal []byte) error
}
decodedLocal, localVal, localOk := maybeDecodeVersion(localRawVal)
decodedKV, kvVal, kvOk := maybeDecodeVersion(kvRawVal)
if localOk && kvOk && decodedLocal.Internal%2 == 1 /* isFence */ {
predecessor := decodedLocal
predecessor.Internal--
if predecessor.Equal(decodedKV) {
if localOk && kvOk && decodedLocal.IsFence() {
// NB: The internal version is -1 for the fence version of all final cluster
// versions. In these cases, we cannot simply check that the local version
// is off-by-one from the KV version, since (for example's sake) we would be
// comparing (24,1,12) to (24,2,-1). Instead, we can use ListBetween to
// verify that there are no cluster versions in between the local and KV
// versions.
versionsBetween := clusterversion.ListBetween(decodedKV.Version, decodedLocal.Version)
if len(versionsBetween) == 0 {
return nil
}
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/show_cluster_setting_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package sql

import (
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
Expand Down Expand Up @@ -59,6 +60,21 @@ func TestCheckClusterSettingValuesAreEquivalent(t *testing.T) {
kv: encode(t, "22.2-upgrading-to-23.1-step-011"),
exp: "value differs between local setting ([]) and KV (22.2-upgrading-to-23.1-step-011)",
},
{ // 5
// NB: On release branches, clusterversion.Latest will have a fence
// version that has -1 for the internal version.
local: encode(t, clusterversion.Latest.Version().FenceVersion().String()),
kv: encode(t, (clusterversion.Latest - 1).Version().String()),
},
{ // 6
local: encode(t, clusterversion.Latest.Version().String()),
kv: encode(t, (clusterversion.Latest - 1).Version().String()),
exp: fmt.Sprintf(
"value differs between local setting (%s) and KV (%s)",
clusterversion.ClusterVersion{Version: clusterversion.Latest.Version()},
clusterversion.ClusterVersion{Version: (clusterversion.Latest - 1).Version()},
),
},
} {
t.Run("", func(t *testing.T) {
err := checkClusterSettingValuesAreEquivalent(tc.local, tc.kv)
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -2534,7 +2534,7 @@ func pebbleFormatVersion(clusterVersion roachpb.Version) pebble.FormatMajorVersi
// pebbleFormatVersionKeys are sorted in descending order; find the first one
// that is not newer than clusterVersion.
for _, k := range pebbleFormatVersionKeys {
if clusterVersion.AtLeast(k.FenceVersion()) {
if clusterVersion.AtLeast(k.Version().FenceVersion()) {
return pebbleFormatVersionMap[k]
}
}
Expand Down
1 change: 0 additions & 1 deletion pkg/upgrade/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ go_library(
"//pkg/sql/sessiondata",
"//pkg/sql/sqlstats",
"//pkg/upgrade/upgradebase",
"//pkg/util/log",
"//pkg/util/retry",
"//pkg/util/stop",
"//pkg/util/uuid",
Expand Down
28 changes: 0 additions & 28 deletions pkg/upgrade/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,40 +13,12 @@ package upgrade
import (
"context"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
)

// FenceVersionFor constructs the appropriate "fence version" for the given
// cluster version. Fence versions allow the upgrades infrastructure to safely
// step through consecutive cluster versions in the presence of Nodes (running
// any binary version) being added to the cluster. See the upgrade manager
// above for intended usage.
//
// Fence versions (and the upgrades infrastructure entirely) were introduced
// in the 21.1 release cycle. In the same release cycle, we introduced the
// invariant that new user-defined versions (users being crdb engineers) must
// always have even-numbered Internal versions, thus reserving the odd numbers
// to slot in fence versions for each cluster version. See top-level
// documentation in pkg/clusterversion for more details.
func FenceVersionFor(
ctx context.Context, cv clusterversion.ClusterVersion,
) clusterversion.ClusterVersion {
if (cv.Internal % 2) != 0 {
log.Fatalf(ctx, "only even numbered internal versions allowed, found %s", cv.Version)
}

// We'll pick the odd internal version preceding the cluster version,
// slotting ourselves right before it.
fenceCV := cv
fenceCV.Internal--
return fenceCV
}

// BumpSystemDatabaseSchemaVersion bumps the SystemDatabaseSchemaVersion
// field for the system database descriptor. It is called after every upgrade
// step that has an associated migration, and when upgrading to the final
Expand Down
2 changes: 1 addition & 1 deletion pkg/upgrade/upgrademanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ func (m *Manager) Migrate(

cv := clusterversion.ClusterVersion{Version: clusterVersion}

fenceVersion := upgrade.FenceVersionFor(ctx, cv)
fenceVersion := cv.FenceVersion()
if err := bumpClusterVersion(ctx, m.deps.Cluster, fenceVersion); err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/upgrade/upgrademanager/manager_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ func TestMigrationFailure(t *testing.T) {
// Pick a random version in to fail at
versions := clusterversion.ListBetween(startVersion, endVersion)
failVersion := versions[rand.Intn(len(versions))]
fenceVersion := upgrade.FenceVersionFor(ctx, clusterversion.ClusterVersion{Version: failVersion}).Version
fenceVersion := failVersion.FenceVersion()
t.Logf("test will fail at version: %s", failVersion.String())

// Create a storage cluster for the tenant.
Expand Down

0 comments on commit 79de4ac

Please sign in to comment.