Skip to content

Commit

Permalink
add v3.6 to v3.5 downgrade support automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
chaochn47 committed May 20, 2021
1 parent 80ccb27 commit 78f9d2b
Show file tree
Hide file tree
Showing 12 changed files with 126 additions and 30 deletions.
6 changes: 6 additions & 0 deletions server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,12 @@ type ServerConfig struct {
// consider running defrag during bootstrap. Needs to be set to non-zero value to take effect.
ExperimentalBootstrapDefragThresholdMegabytes uint `json:"experimental-bootstrap-defrag-threshold-megabytes"`

// UnsafeAllowClusterVersionDowngrade is "true" to allow cluster version downgrade.
// "false" by default, since newer minor versions may introduce incompatible feature changes.
// For instance, lease checkpointer request to 3.4 will fail the remaining 3.3 nodes.
// But, if one does not use "lease checkpointer" feature, it can be safe to run 3.3 along with 3.4.
UnsafeAllowClusterVersionDowngrade bool `json:"unsafe-allow-cluster-version-downgrade"`

// V2Deprecation defines a phase of v2store deprecation process.
V2Deprecation V2DeprecationEnum `json:"v2-deprecation"`
}
Expand Down
13 changes: 12 additions & 1 deletion server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ const (
// v2 API is disabled by default.
DefaultEnableV2 = false

// DefaultUnsafeAllowClusterVersionDowngrade is the default value for "unsafe-allow-cluster-version-downgrade" flag.
// unsafe allow cluster version downgrade is disabled by default
DefaultUnsafeAllowClusterVersionDowngrade = false

// maxElectionMs specifies the maximum value of election timeout.
// More details are listed in ../Documentation/tuning.md#time-parameters.
maxElectionMs = 50000
Expand Down Expand Up @@ -392,6 +396,12 @@ type Config struct {
// ExperimentalTxnModeWriteWithSharedBuffer enables write transaction to use a shared buffer in its readonly check operations.
ExperimentalTxnModeWriteWithSharedBuffer bool `json:"experimental-txn-mode-write-with-shared-buffer"`

// UnsafeAllowClusterVersionDowngrade is "true" to allow cluster version downgrade.
// "false" by default, since newer minor versions may introduce incompatible feature changes.
// For instance, lease checkpointer request to 3.4 will fail the remaining 3.3 nodes.
// But, if one does not use "lease checkpointer" feature, it can be safe to run 3.3 along with 3.4.
UnsafeAllowClusterVersionDowngrade bool `json:"unsafe-allow-cluster-version-downgrade"`

// V2Deprecation describes phase of API & Storage V2 support
V2Deprecation config.V2DeprecationEnum `json:"v2-deprecation"`
}
Expand Down Expand Up @@ -489,7 +499,8 @@ func NewConfig() *Config {
ExperimentalMemoryMlock: false,
ExperimentalTxnModeWriteWithSharedBuffer: true,

V2Deprecation: config.V2_DEPR_DEFAULT,
UnsafeAllowClusterVersionDowngrade: DefaultUnsafeAllowClusterVersionDowngrade,
V2Deprecation: config.V2_DEPR_DEFAULT,
}
cfg.InitialCluster = cfg.InitialClusterFromName(cfg.Name)
return cfg
Expand Down
3 changes: 2 additions & 1 deletion server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock,
ExperimentalTxnModeWriteWithSharedBuffer: cfg.ExperimentalTxnModeWriteWithSharedBuffer,
ExperimentalBootstrapDefragThresholdMegabytes: cfg.ExperimentalBootstrapDefragThresholdMegabytes,
V2Deprecation: cfg.V2DeprecationEffective(),
UnsafeAllowClusterVersionDowngrade: cfg.UnsafeAllowClusterVersionDowngrade,
V2Deprecation: cfg.V2DeprecationEffective(),
}

if srvcfg.ExperimentalEnableDistributedTracing {
Expand Down
1 change: 1 addition & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ func newConfig() *config {

// unsafe
fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.")
fs.BoolVar(&cfg.ec.UnsafeAllowClusterVersionDowngrade, "unsafe-allow-cluster-version-downgrade", embed.DefaultUnsafeAllowClusterVersionDowngrade, "true to allow cluster version downgrade, because newer minor versions may introduce incompatible feature changes like lease checkpointer introduced in v3.4")
fs.BoolVar(&cfg.ec.ForceNewCluster, "force-new-cluster", false, "Force to create a new one member cluster.")

// ignored
Expand Down
3 changes: 3 additions & 0 deletions server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ Unsafe feature:
Force to create a new one-member cluster.
--unsafe-no-fsync 'false'
Disables fsync, unsafe, will cause data loss.
--unsafe-allow-cluster-version-downgrade 'false'
Allow cluster version downgrade, unsafe, newer minor versions may introduce incompatible feature changes.
For instance, experimental lease checkpointer is enabled in 3.4 and downgrade to 3.3 will fail.
CAUTIOUS with unsafe flag! It may break the guarantees given by the consensus protocol!
`
Expand Down
11 changes: 9 additions & 2 deletions server/etcdserver/api/membership/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ type RaftCluster struct {
v2store v2store.Store
be backend.Backend

// Readonly field after initialization
unsafeAllowDowngrade bool

sync.Mutex // guards the fields below
version *semver.Version
members map[types.ID]*Member
Expand Down Expand Up @@ -268,7 +271,7 @@ func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
if c.downgradeInfo != nil {
d = &DowngradeInfo{Enabled: c.downgradeInfo.Enabled, TargetVersion: c.downgradeInfo.TargetVersion}
}
mustDetectDowngrade(c.lg, c.version, d)
mustDetectDowngrade(c.lg, c.version, d, c.unsafeAllowDowngrade)
onSet(c.lg, c.version)

for _, m := range c.members {
Expand Down Expand Up @@ -536,7 +539,7 @@ func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *s
}
oldVer := c.version
c.version = ver
mustDetectDowngrade(c.lg, c.version, c.downgradeInfo)
mustDetectDowngrade(c.lg, c.version, c.downgradeInfo, c.unsafeAllowDowngrade)
if c.v2store != nil {
mustSaveClusterVersionToStore(c.lg, c.v2store, ver)
}
Expand All @@ -550,6 +553,10 @@ func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *s
onSet(c.lg, ver)
}

func (c *RaftCluster) AllowUnsafeDowngrade() {
c.unsafeAllowDowngrade = true
}

func (c *RaftCluster) IsReadyToAddVotingMember() bool {
nmembers := 1
nstarted := 0
Expand Down
23 changes: 17 additions & 6 deletions server/etcdserver/api/membership/downgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func isValidDowngrade(verFrom *semver.Version, verTo *semver.Version) bool {
}

// mustDetectDowngrade will detect unexpected downgrade when the local server is recovered.
func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version, d *DowngradeInfo) {
func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version, d *DowngradeInfo, unsafeAllowDowngrade bool) {
lv := semver.Must(semver.NewVersion(version.Version))
// only keep major.minor version for comparison against cluster version
lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
Expand All @@ -63,14 +63,25 @@ func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version, d *DowngradeInfo) {
)
}

// if downgrade is enabled, and it's one minor version down
// safe to not fail (e.g., local version 3.4, cluster version 3.5)
// if the cluster disables downgrade, check local version against determined cluster version.
// the validation passes when local version is not less than cluster version
if cv != nil && lv.LessThan(*cv) {
lg.Fatal(
"invalid downgrade; server version is lower than determined cluster version",
zap.String("current-server-version", version.Version),
zap.String("determined-cluster-version", version.Cluster(cv.String())),
)
if unsafeAllowDowngrade && isValidDowngrade(cv, lv) {
lg.Warn("allowing unsafe downgrade; local server version is lower than determined cluster version",
zap.String("current-server-version", version.Version),
zap.String("determined-cluster-version", version.Cluster(cv.String())),
zap.String("target-cluster-version", version.Cluster(lv.String())),
)
// overwrite the cluster version with local version determined by the etcd binary version
*cv = *lv
} else {
lg.Fatal("invalid downgrade, not allowed; local server version is lower than determined cluster version",
zap.String("current-server-version", version.Version),
zap.String("determined-cluster-version", version.Cluster(cv.String())),
)
}
}
}

Expand Down
33 changes: 25 additions & 8 deletions server/etcdserver/api/membership/downgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,51 +40,66 @@ func TestMustDetectDowngrade(t *testing.T) {
downgradeDisabled := &DowngradeInfo{Enabled: false}

tests := []struct {
name string
clusterVersion *semver.Version
downgrade *DowngradeInfo
success bool
message string
name string
clusterVersion *semver.Version
downgrade *DowngradeInfo
unsafeAllowDowngrade bool
success bool
message string
}{
{
"Succeeded when downgrade is disabled and cluster version is nil",
nil,
downgradeDisabled,
false,
true,
"",
},
{
"Succeeded when downgrade is disabled and cluster version is one minor lower",
oneMinorLower,
downgradeDisabled,
false,
true,
"",
},
{
"Succeeded when downgrade is disabled and cluster version is server version",
lv,
downgradeDisabled,
false,
true,
"",
},
{
"Succeed when downgrade is disabled, unsafeDowngrade is enabled and cluster version is one minor higher",
oneMinorHigher,
downgradeDisabled,
true,
true,
"allowing unsafe downgrade; local server version is lower than determined cluster version",
},
{
"Failed when downgrade is disabled and server version is lower than determined cluster version ",
oneMinorHigher,
downgradeDisabled,
false,
"invalid downgrade; server version is lower than determined cluster version",
false,
"invalid downgrade, not allowed; local server version is lower than determined cluster version",
},
{
"Succeeded when downgrade is enabled and cluster version is nil",
nil,
downgradeEnabledEqualVersion,
false,
true,
"",
},
{
"Failed when downgrade is enabled and server version is target version",
lv,
downgradeEnabledEqualVersion,
false,
true,
"cluster is downgrading to target version",
},
Expand All @@ -93,21 +108,23 @@ func TestMustDetectDowngrade(t *testing.T) {
lv,
downgradeEnabledLowerVersion,
false,
false,
"invalid downgrade; server version is not allowed to join when downgrade is enabled",
},
{
"Failed when downgrade is enabled and local version is out of range and cluster version is nil",
nil,
downgradeEnabledHigherVersion,
false,
false,
"invalid downgrade; server version is not allowed to join when downgrade is enabled",
},

{
"Failed when downgrade is enabled and local version is out of range",
lv,
downgradeEnabledHigherVersion,
false,
false,
"invalid downgrade; server version is not allowed to join when downgrade is enabled",
},
}
Expand All @@ -122,7 +139,7 @@ func TestMustDetectDowngrade(t *testing.T) {
lcfg.ErrorOutputPaths = []string{logPath}
lg, _ := lcfg.Build()

mustDetectDowngrade(lg, tests[iint].clusterVersion, tests[iint].downgrade)
mustDetectDowngrade(lg, tests[iint].clusterVersion, tests[iint].downgrade, tests[iint].unsafeAllowDowngrade)
return
}

Expand Down
22 changes: 16 additions & 6 deletions server/etcdserver/cluster_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,10 @@ func decideClusterVersion(lg *zap.Logger, vers map[string]*version.Versions) *se
}

// allowedVersionRange decides the available version range of the cluster that local server can join in;
// if the downgrade enabled status is true, the version window is [oneMinorHigher, oneMinorHigher]
// if the downgrade is not enabled, the version window is [MinClusterVersion, localVersion]
func allowedVersionRange(downgradeEnabled bool) (minV *semver.Version, maxV *semver.Version) {
// if the downgrade enabled status is true, the version window is [oneMinorHigherThanLocalVersion, oneMinorHigherThanLocalVersion]
// otherwise, if the unsafeDowngrade enabled status is true, the version window is [MinClusterVersion, oneMinorHigherThanLocalVersion],
// if the both downgrade and unsafeDowngrade is not enabled, the version window is [MinClusterVersion, localVersion]
func allowedVersionRange(downgradeEnabled bool, unsafeDowngradeEnabled bool) (minV *semver.Version, maxV *semver.Version) {
minV = semver.Must(semver.NewVersion(version.MinClusterVersion))
maxV = semver.Must(semver.NewVersion(version.Version))
maxV = &semver.Version{Major: maxV.Major, Minor: maxV.Minor}
Expand All @@ -211,7 +212,15 @@ func allowedVersionRange(downgradeEnabled bool) (minV *semver.Version, maxV *sem
// Todo: handle the case that downgrading from higher major version(e.g. downgrade from v4.0 to v3.x)
maxV.Minor = maxV.Minor + 1
minV = &semver.Version{Major: maxV.Major, Minor: maxV.Minor}
return minV, maxV
}

// if unsafeDowngrade is enabled, and one minor version down
// safe to not fail (e.g., local version 3.4, cluster version 3.5)
if unsafeDowngradeEnabled {
maxV.Minor = maxV.Minor + 1
}

return minV, maxV
}

Expand All @@ -221,9 +230,9 @@ func allowedVersionRange(downgradeEnabled bool) (minV *semver.Version, maxV *sem
// cluster version in the range of [MinV, MaxV] and no known members has a cluster version
// out of the range.
// We set this rule since when the local member joins, another member might be offline.
func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) bool {
func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper, unsafeAllowClusterVersionDowngrade bool) bool {
vers := getVersions(lg, cl, local, rt)
minV, maxV := allowedVersionRange(getDowngradeEnabledFromRemotePeers(lg, cl, local, rt))
minV, maxV := allowedVersionRange(getDowngradeEnabledFromRemotePeers(lg, cl, local, rt), unsafeAllowClusterVersionDowngrade)
return isCompatibleWithVers(lg, vers, local, minV, maxV)
}

Expand Down Expand Up @@ -256,12 +265,13 @@ func isCompatibleWithVers(lg *zap.Logger, vers map[string]*version.Versions, loc
)
return false
}

if maxV.LessThan(*clusterv) {
lg.Warn(
"cluster version of remote member is not compatible; too high",
zap.String("remote-member-id", id),
zap.String("remote-member-cluster-version", clusterv.String()),
zap.String("minimum-cluster-version-supported", minV.String()),
zap.String("maximum-cluster-version-supported", maxV.String()),
)
return false
}
Expand Down
27 changes: 22 additions & 5 deletions server/etcdserver/cluster_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,28 +183,45 @@ func TestDecideAllowedVersionRange(t *testing.T) {
localV = &semver.Version{Major: localV.Major, Minor: localV.Minor}

tests := []struct {
name string
downgradeEnabled bool
expectedMinV *semver.Version
expectedMaxV *semver.Version
name string
downgradeEnabled bool
unsafeDowngradeEnabled bool
expectedMinV *semver.Version
expectedMaxV *semver.Version
}{
{
"When cluster enables downgrade",
true,
false,
&semver.Version{Major: localV.Major, Minor: localV.Minor + 1},
&semver.Version{Major: localV.Major, Minor: localV.Minor + 1},
},
{
"When cluster enables downgrade and unsafeDowngrade",
true,
true,
&semver.Version{Major: localV.Major, Minor: localV.Minor + 1},
&semver.Version{Major: localV.Major, Minor: localV.Minor + 1},
},
{
"When cluster disables downgrade and enables unsafeDowngrade",
false,
true,
minClusterV,
&semver.Version{Major: localV.Major, Minor: localV.Minor + 1},
},
{
"When cluster disables downgrade",
false,
false,
minClusterV,
localV,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
minV, maxV := allowedVersionRange(tt.downgradeEnabled)
minV, maxV := allowedVersionRange(tt.downgradeEnabled, tt.unsafeDowngradeEnabled)
if !minV.Equal(*tt.expectedMinV) {
t.Errorf("Expected minV is %v; Got %v", tt.expectedMinV.String(), minV.String())
}
Expand Down
6 changes: 6 additions & 0 deletions server/etcdserver/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,9 @@ func restartNode(cfg config.ServerConfig, snapshot *raftpb.Snapshot) (types.ID,
)
cl := membership.NewCluster(cfg.Logger)
cl.SetID(id, cid)
if cfg.UnsafeAllowClusterVersionDowngrade {
cl.AllowUnsafeDowngrade()
}
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
Expand Down Expand Up @@ -562,6 +565,9 @@ func restartAsStandaloneNode(cfg config.ServerConfig, snapshot *raftpb.Snapshot)

cl := membership.NewCluster(cfg.Logger)
cl.SetID(id, cid)
if cfg.UnsafeAllowClusterVersionDowngrade {
cl.AllowUnsafeDowngrade()
}
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
Expand Down
Loading

0 comments on commit 78f9d2b

Please sign in to comment.