From 0b238d3c2139be57d0e87eb0eb11e1f71edba99d Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 20 Mar 2024 16:21:01 -0400 Subject: [PATCH] storage: support WAL failover to an explicit path This commit expands on #120509, introducing a WAL failover mode that allows an operator of a node with a single store to configure WAL failover to failover to a particular path (rather than another store's directory). This is configured via the --wal-failover flag: --wal-failover=path=/mnt/data2 When disabling or changing the path, the operator is required to pass the previous path. Eg, --wal_failover=path=/mnt/data3,prev_path=/mnt/data2 or --wal_failover=disabled,prev_path=/mnt/data2 Informs #119418. Informs cockroachdb/pebble#3230 Epic: CRDB-35401 Release note (ops change): Adds an additional option to the new (in 24.1) --wal-failover CLI flag allowing an operator to specify an explicit path for WAL failover for single-store nodes. --- pkg/base/BUILD.bazel | 1 + pkg/base/config.go | 109 ++++++++++++++++--- pkg/base/config_test.go | 20 ++++ pkg/base/testdata/wal-failover-config | 29 ++++++ pkg/server/config.go | 4 +- pkg/storage/open.go | 144 ++++++++++++++++++++------ 6 files changed, 263 insertions(+), 44 deletions(-) create mode 100644 pkg/base/testdata/wal-failover-config diff --git a/pkg/base/BUILD.bazel b/pkg/base/BUILD.bazel index 4f7102949356..3517bbde2077 100644 --- a/pkg/base/BUILD.bazel +++ b/pkg/base/BUILD.bazel @@ -69,6 +69,7 @@ go_test( "//pkg/util/leaktest", "//pkg/util/log", "//pkg/util/uuid", + "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_cockroachdb_errors//:errors", "@com_github_davecgh_go_spew//spew", "@com_github_stretchr_testify//require", diff --git a/pkg/base/config.go b/pkg/base/config.go index a053adcafb7a..0dfa5be233c3 100644 --- a/pkg/base/config.go +++ b/pkg/base/config.go @@ -17,6 +17,7 @@ import ( "net" "net/url" "os" + "strings" "time" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -838,8 +839,7 @@ type TempStorageConfig struct { Settings *cluster.Settings } -// WALFailoverMode configures a node's stores behavior under high write latency -// to their write-ahead logs. +// WALFailoverMode specifies the mode of WAL failover. type WALFailoverMode int8 const ( @@ -858,11 +858,11 @@ const ( // volume, the batch commit latency is insulated from the effects of momentary // disk stalls. WALFailoverAmongStores + // WALFailoverExplicitPath enables WAL failover for a single-store node to an + // explicitly specified path. + WALFailoverExplicitPath ) -// Type implements the pflag.Value interface. -func (m *WALFailoverMode) Type() string { return "string" } - // String implements fmt.Stringer. func (m *WALFailoverMode) String() string { return redact.StringWithoutMarkers(m) @@ -877,25 +877,110 @@ func (m *WALFailoverMode) SafeFormat(p redact.SafePrinter, _ rune) { p.SafeString("disabled") case WALFailoverAmongStores: p.SafeString("among-stores") + case WALFailoverExplicitPath: + p.SafeString("path") default: p.Printf("", int8(*m)) } } +// WALFailoverConfig configures a node's stores behavior under high write +// latency to their write-ahead logs. +type WALFailoverConfig struct { + Mode WALFailoverMode + // Path is the non-store path to which WALs should be written when failing + // over. It must be nonempty if and only if Mode == WALFailoverExplicitPath. + Path string + // PrevPath is the previously used non-store path. It may be set with Mode == + // WALFailoverExplicitPath (when changing the secondary path) or + // WALFailoverDisabled (when disabling WAL failover after it was previously + // enabled). It must be empty for other modes. + PrevPath string +} + +// Type implements the pflag.Value interface. +func (c *WALFailoverConfig) Type() string { return "string" } + +// String implements fmt.Stringer. +func (c *WALFailoverConfig) String() string { + return redact.StringWithoutMarkers(c) +} + +// SafeFormat implements the refact.SafeFormatter interface. +func (c *WALFailoverConfig) SafeFormat(p redact.SafePrinter, _ rune) { + switch c.Mode { + case WALFailoverDefault: + // Empty + case WALFailoverDisabled: + p.SafeString("disabled") + if c.PrevPath != "" { + p.SafeString(",prev_path=") + p.SafeString(redact.SafeString(c.PrevPath)) + } + case WALFailoverAmongStores: + p.SafeString("among-stores") + case WALFailoverExplicitPath: + p.SafeString("path=") + p.SafeString(redact.SafeString(c.Path)) + if c.PrevPath != "" { + p.SafeString(",prev_path=") + p.SafeString(redact.SafeString(c.PrevPath)) + } + default: + p.Printf("", int8(c.Mode)) + } +} + // Set implements the pflag.Value interface. -func (m *WALFailoverMode) Set(s string) error { - switch s { - case "disabled": - *m = WALFailoverDisabled - case "among-stores": - *m = WALFailoverAmongStores +func (c *WALFailoverConfig) Set(s string) error { + switch { + case strings.HasPrefix(s, "disabled"): + c.Mode = WALFailoverDisabled + var ok bool + c.Path, c.PrevPath, ok = parseWALFailoverPathFields(strings.TrimPrefix(s, "disabled")) + if !ok || c.Path != "" { + return errors.Newf("invalid disabled --wal-failover setting: %s "+ + "expect disabled[,prev_path=]", s) + } + case s == "among-stores": + c.Mode = WALFailoverAmongStores + case strings.HasPrefix(s, "path="): + c.Mode = WALFailoverExplicitPath + var ok bool + c.Path, c.PrevPath, ok = parseWALFailoverPathFields(s) + if !ok || c.Path == "" { + return errors.Newf("invalid path --wal-failover setting: %s "+ + "expect path=[,prev_path=]", s) + } default: return errors.Newf("invalid --wal-failover setting: %s "+ - "(possible values: disabled, among-stores)", s) + "(possible values: disabled, among-stores, path=)", s) } return nil } +func parseWALFailoverPathFields(s string) (path, prevPath string, ok bool) { + if s == "" { + return "", "", true + } + if s2 := strings.TrimPrefix(s, "path="); len(s2) < len(s) { + s = s2 + if i := strings.IndexByte(s, ','); i == -1 { + return s, "", true + } else { + path = s[:i] + s = s[i:] + } + } + + // Any remainder must be a prev_path= field. + if !strings.HasPrefix(s, ",prev_path=") { + return "", "", false + } + prevPath = strings.TrimPrefix(s, ",prev_path=") + return path, prevPath, true +} + // ExternalIODirConfig describes various configuration options pertaining // to external storage implementations. // TODO(adityamaru): Rename ExternalIODirConfig to ExternalIOConfig because it diff --git a/pkg/base/config_test.go b/pkg/base/config_test.go index ba1b459eee8c..21c24258cdd6 100644 --- a/pkg/base/config_test.go +++ b/pkg/base/config_test.go @@ -11,8 +11,10 @@ package base_test import ( + "bytes" "fmt" "math" + "strings" "testing" "time" @@ -20,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" "github.com/cockroachdb/cockroach/pkg/testutils/echotest" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/datadriven" "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/require" ) @@ -154,3 +157,20 @@ func TestRaftMaxInflightBytes(t *testing.T) { }) } } + +func TestWALFailoverConfigRoundtrip(t *testing.T) { + defer leaktest.AfterTest(t)() + + datadriven.RunTest(t, datapathutils.TestDataPath(t, "wal-failover-config"), func(t *testing.T, d *datadriven.TestData) string { + var buf bytes.Buffer + for _, l := range strings.Split(d.Input, "\n") { + var cfg base.WALFailoverConfig + if err := cfg.Set(l); err != nil { + fmt.Fprintf(&buf, "err: %s\n", err) + continue + } + fmt.Fprintln(&buf, cfg.String()) + } + return buf.String() + }) +} diff --git a/pkg/base/testdata/wal-failover-config b/pkg/base/testdata/wal-failover-config new file mode 100644 index 000000000000..34480f7414ff --- /dev/null +++ b/pkg/base/testdata/wal-failover-config @@ -0,0 +1,29 @@ +parse +among-stores +---- +among-stores + +parse +disabled +disabled,prev_path=foo +---- +disabled +disabled,prev_path=foo + +parse +path=/foo +path=/foo,prev_path=/bar +---- +path=/foo +path=/foo,prev_path=/bar + +parse +disabled,path=foo +among-stores,path=foo +among-stores,prev_path=foo +garbage +---- +err: invalid disabled --wal-failover setting: disabled,path=foo expect disabled[,prev_path=] +err: invalid --wal-failover setting: among-stores,path=foo (possible values: disabled, among-stores, path=) +err: invalid --wal-failover setting: among-stores,prev_path=foo (possible values: disabled, among-stores, path=) +err: invalid --wal-failover setting: garbage (possible values: disabled, among-stores, path=) diff --git a/pkg/server/config.go b/pkg/server/config.go index 87c2690716fb..e28390da945f 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -230,7 +230,7 @@ type BaseConfig struct { // WALFailover enables and configures automatic WAL failover when latency to // a store's primary WAL increases. - WALFailover base.WALFailoverMode + WALFailover base.WALFailoverConfig // SharedStorage is specified to enable disaggregated shared storage. SharedStorage string @@ -316,7 +316,7 @@ func (cfg *BaseConfig) SetDefaults( cfg.DisableMaxOffsetCheck = false cfg.DefaultZoneConfig = zonepb.DefaultZoneConfig() cfg.StorageEngine = storage.DefaultStorageEngine - cfg.WALFailover = base.WALFailoverDefault + cfg.WALFailover = base.WALFailoverConfig{Mode: base.WALFailoverDefault} cfg.TestingInsecureWebAccess = disableWebLogin cfg.Stores = base.StoreSpecList{ Specs: []base.StoreSpec{storeSpec}, diff --git a/pkg/storage/open.go b/pkg/storage/open.go index e7f793909bb4..b7a6068edcab 100644 --- a/pkg/storage/open.go +++ b/pkg/storage/open.go @@ -210,20 +210,99 @@ func LBaseMaxBytes(v int64) ConfigOption { } } +func noopConfigOption(*engineConfig) error { + return nil +} + +func errConfigOption(err error) func(*engineConfig) error { + return func(*engineConfig) error { return err } +} + // WALFailover configures automatic failover of the engine's write-ahead log to // another volume in the event the WAL becomes blocked on a write that does not // complete within a reasonable duration. -func WALFailover(mode base.WALFailoverMode, storeEnvs fs.Envs) ConfigOption { - // If the user specified no WAL failover setting, we default to disabling WAL - // failover and assume that the previous process did not have WAL failover - // enabled (so there's no need to populate Options.WALRecoveryDirs). If an - // operator had WAL failover enabled and now wants to disable it, they must - // explicitly set --wal-failover=disabled for the next process. - if mode == base.WALFailoverDefault || len(storeEnvs) == 1 { - return func(cfg *engineConfig) error { return nil } +func WALFailover(walCfg base.WALFailoverConfig, storeEnvs fs.Envs) ConfigOption { + // The set of options available in single-store versus multi-store + // configurations vary. This is in part due to the need to store the multiple + // stores' WALs separately. When WALFailoverExplicitPath is provided, we have + // no stable store identifier available to disambiguate the WALs of multiple + // stores. Note that the store ID is not known when a store is first opened. + if len(storeEnvs) == 1 { + switch walCfg.Mode { + case base.WALFailoverDefault, base.WALFailoverAmongStores: + return noopConfigOption + case base.WALFailoverDisabled: + // Check if the user provided an explicit previous path. If they did, they + // were previously using WALFailoverExplicitPath and are now disabling it. + // We need to add the explicilt path to WALRecoveryDirs. + if walCfg.PrevPath != "" { + return func(cfg *engineConfig) error { + cfg.Opts.WALRecoveryDirs = append(cfg.Opts.WALRecoveryDirs, wal.Dir{ + FS: cfg.Env, + Dirname: walCfg.PrevPath, + }) + return nil + } + } + // No PrevPath was provided, implying that the user previously was using + // WALFailoverAmongStores. If there's only 1 store, then WAL failover was + // effectively disabled and we noop. + return noopConfigOption + case base.WALFailoverExplicitPath: + // The user has provided an explicit path to which we should fail over WALs. + return func(cfg *engineConfig) error { + cfg.Opts.WALFailover = makePebbleWALFailoverOptsForDir(cfg.Settings, wal.Dir{ + FS: cfg.Env, + Dirname: walCfg.Path, + }) + if walCfg.PrevPath != "" { + cfg.Opts.WALRecoveryDirs = append(cfg.Opts.WALRecoveryDirs, wal.Dir{ + FS: cfg.Env, + Dirname: walCfg.PrevPath, + }) + } + return nil + } + default: + panic("unreachable") + } } - // mode == WALFailoverDisabled or WALFailoverAmongStores. + switch walCfg.Mode { + case base.WALFailoverDefault: + // If the user specified no WAL failover setting, we default to disabling WAL + // failover and assume that the previous process did not have WAL failover + // enabled (so there's no need to populate Options.WALRecoveryDirs). If an + // operator had WAL failover enabled and now wants to disable it, they must + // explicitly set --wal-failover=disabled for the next process. + return noopConfigOption + case base.WALFailoverDisabled: + // Check if the user provided an explicit previous path. If they did, they + // were previously using WALFailoverExplicitPath and are now disabling it. + // We need to add the explicilt path to WALRecoveryDirs. + if walCfg.PrevPath != "" { + return errConfigOption(errors.Newf("storage: cannot use explicit prev_path --wal-failover option with multiple stores")) + } + // No PrevPath was provided, implying that the user previously was using + // WALFailoverAmongStores. + + // Fallthrough + case base.WALFailoverExplicitPath: + // Not supported for multi-store configurations. + return errConfigOption(errors.Newf("storage: cannot use explicit path --wal-failover option with multiple stores")) + case base.WALFailoverAmongStores: + // Fallthrough + default: + panic("unreachable") + } + + // Either + // 1. mode == WALFailoverAmongStores + // or + // 2. mode == WALFailoverDisabled and the user previously was using + // WALFailoverAmongStores, so we should build the deterministic store pairing + // to determine which WALRecoveryDirs to pass to which engines. + // // For each store, we need to determine which store is its secondary for the // purpose of WALs. Even if failover is disabled, it's possible that it wasn't // when the previous process ran, and the secondary's wal dir may have WALs @@ -274,27 +353,8 @@ func WALFailover(mode base.WALFailoverMode, storeEnvs fs.Envs) ConfigOption { // Use auxiliary/wals-among-stores within the other stores directory. Dirname: secondaryEnv.PathJoin(secondaryEnv.Dir, base.AuxiliaryDir, "wals-among-stores"), } - - if mode == base.WALFailoverAmongStores { - cfg.Opts.WALFailover = &pebble.WALFailoverOptions{ - Secondary: secondary, - FailoverOptions: wal.FailoverOptions{ - // Leave most the options to their defaults, but - // UnhealthyOperationLatencyThreshold should be pulled from the - // cluster setting. - UnhealthyOperationLatencyThreshold: func() (time.Duration, bool) { - // WAL failover requires 24.1 to be finalized first. Otherwise, we might - // write WALs to a secondary, downgrade to a previous version's binary and - // blindly miss WALs. The second return value indicates whether the - // WAL manager is allowed to failover to the secondary. - // - // NB: We do not use settings.Version.IsActive because we do not have a - // guarantee that the cluster version has been initialized. - failoverOK := cfg.Settings.Version.ActiveVersionOrEmpty(context.TODO()).IsActive(clusterversion.V24_1Start) - return walFailoverUnhealthyOpThreshold.Get(&cfg.Settings.SV), failoverOK - }, - }, - } + if walCfg.Mode == base.WALFailoverAmongStores { + cfg.Opts.WALFailover = makePebbleWALFailoverOptsForDir(cfg.Settings, secondary) return nil } // mode == WALFailoverDisabled @@ -303,6 +363,30 @@ func WALFailover(mode base.WALFailoverMode, storeEnvs fs.Envs) ConfigOption { } } +func makePebbleWALFailoverOptsForDir( + settings *cluster.Settings, dir wal.Dir, +) *pebble.WALFailoverOptions { + return &pebble.WALFailoverOptions{ + Secondary: dir, + FailoverOptions: wal.FailoverOptions{ + // Leave most the options to their defaults, but + // UnhealthyOperationLatencyThreshold should be pulled from the + // cluster setting. + UnhealthyOperationLatencyThreshold: func() (time.Duration, bool) { + // WAL failover requires 24.1 to be finalized first. Otherwise, we might + // write WALs to a secondary, downgrade to a previous version's binary and + // blindly miss WALs. The second return value indicates whether the + // WAL manager is allowed to failover to the secondary. + // + // NB: We do not use settings.Version.IsActive because we do not have a + // guarantee that the cluster version has been initialized. + failoverOK := settings.Version.ActiveVersionOrEmpty(context.TODO()).IsActive(clusterversion.V24_1Start) + return walFailoverUnhealthyOpThreshold.Get(&settings.SV), failoverOK + }, + }, + } +} + // PebbleOptions contains Pebble-specific options in the same format as a // Pebble OPTIONS file. For example: // [Options]