Skip to content

Commit

Permalink
wal: synchronously verify secondary is writable
Browse files Browse the repository at this point in the history
When initializing the WAL failover manager, synchronously verify that we can
write to the secondary directory by writing some human-readable metadata about
the Pebble instance using it as a secondary.

Informs #3230.
  • Loading branch information
jbowens committed Mar 27, 2024
1 parent bf92f0b commit b6e563f
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
22 changes: 22 additions & 0 deletions wal/failover_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
package wal

import (
"fmt"
"io"
"os"
"sync"
"time"

"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/vfs"
"golang.org/x/exp/rand"
Expand Down Expand Up @@ -472,6 +475,25 @@ func (wm *failoverManager) init(o Options, initial Logs) error {
o.timeSource = defaultTime{}
}
o.FailoverOptions.EnsureDefaults()

// Synchronously ensure that we're able to write to the secondary before we
// proceed. An operator doesn't want to encounter an issue writing to the
// secondary the first time there's a need to failover. We write a bit of
// metadata to a file in the secondary's directory.
f, err := o.Secondary.FS.Create(o.Secondary.FS.PathJoin(o.Secondary.Dirname, "failover_source"))
if err != nil {
return errors.Newf("failed to write to WAL secondary dir: %v", err)
}
if _, err := io.WriteString(f, fmt.Sprintf("primary: %s\nprocess start: %s\n",
o.Primary.Dirname,
time.Now(),
)); err != nil {
return errors.Newf("failed to write metadata to WAL secondary dir: %v", err)
}
if err := errors.CombineErrors(f.Sync(), f.Close()); err != nil {
return err
}

stopper := newStopper()
var dirs [numDirIndices]dirAndFileHandle
for i, dir := range []Dir{o.Primary, o.Secondary} {
Expand Down
9 changes: 9 additions & 0 deletions wal/failover_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,15 @@ func TestFailoverManager_Quiesce(t *testing.T) {
require.NoError(t, m.Close())
}

func TestFailoverManager_SecondaryIsWritable(t *testing.T) {
var m failoverManager
require.EqualError(t, m.init(Options{
Primary: Dir{FS: vfs.NewMem(), Dirname: "primary"},
Secondary: Dir{FS: errorfs.Wrap(vfs.NewMem(), errorfs.ErrInjected), Dirname: "secondary"},
PreallocateSize: func() int { return 4 },
}, nil /* initial logs */), "failed to write to WAL secondary dir: injected error")
}

// TODO(sumeer): test wrap around of history in dirProber.

// TODO(sumeer): the failover datadriven test cases are not easy to write,
Expand Down
7 changes: 7 additions & 0 deletions wal/testdata/manager_failover
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ok
list-fs
----
pri/000001.log
sec/failover_source

create-writer wal-num=2
----
Expand All @@ -59,6 +60,7 @@ list-fs
----
pri/000001.log
pri/000002.log
sec/failover_source

close-manager
----
Expand Down Expand Up @@ -353,6 +355,7 @@ list-fs
pri/000001-002.log
pri/000002.log
sec/000001-001.log
sec/failover_source

# Test with dampening of switching based on latency and secondary errors.
#
Expand Down Expand Up @@ -417,6 +420,7 @@ now: 77ms

list-fs
----
sec/failover_source

# Wait until monitor sees the error and switches back to primary.
advance-time dur=75ms wait-monitor wait-prober
Expand Down Expand Up @@ -542,6 +546,7 @@ list-fs
pri/000001-002.log
pri/000001-004.log
pri/000001.log
sec/failover_source

# Test failback after primary is healthy.
init-manager inject-errors=((ErrInjected (And Writes (PathMatch "*/000001.log"))))
Expand Down Expand Up @@ -664,6 +669,7 @@ list-fs
pri/000001-002.log
pri/probe-file
sec/000001-001.log
sec/failover_source

# Test that if UnhealthyOperationLatencyThreshold says not to allow failovers
# yet, failover doesn't occur even if the primary errors.
Expand Down Expand Up @@ -704,3 +710,4 @@ ok

list-fs
----
sec/failover_source

0 comments on commit b6e563f

Please sign in to comment.