Skip to content

Commit

Permalink
db: do not recycle WAL files used for recovery
Browse files Browse the repository at this point in the history
WAL recycling depends on the recycled WAL only containing log records
written with the recyclable record format. Pebble always writes WAL
files with the recyclable record format, but RocksDB only uses that
format if WAL recycling is enabled. If Pebble recycles a RocksDB-written
WAL that doesn't use the recyclable record format, it can leave valid
looking log entries in the tail of the log. A subsequent replay of the
WAL by either Pebble or RocksDB could then replay those already
committed log entries. The most common way this badness would be
detected is by Pebble's L0 consistency checks, though the varieties of
badness here are almost unlimited (deleted records reappearing, written
records being deleted, etc). In order to protect against this, Pebble
now only recycles WAL files that it has written in the current
incarnation and thus knows for certain have been written with the
recyclable record format. RocksDB has similar behavior, though the code
which achieves this is somewhat convoluted so the behavior may have been
accidental.

Fixes #567
  • Loading branch information
petermattis committed Mar 18, 2020
1 parent f0b10da commit f3bd334
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 36 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ document all the incompatibilities. The list below contains known
incompatibilities.

* Pebble's use of WAL recycling is only compatible with RocksDB's
`kPointInTimeRecovery` WAL recovery mode. The
`kTolerateCorruptedTailRecords` and `kAbsoluteConsistency` modes
will result in RocksDB being unable to open a Pebble generated
WAL. See [#566](https://github.com/cockroachdb/pebble/issues/566).
`kTolerateCorruptedTailRecords` WAL recovery mode. Older versions of
RocksDB would automatically map incompatible WAL recovery modes to
`kTolerateCorruptedTailRecords`. New versions of RocksDB will
disable WAL recycling.
* Column families. Pebble does not support column families, nor does
it attempt to detect their usage when opening a DB that may contain
them.
Expand Down
15 changes: 14 additions & 1 deletion log_recycler.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@ import (
)

type logRecycler struct {
// The maximum number of log files to maintain for recycling.
limit int
mu struct {

// The minimum log number that is allowed to be recycled. Log numbers smaller
// than this will be subject to immediate deletion. This is used to prevent
// recycling a log written by a previous instance of the DB which may not
// have had log recycling enabled. If that previous instance of the DB was
// RocksDB, the old non-recyclable log record headers will be present.
minRecycleLogNum uint64

mu struct {
sync.Mutex
logNums []uint64
maxLogNum uint64
Expand All @@ -22,6 +31,10 @@ type logRecycler struct {
// the log file should not be deleted (i.e. the log is being recycled), and
// false otherwise.
func (r *logRecycler) add(logNum uint64) bool {
if logNum < r.minRecycleLogNum {
return false
}

r.mu.Lock()
defer r.mu.Unlock()

Expand Down
70 changes: 39 additions & 31 deletions log_recycler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,58 @@ func (r *logRecycler) maxLogNum() uint64 {
}

func TestLogRecycler(t *testing.T) {
r := logRecycler{limit: 3}
r := logRecycler{limit: 3, minRecycleLogNum: 4}

// Logs below the min-recycle number are not recycled.
require.False(t, r.add(1))
require.False(t, r.add(2))
require.False(t, r.add(3))

// Logs are recycled up to the limit.
require.True(t, r.add(1))
require.EqualValues(t, []uint64{1}, r.logNums())
require.EqualValues(t, 1, r.maxLogNum())
require.EqualValues(t, 1, r.peek())
require.True(t, r.add(2))
require.EqualValues(t, []uint64{1, 2}, r.logNums())
require.EqualValues(t, 2, r.maxLogNum())
require.True(t, r.add(3))
require.EqualValues(t, []uint64{1, 2, 3}, r.logNums())
require.EqualValues(t, 3, r.maxLogNum())
require.True(t, r.add(4))
require.EqualValues(t, []uint64{4}, r.logNums())
require.EqualValues(t, 4, r.maxLogNum())
require.EqualValues(t, 4, r.peek())
require.True(t, r.add(5))
require.EqualValues(t, []uint64{4, 5}, r.logNums())
require.EqualValues(t, 5, r.maxLogNum())
require.True(t, r.add(6))
require.EqualValues(t, []uint64{4, 5, 6}, r.logNums())
require.EqualValues(t, 6, r.maxLogNum())

// Trying to add a file past the limit fails.
require.False(t, r.add(4))
require.EqualValues(t, []uint64{1, 2, 3}, r.logNums())
require.EqualValues(t, 4, r.maxLogNum())
require.False(t, r.add(7))
require.EqualValues(t, []uint64{4, 5, 6}, r.logNums())
require.EqualValues(t, 7, r.maxLogNum())

// Trying to add a previously recycled file returns success, but the internal
// state is unchanged.
require.True(t, r.add(1))
require.EqualValues(t, []uint64{1, 2, 3}, r.logNums())
require.EqualValues(t, 4, r.maxLogNum())
require.True(t, r.add(4))
require.EqualValues(t, []uint64{4, 5, 6}, r.logNums())
require.EqualValues(t, 7, r.maxLogNum())

// An error is returned if we try to pop an element other than the first.
require.Regexp(t, `invalid 2 vs \[1 2 3\]`, r.pop(2))
require.Regexp(t, `invalid 5 vs \[4 5 6\]`, r.pop(5))

require.NoError(t, r.pop(1))
require.EqualValues(t, []uint64{2, 3}, r.logNums())
require.NoError(t, r.pop(4))
require.EqualValues(t, []uint64{5, 6}, r.logNums())

// Log number 4 was already considered, so it won't be recycled.
require.True(t, r.add(4))
require.EqualValues(t, []uint64{2, 3}, r.logNums())
// Log number 7 was already considered, so it won't be recycled.
require.True(t, r.add(7))
require.EqualValues(t, []uint64{5, 6}, r.logNums())

require.True(t, r.add(5))
require.EqualValues(t, []uint64{2, 3, 5}, r.logNums())
require.EqualValues(t, 5, r.maxLogNum())
require.True(t, r.add(8))
require.EqualValues(t, []uint64{5, 6, 8}, r.logNums())
require.EqualValues(t, 8, r.maxLogNum())

require.NoError(t, r.pop(2))
require.EqualValues(t, []uint64{3, 5}, r.logNums())
require.NoError(t, r.pop(3))
require.EqualValues(t, []uint64{5}, r.logNums())
require.NoError(t, r.pop(5))
require.EqualValues(t, []uint64{6, 8}, r.logNums())
require.NoError(t, r.pop(6))
require.EqualValues(t, []uint64{8}, r.logNums())
require.NoError(t, r.pop(8))
require.EqualValues(t, []uint64(nil), r.logNums())

require.Regexp(t, `empty`, r.pop(6))
require.Regexp(t, `empty`, r.pop(9))
}

func TestRecycleLogs(t *testing.T) {
Expand Down Expand Up @@ -128,6 +133,9 @@ func TestRecycleLogs(t *testing.T) {
if n := d.logRecycler.count(); n != int(metrics.WAL.ObsoleteFiles) {
t.Fatalf("expected %d obsolete WAL files, but found %d", n, metrics.WAL.ObsoleteFiles)
}
if recycled := d.logRecycler.logNums(); len(recycled) != 0 {
t.Fatalf("expected no recycled WAL files after recovery, but found %d", recycled)
}
if err := d.Close(); err != nil {
t.Fatal(err)
}
Expand Down
3 changes: 3 additions & 0 deletions open.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
if fn >= d.mu.versions.minUnflushedLogNum {
logFiles = append(logFiles, fileNumAndName{fn, filename})
}
if d.logRecycler.minRecycleLogNum <= fn {
d.logRecycler.minRecycleLogNum = fn + 1
}
case fileTypeOptions:
if err := checkOptions(opts, opts.FS.PathJoin(dirname, filename)); err != nil {
return nil, err
Expand Down

0 comments on commit f3bd334

Please sign in to comment.