Skip to content

Commit

Permalink
wal: handle initial obsolete logs
Browse files Browse the repository at this point in the history
During Open, there may exist log files that were constructed from a previous
WAL configuration. Adjust both the standalone and failover WAL managers to
account for WALs of a kind other than their own by separately recording these
initial logs as obsolete.
  • Loading branch information
jbowens authored and sumeerbhola committed Feb 26, 2024
1 parent 0b94619 commit bde41ee
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 42 deletions.
2 changes: 1 addition & 1 deletion open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ func TestOpenWALReplay(t *testing.T) {

if readOnly {
m := d.Metrics()
require.Equal(t, int64(logCount), m.WAL.Files)
require.Equal(t, int64(logCount), m.WAL.ObsoleteFiles)
d.mu.Lock()
require.NotNil(t, d.mu.mem.mutable)
d.mu.Unlock()
Expand Down
2 changes: 1 addition & 1 deletion tool/testdata/db_lsm
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ level | tables size val-bl vtables | score | in | tables size | tables siz
6 | 0 0B 0B 0 | - | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0.0
total | 1 709B 0B 0 | - | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0.0
-------------------------------------------------------------------------------------------------------------------
WAL: 1 files (0B) in: 0B written: 0B (0% overhead)
WAL: 0 files (0B) in: 0B written: 0B (0% overhead)
Flushes: 0
Compactions: 0 estimated debt: 0B in progress: 0 (0B)
default: 0 delete: 0 elision: 0 move: 0 read: 0 rewrite: 0 multi-level: 0
Expand Down
44 changes: 26 additions & 18 deletions wal/failover_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,14 @@ type segmentWithSizeEtc struct {

type failoverManager struct {
opts Options
// initialObsolete holds the set of DeletableLogs that formed the logs
// passed into Init. The initialObsolete logs are all obsolete. Once
// returned via Manager.Obsolete, initialObsolete is cleared. The
// initialObsolete logs are stored separately from mu.queue because they may
// include logs that were NOT created by the standalone manager, and
// multiple physical log files may form one logical WAL.
initialObsolete []DeletableLog

// TODO(jackson/sumeer): read-path etc.

dirHandles [numDirIndices]vfs.File
Expand Down Expand Up @@ -457,25 +465,14 @@ func (wm *failoverManager) Init(o Options, initial Logs) error {
}
wm.recycler.Init(o.MaxNumRecyclableLogs)
for _, ll := range initial {
llse := logicalLogWithSizesEtc{
num: ll.Num,
}
if wm.recycler.MinRecycleLogNum() <= ll.Num {
wm.recycler.SetMinRecycleLogNum(ll.Num + 1)
}
for i, s := range ll.segments {
fs, path := ll.SegmentLocation(i)
stat, err := fs.Stat(path)
if err != nil {
return err
}
llse.segments = append(llse.segments, segmentWithSizeEtc{
segment: s,
approxFileSize: uint64(stat.Size()),
synchronouslyClosed: true,
})
var err error
wm.initialObsolete, err = appendDeletableLogs(wm.initialObsolete, ll)
if err != nil {
return err
}
wm.mu.closedWALs = append(wm.mu.closedWALs, llse)
}
return nil
}
Expand Down Expand Up @@ -514,6 +511,11 @@ func (wm *failoverManager) Obsolete(
) (toDelete []DeletableLog, err error) {
wm.mu.Lock()
defer wm.mu.Unlock()

// If this is the first call to Obsolete after Open, we may have deletable
// logs outside the queue.
toDelete, wm.initialObsolete = wm.initialObsolete, nil

i := 0
for ; i < len(wm.mu.closedWALs); i++ {
ll := wm.mu.closedWALs[i]
Expand Down Expand Up @@ -607,7 +609,7 @@ func (wm *failoverManager) writerClosed(llse logicalLogWithSizesEtc) {

// Stats implements Manager.
func (wm *failoverManager) Stats() Stats {
recycledLogsCount, recycledLogSize := wm.recycler.Stats()
obsoleteLogsCount, obsoleteLogSize := wm.recycler.Stats()
wm.mu.Lock()
defer wm.mu.Unlock()
var liveFileCount int
Expand All @@ -624,9 +626,15 @@ func (wm *failoverManager) Stats() Stats {
if wm.mu.ww != nil {
updateStats(wm.mu.ww.getLog().segments)
}
for i := range wm.initialObsolete {
if i == 0 || wm.initialObsolete[i].NumWAL != wm.initialObsolete[i-1].NumWAL {
obsoleteLogsCount++
}
obsoleteLogSize += wm.initialObsolete[i].ApproxFileSize
}
return Stats{
ObsoleteFileCount: recycledLogsCount,
ObsoleteFileSize: recycledLogSize,
ObsoleteFileCount: obsoleteLogsCount,
ObsoleteFileSize: obsoleteLogSize,
LiveFileCount: liveFileCount,
LiveFileSize: liveFileSize,
}
Expand Down
23 changes: 23 additions & 0 deletions wal/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ func (ll LogicalLog) String() string {
return sb.String()
}

// appendDeletableLogs appends all of the LogicalLog's constituent physical
// files as DeletableLogs to dst, returning the modified slice.
// AppendDeletableLogs will Stat physical files to determine physical sizes.
// AppendDeletableLogs does not make any judgmenet on whether a log file is
// obsolete, so callers must take care not to delete logs that are still
// unflushed.
func appendDeletableLogs(dst []DeletableLog, ll LogicalLog) ([]DeletableLog, error) {
for i := range ll.segments {
fs, path := ll.SegmentLocation(i)
stat, err := fs.Stat(path)
if err != nil {
return dst, err
}
dst = append(dst, DeletableLog{
FS: fs,
Path: path,
NumWAL: ll.Num,
ApproxFileSize: uint64(stat.Size()),
})
}
return dst, nil
}

// Scan finds all log files in the provided directories. It returns an
// ordered list of WALs in increasing NumWAL order.
func Scan(dirs ...Dir) (Logs, error) {
Expand Down
38 changes: 25 additions & 13 deletions wal/standalone_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
package wal

import (
"cmp"
"os"
"slices"
"sync"

"github.com/cockroachdb/errors"
Expand All @@ -22,6 +20,13 @@ type StandaloneManager struct {
o Options
recycler LogRecycler
walDir vfs.File
// initialObsolete holds the set of DeletableLogs that formed the logs
// passed into Init. The initialObsolete logs are all obsolete. Once
// returned via Manager.Obsolete, initialObsolete is cleared. The
// initialObsolete logs are stored separately from mu.queue because they may
// include logs that were NOT created by the standalone manager, and
// multiple physical log files may form one logical WAL.
initialObsolete []DeletableLog

// External synchronization is relied on when accessing w in Manager.Create,
// Writer.{WriteRecord,Close}.
Expand Down Expand Up @@ -61,19 +66,15 @@ func (m *StandaloneManager) Init(o Options, initial Logs) error {
err = firstError(err, walDir.Close())
return err
}
var files []base.FileInfo
for _, ll := range initial {
size, err := ll.PhysicalSize()
if err != nil {
return closeAndReturnErr(err)
}
files = append(files, base.FileInfo{FileNum: base.DiskFileNum(ll.Num), FileSize: size})
if m.recycler.MinRecycleLogNum() <= ll.Num {
m.recycler.SetMinRecycleLogNum(ll.Num + 1)
}
m.initialObsolete, err = appendDeletableLogs(m.initialObsolete, ll)
if err != nil {
return closeAndReturnErr(err)
}
}
slices.SortFunc(files, func(a, b base.FileInfo) int { return cmp.Compare(a.FileNum, b.FileNum) })
m.mu.queue = files
return nil
}

Expand All @@ -97,6 +98,11 @@ func (m *StandaloneManager) Obsolete(
) (toDelete []DeletableLog, err error) {
m.mu.Lock()
defer m.mu.Unlock()

// If this is the first call to Obsolete after Open, we may have deletable
// logs outside the queue.
toDelete, m.initialObsolete = m.initialObsolete, nil

i := 0
for ; i < len(m.mu.queue); i++ {
fi := m.mu.queue[i]
Expand Down Expand Up @@ -210,16 +216,22 @@ func (m *StandaloneManager) ElevateWriteStallThresholdForFailover() bool {

// Stats implements Manager.
func (m *StandaloneManager) Stats() Stats {
recycledLogsCount, recycledLogSize := m.recycler.Stats()
obsoleteLogsCount, obsoleteLogSize := m.recycler.Stats()
m.mu.Lock()
defer m.mu.Unlock()
var fileSize uint64
for i := range m.mu.queue {
fileSize += m.mu.queue[i].FileSize
}
for i := range m.initialObsolete {
if i == 0 || m.initialObsolete[i].NumWAL != m.initialObsolete[i-1].NumWAL {
obsoleteLogsCount++
}
obsoleteLogSize += m.initialObsolete[i].ApproxFileSize
}
return Stats{
ObsoleteFileCount: recycledLogsCount,
ObsoleteFileSize: recycledLogSize,
ObsoleteFileCount: obsoleteLogsCount,
ObsoleteFileSize: obsoleteLogSize,
LiveFileCount: len(m.mu.queue),
LiveFileSize: fileSize,
}
Expand Down
13 changes: 4 additions & 9 deletions wal/testdata/manager_failover
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@ recycler min-log-num: 3

list-and-stats
----
logs:
000001: {(pri,000)}
000002: {(pri,000)}
stats:
obsolete: count 0 size 0
live: count 2 size 22
obsolete: count 2 size 22
live: count 0 size 0

# Wait for monitor ticker to start.
advance-time dur=1ms wait-monitor
Expand Down Expand Up @@ -115,13 +112,11 @@ ok
list-and-stats
----
logs:
000001: {(pri,000)}
000002: {(pri,000)}
000005: {(pri,000)}
000007: {(pri,000)}
stats:
obsolete: count 0 size 0
live: count 4 size 56
obsolete: count 2 size 22
live: count 2 size 34

obsolete min-unflushed=7
----
Expand Down

0 comments on commit bde41ee

Please sign in to comment.