Skip to content

Commit

Permalink
vfs: pass start time in nanos to disk-health timing functions
Browse files Browse the repository at this point in the history
Pass the start time in the form of nanoseconds since the unix epoch as a
parameter to timeDiskOp and timeFilesystemOp. This aids post-mortem debugging
when the disk-health checker fatals the process and GOTRACEBACK is set to dump
the stacks, including arguments. The start time argument will be printed in hex
form, allowing us to decode the start time of the operation. This can be used
to confirm that the timed operation was still inflight at the time stacks were
collected.

Close #3009.
  • Loading branch information
jbowens committed Oct 31, 2023
1 parent a990615 commit df07a1e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 24 deletions.
59 changes: 37 additions & 22 deletions vfs/disk_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ type diskHealthCheckingFile struct {
// NB: this packing scheme is not persisted, and is therefore safe to adjust
// across process boundaries.
lastWritePacked atomic.Uint64
createTime time.Time
createTimeNanos int64
}

// newDiskHealthCheckingFile instantiates a new diskHealthCheckingFile, with the
Expand All @@ -151,8 +151,8 @@ func newDiskHealthCheckingFile(
diskSlowThreshold: diskSlowThreshold,
tickInterval: defaultTickInterval,

stopper: make(chan struct{}),
createTime: time.Now(),
stopper: make(chan struct{}),
createTimeNanos: time.Now().UnixNano(),
}
}

Expand All @@ -178,7 +178,7 @@ func (d *diskHealthCheckingFile) startTicker() {
continue
}
delta, writeSize, op := unpack(packed)
lastWrite := d.createTime.Add(delta)
lastWrite := time.Unix(0, d.createTimeNanos+delta.Nanoseconds())
now := time.Now()
if lastWrite.Add(d.diskSlowThreshold).Before(now) {
// diskSlowThreshold was exceeded. Call the passed-in
Expand Down Expand Up @@ -214,15 +214,15 @@ func (d *diskHealthCheckingFile) ReadAt(p []byte, off int64) (int, error) {
func (d *diskHealthCheckingFile) Write(p []byte) (n int, err error) {
d.timeDiskOp(OpTypeWrite, int64(len(p)), func() {
n, err = d.file.Write(p)
})
}, time.Now().UnixNano())
return n, err
}

// Write implements the io.WriterAt interface.
func (d *diskHealthCheckingFile) WriteAt(p []byte, ofs int64) (n int, err error) {
d.timeDiskOp(OpTypeWrite, int64(len(p)), func() {
n, err = d.file.WriteAt(p, ofs)
})
}, time.Now().UnixNano())
return n, err
}

Expand All @@ -241,7 +241,7 @@ func (d *diskHealthCheckingFile) Prefetch(offset, length int64) error {
func (d *diskHealthCheckingFile) Preallocate(off, n int64) (err error) {
d.timeDiskOp(OpTypePreallocate, n, func() {
err = d.file.Preallocate(off, n)
})
}, time.Now().UnixNano())
return err
}

Expand All @@ -254,37 +254,43 @@ func (d *diskHealthCheckingFile) Stat() (os.FileInfo, error) {
func (d *diskHealthCheckingFile) Sync() (err error) {
d.timeDiskOp(OpTypeSync, 0, func() {
err = d.file.Sync()
})
}, time.Now().UnixNano())
return err
}

// SyncData implements (vfs.File).SyncData.
func (d *diskHealthCheckingFile) SyncData() (err error) {
d.timeDiskOp(OpTypeSyncData, 0, func() {
err = d.file.SyncData()
})
}, time.Now().UnixNano())
return err
}

// SyncTo implements (vfs.File).SyncTo.
func (d *diskHealthCheckingFile) SyncTo(length int64) (fullSync bool, err error) {
d.timeDiskOp(OpTypeSyncTo, length, func() {
fullSync, err = d.file.SyncTo(length)
})
}, time.Now().UnixNano())
return fullSync, err
}

// timeDiskOp runs the specified closure and makes its timing visible to the
// monitoring goroutine, in case it exceeds one of the slow disk durations.
// opType should always be set. writeSizeInBytes should be set if the write
// operation is sized. If not, it should be set to zero.
func (d *diskHealthCheckingFile) timeDiskOp(opType OpType, writeSizeInBytes int64, op func()) {
//
// The start time is taken as a parameter in the form of nanoseconds since the
// unix epoch so that it appears in stack traces during crashes (if GOTRACEBACK
// is set appropriately), aiding postmortem debugging.
func (d *diskHealthCheckingFile) timeDiskOp(
opType OpType, writeSizeInBytes int64, op func(), startNanos int64,
) {
if d == nil {
op()
return
}

delta := time.Since(d.createTime)
delta := time.Duration(startNanos - d.createTimeNanos)
packed := pack(delta, writeSizeInBytes, opType)
if d.lastWritePacked.Swap(packed) != 0 {
panic("concurrent write operations detected on file")
Expand Down Expand Up @@ -352,7 +358,7 @@ type diskHealthCheckingDir struct {
func (d *diskHealthCheckingDir) Sync() (err error) {
d.fs.timeFilesystemOp(d.name, OpTypeSync, func() {
err = d.File.Sync()
})
}, time.Now().UnixNano())
return err
}

Expand Down Expand Up @@ -486,7 +492,17 @@ func WithDiskHealthChecks(
return fs, fs
}

func (d *diskHealthCheckingFS) timeFilesystemOp(name string, opType OpType, op func()) {
// timeFilesystemOp executes the provided closure, which should perform a
// singular filesystem operation of a type matching opType on the named file. It
// records the provided start time such that the long-lived disk-health checking
// goroutine can observe if the operation is blocked for an inordinate time.
//
// The start time is taken as a parameter in the form of nanoseconds since the
// unix epoch so that it appears in stack traces during crashes (if GOTRACEBACK
// is set appropriately), aiding postmortem debugging.
func (d *diskHealthCheckingFS) timeFilesystemOp(
name string, opType OpType, op func(), startNanos int64,
) {
if d == nil {
op()
return
Expand Down Expand Up @@ -515,7 +531,6 @@ func (d *diskHealthCheckingFS) timeFilesystemOp(name string, opType OpType, op f
d.startTickerLocked()
}

startNanos := time.Now().UnixNano()
for i := 0; i < len(d.mu.inflight); i++ {
if d.mu.inflight[i].startNanos.Load() == 0 {
// This slot is not in use. Claim it.
Expand Down Expand Up @@ -635,7 +650,7 @@ func (d *diskHealthCheckingFS) Create(name string) (File, error) {
var err error
d.timeFilesystemOp(name, OpTypeCreate, func() {
f, err = d.fs.Create(name)
})
}, time.Now().UnixNano())
if err != nil {
return f, err
}
Expand Down Expand Up @@ -665,7 +680,7 @@ func (d *diskHealthCheckingFS) Link(oldname, newname string) error {
var err error
d.timeFilesystemOp(newname, OpTypeLink, func() {
err = d.fs.Link(oldname, newname)
})
}, time.Now().UnixNano())
return err
}

Expand All @@ -684,7 +699,7 @@ func (d *diskHealthCheckingFS) MkdirAll(dir string, perm os.FileMode) error {
var err error
d.timeFilesystemOp(dir, OpTypeMkdirAll, func() {
err = d.fs.MkdirAll(dir, perm)
})
}, time.Now().UnixNano())
return err
}

Expand Down Expand Up @@ -733,7 +748,7 @@ func (d *diskHealthCheckingFS) Remove(name string) error {
var err error
d.timeFilesystemOp(name, OpTypeRemove, func() {
err = d.fs.Remove(name)
})
}, time.Now().UnixNano())
return err
}

Expand All @@ -742,7 +757,7 @@ func (d *diskHealthCheckingFS) RemoveAll(name string) error {
var err error
d.timeFilesystemOp(name, OpTypeRemoveAll, func() {
err = d.fs.RemoveAll(name)
})
}, time.Now().UnixNano())
return err
}

Expand All @@ -751,7 +766,7 @@ func (d *diskHealthCheckingFS) Rename(oldname, newname string) error {
var err error
d.timeFilesystemOp(newname, OpTypeRename, func() {
err = d.fs.Rename(oldname, newname)
})
}, time.Now().UnixNano())
return err
}

Expand All @@ -761,7 +776,7 @@ func (d *diskHealthCheckingFS) ReuseForWrite(oldname, newname string) (File, err
var err error
d.timeFilesystemOp(newname, OpTypeReuseForWrite, func() {
f, err = d.fs.ReuseForWrite(oldname, newname)
})
}, time.Now().UnixNano())
if err != nil {
return f, err
}
Expand Down
4 changes: 2 additions & 2 deletions vfs/disk_health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ func TestDiskHealthChecking_File_Underflow(t *testing.T) {
// Given the packing scheme, 35 years of process uptime will lead to a delta
// that is too large to fit in the packed int64.
tCreate := time.Now().Add(-35 * time.Hour * 24 * 365)
hcFile.createTime = tCreate
hcFile.createTimeNanos = tCreate.UnixNano()

// Assert that the time since tCreate (in milliseconds) is indeed greater
// than the max delta that can fit.
Expand All @@ -387,7 +387,7 @@ func TestDiskHealthChecking_File_Underflow(t *testing.T) {
// Given the packing scheme, 34 years of process uptime will lead to a delta
// that is just small enough to fit in the packed int64.
tCreate := time.Now().Add(-34 * time.Hour * 24 * 365)
hcFile.createTime = tCreate
hcFile.createTimeNanos = tCreate.UnixNano()

require.True(t, time.Since(tCreate).Milliseconds() < 1<<deltaBits-1)
require.NotPanics(t, func() { _, _ = hcFile.Write([]byte("should be fine")) })
Expand Down

0 comments on commit df07a1e

Please sign in to comment.