From 13a54c06d9a9683b6708e5c5b988201a28fde28b Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 24 Apr 2019 15:00:42 -0700 Subject: [PATCH] storage: fix possible raft log panic after fsync error Detected with #36989 applied by running `./bin/roachtest run --local '^system-crash/sync-errors=true$'`. With some slight modification to that test's constants it could repro errors like this within a minute: ``` panic: tocommit(375) is out of range [lastIndex(374)]. Was the raft log corrupted, truncated, or lost? ``` Debugging showed `DBSyncWAL` can be called even after a sync failure. I guess if it returns success any time after it fails it will ack writes that aren't recoverable in WAL. They aren't recoverable because RocksDB stops recovery upon hitting the offset corresponding to the lost write (typically there should be a corruption there). Meanwhile, there are still successfully synced writes at later offsets in the file. The fix is simple. If `DBSyncWAL` returns an error once, keep track of that error and return it for all future writes. Release note: None --- pkg/storage/engine/rocksdb.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/storage/engine/rocksdb.go b/pkg/storage/engine/rocksdb.go index 832fe8d5097f..769002fb06c1 100644 --- a/pkg/storage/engine/rocksdb.go +++ b/pkg/storage/engine/rocksdb.go @@ -688,6 +688,7 @@ func (r *RocksDB) syncLoop() { s.Lock() var lastSync time.Time + var err error for { for len(s.pending) == 0 && !s.closed { @@ -713,8 +714,14 @@ func (r *RocksDB) syncLoop() { s.Unlock() - var err error - if r.cfg.Dir != "" { + // Linux only guarantees we'll be notified of a writeback error once + // during a sync call. After sync fails once, we cannot rely on any + // future data written to WAL being crash-recoverable. That's because + // any future writes will be appended after a potential corruption in + // the WAL, and RocksDB's recovery terminates upon encountering any + // corruption. So, we must not call `DBSyncWAL` again after it has + // failed once. + if r.cfg.Dir != "" && err == nil { err = statusToError(C.DBSyncWAL(r.rdb)) lastSync = timeutil.Now() }