diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index be393d0e1355..34fe3374d301 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -398,7 +398,7 @@ type EngineIterator interface { // a clone of an existing iterator. type CloneContext struct { rawIter pebbleiter.Iterator - statsReporter iterStatsReporter + statsReporter *Pebble } // IterOptions contains options used to create an {MVCC,Engine}Iterator. diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index ae3a8ad449ef..141aad735bca 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -1268,10 +1268,31 @@ func (p *Pebble) async(fn func()) { }() } +// writePreventStartupFile creates a file that will prevent nodes from automatically restarting after +// experiencing sstable corruption. +func (p *Pebble) writePreventStartupFile(ctx context.Context) { + auxDir := p.GetAuxiliaryDir() + _ = p.MkdirAll(auxDir, os.ModePerm) + path := base.PreventedStartupFile(auxDir) + + preventStartupMsg := fmt.Sprintf(`ATTENTION: + + this node is terminating because of sstable corruption. + Corruption may be a consequence of a hardware error. + + A file preventing this node from restarting was placed at: + %s`, path) + + if err := fs.WriteFile(p.unencryptedFS, path, []byte(preventStartupMsg)); err != nil { + log.Warningf(ctx, "%v", err) + } +} + func (p *Pebble) makeMetricEtcEventListener(ctx context.Context) pebble.EventListener { return pebble.EventListener{ BackgroundError: func(err error) { if errors.Is(err, pebble.ErrCorruption) { + p.writePreventStartupFile(ctx) log.Fatalf(ctx, "local corruption detected: %v", err) } }, diff --git a/pkg/storage/pebble_batch.go b/pkg/storage/pebble_batch.go index 36d45def93c1..93aaa0271eb9 100644 --- a/pkg/storage/pebble_batch.go +++ b/pkg/storage/pebble_batch.go @@ -56,7 +56,7 @@ type pebbleBatch struct { // scratch space for wrappedIntentWriter. scratch []byte - iterStatsReporter iterStatsReporter + iterStatsReporter *Pebble batchStatsReporter batchStatsReporter settings *cluster.Settings mayWriteSizedDeletes bool @@ -82,7 +82,7 @@ func newPebbleBatch( batch *pebble.Batch, writeOnly bool, settings *cluster.Settings, - iterStatsReporter iterStatsReporter, + iterStatsReporter *Pebble, batchStatsReporter batchStatsReporter, ) *pebbleBatch { pb := pebbleBatchPool.Get().(*pebbleBatch) diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index b77b72a663d1..4308ecbd6efe 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -12,6 +12,7 @@ package storage import ( "bytes" + "context" "math" "sync" @@ -49,7 +50,7 @@ type pebbleIterator struct { // statsReporter is used to sum iterator stats across all the iterators // during the lifetime of the Engine when the iterator is closed or its // stats reset. It's intended to be used with (*Pebble). It must not be nil. - statsReporter iterStatsReporter + statsReporter *Pebble // Set to true to govern whether to call SeekPrefixGE or SeekGE. Skips // SSTables based on MVCC/Engine key when true. @@ -75,16 +76,6 @@ type pebbleIterator struct { mvccDone bool } -type iterStatsReporter interface { - aggregateIterStats(IteratorStats) -} - -var noopStatsReporter = noopStatsReporterImpl{} - -type noopStatsReporterImpl struct{} - -func (noopStatsReporterImpl) aggregateIterStats(IteratorStats) {} - var _ MVCCIterator = &pebbleIterator{} var _ EngineIterator = &pebbleIterator{} @@ -96,10 +87,7 @@ var pebbleIterPool = sync.Pool{ // newPebbleIterator creates a new Pebble iterator for the given Pebble reader. func newPebbleIterator( - handle pebble.Reader, - opts IterOptions, - durability DurabilityRequirement, - statsReporter iterStatsReporter, + handle pebble.Reader, opts IterOptions, durability DurabilityRequirement, statsReporter *Pebble, ) *pebbleIterator { p := pebbleIterPool.Get().(*pebbleIterator) p.reusable = false // defensive @@ -134,7 +122,7 @@ func newPebbleSSTIterator( ) (*pebbleIterator, error) { p := pebbleIterPool.Get().(*pebbleIterator) p.reusable = false // defensive - p.init(nil, opts, StandardDurability, noopStatsReporter) + p.init(nil, opts, StandardDurability, nil) var externalIterOpts []pebble.ExternalIterOption if forwardOnly { @@ -158,7 +146,7 @@ func (p *pebbleIterator) init( iter pebbleiter.Iterator, opts IterOptions, durability DurabilityRequirement, - statsReporter iterStatsReporter, + statsReporter *Pebble, ) { *p = pebbleIterator{ iter: iter, @@ -185,7 +173,7 @@ func (p *pebbleIterator) initReuseOrCreate( clone bool, opts IterOptions, durability DurabilityRequirement, - statsReporter iterStatsReporter, + statsReporter *Pebble, ) { if iter != nil && !clone { p.init(iter, opts, durability, statsReporter) @@ -315,7 +303,7 @@ func (p *pebbleIterator) Close() { // Report the iterator's stats so they can be accumulated and exposed // through time-series metrics. - if p.iter != nil { + if p.iter != nil && p.statsReporter != nil { p.statsReporter.aggregateIterStats(p.Stats()) } @@ -956,6 +944,9 @@ func (p *pebbleIterator) destroy() { // NB: The panic is omitted if the error is encountered on an external // iterator which is iterating over uncommitted sstables. if err := p.iter.Close(); !p.external && errors.Is(err, pebble.ErrCorruption) { + if p.statsReporter != nil { + p.statsReporter.writePreventStartupFile(context.Background()) + } panic(err) } p.iter = nil diff --git a/pkg/storage/pebble_iterator_test.go b/pkg/storage/pebble_iterator_test.go index 29b957fc322e..49f75dfe87ee 100644 --- a/pkg/storage/pebble_iterator_test.go +++ b/pkg/storage/pebble_iterator_test.go @@ -72,7 +72,7 @@ func TestPebbleIterator_Corruption(t *testing.T) { LowerBound: []byte("a"), UpperBound: []byte("z"), } - iter := newPebbleIterator(p.db, iterOpts, StandardDurability, noopStatsReporter) + iter := newPebbleIterator(p.db, iterOpts, StandardDurability, nil /* statsReporter */) // Seeking into the table catches the corruption. ok, err := iter.SeekEngineKeyGE(ek)