Skip to content

Commit

Permalink
storage: Write PreventStartupFile on Node SSTFile Corruption
Browse files Browse the repository at this point in the history
Currently if a node faces sstable corruption, that node will crash and
try to automatically restart. Since it is likely that the node may crash
again, we would like to prevent the node from attempting to restart
itself. As a result, this pr created a `PreventStartupFile` when a node
experiences sstable corruption.

Fixes: cockroachdb#103899
Release-note: None
  • Loading branch information
raggar committed Jul 28, 2023
1 parent 167da65 commit d1fd1f6
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -1268,10 +1268,33 @@ func (p *Pebble) async(fn func()) {
}()
}

// writePreventStartupFile creates a file that will prevent nodes from automatically restarting after
// experiencing sstable corruption.
func (p *Pebble) writePreventStartupFile(ctx context.Context) {
auxDir := p.GetAuxiliaryDir()
_ = p.MkdirAll(auxDir, os.ModePerm)
path := base.PreventedStartupFile(auxDir)

preventStartupMsg := fmt.Sprintf(`ATTENTION:
this node is terminating because of sstable corruption.
Please contact the CockroachDB support team. It is not necessarily safe
to replace this node; cluster data may still be at risk of corruption.
A file preventing this node from restarting was placed at:
%s
`, path)

if err := fs.WriteFile(p.unencryptedFS, path, []byte(preventStartupMsg)); err != nil {
log.Warningf(ctx, "%v", err)
}
}

func (p *Pebble) makeMetricEtcEventListener(ctx context.Context) pebble.EventListener {
return pebble.EventListener{
BackgroundError: func(err error) {
if errors.Is(err, pebble.ErrCorruption) {
p.writePreventStartupFile(ctx)
log.Fatalf(ctx, "local corruption detected: %v", err)
}
},
Expand Down

0 comments on commit d1fd1f6

Please sign in to comment.