Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: fix snapshot missing when recovery from crash #23496

Merged
merged 8 commits into from
Nov 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 53 additions & 10 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -1689,11 +1689,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er

// Peek the error for the first block to decide the directing import logic
it := newInsertIterator(chain, results, bc.validator)

block, err := it.next()

// Left-trim all the known blocks
if err == ErrKnownBlock {
// Left-trim all the known blocks that don't need to build snapshot
if bc.skipBlock(err, it) {
// First block (and state) is known
// 1. We did a roll-back, and should now do a re-import
// 2. The block is stored as a sidechain, and is lying about it's stateroot, and passes a stateroot
Expand All @@ -1704,7 +1703,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
localTd = bc.GetTd(current.Hash(), current.NumberU64())
externTd = bc.GetTd(block.ParentHash(), block.NumberU64()-1) // The first block can't be nil
)
for block != nil && err == ErrKnownBlock {
for block != nil && bc.skipBlock(err, it) {
externTd = new(big.Int).Add(externTd, block.Difficulty())
if localTd.Cmp(externTd) < 0 {
break
Expand All @@ -1722,7 +1721,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
// When node runs a fast sync again, it can re-import a batch of known blocks via
// `insertChain` while a part of them have higher total difficulty than current
// head full block(new pivot point).
for block != nil && err == ErrKnownBlock {
for block != nil && bc.skipBlock(err, it) {
log.Debug("Writing previously known block", "number", block.Number(), "hash", block.Hash())
if err := bc.writeKnownBlock(block); err != nil {
return it.index, err
Expand Down Expand Up @@ -1754,8 +1753,10 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
// If there are any still remaining, mark as ignored
return it.index, err

// Some other error occurred, abort
case err != nil:
// Some other error(except ErrKnownBlock) occurred, abort.
// ErrKnownBlock is allowed here since some known blocks
// still need re-execution to generate snapshots that are missing
case err != nil && !errors.Is(err, ErrKnownBlock):
bc.futureBlocks.Remove(block.Hash())
stats.ignored += len(it.chain)
bc.reportBlock(block, nil, err)
Expand All @@ -1773,7 +1774,7 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
}
}()

for ; block != nil && err == nil || err == ErrKnownBlock; block, err = it.next() {
for ; block != nil && err == nil || errors.Is(err, ErrKnownBlock); block, err = it.next() {
// If the chain is terminating, stop processing blocks
if bc.insertStopped() {
log.Debug("Abort during block processing")
Expand All @@ -1788,8 +1789,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, er
// Clique blocks where they can share state among each other, so importing an
// older block might complete the state of the subsequent one. In this case,
// just skip the block (we already validated it once fully (and crashed), since
// its header and body was already in the database).
if err == ErrKnownBlock {
// its header and body was already in the database). But if the corresponding
// snapshot layer is missing, forcibly rerun the execution to build it.
if bc.skipBlock(err, it) {
logger := log.Debug
if bc.chainConfig.Clique == nil {
logger = log.Warn
Expand Down Expand Up @@ -2266,6 +2268,47 @@ func (bc *BlockChain) futureBlocksLoop() {
}
}

// skipBlock returns 'true', if the block being imported can be skipped over, meaning
// that the block does not need to be processed but can be considered already fully 'done'.
func (bc *BlockChain) skipBlock(err error, it *insertIterator) bool {
// We can only ever bypass processing if the only error returned by the validator
// is ErrKnownBlock, which means all checks passed, but we already have the block
// and state.
if !errors.Is(err, ErrKnownBlock) {
return false
}
// If we're not using snapshots, we can skip this, since we have both block
// and (trie-) state
if bc.snaps == nil {
return true
}
var (
header = it.current() // header can't be nil
parentRoot common.Hash
)
// If we also have the snapshot-state, we can skip the processing.
if bc.snaps.Snapshot(header.Root) != nil {
return true
}
// In this case, we have the trie-state but not snapshot-state. If the parent
// snapshot-state exists, we need to process this in order to not get a gap
// in the snapshot layers.
// Resolve parent block
if parent := it.previous(); parent != nil {
parentRoot = parent.Root
} else if parent = bc.GetHeaderByHash(header.ParentHash); parent != nil {
parentRoot = parent.Root
}
if parentRoot == (common.Hash{}) {
return false // Theoretically impossible case
}
// Parent is also missing snapshot: we can skip this. Otherwise process.
if bc.snaps.Snapshot(parentRoot) == nil {
return true
}
return false
}

// maintainTxIndex is responsible for the construction and deletion of the
// transaction index.
//
Expand Down
8 changes: 8 additions & 0 deletions core/blockchain_insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ func (it *insertIterator) previous() *types.Header {
return it.chain[it.index-1].Header()
}

// current returns the current header that is being processed, or nil.
func (it *insertIterator) current() *types.Header {
if it.index == -1 || it.index >= len(it.chain) {
return nil
}
return it.chain[it.index].Header()
}

// first returns the first block in the it.
func (it *insertIterator) first() *types.Block {
return it.chain[0]
Expand Down
121 changes: 121 additions & 0 deletions core/blockchain_repair_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1863,3 +1863,124 @@ func testRepair(t *testing.T, tt *rewindTest, snapshots bool) {
t.Errorf("Frozen block count mismatch: have %d, want %d", frozen, tt.expFrozen)
}
}

// TestIssue23496 tests scenario described in https://github.com/ethereum/go-ethereum/pull/23496#issuecomment-926393893
// Credits to @zzyalbert for finding the issue.
//
// Local chain owns these blocks:
// G B1 B2 B3 B4
// B1: state committed
// B2: snapshot disk layer
// B3: state committed
// B4: head block
//
// Crash happens without fully persisting snapshot and in-memory states,
// chain rewinds itself to the B1 (skip B3 in order to recover snapshot)
// In this case the snapshot layer of B3 is not created because of existent
// state.
func TestIssue23496(t *testing.T) {
// It's hard to follow the test case, visualize the input
//log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true))))

// Create a temporary persistent database
datadir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("Failed to create temporary datadir: %v", err)
}
os.RemoveAll(datadir)

db, err := rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false)
if err != nil {
t.Fatalf("Failed to create persistent database: %v", err)
}
defer db.Close() // Might double close, should be fine

// Initialize a fresh chain
var (
genesis = (&Genesis{BaseFee: big.NewInt(params.InitialBaseFee)}).MustCommit(db)
engine = ethash.NewFullFaker()
config = &CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieTimeLimit: 5 * time.Minute,
SnapshotLimit: 256,
SnapshotWait: true,
}
)
chain, err := NewBlockChain(db, config, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
if err != nil {
t.Fatalf("Failed to create chain: %v", err)
}
blocks, _ := GenerateChain(params.TestChainConfig, genesis, engine, rawdb.NewMemoryDatabase(), 4, func(i int, b *BlockGen) {
b.SetCoinbase(common.Address{0x02})
b.SetDifficulty(big.NewInt(1000000))
})

// Insert block B1 and commit the state into disk
if _, err := chain.InsertChain(blocks[:1]); err != nil {
t.Fatalf("Failed to import canonical chain start: %v", err)
}
chain.stateCache.TrieDB().Commit(blocks[0].Root(), true, nil)

// Insert block B2 and commit the snapshot into disk
if _, err := chain.InsertChain(blocks[1:2]); err != nil {
t.Fatalf("Failed to import canonical chain start: %v", err)
}
if err := chain.snaps.Cap(blocks[1].Root(), 0); err != nil {
t.Fatalf("Failed to flatten snapshots: %v", err)
}

// Insert block B3 and commit the state into disk
if _, err := chain.InsertChain(blocks[2:3]); err != nil {
t.Fatalf("Failed to import canonical chain start: %v", err)
}
chain.stateCache.TrieDB().Commit(blocks[2].Root(), true, nil)

// Insert the remaining blocks
if _, err := chain.InsertChain(blocks[3:]); err != nil {
t.Fatalf("Failed to import canonical chain tail: %v", err)
}

// Pull the plug on the database, simulating a hard crash
db.Close()

// Start a new blockchain back up and see where the repair leads us
db, err = rawdb.NewLevelDBDatabaseWithFreezer(datadir, 0, 0, datadir, "", false)
if err != nil {
t.Fatalf("Failed to reopen persistent database: %v", err)
}
defer db.Close()

chain, err = NewBlockChain(db, nil, params.AllEthashProtocolChanges, engine, vm.Config{}, nil, nil)
if err != nil {
t.Fatalf("Failed to recreate chain: %v", err)
}
defer chain.Stop()

if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) {
t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4)
}
if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) {
t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
}
if head := chain.CurrentBlock(); head.NumberU64() != uint64(1) {
t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(1))
}

// Reinsert B2-B4
if _, err := chain.InsertChain(blocks[1:]); err != nil {
t.Fatalf("Failed to import canonical chain tail: %v", err)
}
if head := chain.CurrentHeader(); head.Number.Uint64() != uint64(4) {
t.Errorf("Head header mismatch: have %d, want %d", head.Number, 4)
}
if head := chain.CurrentFastBlock(); head.NumberU64() != uint64(4) {
t.Errorf("Head fast block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
}
if head := chain.CurrentBlock(); head.NumberU64() != uint64(4) {
t.Errorf("Head block mismatch: have %d, want %d", head.NumberU64(), uint64(4))
}
if layer := chain.Snapshots().Snapshot(blocks[2].Root()); layer == nil {
t.Error("Failed to regenerate the snapshot of known state")
}
}