From e3cbae3fce5399b1eb57eed64294c429053b32ec Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Tue, 22 Oct 2024 13:23:09 -0700 Subject: [PATCH] fix: resolve indexer infinite loop (#82) on very slow drives or when run with limited resources, a node can have a delay between the block existing & being saved and the block_results getting saved. if the block exists, but the block_results do not, an infinite loop occurs. the indexer will repeatedly request the block and block_results until they both exist. the lack of delay can further constrain the node's resources and result in many calls for block_results before they are committed. this commit updates the condition for waiting to include whenever an error occurred during indexing. if the indexer fails to find the block_results it will bombard the node with requests for it without backing off. this change causes errors to trigger a wait. after waiting for either a new block or for the timeout, the block results are more likely to exist. --- server/indexer_service.go | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/server/indexer_service.go b/server/indexer_service.go index 88e8c56240..bfb729c158 100644 --- a/server/indexer_service.go +++ b/server/indexer_service.go @@ -23,6 +23,7 @@ import ( "github.com/cenkalti/backoff/v4" "github.com/cometbft/cometbft/libs/service" rpcclient "github.com/cometbft/cometbft/rpc/client" + coretypes "github.com/cometbft/cometbft/rpc/core/types" "github.com/cometbft/cometbft/types" ethermint "github.com/evmos/ethermint/types" @@ -111,9 +112,18 @@ func (eis *EVMIndexerService) OnStart() error { if lastBlock == -1 { lastBlock = latestBlock } + // blockErr indicates an error fetching an expected block or its results + var blockErr error for { - if latestBlock <= lastBlock { - // nothing to index. wait for signal of new block + var block *coretypes.ResultBlock + var blockResult *coretypes.ResultBlockResults + if latestBlock <= lastBlock || blockErr != nil { + // two cases: + // 1. nothing to index (indexer is caught up). wait for signal of new block. + // 2. previous attempt to index errored (failed to fetch the Block or BlockResults). + // in this case, wait before retrying the data fetching, rather than infinite looping + // a failing fetch. this can occur due to drive latency between the block existing and its + // block_results getting saved. select { case <-newBlockSignal: case <-time.After(NewBlockWaitTimeout): @@ -121,14 +131,14 @@ func (eis *EVMIndexerService) OnStart() error { continue } for i := lastBlock + 1; i <= latestBlock; i++ { - block, err := eis.client.Block(ctx, &i) - if err != nil { - eis.Logger.Error("failed to fetch block", "height", i, "err", err) + block, blockErr = eis.client.Block(ctx, &i) + if blockErr != nil { + eis.Logger.Error("failed to fetch block", "height", i, "err", blockErr) break } - blockResult, err := eis.client.BlockResults(ctx, &i) - if err != nil { - eis.Logger.Error("failed to fetch block result", "height", i, "err", err) + blockResult, blockErr = eis.client.BlockResults(ctx, &i) + if blockErr != nil { + eis.Logger.Error("failed to fetch block result", "height", i, "err", blockErr) break } if err := eis.txIdxr.IndexBlock(block.Block, blockResult.TxsResults); err != nil {