From 918f240331cbcc23f0205d1a9a60cb5f90a040f9 Mon Sep 17 00:00:00 2001 From: ptrus Date: Tue, 28 Apr 2020 14:19:13 +0200 Subject: [PATCH] go/consensus/tendermint: sync-worker additionally check block timestamps Sync-worker relies on Tendermint fast-sync to determine if the node is still catching up. This PR adds aditional condition that the latest block is not older than 1 minute. This prevents cases where node stops fast-syincing before it has actually caught up. --- .changelog/2873.feature.md | 6 +++++ go/consensus/tendermint/tendermint.go | 34 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 .changelog/2873.feature.md diff --git a/.changelog/2873.feature.md b/.changelog/2873.feature.md new file mode 100644 index 00000000000..51212c5d068 --- /dev/null +++ b/.changelog/2873.feature.md @@ -0,0 +1,6 @@ +go/consensus/tendermint: sync-worker additionally check block timestamps + +Sync-worker relied on Tendermint fast-sync to determine if the node is still +catching up. This PR adds aditional condition that the latest block is not +older than 1 minute. This prevents cases where node stops fast-syincing +before it has actually caught up. diff --git a/go/consensus/tendermint/tendermint.go b/go/consensus/tendermint/tendermint.go index 0b2ed4faca0..42070a65217 100644 --- a/go/consensus/tendermint/tendermint.go +++ b/go/consensus/tendermint/tendermint.go @@ -126,6 +126,12 @@ const ( // StateDir is the name of the directory located inside the node's data // directory which contains the tendermint state. StateDir = "tendermint" + + // Time difference threshold used when considering if node is done with + // initial syncing. If difference is greater than the specified threshold + // the node is considered not yet synced. + // NOTE: this is only used during the initial sync. + syncWorkerLastBlockTimeDiffThreshold = 1 * time.Minute ) var ( @@ -1173,17 +1179,37 @@ func (t *tendermintService) syncWorker() { case <-t.node.Quit(): return case <-time.After(1 * time.Second): - isSyncing, err := checkSyncFn() + isFastSyncing, err := checkSyncFn() if err != nil { t.Logger.Error("Failed to poll FastSync", "err", err, ) return } - if !isSyncing { + if !isFastSyncing { t.Logger.Info("Tendermint Node finished fast-sync") - close(t.syncedCh) - return + + // Check latest block time. + tmBlock, err := t.GetTendermintBlock(t.ctx, consensusAPI.HeightLatest) + if err != nil { + t.Logger.Error("Failed to get tendermint block", + "err", err, + ) + return + } + + now := time.Now() + // No committed blocks or latest block within threshold. + if tmBlock == nil || now.Sub(tmBlock.Header.Time) < syncWorkerLastBlockTimeDiffThreshold { + close(t.syncedCh) + return + } + + t.Logger.Debug("Node still syncing", + "currentTime", now, + "latestBlockTime", tmBlock.Time, + "diff", now.Sub(tmBlock.Time), + ) } } }