diff --git a/.changelog/4403.bugfix.md b/.changelog/4403.bugfix.md new file mode 100644 index 00000000000..776eae55162 --- /dev/null +++ b/.changelog/4403.bugfix.md @@ -0,0 +1,7 @@ +go/worker/storage: Limit number of rounds to fetch before applying them + +Previously, when a node was syncing from genesis, it would try to fetch all +unapplied rounds before applying them. +This could mean trying to fetch 100k+ rounds before applying them. +In combination with failing to fetch rounds and random retrying, this could +make the syncing process unbearably slow. diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/committee/node.go index c0dec26d936..c83ce8feae4 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/committee/node.go @@ -98,6 +98,10 @@ const ( // Trying to wait for rounds further in the future will return an error immediately. roundWaitConsensusOffset = uint64(1) + // maxInFlightRounds is the maximum number of rounds that should be fetched before waiting + // for them to be applied. + maxInFlightRounds = 100 + // getDiffTimeout is the timeout for fetching a diff from a node. getDiffTimeout = 15 * time.Second ) @@ -1243,6 +1247,10 @@ func (n *Node) worker() { // nolint: gocyclo } if !ok { + if len(syncingRounds) >= maxInFlightRounds { + break + } + syncing = &inFlight{ awaitingRetry: outstandingMaskFull, }