Skip to content

Commit

Permalink
Merge pull request #3791 from oasisprotocol/ptrus/fix/storage-register
Browse files Browse the repository at this point in the history
go/roothash/reindexBlocks: return latest known round if no new rounds are reindexed
  • Loading branch information
ptrus authored Mar 17, 2021
2 parents a1120a7 + 5bb676e commit 9df8f95
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .changelog/3791.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
go/roothash/reindexBlocks: return latest known round if no new rounds indexed

This fixes a case where a storage node would not register if restarted while
synced and there were no new runtime rounds (e.g. the runtime is suspended).
11 changes: 11 additions & 0 deletions go/consensus/tendermint/roothash/roothash.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,17 @@ func (sc *serviceClient) reindexBlocks(currentHeight int64, bh api.BlockHistory)
}
}

if lastRound == api.RoundInvalid {
sc.logger.Debug("no new round reindexed, return latest known round")
switch blk, err := bh.GetLatestBlock(sc.ctx); err {
case api.ErrNotFound:
case nil:
lastRound = blk.Header.Round
default:
return lastRound, fmt.Errorf("failed to get latest block: %w", err)
}
}

sc.logger.Debug("block reindex complete",
"last_round", lastRound,
)
Expand Down
18 changes: 18 additions & 0 deletions go/oasis-test-runner/scenario/e2e/runtime/runtime_dynamic.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,24 @@ func (sc *runtimeDynamicImpl) Run(childEnv *env.Env) error { // nolint: gocyclo
return err
}

// Restart nodes to test that the nodes will re-register although
// the runtime is suspended.
sc.Logger.Info("Restarting storage node to ensure it re-registers")
if err = sc.Net.StorageWorkers()[0].Stop(); err != nil {
return fmt.Errorf("failed to stop node: %w", err)
}
if err = sc.Net.StorageWorkers()[0].Start(); err != nil {
return fmt.Errorf("failed to start node: %w", err)
}

sc.Logger.Info("Restarting compute node to ensure it re-registers")
if err = sc.Net.ComputeWorkers()[0].Stop(); err != nil {
return fmt.Errorf("failed to stop node: %w", err)
}
if err = sc.Net.ComputeWorkers()[0].Start(); err != nil {
return fmt.Errorf("failed to start node: %w", err)
}

// Another epoch transition to make sure the runtime keeps being suspended.
if err = sc.epochTransition(ctx); err != nil {
return err
Expand Down

0 comments on commit 9df8f95

Please sign in to comment.