From 587d59911506af9c709363952844be00da5949ff Mon Sep 17 00:00:00 2001 From: Jim Larson Date: Wed, 8 Nov 2023 11:54:24 -0800 Subject: [PATCH] Revert "fix: halt-height behavior is not deterministic (#305)" This reverts commit 7b8423aede6b858de100afd3f0c4de93a5f8939c. This commit caused test failures in agoric-sdk. Reverting to land other changes in agoric-sdk. We'll restore the change and try to debug the failure against a smaller diff in the future. --- CHANGELOG-Agoric.md | 1 - baseapp/abci.go | 61 ++++++++++++++++++++++++++++++-------------- baseapp/abci_test.go | 58 ----------------------------------------- 3 files changed, 42 insertions(+), 78 deletions(-) diff --git a/CHANGELOG-Agoric.md b/CHANGELOG-Agoric.md index debebb5395fd..10dca93a9db4 100644 --- a/CHANGELOG-Agoric.md +++ b/CHANGELOG-Agoric.md @@ -45,4 +45,3 @@ Ref: https://keepachangelog.com/en/1.0.0/ ### Bug Fixes * (snapshots) [#304](https://github.com/agoric-labs/cosmos-sdk/pull/304) raise the per snapshot item limit. Fixes [Agoric/agoric-sdk#8325](https://github.com/Agoric/agoric-sdk/issues/8325) -* (baseapp) [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) Make sure we don't execute blocks beyond the halt height. Port of [cosmos/cosmos-sdk#16639](https://github.com/cosmos/cosmos-sdk/pull/16639) diff --git a/baseapp/abci.go b/baseapp/abci.go index 1c04c4334262..b046de0ae0f9 100644 --- a/baseapp/abci.go +++ b/baseapp/abci.go @@ -5,8 +5,10 @@ import ( "encoding/json" "errors" "fmt" + "os" "sort" "strings" + "syscall" "time" "github.com/gogo/protobuf/proto" @@ -131,8 +133,6 @@ func (app *BaseApp) BeginBlock(req abci.RequestBeginBlock) (res abci.ResponseBeg )) } - app.checkHalt(req.Header.Height, req.Header.Time) - if err := app.validateHeight(req); err != nil { panic(err) } @@ -303,23 +303,6 @@ func (app *BaseApp) DeliverTx(req abci.RequestDeliverTx) (res abci.ResponseDeliv } } -// checkHalt checkes if height or time exceeds halt-height or halt-time respectively. -func (app *BaseApp) checkHalt(height int64, time time.Time) { - var halt bool - switch { - case app.haltHeight > 0 && uint64(height) > app.haltHeight: - halt = true - - case app.haltTime > 0 && time.Unix() > int64(app.haltTime): - halt = true - } - - if halt { - app.logger.Info("halt per configuration", "height", app.haltHeight, "time", app.haltTime) - panic(errors.New("halt application")) - } -} - // Commit implements the ABCI interface. It will commit all state that exists in // the deliver state's multi-store and includes the resulting commit ID in the // returned abci.ResponseCommit. Commit will set the check state based on the @@ -376,6 +359,24 @@ func (app *BaseApp) CommitWithoutSnapshot() (abci.ResponseCommit, int64) { // empty/reset the deliver state app.deliverState = nil + var halt bool + + switch { + case app.haltHeight > 0 && uint64(header.Height) >= app.haltHeight: + halt = true + + case app.haltTime > 0 && header.Time.Unix() >= int64(app.haltTime): + halt = true + } + + if halt { + // Halt the binary and allow Tendermint to receive the ResponseCommit + // response with the commit ID hash. This will allow the node to successfully + // restart and process blocks assuming the halt configuration has been + // reset or moved to a more distant value. + app.halt() + } + var snapshotHeight int64 if app.snapshotInterval > 0 && uint64(header.Height)%app.snapshotInterval == 0 { snapshotHeight = header.Height @@ -384,6 +385,28 @@ func (app *BaseApp) CommitWithoutSnapshot() (abci.ResponseCommit, int64) { return res, snapshotHeight } +// halt attempts to gracefully shutdown the node via SIGINT and SIGTERM falling +// back on os.Exit if both fail. +func (app *BaseApp) halt() { + app.logger.Info("halting node per configuration", "height", app.haltHeight, "time", app.haltTime) + + p, err := os.FindProcess(os.Getpid()) + if err == nil { + // attempt cascading signals in case SIGINT fails (os dependent) + sigIntErr := p.Signal(syscall.SIGINT) + sigTermErr := p.Signal(syscall.SIGTERM) + + if sigIntErr == nil || sigTermErr == nil { + return + } + } + + // Resort to exiting immediately if the process could not be found or killed + // via SIGINT/SIGTERM signals. + app.logger.Info("failed to send SIGINT/SIGTERM; exiting...") + os.Exit(0) +} + // Snapshot takes a snapshot of the current state and prunes any old snapshottypes. // It should be started as a goroutine func (app *BaseApp) Snapshot(height int64) { diff --git a/baseapp/abci_test.go b/baseapp/abci_test.go index 7a584dfba0b9..18b8991ee144 100644 --- a/baseapp/abci_test.go +++ b/baseapp/abci_test.go @@ -3,9 +3,7 @@ package baseapp import ( "encoding/json" "fmt" - "strings" "testing" - "time" "github.com/stretchr/testify/require" abci "github.com/tendermint/tendermint/abci/types" @@ -220,59 +218,3 @@ func (ps *paramStore) Get(_ sdk.Context, key []byte, ptr interface{}) { panic(err) } } - -func TestABCI_HaltChain(t *testing.T) { - logger := defaultLogger() - db := dbm.NewMemDB() - name := t.Name() - - testCases := []struct { - name string - haltHeight uint64 - haltTime uint64 - blockHeight int64 - blockTime int64 - expHalt bool - }{ - {"default", 0, 0, 10, 0, false}, - {"halt-height-edge", 10, 0, 10, 0, false}, - {"halt-height", 10, 0, 11, 0, true}, - {"halt-time-edge", 0, 10, 1, 10, false}, - {"halt-time", 0, 10, 1, 11, true}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - defer func() { - rec := recover() - var err error - if rec != nil { - err = rec.(error) - } - if !tc.expHalt { - require.NoError(t, err) - } else { - require.Error(t, err) - require.True(t, strings.HasPrefix(err.Error(), "halt application")) - } - }() - - app := NewBaseApp( - name, logger, db, nil, - SetHaltHeight(tc.haltHeight), - SetHaltTime(tc.haltTime), - ) - - app.InitChain(abci.RequestInitChain{ - InitialHeight: tc.blockHeight, - }) - - app.BeginBlock(abci.RequestBeginBlock{ - Header: tmproto.Header{ - Height: tc.blockHeight, - Time: time.Unix(tc.blockTime, 0), - }, - }) - }) - } -}