From 80e4eee65cf73ca4b9572078791e40278ad17084 Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Mon, 27 Jun 2022 12:01:00 +0200 Subject: [PATCH] go/consensus/tendermint: Make sure DBs are only closed during cleanup Tendermint Core will close the block/state store DBs during Stop which can make certain in-flight queries trigger a panic due to the database being already closed. This commit introduces a DB wrapper that enables us to defer closing the DB until Cleanup is called (as at that point all of the services have already been stopped). --- go/consensus/tendermint/db/closer.go | 48 +++++++++++++++++++++++ go/consensus/tendermint/full/archive.go | 13 +------ go/consensus/tendermint/full/common.go | 51 ++++++++++++++----------- go/consensus/tendermint/full/full.go | 3 +- go/consensus/tendermint/full/light.go | 4 ++ 5 files changed, 84 insertions(+), 35 deletions(-) create mode 100644 go/consensus/tendermint/db/closer.go diff --git a/go/consensus/tendermint/db/closer.go b/go/consensus/tendermint/db/closer.go new file mode 100644 index 00000000000..fd92f850892 --- /dev/null +++ b/go/consensus/tendermint/db/closer.go @@ -0,0 +1,48 @@ +package db + +import ( + "sync" + + dbm "github.com/tendermint/tm-db" +) + +// Closer manages closing of multiple Tendermint Core databases. +type Closer struct { + l sync.Mutex + dbs []dbm.DB +} + +// Close closes all the managed databases. +func (c *Closer) Close() { + c.l.Lock() + defer c.l.Unlock() + + for _, db := range c.dbs { + _ = db.Close() + } +} + +// NewCloser creates a new empty database closer. +func NewCloser() *Closer { + return &Closer{} +} + +type dbWithCloser struct { + dbm.DB +} + +func (d *dbWithCloser) Close() error { + // Do nothing unless explicitly closed via the closer. + return nil +} + +// WithCloser wraps a Tendermint Core database instance so that it can only be closed by the given +// closer instance. Direct attempts to close the returned database instance will be ignored. +func WithCloser(db dbm.DB, closer *Closer) dbm.DB { + closer.l.Lock() + defer closer.l.Unlock() + + closer.dbs = append(closer.dbs, db) + + return &dbWithCloser{db} +} diff --git a/go/consensus/tendermint/full/archive.go b/go/consensus/tendermint/full/archive.go index 20c2ee2913f..eb754fe4bb8 100644 --- a/go/consensus/tendermint/full/archive.go +++ b/go/consensus/tendermint/full/archive.go @@ -88,17 +88,6 @@ func (srv *archiveService) Stop() { }) } -func (srv *archiveService) Cleanup() { - srv.commonNode.Cleanup() - - if err := srv.blockStoreDB.Close(); err != nil { - srv.Logger.Error("error on closing block store", "err", err) - } - if err := srv.stateStore.Close(); err != nil { - srv.Logger.Error("error on closing state store", "err", err) - } -} - // Implements consensusAPI.Backend. func (srv *archiveService) Quit() <-chan struct{} { return srv.quitCh @@ -207,6 +196,7 @@ func NewArchive( if err != nil { return nil, err } + srv.blockStoreDB = db.WithCloser(srv.blockStoreDB, srv.dbCloser) // NOTE: DBContext uses a full tendermint config but the only thing that is actually used // is the data dir field. @@ -215,6 +205,7 @@ func NewArchive( if err != nil { return nil, err } + stateDB = db.WithCloser(stateDB, srv.dbCloser) srv.stateStore = state.NewStore(stateDB) tmGenDoc, err := api.GetTendermintGenesisDocument(genesisProvider) diff --git a/go/consensus/tendermint/full/common.go b/go/consensus/tendermint/full/common.go index 590eb39b360..f641078dc46 100644 --- a/go/consensus/tendermint/full/common.go +++ b/go/consensus/tendermint/full/common.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "sync/atomic" "github.com/spf13/viper" tmcore "github.com/tendermint/tendermint/rpc/core" @@ -31,6 +32,7 @@ import ( tmbeacon "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/beacon" "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/common" "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/crypto" + "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/db" tmgovernance "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/governance" tmkeymanager "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/keymanager" tmregistry "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/registry" @@ -82,27 +84,28 @@ type commonNode struct { // These stores must be populated by the parent before the node is deemed ready. blockStoreDB tmdb.DB stateStore state.Store + dbCloser *db.Closer - // Guarded by the lock. - isStarted, isInitialized bool - + state uint32 startedCh chan struct{} parentNode api.Backend } -func (n *commonNode) initialized() bool { - n.Lock() - defer n.Unlock() +// Possible internal node states. +const ( + stateNotReady = 0 + stateInitialized = 1 + stateStarted = 2 + stateStopping = 3 +) - return n.isInitialized +func (n *commonNode) initialized() bool { + return atomic.LoadUint32(&n.state) >= stateInitialized } func (n *commonNode) started() bool { - n.Lock() - defer n.Unlock() - - return n.isStarted + return atomic.LoadUint32(&n.state) >= stateStarted } func (n *commonNode) ensureStarted(ctx context.Context) error { @@ -115,6 +118,10 @@ func (n *commonNode) ensureStarted(ctx context.Context) error { return ctx.Err() } + if atomic.LoadUint32(&n.state) >= stateStopping { + return fmt.Errorf("node is shutting down") + } + return nil } @@ -127,12 +134,8 @@ func (n *commonNode) start() error { n.Lock() defer n.Unlock() - if n.isStarted { - return fmt.Errorf("tendermint/common_node: already started") - } - - if !n.isInitialized { - return fmt.Errorf("tendermint/common_node: not initialized") + if atomic.LoadUint32(&n.state) != stateInitialized { + return fmt.Errorf("tendermint/common_node: not in initialized state") } if err := n.mux.Start(); err != nil { @@ -143,9 +146,7 @@ func (n *commonNode) start() error { } func (n *commonNode) finishStart() { - n.Lock() - n.isStarted = true - n.Unlock() + atomic.StoreUint32(&n.state, stateStarted) close(n.startedCh) } @@ -153,19 +154,21 @@ func (n *commonNode) stop() { n.Lock() defer n.Unlock() - if !n.isStarted || !n.isInitialized { + if !n.started() { return } n.svcMgr.Stop() n.mux.Stop() + + atomic.StoreUint32(&n.state, stateStopping) } func (n *commonNode) initialize() error { n.Lock() defer n.Unlock() - if n.isInitialized { + if atomic.LoadUint32(&n.state) != stateNotReady { return nil } @@ -275,7 +278,7 @@ func (n *commonNode) initialize() error { } } - n.isInitialized = true + atomic.StoreUint32(&n.state, stateInitialized) return nil } @@ -289,6 +292,7 @@ func (n *commonNode) Started() <-chan struct{} { func (n *commonNode) Cleanup() { n.serviceClientsWg.Wait() n.svcMgr.Cleanup() + n.dbCloser.Close() } // Implements consensusAPI.Backend. @@ -803,6 +807,7 @@ func newCommonNode( genesis: genesisDoc, dataDir: dataDir, svcMgr: cmbackground.NewServiceManager(logging.GetLogger("tendermint/servicemanager")), + dbCloser: db.NewCloser(), startedCh: make(chan struct{}), }, nil } diff --git a/go/consensus/tendermint/full/full.go b/go/consensus/tendermint/full/full.go index b2633601b7f..76f07f18c2e 100644 --- a/go/consensus/tendermint/full/full.go +++ b/go/consensus/tendermint/full/full.go @@ -658,10 +658,11 @@ func (t *fullService) lazyInit() error { // Tendermint does not expose a way to access the state database and we need it to bypass some // stupid things like pagination on the in-process "client". wrapDbProvider := func(dbCtx *tmnode.DBContext) (tmdb.DB, error) { - db, derr := dbProvider(dbCtx) + rawDB, derr := dbProvider(dbCtx) if derr != nil { return nil, derr } + db := db.WithCloser(rawDB, t.dbCloser) switch dbCtx.ID { case "state": diff --git a/go/consensus/tendermint/full/light.go b/go/consensus/tendermint/full/light.go index 276336a7b11..4b5ed3b8ad3 100644 --- a/go/consensus/tendermint/full/light.go +++ b/go/consensus/tendermint/full/light.go @@ -61,6 +61,10 @@ func (n *commonNode) GetLightBlock(ctx context.Context, height int64) (*consensu // Implements LightClientBackend. func (n *commonNode) GetParameters(ctx context.Context, height int64) (*consensusAPI.Parameters, error) { + if err := n.ensureStarted(ctx); err != nil { + return nil, err + } + tmHeight, err := n.heightToTendermintHeight(height) if err != nil { return nil, err