diff --git a/.changelog/2815.internal.1.md b/.changelog/2815.internal.1.md new file mode 100644 index 00000000000..1f1e1f84457 --- /dev/null +++ b/.changelog/2815.internal.1.md @@ -0,0 +1,4 @@ +go/consensus/tendermint: support DebugUnsafeReplayRecoverCorruptedWAL + +Adds support for setting tendermint DebugUnsafeReplayRecoverCorruptedWAL and +enables it in daily txsource test runs. diff --git a/.changelog/2815.internal.2.md b/.changelog/2815.internal.2.md new file mode 100644 index 00000000000..a2f18ace6b2 --- /dev/null +++ b/.changelog/2815.internal.2.md @@ -0,0 +1,4 @@ +oasis-test-runner/txsource: increase number of validators + +Increase the number of validators used in txsource tests so that consensus can +keep making progress when one of the nodes is restarted. diff --git a/go/consensus/tendermint/tendermint.go b/go/consensus/tendermint/tendermint.go index e50710a9603..e4ed7464e1b 100644 --- a/go/consensus/tendermint/tendermint.go +++ b/go/consensus/tendermint/tendermint.go @@ -104,6 +104,10 @@ const ( // CfgP2PDebugAllowDuplicateIP allows multiple connections from the same IP. CfgDebugP2PAllowDuplicateIP = "tendermint.debug.allow_duplicate_ip" + // CfgDebugUnsafeReplayRecoverCorruptedWAL enables the debug and unsafe + // automatic corrupted WAL recovery during replay. + CfgDebugUnsafeReplayRecoverCorruptedWAL = "tendermint.debug.unsafe_replay_recover_corrupted_wal" + // CfgConsensusMinGasPrice configures the minimum gas price for this validator. CfgConsensusMinGasPrice = "consensus.tendermint.min_gas_price" // CfgConsensusSubmissionGasPrice configures the gas price used when submitting transactions. @@ -931,6 +935,7 @@ func (t *tendermintService) lazyInit() error { tenderConfig.Consensus.SkipTimeoutCommit = t.genesis.Consensus.Parameters.SkipTimeoutCommit tenderConfig.Consensus.CreateEmptyBlocks = true tenderConfig.Consensus.CreateEmptyBlocksInterval = emptyBlockInterval + tenderConfig.Consensus.DebugUnsafeReplayRecoverCorruptedWAL = viper.GetBool(CfgDebugUnsafeReplayRecoverCorruptedWAL) && cmflags.DebugDontBlameOasis() tenderConfig.Instrumentation.Prometheus = true tenderConfig.Instrumentation.PrometheusListenAddr = "" tenderConfig.TxIndex.Indexer = "null" @@ -1376,11 +1381,13 @@ func init() { Flags.Uint64(CfgConsensusSubmissionGasPrice, 0, "gas price used when submitting consensus transactions") Flags.Uint64(CfgConsensusSubmissionMaxFee, 0, "maximum transaction fee when submitting consensus transactions") Flags.Bool(CfgConsensusDebugDisableCheckTx, false, "do not perform CheckTx on incoming transactions (UNSAFE)") + Flags.Bool(CfgDebugUnsafeReplayRecoverCorruptedWAL, false, "Enable automatic recovery from corrupted WAL during replay (UNSAFE).") _ = Flags.MarkHidden(cfgLogDebug) _ = Flags.MarkHidden(CfgDebugP2PAddrBookLenient) _ = Flags.MarkHidden(CfgDebugP2PAllowDuplicateIP) _ = Flags.MarkHidden(CfgConsensusDebugDisableCheckTx) + _ = Flags.MarkHidden(CfgDebugUnsafeReplayRecoverCorruptedWAL) _ = viper.BindPFlags(Flags) Flags.AddFlagSet(db.Flags) diff --git a/go/go.mod b/go/go.mod index 66480e921b5..c8ec4764f53 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,7 +2,7 @@ module github.com/oasislabs/oasis-core/go replace ( github.com/tendermint/iavl => github.com/oasislabs/iavl v0.12.0-ekiden3 - github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis2 + github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis3 golang.org/x/crypto/curve25519 => github.com/oasislabs/ed25519/extra/x25519 v0.0.0-20191022155220-a426dcc8ad5f golang.org/x/crypto/ed25519 => github.com/oasislabs/ed25519 v0.0.0-20191109133925-b197a691e30d ) diff --git a/go/go.sum b/go/go.sum index a734223bd09..36828615878 100644 --- a/go/go.sum +++ b/go/go.sum @@ -353,8 +353,8 @@ github.com/oasislabs/iavl v0.12.0-ekiden3 h1:8544fXJb57urhAEpTlIwDBdTJukgpPS/FCS github.com/oasislabs/iavl v0.12.0-ekiden3/go.mod h1:B/tMpl5cg7n42n3xYQTCckJzQezoI75jedkc8FOiOF0= github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8 h1:KC7dcrx0WEeyAWGAG+vdJjmIW36PUfw1x/LUnHjLm2E= github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8/go.mod h1:ABsG2IHM7bpTRIH3EvQ8CZQEBkzuhLxXFxaYApYMB9Y= -github.com/oasislabs/tendermint v0.32.8-oasis2 h1:PSEUtAp8Rfe/0T7endF6Iqg4p9+pPkSDSx0E83bD2LM= -github.com/oasislabs/tendermint v0.32.8-oasis2/go.mod h1:uxexUd6P+G+Zg1yACNBycfcaV1dPI985r79I+IXP38w= +github.com/oasislabs/tendermint v0.32.8-oasis3 h1:vtvlWwTD2YMpQ3OTMWRCIdeEQmHVhJIG+4dhVl6QWz0= +github.com/oasislabs/tendermint v0.32.8-oasis3/go.mod h1:uxexUd6P+G+Zg1yACNBycfcaV1dPI985r79I+IXP38w= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= diff --git a/go/oasis-test-runner/oasis/args.go b/go/oasis-test-runner/oasis/args.go index c806a416900..a3d724dcfae 100644 --- a/go/oasis-test-runner/oasis/args.go +++ b/go/oasis-test-runner/oasis/args.go @@ -98,6 +98,13 @@ func (args *argBuilder) tendermintDebugDisableCheckTx(disable bool) *argBuilder return args } +func (args *argBuilder) tendermintRecoverCorruptedWAL(enable bool) *argBuilder { + if enable { + args.vec = append(args.vec, "--"+tendermint.CfgDebugUnsafeReplayRecoverCorruptedWAL) + } + return args +} + func (args *argBuilder) tendermintCoreListenAddress(port uint16) *argBuilder { args.vec = append(args.vec, []string{ "--" + tendermint.CfgCoreListenAddress, "tcp://0.0.0.0:" + strconv.Itoa(int(port)), diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index 7e0ed7b9e48..5b58394720d 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -117,7 +117,7 @@ type TEEFixture struct { } // ValidatorFixture is a validator fixture. -type ValidatorFixture struct { +type ValidatorFixture struct { // nolint: maligned AllowEarlyTermination bool `json:"allow_early_termination"` AllowErrorTermination bool `json:"allow_error_termination"` @@ -129,6 +129,8 @@ type ValidatorFixture struct { SubmissionGasPrice uint64 `json:"submission_gas_price"` Sentries []int `json:"sentries,omitempty"` + + TendermintRecoverCorruptedWAL bool `json:"tendermint_recover_corrupted_wal"` } // Create instantiates the validator described by the fixture. @@ -144,10 +146,11 @@ func (f *ValidatorFixture) Create(net *Network) (*Validator, error) { return net.NewValidator(&ValidatorCfg{ NodeCfg: NodeCfg{ - AllowEarlyTermination: f.AllowEarlyTermination, - AllowErrorTermination: f.AllowErrorTermination, - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - SubmissionGasPrice: f.SubmissionGasPrice, + AllowEarlyTermination: f.AllowEarlyTermination, + AllowErrorTermination: f.AllowErrorTermination, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + SubmissionGasPrice: f.SubmissionGasPrice, + TendermintRecoverCorruptedWAL: f.TendermintRecoverCorruptedWAL, }, Entity: entity, MinGasPrice: f.MinGasPrice, diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 2225372975d..5f06cfb4bb2 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -70,8 +70,9 @@ type Node struct { // nolint: maligned disableDefaultLogWatcherHandlerFactories bool logWatcherHandlerFactories []log.WatcherHandlerFactory - submissionGasPrice uint64 - consensusDisableCheckTx bool + submissionGasPrice uint64 + consensusDisableCheckTx bool + tendermintRecoverCorruptedWAL bool } // Exit returns a channel that will close once the node shuts down. @@ -163,8 +164,9 @@ type NodeCfg struct { // nolint: maligned DisableDefaultLogWatcherHandlerFactories bool LogWatcherHandlerFactories []log.WatcherHandlerFactory - SubmissionGasPrice uint64 - ConsensusDisableCheckTx bool + SubmissionGasPrice uint64 + ConsensusDisableCheckTx bool + TendermintRecoverCorruptedWAL bool } // CmdAttrs is the SysProcAttr that will ensure graceful cleanup. diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index 845e92b1e48..4fcbeed5eb8 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -85,7 +85,8 @@ func (val *Validator) startNode() error { tendermintSubmissionGasPrice(val.submissionGasPrice). storageBackend("client"). appendNetwork(val.net). - appendEntity(val.entity) + appendEntity(val.entity). + tendermintRecoverCorruptedWAL(val.tendermintRecoverCorruptedWAL) if len(val.sentries) > 0 { args = args.addSentries(val.sentries). @@ -128,6 +129,7 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, submissionGasPrice: cfg.SubmissionGasPrice, + tendermintRecoverCorruptedWAL: cfg.TendermintRecoverCorruptedWAL, }, entity: cfg.Entity, minGasPrice: cfg.MinGasPrice, diff --git a/go/oasis-test-runner/scenario/e2e/txsource.go b/go/oasis-test-runner/scenario/e2e/txsource.go index 4123cc04fe0..f92ed9ef603 100644 --- a/go/oasis-test-runner/scenario/e2e/txsource.go +++ b/go/oasis-test-runner/scenario/e2e/txsource.go @@ -65,6 +65,10 @@ var TxSourceMulti scenario.Scenario = &txSourceImpl{ timeLimit: timeLimitLong, nodeRestartInterval: nodeRestartIntervalLong, livenessCheckInterval: livenessCheckInterval, + // Nodes getting killed commonly result in corrupted tendermint WAL when the + // node is restarted. Enable automatic corrupted WAL recovery for validator + // nodes. + tendermintRecoverCorruptedWAL: true, } type txSourceImpl struct { @@ -75,6 +79,8 @@ type txSourceImpl struct { nodeRestartInterval time.Duration livenessCheckInterval time.Duration + tendermintRecoverCorruptedWAL bool + rng *rand.Rand } @@ -90,10 +96,20 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { // Disable CheckTx on the client node so we can submit invalid transactions. f.Clients[0].ConsensusDisableCheckTx = true + // Use at least 4 validators so that consensus can keep making progress + // when a node is being killed and restarted. + f.Validators = []oasis.ValidatorFixture{ + oasis.ValidatorFixture{Entity: 1}, + oasis.ValidatorFixture{Entity: 1}, + oasis.ValidatorFixture{Entity: 1}, + oasis.ValidatorFixture{Entity: 1}, + } + // Update validators to require fee payments. for i := range f.Validators { f.Validators[i].MinGasPrice = txSourceGasPrice f.Validators[i].SubmissionGasPrice = txSourceGasPrice + f.Validators[i].TendermintRecoverCorruptedWAL = sc.tendermintRecoverCorruptedWAL } // Update all other nodes to use a specific gas price. for i := range f.Keymanagers {