Skip to content

Commit

Permalink
Merge pull request #2815 from oasislabs/ptrus/feature/debug-recover-wal
Browse files Browse the repository at this point in the history
go/consensus/tendermint: support DebugUnsafeReplayRecoverCorruptedWAL
  • Loading branch information
ptrus authored Apr 2, 2020
2 parents a72bc87 + b21ae67 commit d872a00
Show file tree
Hide file tree
Showing 10 changed files with 58 additions and 13 deletions.
4 changes: 4 additions & 0 deletions .changelog/2815.internal.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
go/consensus/tendermint: support DebugUnsafeReplayRecoverCorruptedWAL

Adds support for setting tendermint DebugUnsafeReplayRecoverCorruptedWAL and
enables it in daily txsource test runs.
4 changes: 4 additions & 0 deletions .changelog/2815.internal.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
oasis-test-runner/txsource: increase number of validators

Increase the number of validators used in txsource tests so that consensus can
keep making progress when one of the nodes is restarted.
7 changes: 7 additions & 0 deletions go/consensus/tendermint/tendermint.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ const (
// CfgP2PDebugAllowDuplicateIP allows multiple connections from the same IP.
CfgDebugP2PAllowDuplicateIP = "tendermint.debug.allow_duplicate_ip"

// CfgDebugUnsafeReplayRecoverCorruptedWAL enables the debug and unsafe
// automatic corrupted WAL recovery during replay.
CfgDebugUnsafeReplayRecoverCorruptedWAL = "tendermint.debug.unsafe_replay_recover_corrupted_wal"

// CfgConsensusMinGasPrice configures the minimum gas price for this validator.
CfgConsensusMinGasPrice = "consensus.tendermint.min_gas_price"
// CfgConsensusSubmissionGasPrice configures the gas price used when submitting transactions.
Expand Down Expand Up @@ -931,6 +935,7 @@ func (t *tendermintService) lazyInit() error {
tenderConfig.Consensus.SkipTimeoutCommit = t.genesis.Consensus.Parameters.SkipTimeoutCommit
tenderConfig.Consensus.CreateEmptyBlocks = true
tenderConfig.Consensus.CreateEmptyBlocksInterval = emptyBlockInterval
tenderConfig.Consensus.DebugUnsafeReplayRecoverCorruptedWAL = viper.GetBool(CfgDebugUnsafeReplayRecoverCorruptedWAL) && cmflags.DebugDontBlameOasis()
tenderConfig.Instrumentation.Prometheus = true
tenderConfig.Instrumentation.PrometheusListenAddr = ""
tenderConfig.TxIndex.Indexer = "null"
Expand Down Expand Up @@ -1376,11 +1381,13 @@ func init() {
Flags.Uint64(CfgConsensusSubmissionGasPrice, 0, "gas price used when submitting consensus transactions")
Flags.Uint64(CfgConsensusSubmissionMaxFee, 0, "maximum transaction fee when submitting consensus transactions")
Flags.Bool(CfgConsensusDebugDisableCheckTx, false, "do not perform CheckTx on incoming transactions (UNSAFE)")
Flags.Bool(CfgDebugUnsafeReplayRecoverCorruptedWAL, false, "Enable automatic recovery from corrupted WAL during replay (UNSAFE).")

_ = Flags.MarkHidden(cfgLogDebug)
_ = Flags.MarkHidden(CfgDebugP2PAddrBookLenient)
_ = Flags.MarkHidden(CfgDebugP2PAllowDuplicateIP)
_ = Flags.MarkHidden(CfgConsensusDebugDisableCheckTx)
_ = Flags.MarkHidden(CfgDebugUnsafeReplayRecoverCorruptedWAL)

_ = viper.BindPFlags(Flags)
Flags.AddFlagSet(db.Flags)
Expand Down
2 changes: 1 addition & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module github.com/oasislabs/oasis-core/go

replace (
github.com/tendermint/iavl => github.com/oasislabs/iavl v0.12.0-ekiden3
github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis2
github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis3
golang.org/x/crypto/curve25519 => github.com/oasislabs/ed25519/extra/x25519 v0.0.0-20191022155220-a426dcc8ad5f
golang.org/x/crypto/ed25519 => github.com/oasislabs/ed25519 v0.0.0-20191109133925-b197a691e30d
)
Expand Down
4 changes: 2 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ github.com/oasislabs/iavl v0.12.0-ekiden3 h1:8544fXJb57urhAEpTlIwDBdTJukgpPS/FCS
github.com/oasislabs/iavl v0.12.0-ekiden3/go.mod h1:B/tMpl5cg7n42n3xYQTCckJzQezoI75jedkc8FOiOF0=
github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8 h1:KC7dcrx0WEeyAWGAG+vdJjmIW36PUfw1x/LUnHjLm2E=
github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8/go.mod h1:ABsG2IHM7bpTRIH3EvQ8CZQEBkzuhLxXFxaYApYMB9Y=
github.com/oasislabs/tendermint v0.32.8-oasis2 h1:PSEUtAp8Rfe/0T7endF6Iqg4p9+pPkSDSx0E83bD2LM=
github.com/oasislabs/tendermint v0.32.8-oasis2/go.mod h1:uxexUd6P+G+Zg1yACNBycfcaV1dPI985r79I+IXP38w=
github.com/oasislabs/tendermint v0.32.8-oasis3 h1:vtvlWwTD2YMpQ3OTMWRCIdeEQmHVhJIG+4dhVl6QWz0=
github.com/oasislabs/tendermint v0.32.8-oasis3/go.mod h1:uxexUd6P+G+Zg1yACNBycfcaV1dPI985r79I+IXP38w=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
Expand Down
7 changes: 7 additions & 0 deletions go/oasis-test-runner/oasis/args.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ func (args *argBuilder) tendermintDebugDisableCheckTx(disable bool) *argBuilder
return args
}

func (args *argBuilder) tendermintRecoverCorruptedWAL(enable bool) *argBuilder {
if enable {
args.vec = append(args.vec, "--"+tendermint.CfgDebugUnsafeReplayRecoverCorruptedWAL)
}
return args
}

func (args *argBuilder) tendermintCoreListenAddress(port uint16) *argBuilder {
args.vec = append(args.vec, []string{
"--" + tendermint.CfgCoreListenAddress, "tcp://0.0.0.0:" + strconv.Itoa(int(port)),
Expand Down
13 changes: 8 additions & 5 deletions go/oasis-test-runner/oasis/fixture.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ type TEEFixture struct {
}

// ValidatorFixture is a validator fixture.
type ValidatorFixture struct {
type ValidatorFixture struct { // nolint: maligned
AllowEarlyTermination bool `json:"allow_early_termination"`
AllowErrorTermination bool `json:"allow_error_termination"`

Expand All @@ -129,6 +129,8 @@ type ValidatorFixture struct {
SubmissionGasPrice uint64 `json:"submission_gas_price"`

Sentries []int `json:"sentries,omitempty"`

TendermintRecoverCorruptedWAL bool `json:"tendermint_recover_corrupted_wal"`
}

// Create instantiates the validator described by the fixture.
Expand All @@ -144,10 +146,11 @@ func (f *ValidatorFixture) Create(net *Network) (*Validator, error) {

return net.NewValidator(&ValidatorCfg{
NodeCfg: NodeCfg{
AllowEarlyTermination: f.AllowEarlyTermination,
AllowErrorTermination: f.AllowErrorTermination,
LogWatcherHandlerFactories: f.LogWatcherHandlerFactories,
SubmissionGasPrice: f.SubmissionGasPrice,
AllowEarlyTermination: f.AllowEarlyTermination,
AllowErrorTermination: f.AllowErrorTermination,
LogWatcherHandlerFactories: f.LogWatcherHandlerFactories,
SubmissionGasPrice: f.SubmissionGasPrice,
TendermintRecoverCorruptedWAL: f.TendermintRecoverCorruptedWAL,
},
Entity: entity,
MinGasPrice: f.MinGasPrice,
Expand Down
10 changes: 6 additions & 4 deletions go/oasis-test-runner/oasis/oasis.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ type Node struct { // nolint: maligned
disableDefaultLogWatcherHandlerFactories bool
logWatcherHandlerFactories []log.WatcherHandlerFactory

submissionGasPrice uint64
consensusDisableCheckTx bool
submissionGasPrice uint64
consensusDisableCheckTx bool
tendermintRecoverCorruptedWAL bool
}

// Exit returns a channel that will close once the node shuts down.
Expand Down Expand Up @@ -163,8 +164,9 @@ type NodeCfg struct { // nolint: maligned
DisableDefaultLogWatcherHandlerFactories bool
LogWatcherHandlerFactories []log.WatcherHandlerFactory

SubmissionGasPrice uint64
ConsensusDisableCheckTx bool
SubmissionGasPrice uint64
ConsensusDisableCheckTx bool
TendermintRecoverCorruptedWAL bool
}

// CmdAttrs is the SysProcAttr that will ensure graceful cleanup.
Expand Down
4 changes: 3 additions & 1 deletion go/oasis-test-runner/oasis/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ func (val *Validator) startNode() error {
tendermintSubmissionGasPrice(val.submissionGasPrice).
storageBackend("client").
appendNetwork(val.net).
appendEntity(val.entity)
appendEntity(val.entity).
tendermintRecoverCorruptedWAL(val.tendermintRecoverCorruptedWAL)

if len(val.sentries) > 0 {
args = args.addSentries(val.sentries).
Expand Down Expand Up @@ -128,6 +129,7 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) {
disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories,
logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories,
submissionGasPrice: cfg.SubmissionGasPrice,
tendermintRecoverCorruptedWAL: cfg.TendermintRecoverCorruptedWAL,
},
entity: cfg.Entity,
minGasPrice: cfg.MinGasPrice,
Expand Down
16 changes: 16 additions & 0 deletions go/oasis-test-runner/scenario/e2e/txsource.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ var TxSourceMulti scenario.Scenario = &txSourceImpl{
timeLimit: timeLimitLong,
nodeRestartInterval: nodeRestartIntervalLong,
livenessCheckInterval: livenessCheckInterval,
// Nodes getting killed commonly result in corrupted tendermint WAL when the
// node is restarted. Enable automatic corrupted WAL recovery for validator
// nodes.
tendermintRecoverCorruptedWAL: true,
}

type txSourceImpl struct {
Expand All @@ -75,6 +79,8 @@ type txSourceImpl struct {
nodeRestartInterval time.Duration
livenessCheckInterval time.Duration

tendermintRecoverCorruptedWAL bool

rng *rand.Rand
}

Expand All @@ -90,10 +96,20 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) {
// Disable CheckTx on the client node so we can submit invalid transactions.
f.Clients[0].ConsensusDisableCheckTx = true

// Use at least 4 validators so that consensus can keep making progress
// when a node is being killed and restarted.
f.Validators = []oasis.ValidatorFixture{
oasis.ValidatorFixture{Entity: 1},
oasis.ValidatorFixture{Entity: 1},
oasis.ValidatorFixture{Entity: 1},
oasis.ValidatorFixture{Entity: 1},
}

// Update validators to require fee payments.
for i := range f.Validators {
f.Validators[i].MinGasPrice = txSourceGasPrice
f.Validators[i].SubmissionGasPrice = txSourceGasPrice
f.Validators[i].TendermintRecoverCorruptedWAL = sc.tendermintRecoverCorruptedWAL
}
// Update all other nodes to use a specific gas price.
for i := range f.Keymanagers {
Expand Down

0 comments on commit d872a00

Please sign in to comment.