diff --git a/e2e/app/setup.go b/e2e/app/setup.go index f99066546..c4c6c9812 100644 --- a/e2e/app/setup.go +++ b/e2e/app/setup.go @@ -163,6 +163,7 @@ func Setup(ctx context.Context, def Definition, depCfg DeployConfig) error { node.Mode, omniEVM.InstanceName, endpoints, + def.Testnet.Manifest.HaloChaos, ); err != nil { return err } @@ -396,6 +397,7 @@ func writeHaloConfig( mode e2e.Mode, evmInstance string, endpoints xchain.RPCEndpoints, + devnetChaos bool, ) error { cfg := halocfg.DefaultConfig() @@ -412,6 +414,7 @@ func writeHaloConfig( cfg.EngineJWTFile = "/halo/config/jwtsecret" // Absolute path inside docker container cfg.Tracer.Endpoint = defCfg.TracingEndpoint cfg.Tracer.Headers = defCfg.TracingHeaders + cfg.DevnetChaos = devnetChaos if testCfg { cfg.SnapshotInterval = 1 // Write snapshots each block in e2e tests diff --git a/e2e/manifests/ci.toml b/e2e/manifests/ci.toml index 16436264a..560a7a382 100644 --- a/e2e/manifests/ci.toml +++ b/e2e/manifests/ci.toml @@ -4,6 +4,7 @@ anvil_chains = ["mock_l2", "mock_l1"] multi_omni_evms = true network_upgrade_height = 15 pingpong_n = 5 # Increased ping pong to span validator updates +halo_chaos = true [node.validator01] [node.validator02] diff --git a/e2e/types/manifest.go b/e2e/types/manifest.go index 563df22af..509c3986b 100644 --- a/e2e/types/manifest.go +++ b/e2e/types/manifest.go @@ -58,8 +58,6 @@ const ( PerturbStopStart Perturb = "stopstart" // PerturbRollback defines a perturbation that stops a halo node, performs a rollback, then starts it again. PerturbRollback Perturb = "rollback" - // PerturbUpgrade defines a perturbation that upgrades a halo node to the latest image tag. - PerturbUpgrade Perturb = "upgrade" // PerturbFuzzyHeadDropBlocks defines a perturbation that enables fuzzyhead dropping xblock for a while. PerturbFuzzyHeadDropBlocks Perturb = "fuzzyhead_dropblocks" @@ -72,8 +70,6 @@ const ( ) // Manifest wraps e2e.Manifest with additional omni-specific fields. -// - type Manifest struct { e2e.Manifest @@ -118,6 +114,9 @@ type Manifest struct { // NetworkUpgradeHeight defines the network upgrade height, default is genesis, negative is disabled. // Note that it might be scheduled at a later height. NetworkUpgradeHeight int64 `toml:"network_upgrade_height"` + + // HaloChaos defines whether to enable chaos testing in halo. + HaloChaos bool `toml:"halo_chaos"` } // Seeds returns a map of seed nodes by name. diff --git a/halo/app/abci.go b/halo/app/abci.go index ae0ae389c..519c09412 100644 --- a/halo/app/abci.go +++ b/halo/app/abci.go @@ -4,7 +4,9 @@ package app import ( "context" "fmt" + "time" + "github.com/omni-network/omni/halo/attest/types" "github.com/omni-network/omni/lib/log" abci "github.com/cometbft/cometbft/abci/types" @@ -21,17 +23,25 @@ type abciWrapper struct { abci.Application postFinalize postFinalizeCallback multiStoreProvider multiStoreProvider + chaos chaos } func newABCIWrapper( app abci.Application, finaliseCallback postFinalizeCallback, multiStoreProvider multiStoreProvider, + chaosTest bool, ) *abciWrapper { + chaos := chaos(new(noopChaos)) + if chaosTest { + chaos = new(chaosSlasher) + } + return &abciWrapper{ Application: app, postFinalize: finaliseCallback, multiStoreProvider: multiStoreProvider, + chaos: chaos, } } @@ -65,6 +75,8 @@ func (l abciWrapper) InitChain(ctx context.Context, chain *abci.RequestInitChain } func (l abciWrapper) PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal) (*abci.ResponsePrepareProposal, error) { + l.chaos.PrepareProposal(ctx, proposal) + ctx = log.WithCtx(ctx, "height", proposal.Height) log.Debug(ctx, "👾 ABCI call: PrepareProposal", log.Hex7("proposer", proposal.ProposerAddress), @@ -91,6 +103,8 @@ func (l abciWrapper) ProcessProposal(ctx context.Context, proposal *abci.Request } func (l abciWrapper) FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) { + l.chaos.FinalizeBlock(ctx, req) + ctx = log.WithCtx(ctx, "height", req.Height) resp, err := l.Application.FinalizeBlock(ctx, req) if err != nil { @@ -179,3 +193,73 @@ func (l abciWrapper) ApplySnapshotChunk(ctx context.Context, chunk *abci.Request log.Debug(ctx, "👾 ABCI call: ApplySnapshotChunk") return l.Application.ApplySnapshotChunk(ctx, chunk) } + +// chaos abstracts a source of chaos testing. +type chaos interface { + PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal) + FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) +} + +// noopChaos is a no-op chaos implementation. +// This is used in production to disable chaos testing. +type noopChaos struct{} + +func (noopChaos) PrepareProposal(context.Context, *abci.RequestPrepareProposal) {} +func (noopChaos) FinalizeBlock(context.Context, *abci.RequestFinalizeBlock) {} + +const chaosSlashMinHeight = 10 + +// chaosSlasher is a chaos implementation that slashes the first validator to vote. +type chaosSlasher struct { + val abci.Validator + valDetected bool + valSlashed bool +} + +// PrepareProposal detects the first validator to vote as slashable. +func (c *chaosSlasher) PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal) { + if c.valDetected { + return + } + + for _, vote := range proposal.LocalLastCommit.Votes { + votes, ok, _ := types.VotesFromExtension(vote.VoteExtension) + if ok && len(votes.Votes) > 0 { + c.val = vote.Validator + c.valDetected = true + log.Warn(ctx, "😱 Chaos slashable validator detected", nil, log.Hex7("val", c.val.Address), "height", proposal.Height) + + return + } + } +} + +// FinalizeBlock slashes the detected validator once. +func (c *chaosSlasher) FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) { + if !c.valDetected || c.valSlashed || req.Height < chaosSlashMinHeight { + return + } + + // Wait for all validators to be online + for _, vote := range req.DecidedLastCommit.Votes { + if vote.BlockIdFlag != cmtproto.BlockIDFlagCommit { + return + } + } + + var totalPower int64 + for _, vote := range req.DecidedLastCommit.Votes { + totalPower += vote.Validator.Power + } + + req.Misbehavior = append(req.Misbehavior, abci.Misbehavior{ + Type: abci.MisbehaviorType_DUPLICATE_VOTE, + Validator: c.val, + Height: req.Height - 1, + Time: time.Now().Add(-time.Second), + TotalVotingPower: totalPower, + }) + + c.valSlashed = true + log.Warn(ctx, "😱 Chaos slashing validator", nil, log.Hex7("val", c.val.Address), "height", req.Height) +} diff --git a/halo/app/start.go b/halo/app/start.go index 7d69dace4..99090be08 100644 --- a/halo/app/start.go +++ b/halo/app/start.go @@ -56,6 +56,11 @@ type Config struct { Comet cmtcfg.Config } +// ChaosTest returns true if the devnet chaos test is enabled. +func (c Config) ChaosTest() bool { + return c.Network == netconf.Devnet && c.DevnetChaos +} + // BackendType returns the halo config backend type // or the comet backend type otherwise. func (c Config) BackendType() dbm.BackendType { @@ -166,7 +171,7 @@ func Start(ctx context.Context, cfg Config) (<-chan error, func(context.Context) app.EVMEngKeeper.SetBuildDelay(cfg.EVMBuildDelay) app.EVMEngKeeper.SetBuildOptimistic(cfg.EVMBuildOptimistic) - cmtNode, err := newCometNode(ctx, &cfg.Comet, app, privVal) + cmtNode, err := newCometNode(ctx, cfg, app, privVal) if err != nil { return nil, nil, errors.Wrap(err, "create comet node") } @@ -294,39 +299,49 @@ func startRPCServers( return nil } -func newCometNode(ctx context.Context, cfg *cmtcfg.Config, app *App, privVal cmttypes.PrivValidator) ( - *node.Node, error) { - nodeKey, err := p2p.LoadOrGenNodeKey(cfg.NodeKeyFile()) +func newCometNode( + ctx context.Context, + cfg Config, + app *App, + privVal cmttypes.PrivValidator, +) (*node.Node, error) { + cmtCfg := &cfg.Comet + nodeKey, err := p2p.LoadOrGenNodeKey(cmtCfg.NodeKeyFile()) if err != nil { - return nil, errors.Wrap(err, "load or gen node key", "key_file", cfg.NodeKeyFile()) + return nil, errors.Wrap(err, "load or gen node key", "key_file", cmtCfg.NodeKeyFile()) } - cmtLog, err := NewCmtLogger(ctx, cfg.LogLevel) + cmtLog, err := NewCmtLogger(ctx, cmtCfg.LogLevel) if err != nil { return nil, err } + if cfg.ChaosTest() { + log.Warn(ctx, "😱 Chaos testing enabled!", nil) + } + wrapper := newABCIWrapper( sdkserver.NewCometABCIWrapper(app), app.EVMEngKeeper.PostFinalize, func() storetypes.CacheMultiStore { return app.CommitMultiStore().CacheMultiStore() }, + cfg.ChaosTest(), ) // Configure CometBFT prometheus metrics as per provided config metrics := node.DefaultMetricsProvider(&cmtcfg.InstrumentationConfig{ - Prometheus: cfg.Instrumentation.Prometheus, - Namespace: cfg.Instrumentation.Namespace, + Prometheus: cmtCfg.Instrumentation.Prometheus, + Namespace: cmtCfg.Instrumentation.Namespace, }) // But don't instantiate the CometBFT prometheus server, we do it startMonitoringAPI. - cfg.Instrumentation.Prometheus = false + cmtCfg.Instrumentation.Prometheus = false - cmtNode, err := node.NewNode(cfg, + cmtNode, err := node.NewNode(cmtCfg, privVal, nodeKey, proxy.NewLocalClientCreator(wrapper), - node.DefaultGenesisDocProviderFunc(cfg), + node.DefaultGenesisDocProviderFunc(cmtCfg), cmtcfg.DefaultDBProvider, metrics, cmtLog, diff --git a/halo/attest/keeper/cpayload.go b/halo/attest/keeper/cpayload.go index f4db947b1..da4d33903 100644 --- a/halo/attest/keeper/cpayload.go +++ b/halo/attest/keeper/cpayload.go @@ -18,7 +18,6 @@ import ( "github.com/cosmos/cosmos-sdk/baseapp" sdk "github.com/cosmos/cosmos-sdk/types" authtypes "github.com/cosmos/cosmos-sdk/x/auth/types" - "github.com/cosmos/gogoproto/proto" ) var _ evmenginetypes.VoteExtensionProvider = (*Keeper)(nil) @@ -26,8 +25,8 @@ var _ evmenginetypes.VoteExtensionProvider = (*Keeper)(nil) // PrepareVotes returns the cosmosSDK transaction MsgAddVotes that will include all the validator votes included // in the previous block's vote extensions into the attest module. // -// Note that the commit is assumed to be valid and only contains valid VEs from the previous block as -// provided by a trusted cometBFT. Some votes (contained inside VE) may however be invalid, they are discarded. +// Note that the commit is expected to be valid and only contains valid VEs from the previous block as +// provided by a trusted cometBFT. Some votes (contained inside VEs) may however be invalid, they are discarded. func (k *Keeper) PrepareVotes(ctx context.Context, commit abci.ExtendedCommitInfo, commitHeight uint64) ([]sdk.Msg, error) { sdkCtx := sdk.UnwrapSDKContext(ctx) // The VEs in LastLocalCommit is expected to be valid @@ -114,17 +113,3 @@ func sortAggregates(aggs []*types.AggVote) []*types.AggVote { return aggs } - -// votesFromExtension returns the attestations contained in the vote extension, or false if none or an error. -func votesFromExtension(voteExtension []byte) (*types.Votes, bool, error) { - if len(voteExtension) == 0 { - return nil, false, nil - } - - resp := new(types.Votes) - if err := proto.Unmarshal(voteExtension, resp); err != nil { - return nil, false, errors.Wrap(err, "decode vote extension") - } - - return resp, true, nil -} diff --git a/halo/attest/keeper/keeper.go b/halo/attest/keeper/keeper.go index c7ad7e3ad..fbc0a41a8 100644 --- a/halo/attest/keeper/keeper.go +++ b/halo/attest/keeper/keeper.go @@ -769,7 +769,7 @@ func (k *Keeper) parseAndVerifyVoteExtension( return nil, false, err // This error should never occur } - votes, ok, err := votesFromExtension(voteExt) + votes, ok, err := types.VotesFromExtension(voteExt) if err != nil { return nil, false, errors.Wrap(err, "votes from extension") } else if !ok { diff --git a/halo/attest/types/helper.go b/halo/attest/types/helper.go index 9a6f7e284..86567ed9a 100644 --- a/halo/attest/types/helper.go +++ b/halo/attest/types/helper.go @@ -5,7 +5,10 @@ import ( "log/slog" "strconv" + "github.com/omni-network/omni/lib/errors" "github.com/omni-network/omni/lib/xchain" + + "github.com/cosmos/gogoproto/proto" ) const logLimit = 5 @@ -45,3 +48,17 @@ func AttLogs(headers []*AttestHeader) []any { return attrs } + +// VotesFromExtension returns the attestations contained in the vote extension, or false if none or an error. +func VotesFromExtension(voteExtension []byte) (*Votes, bool, error) { + if len(voteExtension) == 0 { + return nil, false, nil + } + + resp := new(Votes) + if err := proto.Unmarshal(voteExtension, resp); err != nil { + return nil, false, errors.Wrap(err, "decode vote extension") + } + + return resp, true, nil +} diff --git a/halo/cmd/cmd_internal_test.go b/halo/cmd/cmd_internal_test.go index 906879661..761c3e605 100644 --- a/halo/cmd/cmd_internal_test.go +++ b/halo/cmd/cmd_internal_test.go @@ -145,6 +145,7 @@ func TestTomlConfig(t *testing.T) { var expect halocfg.Config fuzzer.Fuzz(&expect) expect.HomeDir = dir + expect.DevnetChaos = false // Chaos not included in toml // The Toml library converts map keys to lower case. So do this so expect==actual. for k := range expect.RPCEndpoints { diff --git a/halo/cmd/flags.go b/halo/cmd/flags.go index eaf1583da..7450babc0 100644 --- a/halo/cmd/flags.go +++ b/halo/cmd/flags.go @@ -35,6 +35,7 @@ func bindRunFlags(cmd *cobra.Command, cfg *halocfg.Config) { flags.DurationVar(&cfg.EVMBuildDelay, "evm-build-delay", cfg.EVMBuildDelay, "Minimum delay between triggering and fetching a EVM payload build") flags.BoolVar(&cfg.EVMBuildOptimistic, "evm-build-optimistic", cfg.EVMBuildOptimistic, "Enables optimistic building of EVM payloads on previous block finalize") flags.IntSliceVar(&cfg.UnsafeSkipUpgrades, sdkserver.FlagUnsafeSkipUpgrades, cfg.UnsafeSkipUpgrades, "Skip a set of upgrade heights to continue the old binary") + flags.BoolVar(&cfg.DevnetChaos, "devnet-chaos-test", cfg.DevnetChaos, "Enable chaos testing (only applicable to devnet)") } func bindRollbackFlags(flags *pflag.FlagSet, cfg *app.RollbackConfig) { diff --git a/halo/cmd/testdata/TestCLIReference_rollback.golden b/halo/cmd/testdata/TestCLIReference_rollback.golden index 2a268fd59..522f69f5a 100644 --- a/halo/cmd/testdata/TestCLIReference_rollback.golden +++ b/halo/cmd/testdata/TestCLIReference_rollback.golden @@ -14,6 +14,7 @@ Flags: --api-address string Address defines the API server to listen on (default "tcp://0.0.0.0:1317") --api-enable Enable defines if the API server should be enabled. (default true) --app-db-backend string The type of database for application and snapshots databases (default "goleveldb") + --devnet-chaos-test Enable chaos testing (only applicable to devnet) --engine-endpoint string An EVM execution client Engine API http endpoint --engine-jwt-file string The path to the Engine API JWT file --evm-build-delay duration Minimum delay between triggering and fetching a EVM payload build (default 600ms) diff --git a/halo/cmd/testdata/TestCLIReference_run.golden b/halo/cmd/testdata/TestCLIReference_run.golden index e3dae2373..403e71fe5 100644 --- a/halo/cmd/testdata/TestCLIReference_run.golden +++ b/halo/cmd/testdata/TestCLIReference_run.golden @@ -7,6 +7,7 @@ Flags: --api-address string Address defines the API server to listen on (default "tcp://0.0.0.0:1317") --api-enable Enable defines if the API server should be enabled. (default true) --app-db-backend string The type of database for application and snapshots databases (default "goleveldb") + --devnet-chaos-test Enable chaos testing (only applicable to devnet) --engine-endpoint string An EVM execution client Engine API http endpoint --engine-jwt-file string The path to the Engine API JWT file --evm-build-delay duration Minimum delay between triggering and fetching a EVM payload build (default 600ms) diff --git a/halo/cmd/testdata/TestRunCmd_defaults.golden b/halo/cmd/testdata/TestRunCmd_defaults.golden index 58836fa5f..7d39b7567 100644 --- a/halo/cmd/testdata/TestRunCmd_defaults.golden +++ b/halo/cmd/testdata/TestRunCmd_defaults.golden @@ -24,6 +24,7 @@ "Enable": true, "Address": "0.0.0.0:9090" }, + "DevnetChaos": false, "Comet": { "Version": "0.38.15", "RootDir": "./halo", diff --git a/halo/cmd/testdata/TestRunCmd_flags.golden b/halo/cmd/testdata/TestRunCmd_flags.golden index 7fcfe68db..d59a68450 100644 --- a/halo/cmd/testdata/TestRunCmd_flags.golden +++ b/halo/cmd/testdata/TestRunCmd_flags.golden @@ -24,6 +24,7 @@ "Enable": true, "Address": "0.0.0.0:9090" }, + "DevnetChaos": false, "Comet": { "Version": "0.38.15", "RootDir": "foo", diff --git a/halo/cmd/testdata/TestRunCmd_json_files.golden b/halo/cmd/testdata/TestRunCmd_json_files.golden index 717562c29..5c384c614 100644 --- a/halo/cmd/testdata/TestRunCmd_json_files.golden +++ b/halo/cmd/testdata/TestRunCmd_json_files.golden @@ -24,6 +24,7 @@ "Enable": true, "Address": "0.0.0.0:9090" }, + "DevnetChaos": false, "Comet": { "Version": "0.38.15", "RootDir": "testinput/input2", diff --git a/halo/cmd/testdata/TestRunCmd_toml_files.golden b/halo/cmd/testdata/TestRunCmd_toml_files.golden index f965ef408..5436cc4ac 100644 --- a/halo/cmd/testdata/TestRunCmd_toml_files.golden +++ b/halo/cmd/testdata/TestRunCmd_toml_files.golden @@ -27,6 +27,7 @@ "Enable": true, "Address": "grpc/toml" }, + "DevnetChaos": false, "Comet": { "Version": "0.38.15", "RootDir": "testinput/input1", diff --git a/halo/config/config.go b/halo/config/config.go index 062275b32..335c64c7b 100644 --- a/halo/config/config.go +++ b/halo/config/config.go @@ -86,6 +86,7 @@ type Config struct { UnsafeSkipUpgrades []int SDKAPI RPCConfig `mapstructure:"api"` SDKGRPC RPCConfig `mapstructure:"grpc"` + DevnetChaos bool } // RPCConfig is an abridged version of CosmosSDK srvconfig.API/GRPCConfig.