Skip to content

Commit

Permalink
ci(e2e): support devnet chaos slashing
Browse files Browse the repository at this point in the history
  • Loading branch information
corverroos committed Nov 15, 2024
1 parent d73b69a commit 40079d3
Show file tree
Hide file tree
Showing 17 changed files with 146 additions and 33 deletions.
3 changes: 3 additions & 0 deletions e2e/app/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ func Setup(ctx context.Context, def Definition, depCfg DeployConfig) error {
node.Mode,
omniEVM.InstanceName,
endpoints,
def.Testnet.Manifest.HaloChaos,
); err != nil {
return err
}
Expand Down Expand Up @@ -396,6 +397,7 @@ func writeHaloConfig(
mode e2e.Mode,
evmInstance string,
endpoints xchain.RPCEndpoints,
devnetChaos bool,
) error {
cfg := halocfg.DefaultConfig()

Expand All @@ -412,6 +414,7 @@ func writeHaloConfig(
cfg.EngineJWTFile = "/halo/config/jwtsecret" // Absolute path inside docker container
cfg.Tracer.Endpoint = defCfg.TracingEndpoint
cfg.Tracer.Headers = defCfg.TracingHeaders
cfg.DevnetChaos = devnetChaos

if testCfg {
cfg.SnapshotInterval = 1 // Write snapshots each block in e2e tests
Expand Down
1 change: 1 addition & 0 deletions e2e/manifests/ci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ anvil_chains = ["mock_l2", "mock_l1"]
multi_omni_evms = true
network_upgrade_height = 15
pingpong_n = 5 # Increased ping pong to span validator updates
halo_chaos = true

[node.validator01]
[node.validator02]
Expand Down
7 changes: 3 additions & 4 deletions e2e/types/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ const (
PerturbStopStart Perturb = "stopstart"
// PerturbRollback defines a perturbation that stops a halo node, performs a rollback, then starts it again.
PerturbRollback Perturb = "rollback"
// PerturbUpgrade defines a perturbation that upgrades a halo node to the latest image tag.
PerturbUpgrade Perturb = "upgrade"

// PerturbFuzzyHeadDropBlocks defines a perturbation that enables fuzzyhead dropping xblock for a while.
PerturbFuzzyHeadDropBlocks Perturb = "fuzzyhead_dropblocks"
Expand All @@ -72,8 +70,6 @@ const (
)

// Manifest wraps e2e.Manifest with additional omni-specific fields.
//

type Manifest struct {
e2e.Manifest

Expand Down Expand Up @@ -118,6 +114,9 @@ type Manifest struct {
// NetworkUpgradeHeight defines the network upgrade height, default is genesis, negative is disabled.
// Note that it might be scheduled at a later height.
NetworkUpgradeHeight int64 `toml:"network_upgrade_height"`

// HaloChaos defines whether to enable chaos testing in halo.
HaloChaos bool `toml:"halo_chaos"`
}

// Seeds returns a map of seed nodes by name.
Expand Down
84 changes: 84 additions & 0 deletions halo/app/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ package app
import (
"context"
"fmt"
"time"

"github.com/omni-network/omni/halo/attest/types"
"github.com/omni-network/omni/lib/log"

abci "github.com/cometbft/cometbft/abci/types"
Expand All @@ -21,17 +23,25 @@ type abciWrapper struct {
abci.Application
postFinalize postFinalizeCallback
multiStoreProvider multiStoreProvider
chaos chaos
}

func newABCIWrapper(
app abci.Application,
finaliseCallback postFinalizeCallback,
multiStoreProvider multiStoreProvider,
chaosTest bool,
) *abciWrapper {
chaos := chaos(new(noopChaos))
if chaosTest {
chaos = new(chaosSlasher)
}

return &abciWrapper{
Application: app,
postFinalize: finaliseCallback,
multiStoreProvider: multiStoreProvider,
chaos: chaos,
}
}

Expand Down Expand Up @@ -65,6 +75,8 @@ func (l abciWrapper) InitChain(ctx context.Context, chain *abci.RequestInitChain
}

func (l abciWrapper) PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal) (*abci.ResponsePrepareProposal, error) {
l.chaos.PrepareProposal(ctx, proposal)

ctx = log.WithCtx(ctx, "height", proposal.Height)
log.Debug(ctx, "👾 ABCI call: PrepareProposal",
log.Hex7("proposer", proposal.ProposerAddress),
Expand All @@ -91,6 +103,8 @@ func (l abciWrapper) ProcessProposal(ctx context.Context, proposal *abci.Request
}

func (l abciWrapper) FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) (*abci.ResponseFinalizeBlock, error) {
l.chaos.FinalizeBlock(ctx, req)

ctx = log.WithCtx(ctx, "height", req.Height)
resp, err := l.Application.FinalizeBlock(ctx, req)
if err != nil {
Expand Down Expand Up @@ -179,3 +193,73 @@ func (l abciWrapper) ApplySnapshotChunk(ctx context.Context, chunk *abci.Request
log.Debug(ctx, "👾 ABCI call: ApplySnapshotChunk")
return l.Application.ApplySnapshotChunk(ctx, chunk)
}

// chaos abstracts a source of chaos testing.
type chaos interface {
PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal)
FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock)
}

// noopChaos is a no-op chaos implementation.
// This is used in production to disable chaos testing.
type noopChaos struct{}

func (noopChaos) PrepareProposal(context.Context, *abci.RequestPrepareProposal) {}
func (noopChaos) FinalizeBlock(context.Context, *abci.RequestFinalizeBlock) {}

const chaosSlashMinHeight = 10

// chaosSlasher is a chaos implementation that slashes the first validator to vote.
type chaosSlasher struct {
val abci.Validator
valDetected bool
valSlashed bool
}

// PrepareProposal detects the first validator to vote as slashable.
func (c *chaosSlasher) PrepareProposal(ctx context.Context, proposal *abci.RequestPrepareProposal) {
if c.valDetected {
return
}

for _, vote := range proposal.LocalLastCommit.Votes {
votes, ok, _ := types.VotesFromExtension(vote.VoteExtension)
if ok && len(votes.Votes) > 0 {
c.val = vote.Validator
c.valDetected = true
log.Warn(ctx, "😱 Chaos slashable validator detected", nil, log.Hex7("val", c.val.Address), "height", proposal.Height)

return
}
}
}

// FinalizeBlock slashes the detected validator once.
func (c *chaosSlasher) FinalizeBlock(ctx context.Context, req *abci.RequestFinalizeBlock) {
if !c.valDetected || c.valSlashed || req.Height < chaosSlashMinHeight {
return
}

// Wait for all validators to be online
for _, vote := range req.DecidedLastCommit.Votes {
if vote.BlockIdFlag != cmtproto.BlockIDFlagCommit {
return
}
}

var totalPower int64
for _, vote := range req.DecidedLastCommit.Votes {
totalPower += vote.Validator.Power
}

req.Misbehavior = append(req.Misbehavior, abci.Misbehavior{
Type: abci.MisbehaviorType_DUPLICATE_VOTE,
Validator: c.val,
Height: req.Height - 1,
Time: time.Now().Add(-time.Second),
TotalVotingPower: totalPower,
})

c.valSlashed = true
log.Warn(ctx, "😱 Chaos slashing validator", nil, log.Hex7("val", c.val.Address), "height", req.Height)
}
37 changes: 26 additions & 11 deletions halo/app/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ type Config struct {
Comet cmtcfg.Config
}

// ChaosTest returns true if the devnet chaos test is enabled.
func (c Config) ChaosTest() bool {
return c.Network == netconf.Devnet && c.DevnetChaos
}

// BackendType returns the halo config backend type
// or the comet backend type otherwise.
func (c Config) BackendType() dbm.BackendType {
Expand Down Expand Up @@ -166,7 +171,7 @@ func Start(ctx context.Context, cfg Config) (<-chan error, func(context.Context)
app.EVMEngKeeper.SetBuildDelay(cfg.EVMBuildDelay)
app.EVMEngKeeper.SetBuildOptimistic(cfg.EVMBuildOptimistic)

cmtNode, err := newCometNode(ctx, &cfg.Comet, app, privVal)
cmtNode, err := newCometNode(ctx, cfg, app, privVal)
if err != nil {
return nil, nil, errors.Wrap(err, "create comet node")
}
Expand Down Expand Up @@ -294,39 +299,49 @@ func startRPCServers(
return nil
}

func newCometNode(ctx context.Context, cfg *cmtcfg.Config, app *App, privVal cmttypes.PrivValidator) (
*node.Node, error) {
nodeKey, err := p2p.LoadOrGenNodeKey(cfg.NodeKeyFile())
func newCometNode(
ctx context.Context,
cfg Config,
app *App,
privVal cmttypes.PrivValidator,
) (*node.Node, error) {
cmtCfg := &cfg.Comet
nodeKey, err := p2p.LoadOrGenNodeKey(cmtCfg.NodeKeyFile())
if err != nil {
return nil, errors.Wrap(err, "load or gen node key", "key_file", cfg.NodeKeyFile())
return nil, errors.Wrap(err, "load or gen node key", "key_file", cmtCfg.NodeKeyFile())
}

cmtLog, err := NewCmtLogger(ctx, cfg.LogLevel)
cmtLog, err := NewCmtLogger(ctx, cmtCfg.LogLevel)
if err != nil {
return nil, err
}

if cfg.ChaosTest() {
log.Warn(ctx, "😱 Chaos testing enabled!", nil)
}

wrapper := newABCIWrapper(
sdkserver.NewCometABCIWrapper(app),
app.EVMEngKeeper.PostFinalize,
func() storetypes.CacheMultiStore {
return app.CommitMultiStore().CacheMultiStore()
},
cfg.ChaosTest(),
)

// Configure CometBFT prometheus metrics as per provided config
metrics := node.DefaultMetricsProvider(&cmtcfg.InstrumentationConfig{
Prometheus: cfg.Instrumentation.Prometheus,
Namespace: cfg.Instrumentation.Namespace,
Prometheus: cmtCfg.Instrumentation.Prometheus,
Namespace: cmtCfg.Instrumentation.Namespace,
})
// But don't instantiate the CometBFT prometheus server, we do it startMonitoringAPI.
cfg.Instrumentation.Prometheus = false
cmtCfg.Instrumentation.Prometheus = false

cmtNode, err := node.NewNode(cfg,
cmtNode, err := node.NewNode(cmtCfg,
privVal,
nodeKey,
proxy.NewLocalClientCreator(wrapper),
node.DefaultGenesisDocProviderFunc(cfg),
node.DefaultGenesisDocProviderFunc(cmtCfg),
cmtcfg.DefaultDBProvider,
metrics,
cmtLog,
Expand Down
19 changes: 2 additions & 17 deletions halo/attest/keeper/cpayload.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,15 @@ import (
"github.com/cosmos/cosmos-sdk/baseapp"
sdk "github.com/cosmos/cosmos-sdk/types"
authtypes "github.com/cosmos/cosmos-sdk/x/auth/types"
"github.com/cosmos/gogoproto/proto"
)

var _ evmenginetypes.VoteExtensionProvider = (*Keeper)(nil)

// PrepareVotes returns the cosmosSDK transaction MsgAddVotes that will include all the validator votes included
// in the previous block's vote extensions into the attest module.
//
// Note that the commit is assumed to be valid and only contains valid VEs from the previous block as
// provided by a trusted cometBFT. Some votes (contained inside VE) may however be invalid, they are discarded.
// Note that the commit is expected to be valid and only contains valid VEs from the previous block as
// provided by a trusted cometBFT. Some votes (contained inside VEs) may however be invalid, they are discarded.
func (k *Keeper) PrepareVotes(ctx context.Context, commit abci.ExtendedCommitInfo, commitHeight uint64) ([]sdk.Msg, error) {
sdkCtx := sdk.UnwrapSDKContext(ctx)
// The VEs in LastLocalCommit is expected to be valid
Expand Down Expand Up @@ -114,17 +113,3 @@ func sortAggregates(aggs []*types.AggVote) []*types.AggVote {

return aggs
}

// votesFromExtension returns the attestations contained in the vote extension, or false if none or an error.
func votesFromExtension(voteExtension []byte) (*types.Votes, bool, error) {
if len(voteExtension) == 0 {
return nil, false, nil
}

resp := new(types.Votes)
if err := proto.Unmarshal(voteExtension, resp); err != nil {
return nil, false, errors.Wrap(err, "decode vote extension")
}

return resp, true, nil
}
2 changes: 1 addition & 1 deletion halo/attest/keeper/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ func (k *Keeper) parseAndVerifyVoteExtension(
return nil, false, err // This error should never occur
}

votes, ok, err := votesFromExtension(voteExt)
votes, ok, err := types.VotesFromExtension(voteExt)
if err != nil {
return nil, false, errors.Wrap(err, "votes from extension")
} else if !ok {
Expand Down
17 changes: 17 additions & 0 deletions halo/attest/types/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import (
"log/slog"
"strconv"

"github.com/omni-network/omni/lib/errors"
"github.com/omni-network/omni/lib/xchain"

"github.com/cosmos/gogoproto/proto"
)

const logLimit = 5
Expand Down Expand Up @@ -45,3 +48,17 @@ func AttLogs(headers []*AttestHeader) []any {

return attrs
}

// VotesFromExtension returns the attestations contained in the vote extension, or false if none or an error.
func VotesFromExtension(voteExtension []byte) (*Votes, bool, error) {
if len(voteExtension) == 0 {
return nil, false, nil
}

resp := new(Votes)
if err := proto.Unmarshal(voteExtension, resp); err != nil {
return nil, false, errors.Wrap(err, "decode vote extension")
}

return resp, true, nil
}
1 change: 1 addition & 0 deletions halo/cmd/cmd_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ func TestTomlConfig(t *testing.T) {
var expect halocfg.Config
fuzzer.Fuzz(&expect)
expect.HomeDir = dir
expect.DevnetChaos = false // Chaos not included in toml

// The Toml library converts map keys to lower case. So do this so expect==actual.
for k := range expect.RPCEndpoints {
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func bindRunFlags(cmd *cobra.Command, cfg *halocfg.Config) {
flags.DurationVar(&cfg.EVMBuildDelay, "evm-build-delay", cfg.EVMBuildDelay, "Minimum delay between triggering and fetching a EVM payload build")
flags.BoolVar(&cfg.EVMBuildOptimistic, "evm-build-optimistic", cfg.EVMBuildOptimistic, "Enables optimistic building of EVM payloads on previous block finalize")
flags.IntSliceVar(&cfg.UnsafeSkipUpgrades, sdkserver.FlagUnsafeSkipUpgrades, cfg.UnsafeSkipUpgrades, "Skip a set of upgrade heights to continue the old binary")
flags.BoolVar(&cfg.DevnetChaos, "devnet-chaos-test", cfg.DevnetChaos, "Enable chaos testing (only applicable to devnet)")
}

func bindRollbackFlags(flags *pflag.FlagSet, cfg *app.RollbackConfig) {
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/testdata/TestCLIReference_rollback.golden
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Flags:
--api-address string Address defines the API server to listen on (default "tcp://0.0.0.0:1317")
--api-enable Enable defines if the API server should be enabled. (default true)
--app-db-backend string The type of database for application and snapshots databases (default "goleveldb")
--devnet-chaos-test Enable chaos testing (only applicable to devnet)
--engine-endpoint string An EVM execution client Engine API http endpoint
--engine-jwt-file string The path to the Engine API JWT file
--evm-build-delay duration Minimum delay between triggering and fetching a EVM payload build (default 600ms)
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/testdata/TestCLIReference_run.golden
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Flags:
--api-address string Address defines the API server to listen on (default "tcp://0.0.0.0:1317")
--api-enable Enable defines if the API server should be enabled. (default true)
--app-db-backend string The type of database for application and snapshots databases (default "goleveldb")
--devnet-chaos-test Enable chaos testing (only applicable to devnet)
--engine-endpoint string An EVM execution client Engine API http endpoint
--engine-jwt-file string The path to the Engine API JWT file
--evm-build-delay duration Minimum delay between triggering and fetching a EVM payload build (default 600ms)
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/testdata/TestRunCmd_defaults.golden
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"Enable": true,
"Address": "0.0.0.0:9090"
},
"DevnetChaos": false,
"Comet": {
"Version": "0.38.15",
"RootDir": "./halo",
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/testdata/TestRunCmd_flags.golden
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"Enable": true,
"Address": "0.0.0.0:9090"
},
"DevnetChaos": false,
"Comet": {
"Version": "0.38.15",
"RootDir": "foo",
Expand Down
1 change: 1 addition & 0 deletions halo/cmd/testdata/TestRunCmd_json_files.golden
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"Enable": true,
"Address": "0.0.0.0:9090"
},
"DevnetChaos": false,
"Comet": {
"Version": "0.38.15",
"RootDir": "testinput/input2",
Expand Down
Loading

0 comments on commit 40079d3

Please sign in to comment.