From f0bd74fdb8b9a7cf5e84432c42c7e3c28ea8b4e8 Mon Sep 17 00:00:00 2001 From: ptrus Date: Mon, 22 Feb 2021 12:03:38 +0100 Subject: [PATCH 1/5] go/oasis-test-runner: Configurable supplementary sanity checker --- .changelog/3719.internal.1.md | 4 ++ go/oasis-net-runner/fixtures/default.go | 2 +- go/oasis-test-runner/oasis/args.go | 12 ++-- go/oasis-test-runner/oasis/client.go | 16 +++-- go/oasis-test-runner/oasis/compute.go | 2 + go/oasis-test-runner/oasis/fixture.go | 70 +++++++++++-------- go/oasis-test-runner/oasis/keymanager.go | 2 + go/oasis-test-runner/oasis/oasis.go | 10 +-- go/oasis-test-runner/oasis/sentry.go | 2 + go/oasis-test-runner/oasis/storage.go | 2 + go/oasis-test-runner/oasis/validator.go | 6 +- go/oasis-test-runner/scenario/e2e/e2e.go | 2 +- .../scenario/e2e/gas_fees_staking.go | 2 +- .../scenario/e2e/genesis_file.go | 2 +- .../e2e/runtime/governance_upgrade.go | 2 +- .../scenario/e2e/runtime/runtime.go | 2 +- .../scenario/e2e/runtime/sentry.go | 1 + go/oasis-test-runner/scenario/e2e/upgrade.go | 2 +- .../scenario/e2e/upgrade_cancel.go | 2 +- 19 files changed, 86 insertions(+), 57 deletions(-) create mode 100644 .changelog/3719.internal.1.md diff --git a/.changelog/3719.internal.1.md b/.changelog/3719.internal.1.md new file mode 100644 index 00000000000..b45f60df305 --- /dev/null +++ b/.changelog/3719.internal.1.md @@ -0,0 +1,4 @@ +go/oasis-test-runner: Configurable supplementary sanity checker + +Before the supplementary sanity checker was always automatically started on +the validator-0. diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index 0cd7028903d..57a199c8adb 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -78,7 +78,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { {IsDebugTestEntity: true}, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1}, + {Entity: 1, Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}}, }, Seeds: []oasis.SeedFixture{{}}, } diff --git a/go/oasis-test-runner/oasis/args.go b/go/oasis-test-runner/oasis/args.go index 13b342e9ca9..8a5373545ee 100644 --- a/go/oasis-test-runner/oasis/args.go +++ b/go/oasis-test-runner/oasis/args.go @@ -208,11 +208,13 @@ func (args *argBuilder) runtimeSupported(id common.Namespace) *argBuilder { return args } -func (args *argBuilder) tendermintSupplementarySanityEnabled() *argBuilder { - args.vec = append(args.vec, "--"+tendermintFull.CfgSupplementarySanityEnabled) - args.vec = append(args.vec, []string{ - "--" + tendermintFull.CfgSupplementarySanityInterval, "1", - }...) +func (args *argBuilder) tendermintSupplementarySanity(interval uint64) *argBuilder { + if interval > 0 { + args.vec = append(args.vec, "--"+tendermintFull.CfgSupplementarySanityEnabled) + args.vec = append(args.vec, []string{ + "--" + tendermintFull.CfgSupplementarySanityInterval, strconv.Itoa(int(interval)), + }...) + } return args } diff --git a/go/oasis-test-runner/oasis/client.go b/go/oasis-test-runner/oasis/client.go index 16a1813c5e1..df2c724ea84 100644 --- a/go/oasis-test-runner/oasis/client.go +++ b/go/oasis-test-runner/oasis/client.go @@ -35,7 +35,8 @@ func (client *Client) startNode() error { appendSeedNodes(client.net.seeds). workerP2pPort(client.p2pPort). workerP2pEnabled(). - runtimeTagIndexerBackend("bleve") + runtimeTagIndexerBackend("bleve"). + tendermintSupplementarySanity(client.supplementarySanityInterval) if client.maxTransactionAge != 0 { args = args.runtimeClientMaxTransactionAge(client.maxTransactionAge) @@ -76,12 +77,13 @@ func (net *Network) NewClient(cfg *ClientCfg) (*Client, error) { client := &Client{ Node: Node{ - Name: clientName, - net: net, - dir: clientDir, - consensus: cfg.Consensus, - termEarlyOk: cfg.AllowEarlyTermination, - termErrorOk: cfg.AllowErrorTermination, + Name: clientName, + net: net, + dir: clientDir, + consensus: cfg.Consensus, + termEarlyOk: cfg.AllowEarlyTermination, + termErrorOk: cfg.AllowErrorTermination, + supplementarySanityInterval: cfg.SupplementarySanityInterval, }, maxTransactionAge: cfg.MaxTransactionAge, consensusPort: net.nextNodePort, diff --git a/go/oasis-test-runner/oasis/compute.go b/go/oasis-test-runner/oasis/compute.go index f53a4135c60..3d15848225e 100644 --- a/go/oasis-test-runner/oasis/compute.go +++ b/go/oasis-test-runner/oasis/compute.go @@ -103,6 +103,7 @@ func (worker *Compute) startNode() error { runtimeSGXLoader(worker.net.cfg.RuntimeSGXLoaderBinary). workerExecutorScheduleCheckTxEnabled(). configureDebugCrashPoints(worker.crashPointsProbability). + tendermintSupplementarySanity(worker.supplementarySanityInterval). appendNetwork(worker.net). appendSeedNodes(worker.net.seeds). appendEntity(worker.entity) @@ -156,6 +157,7 @@ func (net *Network) NewCompute(cfg *ComputeCfg) (*Compute, error) { termErrorOk: cfg.AllowErrorTermination, noAutoStart: cfg.NoAutoStart, crashPointsProbability: cfg.CrashPointsProbability, + supplementarySanityInterval: cfg.SupplementarySanityInterval, disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, consensus: cfg.Consensus, diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index ebf92ca0fe8..b15f43fa134 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -141,6 +141,9 @@ type ConsensusFixture struct { // nolint: maligned // EnableConsensusRPCWorker enables the public consensus RPC services worker. EnableConsensusRPCWorker bool `json:"enable_consensusrpc_worker,omitempty"` + + // SupplementarySanityInterval configures the sanity check application. + SupplementarySanityInterval uint64 `json:"supplementary_sanity_interval,omitempty"` } // TEEFixture is a TEE configuration fixture. @@ -181,12 +184,13 @@ func (f *ValidatorFixture) Create(net *Network) (*Validator, error) { return net.NewValidator(&ValidatorCfg{ NodeCfg: NodeCfg{ - AllowEarlyTermination: f.AllowEarlyTermination, - AllowErrorTermination: f.AllowErrorTermination, - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - Consensus: f.Consensus, - NoAutoStart: f.NoAutoStart, - CrashPointsProbability: f.CrashPointsProbability, + AllowEarlyTermination: f.AllowEarlyTermination, + AllowErrorTermination: f.AllowErrorTermination, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + Consensus: f.Consensus, + NoAutoStart: f.NoAutoStart, + CrashPointsProbability: f.CrashPointsProbability, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, }, Entity: entity, Sentries: sentries, @@ -318,12 +322,13 @@ func (f *KeymanagerFixture) Create(net *Network) (*Keymanager, error) { return net.NewKeymanager(&KeymanagerCfg{ NodeCfg: NodeCfg{ - AllowEarlyTermination: f.AllowEarlyTermination, - AllowErrorTermination: f.AllowErrorTermination, - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - CrashPointsProbability: f.CrashPointsProbability, - Consensus: f.Consensus, - NoAutoStart: f.NoAutoStart, + AllowEarlyTermination: f.AllowEarlyTermination, + AllowErrorTermination: f.AllowErrorTermination, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + CrashPointsProbability: f.CrashPointsProbability, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + Consensus: f.Consensus, + NoAutoStart: f.NoAutoStart, }, Runtime: runtime, Entity: entity, @@ -372,12 +377,13 @@ func (f *StorageWorkerFixture) Create(net *Network) (*Storage, error) { return net.NewStorage(&StorageCfg{ NodeCfg: NodeCfg{ - AllowEarlyTermination: f.AllowEarlyTermination, - AllowErrorTermination: f.AllowErrorTermination, - CrashPointsProbability: f.CrashPointsProbability, - NoAutoStart: f.NoAutoStart, - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - Consensus: f.Consensus, + AllowEarlyTermination: f.AllowEarlyTermination, + AllowErrorTermination: f.AllowErrorTermination, + CrashPointsProbability: f.CrashPointsProbability, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + NoAutoStart: f.NoAutoStart, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + Consensus: f.Consensus, }, Backend: f.Backend, Entity: entity, @@ -424,12 +430,13 @@ func (f *ComputeWorkerFixture) Create(net *Network) (*Compute, error) { return net.NewCompute(&ComputeCfg{ NodeCfg: NodeCfg{ - AllowEarlyTermination: f.AllowEarlyTermination, - AllowErrorTermination: f.AllowErrorTermination, - NoAutoStart: f.NoAutoStart, - CrashPointsProbability: f.CrashPointsProbability, - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - Consensus: f.Consensus, + AllowEarlyTermination: f.AllowEarlyTermination, + AllowErrorTermination: f.AllowErrorTermination, + NoAutoStart: f.NoAutoStart, + CrashPointsProbability: f.CrashPointsProbability, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + Consensus: f.Consensus, }, Entity: entity, RuntimeProvisioner: f.RuntimeProvisioner, @@ -455,6 +462,9 @@ type SentryFixture struct { CrashPointsProbability float64 `json:"crash_points_probability,omitempty"` + // Consensus contains configuration for the consensus backend. + Consensus ConsensusFixture `json:"consensus"` + Validators []int `json:"validators"` StorageWorkers []int `json:"storage_workers"` KeymanagerWorkers []int `json:"keymanager_workers"` @@ -464,8 +474,9 @@ type SentryFixture struct { func (f *SentryFixture) Create(net *Network) (*Sentry, error) { return net.NewSentry(&SentryCfg{ NodeCfg: NodeCfg{ - LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, - CrashPointsProbability: f.CrashPointsProbability, + LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, + CrashPointsProbability: f.CrashPointsProbability, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, }, ValidatorIndices: f.Validators, StorageIndices: f.StorageWorkers, @@ -489,9 +500,10 @@ type ClientFixture struct { func (f *ClientFixture) Create(net *Network) (*Client, error) { return net.NewClient(&ClientCfg{ NodeCfg: NodeCfg{ - Consensus: f.Consensus, - AllowErrorTermination: f.AllowErrorTermination, - AllowEarlyTermination: f.AllowEarlyTermination, + Consensus: f.Consensus, + AllowErrorTermination: f.AllowErrorTermination, + AllowEarlyTermination: f.AllowEarlyTermination, + SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, }, MaxTransactionAge: f.MaxTransactionAge, }) diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index 2a6b32eaa50..630c7ee734b 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -279,6 +279,7 @@ func (km *Keymanager) startNode() error { workerKeymanagerEnabled(). workerKeymanagerRuntimeID(km.runtime.id). configureDebugCrashPoints(km.crashPointsProbability). + tendermintSupplementarySanity(km.supplementarySanityInterval). appendNetwork(km.net). appendEntity(km.entity) @@ -344,6 +345,7 @@ func (net *Network) NewKeymanager(cfg *KeymanagerCfg) (*Keymanager, error) { termEarlyOk: cfg.AllowEarlyTermination, termErrorOk: cfg.AllowErrorTermination, crashPointsProbability: cfg.CrashPointsProbability, + supplementarySanityInterval: cfg.SupplementarySanityInterval, disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, consensus: cfg.Consensus, diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 3afc1104309..6470d9d4941 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -96,7 +96,8 @@ type Node struct { // nolint: maligned isStopping bool noAutoStart bool - crashPointsProbability float64 + crashPointsProbability float64 + supplementarySanityInterval uint64 disableDefaultLogWatcherHandlerFactories bool logWatcherHandlerFactories []log.WatcherHandlerFactory @@ -245,9 +246,10 @@ func (n *Node) SetConsensusStateSync(cfg *ConsensusStateSyncCfg) { // NodeCfg defines the common node configuration options. type NodeCfg struct { // nolint: maligned - AllowEarlyTermination bool - AllowErrorTermination bool - CrashPointsProbability float64 + AllowEarlyTermination bool + AllowErrorTermination bool + CrashPointsProbability float64 + SupplementarySanityInterval uint64 NoAutoStart bool diff --git a/go/oasis-test-runner/oasis/sentry.go b/go/oasis-test-runner/oasis/sentry.go index 02b56135a70..c3a2ec68d7a 100644 --- a/go/oasis-test-runner/oasis/sentry.go +++ b/go/oasis-test-runner/oasis/sentry.go @@ -80,6 +80,7 @@ func (sentry *Sentry) startNode() error { tendermintPrune(sentry.consensus.PruneNumKept). tendermintRecoverCorruptedWAL(sentry.consensus.TendermintRecoverCorruptedWAL). configureDebugCrashPoints(sentry.crashPointsProbability). + tendermintSupplementarySanity(sentry.supplementarySanityInterval). appendNetwork(sentry.net). appendSeedNodes(sentry.net.seeds). internalSocketAddress(sentry.net.validators[0].SocketPath()) @@ -150,6 +151,7 @@ func (net *Network) NewSentry(cfg *SentryCfg) (*Sentry, error) { net: net, dir: sentryDir, crashPointsProbability: cfg.CrashPointsProbability, + supplementarySanityInterval: cfg.SupplementarySanityInterval, disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, }, diff --git a/go/oasis-test-runner/oasis/storage.go b/go/oasis-test-runner/oasis/storage.go index 3f4fa805bea..1535e83723f 100644 --- a/go/oasis-test-runner/oasis/storage.go +++ b/go/oasis-test-runner/oasis/storage.go @@ -125,6 +125,7 @@ func (worker *Storage) startNode() error { workerStorageDebugDisableCheckpointSync(worker.checkpointSyncDisabled). workerStorageCheckpointCheckInterval(worker.checkpointCheckInterval). configureDebugCrashPoints(worker.crashPointsProbability). + tendermintSupplementarySanity(worker.supplementarySanityInterval). appendNetwork(worker.net). appendEntity(worker.entity) @@ -199,6 +200,7 @@ func (net *Network) NewStorage(cfg *StorageCfg) (*Storage, error) { termEarlyOk: cfg.AllowEarlyTermination, termErrorOk: cfg.AllowErrorTermination, crashPointsProbability: cfg.CrashPointsProbability, + supplementarySanityInterval: cfg.SupplementarySanityInterval, disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, consensus: cfg.Consensus, diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index c38caa8e9a5..8d28119df48 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -90,6 +90,7 @@ func (val *Validator) startNode() error { tendermintPrune(val.consensus.PruneNumKept). tendermintRecoverCorruptedWAL(val.consensus.TendermintRecoverCorruptedWAL). configureDebugCrashPoints(val.crashPointsProbability). + tendermintSupplementarySanity(val.supplementarySanityInterval). appendNetwork(val.net). appendEntity(val.entity) @@ -104,10 +105,6 @@ func (val *Validator) startNode() error { workerConsensusRPCEnabled() } - if len(val.net.validators) >= 1 && val == val.net.validators[0] { - args = args.tendermintSupplementarySanityEnabled() - } - if err := val.net.startOasisNode(&val.Node, nil, args); err != nil { return fmt.Errorf("oasis/validator: failed to launch node %s: %w", val.Name, err) } @@ -136,6 +133,7 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { termEarlyOk: cfg.AllowEarlyTermination, termErrorOk: cfg.AllowErrorTermination, crashPointsProbability: cfg.CrashPointsProbability, + supplementarySanityInterval: cfg.SupplementarySanityInterval, disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, consensus: cfg.Consensus, diff --git a/go/oasis-test-runner/scenario/e2e/e2e.go b/go/oasis-test-runner/scenario/e2e/e2e.go index a6bbd0f5546..8ce86f2f6e9 100644 --- a/go/oasis-test-runner/scenario/e2e/e2e.go +++ b/go/oasis-test-runner/scenario/e2e/e2e.go @@ -106,7 +106,7 @@ func (sc *E2E) Fixture() (*oasis.NetworkFixture, error) { {}, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, + {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true, SupplementarySanityInterval: 1}}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, }, diff --git a/go/oasis-test-runner/scenario/e2e/gas_fees_staking.go b/go/oasis-test-runner/scenario/e2e/gas_fees_staking.go index e7a8fb017c5..56343b0fbee 100644 --- a/go/oasis-test-runner/scenario/e2e/gas_fees_staking.go +++ b/go/oasis-test-runner/scenario/e2e/gas_fees_staking.go @@ -106,7 +106,7 @@ func (sc *gasFeesImpl) Fixture() (*oasis.NetworkFixture, error) { Validators: []oasis.ValidatorFixture{ // Create three validators, each with its own entity so we can test // if gas disbursement works correctly. - {Entity: 1, Consensus: oasis.ConsensusFixture{MinGasPrice: 1}}, + {Entity: 1, Consensus: oasis.ConsensusFixture{MinGasPrice: 1, SupplementarySanityInterval: 1}}, {Entity: 2, Consensus: oasis.ConsensusFixture{MinGasPrice: 1}}, {Entity: 3, Consensus: oasis.ConsensusFixture{MinGasPrice: 1}}, }, diff --git a/go/oasis-test-runner/scenario/e2e/genesis_file.go b/go/oasis-test-runner/scenario/e2e/genesis_file.go index fb281def659..acea5d5a9df 100644 --- a/go/oasis-test-runner/scenario/e2e/genesis_file.go +++ b/go/oasis-test-runner/scenario/e2e/genesis_file.go @@ -44,7 +44,7 @@ func (s *genesisFileImpl) Fixture() (*oasis.NetworkFixture, error) { // be possible to run this configuration as the PVSS backend // currently requires multiple validators. f.Validators = []oasis.ValidatorFixture{ - {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, + {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true, SupplementarySanityInterval: 1}}, } f.Network.Beacon.Backend = beacon.BackendInsecure diff --git a/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go b/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go index c330aa10437..dface0a537c 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go @@ -137,7 +137,7 @@ func (sc *governanceConsensusUpgradeImpl) Fixture() (*oasis.NetworkFixture, erro } f.Validators = []oasis.ValidatorFixture{ - {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}, AllowErrorTermination: true}, + {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true, SupplementarySanityInterval: 1}, AllowErrorTermination: true}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}, AllowErrorTermination: true}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}, AllowErrorTermination: true}, } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 50c55be59ad..abb4776b1bd 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -205,7 +205,7 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { }, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, + {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true, SupplementarySanityInterval: 1}}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, {Entity: 1, Consensus: oasis.ConsensusFixture{EnableConsensusRPCWorker: true}}, }, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/sentry.go b/go/oasis-test-runner/scenario/e2e/runtime/sentry.go index b4d4ec277ad..733192c22ac 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/sentry.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/sentry.go @@ -127,6 +127,7 @@ func (s *sentryImpl) Fixture() (*oasis.NetworkFixture, error) { Entity: 1, LogWatcherHandlerFactories: validatorExtraLogWatcherHandlerFactories, Sentries: []int{0, 1}, + Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}, }, { Entity: 1, diff --git a/go/oasis-test-runner/scenario/e2e/upgrade.go b/go/oasis-test-runner/scenario/e2e/upgrade.go index 3f84c5955db..e9ff61a033a 100644 --- a/go/oasis-test-runner/scenario/e2e/upgrade.go +++ b/go/oasis-test-runner/scenario/e2e/upgrade.go @@ -136,7 +136,7 @@ func (sc *nodeUpgradeImpl) Fixture() (*oasis.NetworkFixture, error) { {}, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1, AllowErrorTermination: true}, + {Entity: 1, AllowErrorTermination: true, Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}}, {Entity: 1, AllowErrorTermination: true}, {Entity: 1, AllowErrorTermination: true}, {Entity: 1, AllowErrorTermination: true}, diff --git a/go/oasis-test-runner/scenario/e2e/upgrade_cancel.go b/go/oasis-test-runner/scenario/e2e/upgrade_cancel.go index 67d67dae22c..a45231b18bd 100644 --- a/go/oasis-test-runner/scenario/e2e/upgrade_cancel.go +++ b/go/oasis-test-runner/scenario/e2e/upgrade_cancel.go @@ -64,7 +64,7 @@ func (sc *nodeUpgradeCancelImpl) Fixture() (*oasis.NetworkFixture, error) { {}, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1}, + {Entity: 1, Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}}, {Entity: 1}, {Entity: 1}, {Entity: 1}, From b822efb373880a01cba5d7fe1bb87d1841f06047 Mon Sep 17 00:00:00 2001 From: ptrus Date: Mon, 22 Feb 2021 12:05:32 +0100 Subject: [PATCH 2/5] go/txsource: Separate client node for supplementary sanity --- .../scenario/e2e/runtime/txsource.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go index a727f61db0c..75c531e4a0c 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go @@ -99,7 +99,7 @@ var TxSourceMultiShortSGX scenario.Scenario = &txSourceImpl{ consensusPruneDisabledProbability: 0.1, consensusPruneMinKept: 100, consensusPruneMaxKept: 200, - // XXX: don't use more node as SGX E2E test instances cannot handle much + // XXX: don't use more nodes as SGX E2E test instances cannot handle many // more nodes that are currently configured. numValidatorNodes: 3, numKeyManagerNodes: 1, @@ -133,13 +133,12 @@ var TxSourceMulti scenario.Scenario = &txSourceImpl{ consensusPruneMaxKept: 1000, enableCrashPoints: true, // Nodes getting killed commonly result in corrupted tendermint WAL when the - // node is restarted. Enable automatic corrupted WAL recovery for validator - // nodes. + // node is restarted. Enable automatic corrupted WAL recovery for nodes. tendermintRecoverCorruptedWAL: true, - // Use 4 validators so that consensus can keep making progress - // when a node is being killed and restarted. + // Use 4 validators so that consensus can keep making progress when a node + // is being killed and restarted. numValidatorNodes: 4, - // Use 2 keymanager so that at least one keymanager is accessible when + // Use 2 keymanagers so that at least one keymanager is accessible when // the other one is being killed or shut down. numKeyManagerNodes: 2, // Use 4 storage nodes so runtime continues to work when one of the nodes @@ -539,6 +538,13 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { sc.generateConsensusFixture(&f.ByzantineNodes[i].Consensus, false) } + // Add a sanity-checker client node. + // NOTE: we skip the sanity checker on the validators as it blocks consensus + // and it can cause a node to fall behind. + f.Clients = append(f.Clients, oasis.ClientFixture{ + Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}, + }) + return f, nil } From c634ca4d882c7e0e8c05951cf2ee87db0643a9d5 Mon Sep 17 00:00:00 2001 From: ptrus Date: Tue, 23 Feb 2021 12:51:46 +0100 Subject: [PATCH 3/5] go/oasis-test-runner: Add support for configuring pprof --- .changelog/3719.internal.2.md | 3 ++ go/oasis-node/cmd/common/pprof/pprof.go | 7 +++-- go/oasis-test-runner/oasis/args.go | 11 +++++++ go/oasis-test-runner/oasis/byzantine.go | 8 ++++- go/oasis-test-runner/oasis/client.go | 23 ++++++++++----- go/oasis-test-runner/oasis/compute.go | 8 ++++- go/oasis-test-runner/oasis/fixture.go | 25 ++++++++++++++++ go/oasis-test-runner/oasis/keymanager.go | 8 ++++- go/oasis-test-runner/oasis/oasis.go | 3 ++ go/oasis-test-runner/oasis/sentry.go | 8 ++++- go/oasis-test-runner/oasis/storage.go | 8 ++++- go/oasis-test-runner/oasis/validator.go | 8 ++++- .../e2e/runtime/governance_upgrade.go | 2 +- .../scenario/e2e/runtime/late_start.go | 4 ++- .../scenario/e2e/runtime/multiple_runtimes.go | 2 ++ .../scenario/e2e/runtime/runtime.go | 2 +- .../e2e/runtime/runtime_governance.go | 1 + .../scenario/e2e/runtime/txsource.go | 29 ++++++++++++++----- 18 files changed, 134 insertions(+), 26 deletions(-) create mode 100644 .changelog/3719.internal.2.md diff --git a/.changelog/3719.internal.2.md b/.changelog/3719.internal.2.md new file mode 100644 index 00000000000..4dddb8be83a --- /dev/null +++ b/.changelog/3719.internal.2.md @@ -0,0 +1,3 @@ +go/oasis-test-runner: Add support for configuring `pprof` + +Adds support for configuring `pprof` on the test nodes. diff --git a/go/oasis-node/cmd/common/pprof/pprof.go b/go/oasis-node/cmd/common/pprof/pprof.go index b863a7c0cbe..96f69a7183f 100644 --- a/go/oasis-node/cmd/common/pprof/pprof.go +++ b/go/oasis-node/cmd/common/pprof/pprof.go @@ -18,7 +18,8 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/service" ) -const cfgPprofBind = "pprof.bind" +// CfgPprofBind enables profiling endpoint at the given address. +const CfgPprofBind = "pprof.bind" // Flags has the flags used by the pprof service. var Flags = flag.NewFlagSet("", flag.ContinueOnError) @@ -118,7 +119,7 @@ func (p *pprofService) Cleanup() { // New constructs a new pprof service. func New(ctx context.Context) (service.BackgroundService, error) { - address := viper.GetString(cfgPprofBind) + address := viper.GetString(CfgPprofBind) return &pprofService{ BaseBackgroundService: *service.NewBaseBackgroundService("pprof"), @@ -129,7 +130,7 @@ func New(ctx context.Context) (service.BackgroundService, error) { } func init() { - Flags.String(cfgPprofBind, "", "enable profiling endpoint at given address") + Flags.String(CfgPprofBind, "", "enable profiling endpoint at given address") _ = viper.BindPFlags(Flags) } diff --git a/go/oasis-test-runner/oasis/args.go b/go/oasis-test-runner/oasis/args.go index 8a5373545ee..024928589cb 100644 --- a/go/oasis-test-runner/oasis/args.go +++ b/go/oasis-test-runner/oasis/args.go @@ -26,6 +26,7 @@ import ( "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/grpc" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/metrics" + "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/pprof" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/debug/byzantine" runtimeClient "github.com/oasisprotocol/oasis-core/go/runtime/client" runtimeRegistry "github.com/oasisprotocol/oasis-core/go/runtime/registry" @@ -66,6 +67,16 @@ func (args *argBuilder) debugAllowTestKeys() *argBuilder { return args } +func (args *argBuilder) debugEnableProfiling(port uint16) *argBuilder { + if port == 0 { + return args + } + args.vec = append(args.vec, + "--"+pprof.CfgPprofBind, "0.0.0.0:"+strconv.Itoa(int(port)), + ) + return args +} + func (args *argBuilder) grpcServerPort(port uint16) *argBuilder { args.vec = append(args.vec, []string{ "--" + grpc.CfgServerPort, strconv.Itoa(int(port)), diff --git a/go/oasis-test-runner/oasis/byzantine.go b/go/oasis-test-runner/oasis/byzantine.go index 3728d75052a..070cbec88ce 100644 --- a/go/oasis-test-runner/oasis/byzantine.go +++ b/go/oasis-test-runner/oasis/byzantine.go @@ -41,6 +41,7 @@ func (worker *Byzantine) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(worker.Node.pprofPort). tendermintDebugAllowDuplicateIP(). tendermintCoreAddress(worker.consensusPort). tendermintDebugAddrBookLenient(). @@ -118,11 +119,16 @@ func (net *Network) NewByzantine(cfg *ByzantineCfg) (*Byzantine, error) { activationEpoch: cfg.ActivationEpoch, runtime: cfg.Runtime, } + net.nextNodePort += 2 worker.doStartNode = worker.startNode copy(worker.NodeID[:], nodeKey[:]) + if cfg.EnableProfiling { + worker.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.byzantine = append(net.byzantine, worker) - net.nextNodePort += 2 if err := net.AddLogWatcher(&worker.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/oasis/client.go b/go/oasis-test-runner/oasis/client.go index df2c724ea84..b17dad64204 100644 --- a/go/oasis-test-runner/oasis/client.go +++ b/go/oasis-test-runner/oasis/client.go @@ -4,13 +4,13 @@ import ( "fmt" "github.com/oasisprotocol/oasis-core/go/common/node" - registry "github.com/oasisprotocol/oasis-core/go/registry/api" ) // Client is an Oasis client node. type Client struct { Node + runtimes []int maxTransactionAge int64 consensusPort uint16 @@ -21,6 +21,7 @@ type Client struct { type ClientCfg struct { NodeCfg + Runtimes []int MaxTransactionAge int64 } @@ -28,6 +29,7 @@ func (client *Client) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(client.Node.pprofPort). tendermintPrune(client.consensus.PruneNumKept). tendermintRecoverCorruptedWAL(client.consensus.TendermintRecoverCorruptedWAL). tendermintCoreAddress(client.consensusPort). @@ -35,17 +37,17 @@ func (client *Client) startNode() error { appendSeedNodes(client.net.seeds). workerP2pPort(client.p2pPort). workerP2pEnabled(). - runtimeTagIndexerBackend("bleve"). tendermintSupplementarySanity(client.supplementarySanityInterval) if client.maxTransactionAge != 0 { args = args.runtimeClientMaxTransactionAge(client.maxTransactionAge) } - for _, v := range client.net.runtimes { - if v.kind != registry.KindCompute { - continue - } + if len(client.runtimes) > 0 { + args = args.runtimeTagIndexerBackend("bleve") + } + for _, idx := range client.runtimes { + v := client.net.runtimes[idx] // XXX: could support configurable binary idx if ever needed. args = args.appendHostedRuntime(v, node.TEEHardwareInvalid, 0) } @@ -85,14 +87,21 @@ func (net *Network) NewClient(cfg *ClientCfg) (*Client, error) { termErrorOk: cfg.AllowErrorTermination, supplementarySanityInterval: cfg.SupplementarySanityInterval, }, + runtimes: cfg.Runtimes, maxTransactionAge: cfg.MaxTransactionAge, consensusPort: net.nextNodePort, p2pPort: net.nextNodePort + 1, } + net.nextNodePort += 2 + client.doStartNode = client.startNode + if cfg.EnableProfiling { + client.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.clients = append(net.clients, client) - net.nextNodePort += 2 return client, nil } diff --git a/go/oasis-test-runner/oasis/compute.go b/go/oasis-test-runner/oasis/compute.go index 3d15848225e..ea61b265fcf 100644 --- a/go/oasis-test-runner/oasis/compute.go +++ b/go/oasis-test-runner/oasis/compute.go @@ -91,6 +91,7 @@ func (worker *Compute) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(worker.Node.pprofPort). workerCertificateRotation(true). tendermintCoreAddress(worker.consensusPort). tendermintSubmissionGasPrice(worker.consensus.SubmissionGasPrice). @@ -169,11 +170,16 @@ func (net *Network) NewCompute(cfg *ComputeCfg) (*Compute, error) { p2pPort: net.nextNodePort + 2, runtimes: cfg.Runtimes, } + net.nextNodePort += 3 worker.doStartNode = worker.startNode copy(worker.NodeID[:], nodeKey[:]) + if cfg.EnableProfiling { + worker.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.computeWorkers = append(net.computeWorkers, worker) - net.nextNodePort += 3 if err := net.AddLogWatcher(&worker.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index b15f43fa134..a4ba854fb43 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -161,6 +161,8 @@ type ValidatorFixture struct { // nolint: maligned CrashPointsProbability float64 `json:"crash_points_probability,omitempty"` + EnableProfiling bool `json:"enable_profiling"` + Entity int `json:"entity"` LogWatcherHandlerFactories []log.WatcherHandlerFactory `json:"-"` @@ -191,6 +193,7 @@ func (f *ValidatorFixture) Create(net *Network) (*Validator, error) { NoAutoStart: f.NoAutoStart, CrashPointsProbability: f.CrashPointsProbability, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, }, Entity: entity, Sentries: sentries, @@ -295,6 +298,8 @@ type KeymanagerFixture struct { NoAutoStart bool `json:"no_auto_start,omitempty"` + EnableProfiling bool `json:"enable_profiling"` + Sentries []int `json:"sentries,omitempty"` // Consensus contains configuration for the consensus backend. @@ -327,6 +332,7 @@ func (f *KeymanagerFixture) Create(net *Network) (*Keymanager, error) { LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, CrashPointsProbability: f.CrashPointsProbability, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, Consensus: f.Consensus, NoAutoStart: f.NoAutoStart, }, @@ -347,6 +353,8 @@ type StorageWorkerFixture struct { // nolint: maligned NoAutoStart bool `json:"no_auto_start,omitempty"` + EnableProfiling bool `json:"enable_profiling"` + DisableCertRotation bool `json:"disable_cert_rotation"` DisablePublicRPC bool `json:"disable_public_rpc"` @@ -381,6 +389,7 @@ func (f *StorageWorkerFixture) Create(net *Network) (*Storage, error) { AllowErrorTermination: f.AllowErrorTermination, CrashPointsProbability: f.CrashPointsProbability, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, NoAutoStart: f.NoAutoStart, LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, Consensus: f.Consensus, @@ -410,6 +419,8 @@ type ComputeWorkerFixture struct { NoAutoStart bool `json:"no_auto_start,omitempty"` + EnableProfiling bool `json:"enable_profiling"` + // Consensus contains configuration for the consensus backend. Consensus ConsensusFixture `json:"consensus"` @@ -435,6 +446,7 @@ func (f *ComputeWorkerFixture) Create(net *Network) (*Compute, error) { NoAutoStart: f.NoAutoStart, CrashPointsProbability: f.CrashPointsProbability, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, Consensus: f.Consensus, }, @@ -462,6 +474,8 @@ type SentryFixture struct { CrashPointsProbability float64 `json:"crash_points_probability,omitempty"` + EnableProfiling bool `json:"enable_profiling"` + // Consensus contains configuration for the consensus backend. Consensus ConsensusFixture `json:"consensus"` @@ -477,6 +491,7 @@ func (f *SentryFixture) Create(net *Network) (*Sentry, error) { LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, CrashPointsProbability: f.CrashPointsProbability, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, }, ValidatorIndices: f.Validators, StorageIndices: f.StorageWorkers, @@ -489,9 +504,14 @@ type ClientFixture struct { AllowErrorTermination bool `json:"allow_error_termination"` AllowEarlyTermination bool `json:"allow_early_termination"` + EnableProfiling bool `json:"enable_profiling"` + // Consensus contains configuration for the consensus backend. Consensus ConsensusFixture `json:"consensus"` + // Runtimes contains the indexes of the runtimes to enable. + Runtimes []int `json:"runtimes,omitempty"` + // MaxTransactionAge configures the MaxTransactionAge configuration of the client. MaxTransactionAge int64 `json:"max_transaction_age"` } @@ -504,8 +524,10 @@ func (f *ClientFixture) Create(net *Network) (*Client, error) { AllowErrorTermination: f.AllowErrorTermination, AllowEarlyTermination: f.AllowEarlyTermination, SupplementarySanityInterval: f.Consensus.SupplementarySanityInterval, + EnableProfiling: f.EnableProfiling, }, MaxTransactionAge: f.MaxTransactionAge, + Runtimes: f.Runtimes, }) } @@ -517,6 +539,8 @@ type ByzantineFixture struct { // nolint: maligned IdentitySeed string `json:"identity_seed"` Entity int `json:"entity"` + EnableProfiling bool `json:"enable_profiling"` + ActivationEpoch beacon.EpochTime `json:"activation_epoch"` Runtime int `json:"runtime"` @@ -539,6 +563,7 @@ func (f *ByzantineFixture) Create(net *Network) (*Byzantine, error) { DisableDefaultLogWatcherHandlerFactories: !f.EnableDefaultLogWatcherHandlerFactories, LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, Consensus: f.Consensus, + EnableProfiling: f.EnableProfiling, }, Script: f.Script, ExtraArgs: f.ExtraArgs, diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index 630c7ee734b..96c29314e6e 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -266,6 +266,7 @@ func (km *Keymanager) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(km.Node.pprofPort). workerCertificateRotation(true). tendermintCoreAddress(km.consensusPort). tendermintSubmissionGasPrice(km.consensus.SubmissionGasPrice). @@ -361,11 +362,16 @@ func (net *Network) NewKeymanager(cfg *KeymanagerCfg) (*Keymanager, error) { workerClientPort: net.nextNodePort + 1, mayGenerate: len(net.keymanagers) == 0, } + net.nextNodePort += 2 km.doStartNode = km.startNode copy(km.NodeID[:], nodeKey[:]) + if cfg.EnableProfiling { + km.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.keymanagers = append(net.keymanagers, km) - net.nextNodePort += 2 if err := net.AddLogWatcher(&km.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 6470d9d4941..bd1778e865e 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -105,6 +105,8 @@ type Node struct { // nolint: maligned consensus ConsensusFixture consensusStateSync *ConsensusStateSyncCfg customGrpcSocketPath string + + pprofPort uint16 } // Exit returns a channel that will close once the node shuts down. @@ -250,6 +252,7 @@ type NodeCfg struct { // nolint: maligned AllowErrorTermination bool CrashPointsProbability float64 SupplementarySanityInterval uint64 + EnableProfiling bool NoAutoStart bool diff --git a/go/oasis-test-runner/oasis/sentry.go b/go/oasis-test-runner/oasis/sentry.go index c3a2ec68d7a..80750f5be48 100644 --- a/go/oasis-test-runner/oasis/sentry.go +++ b/go/oasis-test-runner/oasis/sentry.go @@ -73,6 +73,7 @@ func (sentry *Sentry) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(sentry.Node.pprofPort). workerCertificateRotation(false). workerSentryEnabled(). workerSentryControlPort(sentry.controlPort). @@ -165,10 +166,15 @@ func (net *Network) NewSentry(cfg *SentryCfg) (*Sentry, error) { controlPort: net.nextNodePort + 1, sentryPort: net.nextNodePort + 2, } + net.nextNodePort += 3 sentry.doStartNode = sentry.startNode + if cfg.EnableProfiling { + sentry.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.sentries = append(net.sentries, sentry) - net.nextNodePort += 3 if err := net.AddLogWatcher(&sentry.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/oasis/storage.go b/go/oasis-test-runner/oasis/storage.go index 1535e83723f..3530559e891 100644 --- a/go/oasis-test-runner/oasis/storage.go +++ b/go/oasis-test-runner/oasis/storage.go @@ -111,6 +111,7 @@ func (worker *Storage) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(worker.Node.pprofPort). workerCertificateRotation(!worker.disableCertRotation). tendermintCoreAddress(worker.consensusPort). tendermintSubmissionGasPrice(worker.consensus.SubmissionGasPrice). @@ -220,11 +221,16 @@ func (net *Network) NewStorage(cfg *StorageCfg) (*Storage, error) { p2pPort: net.nextNodePort + 2, runtimes: cfg.Runtimes, } + net.nextNodePort += 3 worker.doStartNode = worker.startNode copy(worker.NodeID[:], nodeKey[:]) + if cfg.EnableProfiling { + worker.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + net.storageWorkers = append(net.storageWorkers, worker) - net.nextNodePort += 3 if err := net.AddLogWatcher(&worker.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index 8d28119df48..f24825322e0 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -82,6 +82,7 @@ func (val *Validator) startNode() error { args := newArgBuilder(). debugDontBlameOasis(). debugAllowTestKeys(). + debugEnableProfiling(val.Node.pprofPort). workerCertificateRotation(true). consensusValidator(). tendermintCoreAddress(val.consensusPort). @@ -144,8 +145,14 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { consensusPort: net.nextNodePort, clientPort: net.nextNodePort + 1, } + net.nextNodePort += 2 val.doStartNode = val.startNode + if cfg.EnableProfiling { + val.Node.pprofPort = net.nextNodePort + net.nextNodePort++ + } + var consensusAddrs []interface{ String() string } localhost := netPkg.ParseIP("127.0.0.1") if len(val.sentries) > 0 { @@ -214,7 +221,6 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { } net.validators = append(net.validators, val) - net.nextNodePort += 2 if err := net.AddLogWatcher(&val.Node); err != nil { net.logger.Error("failed to add log watcher", diff --git a/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go b/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go index dface0a537c..1f29083cdd6 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/governance_upgrade.go @@ -154,7 +154,7 @@ func (sc *governanceConsensusUpgradeImpl) Fixture() (*oasis.NetworkFixture, erro {Entity: 1, Runtimes: []int{1}, AllowErrorTermination: true}, } f.Clients = []oasis.ClientFixture{ - {AllowErrorTermination: true}, + {AllowErrorTermination: true, Runtimes: []int{1}}, } switch { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/late_start.go b/go/oasis-test-runner/scenario/e2e/runtime/late_start.go index fd703c59247..6bfc1e108d4 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/late_start.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/late_start.go @@ -63,7 +63,9 @@ func (sc *lateStartImpl) Run(childEnv *env.Env) error { time.Sleep(lateStartInitialWait) sc.Logger.Info("Starting the client node") - clientFixture := &oasis.ClientFixture{} + clientFixture := &oasis.ClientFixture{ + Runtimes: []int{1}, + } client, err := clientFixture.Create(sc.Net) if err != nil { return err diff --git a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go index 69a99613124..82fb72c53d5 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go @@ -148,6 +148,8 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { ) } + f.Clients[0].Runtimes = computeRuntimes + return f, nil } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index abb4776b1bd..073d0e455cc 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -227,7 +227,7 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { Sentries: []oasis.SentryFixture{}, Seeds: []oasis.SeedFixture{{}}, Clients: []oasis.ClientFixture{ - {}, + {Runtimes: []int{1}}, }, } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime_governance.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime_governance.go index ae514df1920..e7d93b152e8 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime_governance.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime_governance.go @@ -133,6 +133,7 @@ func (sc *runtimeGovernanceImpl) Fixture() (*oasis.NetworkFixture, error) { }, ) } + f.Clients[0].Runtimes = computeRuntimes // Set up staking. f.Network.StakingGenesis = &staking.Genesis{ diff --git a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go index 75c531e4a0c..dee067a0e1b 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go @@ -76,6 +76,7 @@ var TxSourceMultiShort scenario.Scenario = &txSourceImpl{ numKeyManagerNodes: 2, numStorageNodes: 2, numComputeNodes: 4, + numClientNodes: 2, } // TxSourceMultiShortSGX uses multiple workloads for a short time. @@ -105,6 +106,7 @@ var TxSourceMultiShortSGX scenario.Scenario = &txSourceImpl{ numKeyManagerNodes: 1, numStorageNodes: 2, numComputeNodes: 4, + numClientNodes: 1, } // TxSourceMulti uses multiple workloads. @@ -150,6 +152,9 @@ var TxSourceMulti scenario.Scenario = &txSourceImpl{ // In worst case, 2 nodes can be offline at the same time. Aditionally we // need one backup node and one extra node. numComputeNodes: 5, + // Second client node is used to run supplementary-sanity checks which can + // cause the node to fall behind over the long run. + numClientNodes: 2, } type txSourceImpl struct { // nolint: maligned @@ -175,6 +180,7 @@ type txSourceImpl struct { // nolint: maligned numValidatorNodes int numKeyManagerNodes int numComputeNodes int + numClientNodes int // Configurable number of storage nodes. If running tests with long node // shutdowns enabled, make sure this is at least `MinWriteReplication+1`, @@ -487,6 +493,21 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { }) } f.StorageWorkers = storageWorkers + var clients []oasis.ClientFixture + for i := 0; i < sc.numClientNodes; i++ { + c := oasis.ClientFixture{} + // Enable runtime on the first node. + if i == 0 { + c.Runtimes = []int{1} + } + // Enable supplementary sanity and profiling on the last client node. + if i == sc.numClientNodes-1 { + c.Consensus.SupplementarySanityInterval = 1 + c.EnableProfiling = true + } + clients = append(clients, c) + } + f.Clients = clients // Update validators to require fee payments. for i := range f.Validators { @@ -538,13 +559,6 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { sc.generateConsensusFixture(&f.ByzantineNodes[i].Consensus, false) } - // Add a sanity-checker client node. - // NOTE: we skip the sanity checker on the validators as it blocks consensus - // and it can cause a node to fall behind. - f.Clients = append(f.Clients, oasis.ClientFixture{ - Consensus: oasis.ConsensusFixture{SupplementarySanityInterval: 1}, - }) - return f, nil } @@ -811,6 +825,7 @@ func (sc *txSourceImpl) Clone() scenario.Scenario { numKeyManagerNodes: sc.numKeyManagerNodes, numStorageNodes: sc.numStorageNodes, numComputeNodes: sc.numComputeNodes, + numClientNodes: sc.numClientNodes, seed: sc.seed, // rng must always be reinitialized from seed by calling PreInit(). } From 6d4bcaebd89f72c2a029866305faf2bed0abc1af Mon Sep 17 00:00:00 2001 From: ptrus Date: Thu, 25 Feb 2021 07:19:13 +0100 Subject: [PATCH 4/5] go/txsource/governance: configurable timeout for governance iterations --- .../cmd/debug/txsource/workload/governance.go | 57 +++++++++++-------- .../scenario/e2e/runtime/txsource.go | 2 +- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/go/oasis-node/cmd/debug/txsource/workload/governance.go b/go/oasis-node/cmd/debug/txsource/workload/governance.go index 0b2a4bc9184..83910525816 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/governance.go +++ b/go/oasis-node/cmd/debug/txsource/workload/governance.go @@ -25,28 +25,31 @@ import ( // NameGovernance is the name of the governance workload. const NameGovernance = "governance" -var errUnexpectedGovTxResult = fmt.Errorf("unexpected governance tx result") +var ( -// Governance is the governance workload. -var Governance = &governanceWorkload{ - BaseWorkload: NewBaseWorkload(NameGovernance), -} + // Governance is the governance workload. + Governance = &governanceWorkload{ + BaseWorkload: NewBaseWorkload(NameGovernance), + } -var numProposerAccounts = 10 - -// How likely voters should vote YES for the proposal made by i'th proposer. -var proposersVoteYesRate = []uint8{ - 100, - 99, - 98, - 95, - 92, - 90, - 80, - 70, - 50, - 0, -} + // Timeout after each governance workload iteration. + iterationTimeout = 2 * time.Second + errUnexpectedGovTxResult = fmt.Errorf("unexpected governance tx result") + numProposerAccounts = 10 + // How likely voters should vote YES for the proposal made by i'th proposer. + proposersVoteYesRate = []uint8{ + 100, + 99, + 98, + 95, + 92, + 90, + 80, + 70, + 50, + 0, + } +) type governanceWorkload struct { BaseWorkload @@ -247,6 +250,7 @@ OUTER: if pendingUpgrade == nil { g.Logger.Debug("no eligible pending upgrade for submitting cancel upgrade proposal, skipping", "pending_upgrades", pendingUpgrades, + "current_epoch", g.currentEpoch, ) return nil } @@ -279,7 +283,11 @@ func (g *governanceWorkload) submitVote(voter signature.Signer, proposalID uint6 return nil case errors.Is(err, registry.ErrNoSuchNode), errors.Is(err, governance.ErrNotEligible): - g.Logger.Error("submitting vote error: voter not a validator, continuing", "err", err) + g.Logger.Error("submitting vote error: voter not a validator, continuing", + "err", err, + "voter", voter.Public(), + "proposal_id", proposalID, + ) return nil default: return fmt.Errorf("failed to sign and submit cast vote transaction: %w", err) @@ -411,7 +419,7 @@ func (g *governanceWorkload) Run( // Main workload loop. for { select { - case <-time.After(1 * time.Second): + case <-time.After(iterationTimeout): case <-gracefulExit.Done(): g.Logger.Debug("time's up") return nil @@ -433,12 +441,15 @@ func (g *governanceWorkload) Run( var upgrades []*upgrade.Descriptor upgrades, err = g.governance.PendingUpgrades(g.ctx, consensus.HeightLatest) if err != nil { - return fmt.Errorf("querying penging upgrades: %w", err) + return fmt.Errorf("querying pending upgrades: %w", err) } for _, up := range upgrades { if up.Epoch.AbsDiff(g.currentEpoch) != g.parameters.UpgradeCancelMinEpochDiff+2 { continue } + g.Logger.Debug("ensuring pending upgrade canceled", + "upgrade", up, + ) if err = g.ensureUpgradeCanceled(up); err != nil { return fmt.Errorf("ensuring upgrade canceled: %w", err) } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go index dee067a0e1b..ef291cba1cd 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/txsource.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/txsource.go @@ -244,7 +244,7 @@ func (sc *txSourceImpl) Fixture() (*oasis.NetworkFixture, error) { f.Network.GovernanceParameters = &governance.ConsensusParameters{ VotingPeriod: 10, MinProposalDeposit: *quantity.NewFromUint64(300), - Quorum: 90, + Quorum: 75, Threshold: 90, UpgradeMinEpochDiff: 40, UpgradeCancelMinEpochDiff: 20, From fcbcc970ec067ff6702d334caef0ff1789f71cbb Mon Sep 17 00:00:00 2001 From: ptrus Date: Fri, 26 Feb 2021 08:30:48 +0100 Subject: [PATCH 5/5] go/txsource/queries: do not query all proposals on every iteration --- .../cmd/debug/txsource/workload/queries.go | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/go/oasis-node/cmd/debug/txsource/workload/queries.go b/go/oasis-node/cmd/debug/txsource/workload/queries.go index 34edd424fa2..c6616f2e2e8 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/queries.go +++ b/go/oasis-node/cmd/debug/txsource/workload/queries.go @@ -68,6 +68,10 @@ const ( // nodes have gap in history for the queried height, due to restoring from checkpoint. See also: // https://github.com/oasisprotocol/oasis-core/issues/3337 queriesNumAllowedQueryTxsHistoricalFailures = 5 + + // doQueryAllProposalsEvery configures how often the queries workload should query all (including) + // past proposals. + doQueryAllProposalsEvery = 10 ) // QueriesFlags are the queries workload flags. @@ -94,6 +98,8 @@ type queries struct { queryTxsHistoricalFailures uint64 runtimeGenesisRound uint64 + + iteration uint64 } func (q *queries) sanityCheckTransactionEvents(ctx context.Context, height int64, txEvents []*results.Event) error { @@ -602,11 +608,34 @@ func (q *queries) doGovernanceQueries(ctx context.Context, rng *rand.Rand, heigh q.logger.Debug("doing governance queries", "height", height, ) - proposals, err := q.governance.Proposals(ctx, height) + + if q.iteration%doQueryAllProposalsEvery == 0 { + proposals, err := q.governance.Proposals(ctx, height) + if err != nil { + return fmt.Errorf("governance.Proposals: %w", err) + } + for _, p := range proposals { + var p2 *governance.Proposal + p2, err = q.governance.Proposal(ctx, &governance.ProposalQuery{Height: height, ProposalID: p.ID}) + if err != nil { + return fmt.Errorf("governance.Proposal: %w", err) + } + if !p.Content.Equals(&p2.Content) { + return fmt.Errorf("proposal contents not equal") + } + + _, err = q.governance.Votes(ctx, &governance.ProposalQuery{Height: height, ProposalID: p.ID}) + if err != nil { + return fmt.Errorf("governance.Votes: %w", err) + } + } + } + + activeProposals, err := q.governance.ActiveProposals(ctx, height) if err != nil { - return fmt.Errorf("governance.Proposals: %w", err) + return fmt.Errorf("governance.ActiveProposals: %w", err) } - for _, p := range proposals { + for _, p := range activeProposals { var p2 *governance.Proposal p2, err = q.governance.Proposal(ctx, &governance.ProposalQuery{Height: height, ProposalID: p.ID}) if err != nil { @@ -622,14 +651,6 @@ func (q *queries) doGovernanceQueries(ctx context.Context, rng *rand.Rand, heigh } } - activeProposals, err := q.governance.ActiveProposals(ctx, height) - if err != nil { - return fmt.Errorf("governance.ActiveProposals: %w", err) - } - if l1, l2 := len(activeProposals), len(proposals); l1 > l2 { - return fmt.Errorf("more active proposals(%v) than all proposals(%v)", l1, l2) - } - pendingUpgrades, err := q.governance.PendingUpgrades(ctx, height) if err != nil { return fmt.Errorf("governance.PendingUpgrades: %w", err) @@ -916,6 +937,7 @@ func (q *queries) Run( default: return err } + q.iteration++ select { case <-time.After(1 * time.Second):