From 4adae78f497a37b96215d271c577ffc1d460f866 Mon Sep 17 00:00:00 2001 From: ptrus Date: Thu, 14 May 2020 15:50:13 +0200 Subject: [PATCH] keymanager runtime upgrade e2e test --- .buildkite/benchmarks.pipeline.yml | 3 + .buildkite/code.pipeline.yml | 3 + .buildkite/longtests.pipeline.yml | 3 + .../scripts/download_e2e_test_artifacts.sh | 4 + .changelog/2517.bugfix.md | 6 + .changelog/2517.feature.md | 1 + Cargo.lock | 10 + Cargo.toml | 2 + Makefile | 3 +- go/oasis-net-runner/fixtures/default.go | 4 +- go/oasis-test-runner/oasis/args.go | 6 +- go/oasis-test-runner/oasis/compute.go | 3 +- go/oasis-test-runner/oasis/fixture.go | 11 +- go/oasis-test-runner/oasis/keymanager.go | 16 +- go/oasis-test-runner/oasis/oasis.go | 73 +++- go/oasis-test-runner/oasis/runtime.go | 40 ++- go/oasis-test-runner/scenario/e2e/e2e.go | 2 + .../scenario/e2e/keymanager_upgrade.go | 314 ++++++++++++++++++ .../scenario/e2e/multiple_runtimes.go | 4 +- go/oasis-test-runner/scenario/e2e/runtime.go | 9 +- go/worker/keymanager/init.go | 1 + go/worker/keymanager/worker.go | 29 +- tests/fixture-data/net-runner/default.json | 8 +- .../simple-keymanager-upgrade/Cargo.toml | 18 + .../simple-keymanager-upgrade/src/api.rs | 28 ++ .../simple-keymanager-upgrade/src/lib.rs | 4 + .../simple-keymanager-upgrade/src/main.rs | 9 + 27 files changed, 560 insertions(+), 54 deletions(-) create mode 100644 .changelog/2517.bugfix.md create mode 100644 .changelog/2517.feature.md create mode 100644 go/oasis-test-runner/scenario/e2e/keymanager_upgrade.go create mode 100644 tests/runtimes/simple-keymanager-upgrade/Cargo.toml create mode 100644 tests/runtimes/simple-keymanager-upgrade/src/api.rs create mode 100644 tests/runtimes/simple-keymanager-upgrade/src/lib.rs create mode 100644 tests/runtimes/simple-keymanager-upgrade/src/main.rs diff --git a/.buildkite/benchmarks.pipeline.yml b/.buildkite/benchmarks.pipeline.yml index 9a1f135c011..b41badd890f 100644 --- a/.buildkite/benchmarks.pipeline.yml +++ b/.buildkite/benchmarks.pipeline.yml @@ -97,14 +97,17 @@ steps: command: - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keyvalue + - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager-upgrade # Upload the built artifacts. - cd /var/tmp/artifacts/sgx/x86_64-fortanix-unknown-sgx/debug - buildkite-agent artifact upload simple-keymanager.sgxs - buildkite-agent artifact upload simple-keyvalue.sgxs + - buildkite-agent artifact upload simple-keymanager-upgrade.sgxs - cd /var/tmp/artifacts/default/debug - buildkite-agent artifact upload simple-keymanager - buildkite-agent artifact upload simple-keyvalue + - buildkite-agent artifact upload simple-keymanager-upgrade agents: buildkite_agent_size: large plugins: diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index 5ae324013ac..bbf3335ee7a 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -159,14 +159,17 @@ steps: command: - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keyvalue + - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager-upgrade # Upload the built artifacts. - cd /var/tmp/artifacts/sgx/x86_64-fortanix-unknown-sgx/debug - buildkite-agent artifact upload simple-keymanager.sgxs - buildkite-agent artifact upload simple-keyvalue.sgxs + - buildkite-agent artifact upload simple-keymanager-upgrade.sgxs - cd /var/tmp/artifacts/default/debug - buildkite-agent artifact upload simple-keymanager - buildkite-agent artifact upload simple-keyvalue + - buildkite-agent artifact upload simple-keymanager-upgrade agents: buildkite_agent_size: large retry: diff --git a/.buildkite/longtests.pipeline.yml b/.buildkite/longtests.pipeline.yml index 021f2fe085e..b9bbbf1f69b 100644 --- a/.buildkite/longtests.pipeline.yml +++ b/.buildkite/longtests.pipeline.yml @@ -81,14 +81,17 @@ steps: command: - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keyvalue + - .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager-upgrade # Upload the built artifacts. - cd /var/tmp/artifacts/sgx/x86_64-fortanix-unknown-sgx/debug - buildkite-agent artifact upload simple-keymanager.sgxs - buildkite-agent artifact upload simple-keyvalue.sgxs + - buildkite-agent artifact upload simple-keymanager-upgrade.sgxs - cd /var/tmp/artifacts/default/debug - buildkite-agent artifact upload simple-keymanager - buildkite-agent artifact upload simple-keyvalue + - buildkite-agent artifact upload simple-keymanager-upgrade agents: buildkite_agent_size: large plugins: diff --git a/.buildkite/scripts/download_e2e_test_artifacts.sh b/.buildkite/scripts/download_e2e_test_artifacts.sh index 11b8ab0ee51..4c1e0db7c64 100755 --- a/.buildkite/scripts/download_e2e_test_artifacts.sh +++ b/.buildkite/scripts/download_e2e_test_artifacts.sh @@ -24,6 +24,10 @@ download_artifact oasis-core-runtime-loader target/default/debug 755 download_artifact simple-keymanager.sgxs target/sgx/x86_64-fortanix-unknown-sgx/debug 755 download_artifact simple-keymanager target/default/debug 755 +# Simple Key manager runtime used in keymenager upgrade test. +download_artifact simple-keymanager-upgrade.sgxs target/sgx/x86_64-fortanix-unknown-sgx/debug 755 +download_artifact simple-keymanager-upgrade target/default/debug 755 + # Test simple-keyvalue runtime and clients. download_artifact test-long-term-client target/default/debug 755 download_artifact simple-keyvalue-client target/default/debug 755 diff --git a/.changelog/2517.bugfix.md b/.changelog/2517.bugfix.md new file mode 100644 index 00000000000..dedcc6f3dc4 --- /dev/null +++ b/.changelog/2517.bugfix.md @@ -0,0 +1,6 @@ +go/worker/keymanager: retry initialization in case of failure + +The keymanager worker registers only after the initialization either fails or +succeeds. In case the worker needs to replicate the first initialization will +always fail, since other nodes' access control prevents it from replicating. +In that case the initialization should be retried. diff --git a/.changelog/2517.feature.md b/.changelog/2517.feature.md new file mode 100644 index 00000000000..aa1dbc62d80 --- /dev/null +++ b/.changelog/2517.feature.md @@ -0,0 +1 @@ +e2e/tests: added keymanager runtime upgrade test diff --git a/Cargo.lock b/Cargo.lock index 00487fa853a..6e481322f1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1336,6 +1336,16 @@ dependencies = [ "oasis-core-tools 0.3.0-alpha", ] +[[package]] +name = "simple-keymanager-upgrade" +version = "0.3.0-alpha" +dependencies = [ + "oasis-core-keymanager-api-common 0.3.0-alpha", + "oasis-core-keymanager-lib 0.3.0-alpha", + "oasis-core-runtime 0.3.0-alpha", + "oasis-core-tools 0.3.0-alpha", +] + [[package]] name = "simple-keyvalue" version = "0.3.0-alpha" diff --git a/Cargo.toml b/Cargo.toml index 3747f08b347..17de8c95103 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,8 @@ members = [ # Test runtimes. "tests/runtimes/simple-keyvalue", "tests/runtimes/simple-keymanager", + "tests/runtimes/simple-keymanager-upgrade", + # Test clients. "tests/clients/simple-keyvalue", "tests/clients/simple-keyvalue-enc", diff --git a/Makefile b/Makefile index fdf78d59c21..12c1f270a32 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ include common.mk # List of runtimes to build. RUNTIMES := tests/runtimes/simple-keyvalue \ - tests/runtimes/simple-keymanager + tests/runtimes/simple-keymanager \ + tests/runtimes/simple-keymanager-upgrade # Set all target as the default target. all: build diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index df5b92e9dc2..feec748672b 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -67,7 +67,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { Kind: registry.KindKeyManager, Entity: 0, Keymanager: -1, - Binary: viper.GetString(cfgKeymanagerBinary), + Binaries: viper.GetStringSlice(cfgKeymanagerBinary), AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, }, @@ -78,7 +78,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { Kind: registry.KindCompute, Entity: 0, Keymanager: 0, - Binary: viper.GetString(cfgRuntimeBinary), + Binaries: viper.GetStringSlice(cfgRuntimeBinary), Executor: registry.ExecutorParameters{ GroupSize: 2, GroupBackupSize: 1, diff --git a/go/oasis-test-runner/oasis/args.go b/go/oasis-test-runner/oasis/args.go index 30c345b62b2..3fbe64c8b65 100644 --- a/go/oasis-test-runner/oasis/args.go +++ b/go/oasis-test-runner/oasis/args.go @@ -454,9 +454,9 @@ func (args *argBuilder) appendRuntimePruner(p *RuntimePrunerCfg) *argBuilder { return args } -func (args *argBuilder) appendComputeNodeRuntime(rt *Runtime) *argBuilder { +func (args *argBuilder) appendComputeNodeRuntime(rt *Runtime, binaryIdx int) *argBuilder { args = args.runtimeSupported(rt.id). - workerRuntimePath(rt.id, rt.binary). + workerRuntimePath(rt.id, rt.binaries[binaryIdx]). appendRuntimePruner(&rt.pruner) return args } @@ -494,7 +494,7 @@ func (args *argBuilder) byzantineFakeSGX() *argBuilder { func (args *argBuilder) byzantineVersionFakeEnclaveID(rt *Runtime) *argBuilder { eid := sgx.EnclaveIdentity{ - MrEnclave: *rt.mrEnclave, + MrEnclave: *rt.mrEnclaves[0], MrSigner: *rt.mrSigner, } args.vec = append(args.vec, "--"+byzantine.CfgVersionFakeEnclaveID, eid.String()) diff --git a/go/oasis-test-runner/oasis/compute.go b/go/oasis-test-runner/oasis/compute.go index 1c8f5a2d872..12b9eead4c9 100644 --- a/go/oasis-test-runner/oasis/compute.go +++ b/go/oasis-test-runner/oasis/compute.go @@ -95,7 +95,8 @@ func (worker *Compute) startNode() error { if v.kind != registry.KindCompute { continue } - args = args.appendComputeNodeRuntime(v) + // XXX: could support configurable binary idx if ever needed. + args = args.appendComputeNodeRuntime(v, 0) } if err := worker.net.startOasisNode(&worker.Node, nil, args); err != nil { diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index cb5fa552996..8c549873c48 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -188,9 +188,9 @@ type RuntimeFixture struct { // nolint: maligned Entity int `json:"entity"` Keymanager int `json:"keymanager"` - Binary string `json:"binary"` - GenesisState string `json:"genesis_state"` - GenesisRound uint64 `json:"genesis_round"` + Binaries []string `json:"binaries"` + GenesisState string `json:"genesis_state"` + GenesisRound uint64 `json:"genesis_round"` Executor registry.ExecutorParameters `json:"executor"` Merge registry.MergeParameters `json:"merge"` @@ -235,7 +235,7 @@ func (f *RuntimeFixture) Create(netFixture *NetworkFixture, net *Network) (*Runt TxnScheduler: f.TxnScheduler, Storage: f.Storage, AdmissionPolicy: f.AdmissionPolicy, - Binary: f.Binary, + Binaries: f.Binaries, GenesisState: f.GenesisState, GenesisRound: f.GenesisRound, Pruner: f.Pruner, @@ -271,6 +271,8 @@ type KeymanagerFixture struct { AllowEarlyTermination bool `json:"allow_early_termination"` AllowErrorTermination bool `json:"allow_error_termination"` + NoAutoStart bool `json:"no_auto_start,omitempty"` + Sentries []int `json:"sentries,omitempty"` // Consensus contains configuration for the consensus backend. @@ -300,6 +302,7 @@ func (f *KeymanagerFixture) Create(net *Network) (*Keymanager, error) { AllowErrorTermination: f.AllowErrorTermination, LogWatcherHandlerFactories: f.LogWatcherHandlerFactories, Consensus: f.Consensus, + NoAutoStart: f.NoAutoStart, }, Runtime: runtime, Entity: entity, diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index e7d39666251..a93db7868ae 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -56,7 +56,11 @@ func (pol *KeymanagerPolicy) provision() error { "--" + kmCmd.CfgPolicyFile, policyPath, "--" + kmCmd.CfgPolicyID, pol.runtime.id.String(), "--" + kmCmd.CfgPolicySerial, strconv.Itoa(pol.serial), - "--" + kmCmd.CfgPolicyEnclaveID, pol.runtime.mrEnclave.String() + pol.runtime.mrSigner.String(), + } + for _, mrEnclave := range pol.runtime.mrEnclaves { + policyArgs = append(policyArgs, []string{ + "--" + kmCmd.CfgPolicyEnclaveID, mrEnclave.String() + pol.runtime.mrSigner.String(), + }...) } for _, rt := range pol.net.runtimes { @@ -64,8 +68,10 @@ func (pol *KeymanagerPolicy) provision() error { continue } - arg := fmt.Sprintf("%s=%s%s", rt.id, rt.mrEnclave, rt.mrSigner) - policyArgs = append(policyArgs, "--"+kmCmd.CfgPolicyMayQuery, arg) + for _, mrEnclave := range rt.mrEnclaves { + arg := fmt.Sprintf("%s=%s%s", rt.id, mrEnclave, rt.mrSigner) + policyArgs = append(policyArgs, "--"+kmCmd.CfgPolicyMayQuery, arg) + } } w, err := pol.dir.NewLogWriter("provision-policy.log") @@ -263,7 +269,8 @@ func (km *Keymanager) startNode() error { workerClientPort(km.workerClientPort). workerRuntimeProvisioner(workerCommon.RuntimeProvisionerSandboxed). workerRuntimeSGXLoader(km.net.cfg.RuntimeSGXLoaderBinary). - workerRuntimePath(km.runtime.id, km.runtime.binary). + // XXX: could support configurable binary idx if ever needed. + workerRuntimePath(km.runtime.id, km.runtime.binaries[0]). workerKeymanagerEnabled(). workerKeymanagerRuntimeID(km.runtime.id). appendNetwork(km.net). @@ -325,6 +332,7 @@ func (net *Network) NewKeymanager(cfg *KeymanagerCfg) (*Keymanager, error) { disableDefaultLogWatcherHandlerFactories: cfg.DisableDefaultLogWatcherHandlerFactories, logWatcherHandlerFactories: cfg.LogWatcherHandlerFactories, consensus: cfg.Consensus, + noAutoStart: cfg.NoAutoStart, }, runtime: cfg.Runtime, entity: cfg.Entity, diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index f4ee64f86c8..c941771ff5e 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -68,6 +68,7 @@ type Node struct { // nolint: maligned termErrorOk bool doStartNode func() error isStopping bool + noAutoStart bool disableDefaultLogWatcherHandlerFactories bool logWatcherHandlerFactories []log.WatcherHandlerFactory @@ -167,6 +168,8 @@ type NodeCfg struct { // nolint: maligned AllowEarlyTermination bool AllowErrorTermination bool + NoAutoStart bool + DisableDefaultLogWatcherHandlerFactories bool LogWatcherHandlerFactories []log.WatcherHandlerFactory @@ -289,7 +292,7 @@ func (net *Network) Runtimes() []*Runtime { return net.runtimes } -// Keymanager returns the keymanagers associated with the network. +// Keymanagers returns the keymanagers associated with the network. func (net *Network) Keymanagers() []*Keymanager { return net.keymanagers } @@ -379,7 +382,7 @@ func (net *Network) AddLogWatcher(node *Node) error { return nil } -// CloseLogWatchers closes all log watchers and checks if any errors were reported +// CheckLogWatchers closes all log watchers and checks if any errors were reported // while the log watchers were running. func (net *Network) CheckLogWatchers() (err error) { for _, w := range net.logWatchers { @@ -396,7 +399,7 @@ func (net *Network) CheckLogWatchers() (err error) { } // Start starts the network. -func (net *Network) Start() error { +func (net *Network) Start() error { // nolint: gocyclo net.logger.Info("starting network") // Figure out if the IAS proxy is needed by peeking at all the @@ -467,16 +470,22 @@ func (net *Network) Start() error { } } - net.logger.Debug("starting seed node") - if err = net.seedNode.startNode(); err != nil { - net.logger.Error("failed to start seed node", - "err", err, - ) - return err + if !net.seedNode.noAutoStart { + net.logger.Debug("starting seed node") + if err = net.seedNode.startNode(); err != nil { + net.logger.Error("failed to start seed node", + "err", err, + ) + return err + } } net.logger.Debug("starting validator node(s)") for _, v := range net.validators { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start validator", "err", err, @@ -494,8 +503,12 @@ func (net *Network) Start() error { time.Sleep(validatorStartDelay) } + net.logger.Debug("starting keymanager(s)") for _, km := range net.keymanagers { - net.logger.Debug("starting keymanager") + if km.noAutoStart { + continue + } + if err = km.startNode(); err != nil { net.logger.Error("failed to start keymanager node", "err", err, @@ -506,6 +519,10 @@ func (net *Network) Start() error { net.logger.Debug("starting storage node(s)") for _, v := range net.storageWorkers { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start storage worker", "err", err, @@ -516,6 +533,10 @@ func (net *Network) Start() error { net.logger.Debug("starting compute node(s)") for _, v := range net.computeWorkers { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start compute worker", "err", err, @@ -526,16 +547,25 @@ func (net *Network) Start() error { net.logger.Debug("starting sentry node(s)") for _, v := range net.sentries { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start sentry node", "err", err, ) return err } + } net.logger.Debug("starting client node(s)") for _, v := range net.clients { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start client node", "err", err, @@ -546,6 +576,10 @@ func (net *Network) Start() error { net.logger.Debug("starting byzantine node(s)") for _, v := range net.byzantine { + if v.noAutoStart { + continue + } + if err = v.startNode(); err != nil { net.logger.Error("failed to start byzantine node", "err", err, @@ -554,23 +588,34 @@ func (net *Network) Start() error { } } - // Use the first validator as a controller. - if len(net.validators) >= 1 { + // Use the first started validator as a controller. + for _, v := range net.validators { + if v.noAutoStart { + continue + } + if net.controller, err = NewController(net.validators[0].SocketPath()); err != nil { net.logger.Error("failed to create controller", "err", err, ) return fmt.Errorf("oasis: failed to create controller: %w", err) } + break } - // Create a client controller for the first client node. - if len(net.clients) >= 1 { + + // Create a client controller for the first started client node. + for _, v := range net.clients { + if v.noAutoStart { + continue + } + if net.clientController, err = NewController(net.clients[0].SocketPath()); err != nil { net.logger.Error("failed to create client controller", "err", err, ) return fmt.Errorf("oasis: failed to create client controller: %w", err) } + break } net.logger.Info("network started") diff --git a/go/oasis-test-runner/oasis/runtime.go b/go/oasis-test-runner/oasis/runtime.go index 0a9379f8a00..3e54f180253 100644 --- a/go/oasis-test-runner/oasis/runtime.go +++ b/go/oasis-test-runner/oasis/runtime.go @@ -26,9 +26,9 @@ type Runtime struct { // nolint: maligned id common.Namespace kind registry.RuntimeKind - binary string + binaries []string teeHardware node.TEEHardware - mrEnclave *sgx.MrEnclave + mrEnclaves []*sgx.MrEnclave mrSigner *sgx.MrSigner pruner RuntimePrunerCfg @@ -47,7 +47,7 @@ type RuntimeCfg struct { // nolint: maligned TEEHardware node.TEEHardware MrSigner *sgx.MrSigner - Binary string + Binaries []string GenesisState string GenesisRound uint64 @@ -83,9 +83,9 @@ func (rt *Runtime) Kind() registry.RuntimeKind { // GetEnclaveIdentity returns the runtime's enclave ID. func (rt *Runtime) GetEnclaveIdentity() *sgx.EnclaveIdentity { - if rt.mrEnclave != nil && rt.mrSigner != nil { + if rt.mrEnclaves != nil && rt.mrSigner != nil { return &sgx.EnclaveIdentity{ - MrEnclave: *rt.mrEnclave, + MrEnclave: *rt.mrEnclaves[0], MrSigner: *rt.mrSigner, } } @@ -176,22 +176,26 @@ func (net *Network) NewRuntime(cfg *RuntimeCfg) (*Runtime, error) { // TODO: Support genesis state. } } - var mrEnclave *sgx.MrEnclave + var mrEnclaves []*sgx.MrEnclave if cfg.TEEHardware == node.TEEHardwareIntelSGX { - if mrEnclave, err = deriveMrEnclave(cfg.Binary); err != nil { - return nil, err + enclaveIdentities := []sgx.EnclaveIdentity{} + for _, binary := range cfg.Binaries { + var mrEnclave *sgx.MrEnclave + if mrEnclave, err = deriveMrEnclave(binary); err != nil { + return nil, err + } + enclaveIdentities = append(enclaveIdentities, sgx.EnclaveIdentity{MrEnclave: *mrEnclave, MrSigner: *cfg.MrSigner}) + args = append(args, []string{ + "--" + cmdRegRt.CfgVersionEnclave, mrEnclave.String() + cfg.MrSigner.String(), + }...) + mrEnclaves = append(mrEnclaves, mrEnclave) } - + descriptor.Version.TEE = cbor.Marshal(registry.VersionInfoIntelSGX{ + Enclaves: enclaveIdentities, + }) args = append(args, []string{ "--" + cmdRegRt.CfgTEEHardware, cfg.TEEHardware.String(), - "--" + cmdRegRt.CfgVersionEnclave, mrEnclave.String() + cfg.MrSigner.String(), }...) - - descriptor.Version.TEE = cbor.Marshal(registry.VersionInfoIntelSGX{ - Enclaves: []sgx.EnclaveIdentity{ - {MrEnclave: *mrEnclave, MrSigner: *cfg.MrSigner}, - }, - }) } if cfg.Keymanager != nil { args = append(args, []string{ @@ -236,9 +240,9 @@ func (net *Network) NewRuntime(cfg *RuntimeCfg) (*Runtime, error) { dir: rtDir, id: cfg.ID, kind: cfg.Kind, - binary: cfg.Binary, + binaries: cfg.Binaries, teeHardware: cfg.TEEHardware, - mrEnclave: mrEnclave, + mrEnclaves: mrEnclaves, mrSigner: cfg.MrSigner, pruner: cfg.Pruner, excludeFromGenesis: cfg.ExcludeFromGenesis, diff --git a/go/oasis-test-runner/scenario/e2e/e2e.go b/go/oasis-test-runner/scenario/e2e/e2e.go index 49848724e63..ca7c75cb5b6 100644 --- a/go/oasis-test-runner/scenario/e2e/e2e.go +++ b/go/oasis-test-runner/scenario/e2e/e2e.go @@ -126,6 +126,8 @@ func RegisterScenarios() error { LateStart, // Restore from v20.6 genesis file. RestoreV206, + // KeymanagerUpgrade test. + KeymanagerUpgrade, } { if err := cmd.Register(s); err != nil { return err diff --git a/go/oasis-test-runner/scenario/e2e/keymanager_upgrade.go b/go/oasis-test-runner/scenario/e2e/keymanager_upgrade.go new file mode 100644 index 00000000000..2f1ebc04f8e --- /dev/null +++ b/go/oasis-test-runner/scenario/e2e/keymanager_upgrade.go @@ -0,0 +1,314 @@ +package e2e + +import ( + "bytes" + "context" + "fmt" + "path/filepath" + + "github.com/oasislabs/oasis-core/go/common" + "github.com/oasislabs/oasis-core/go/common/cbor" + "github.com/oasislabs/oasis-core/go/common/sgx" + epochtime "github.com/oasislabs/oasis-core/go/epochtime/api" + keymanager "github.com/oasislabs/oasis-core/go/keymanager/api" + "github.com/oasislabs/oasis-core/go/oasis-test-runner/env" + "github.com/oasislabs/oasis-core/go/oasis-test-runner/oasis" + "github.com/oasislabs/oasis-core/go/oasis-test-runner/oasis/cli" + "github.com/oasislabs/oasis-core/go/oasis-test-runner/scenario" + registry "github.com/oasislabs/oasis-core/go/registry/api" +) + +// KeymanagerUpgrade is the keymanager upgrade scenario. +var KeymanagerUpgrade scenario.Scenario = newKmUpgradeImpl() + +type kmUpgradeImpl struct { + runtimeImpl + + nonce uint64 +} + +func newKmUpgradeImpl() scenario.Scenario { + return &kmUpgradeImpl{ + runtimeImpl: *newRuntimeImpl( + "keymanager-upgrade", + "simple-keyvalue-enc-client", + nil, + ), + } +} + +func (sc *kmUpgradeImpl) Fixture() (*oasis.NetworkFixture, error) { + f, err := sc.runtimeImpl.Fixture() + if err != nil { + return nil, err + } + + // Load the upgraded keymanager binary. + newKmBinary, err := sc.resolveRuntimeBinary("simple-keymanager-upgrade") + if err != nil { + return nil, fmt.Errorf("error resolving binary: %w", err) + } + // Setup the upgraded runtime. + kmRuntimeFix := f.Runtimes[0] + if kmRuntimeFix.Kind != registry.KindKeyManager { + return nil, fmt.Errorf("expected first runtime in fixture to be keymanager runtime, got: %s", kmRuntimeFix.Kind) + } + kmRuntimeFix.Binaries = append([]string{newKmBinary}, kmRuntimeFix.Binaries...) + // The upgraded runtime will be registered later. + kmRuntimeFix.ExcludeFromGenesis = true + f.Runtimes = append(f.Runtimes, kmRuntimeFix) + + // Add the upgraded keymanager, will be started later. + f.Keymanagers = append(f.Keymanagers, oasis.KeymanagerFixture{Runtime: 2, Entity: 1, NoAutoStart: true}) + + f.Network.IAS.UseRegistry = true + + return f, nil +} + +func (sc *kmUpgradeImpl) Clone() scenario.Scenario { + return &kmUpgradeImpl{ + runtimeImpl: *sc.runtimeImpl.Clone().(*runtimeImpl), + } +} + +func (sc *kmUpgradeImpl) applyUpgradePolicy(childEnv *env.Env) error { + cli := cli.New(childEnv, sc.net, sc.logger) + + kmPolicyPath := filepath.Join(childEnv.Dir(), "km_policy.cbor") + kmPolicySig1Path := filepath.Join(childEnv.Dir(), "km_policy_sig1.pem") + kmPolicySig2Path := filepath.Join(childEnv.Dir(), "km_policy_sig2.pem") + kmPolicySig3Path := filepath.Join(childEnv.Dir(), "km_policy_sig3.pem") + kmUpdateTxPath := filepath.Join(childEnv.Dir(), "km_gen_update.json") + + oldKMRuntime := sc.net.Runtimes()[0] + newKMRuntime := sc.net.Runtimes()[2] + // Sanity check fixture. + if err := func() error { + if oldKMRuntime.Kind() != registry.KindKeyManager { + return fmt.Errorf("old keymanager runtime not of kind KindKeyManager") + } + if newKMRuntime.Kind() != registry.KindKeyManager { + return fmt.Errorf("new keymanager runtime not of kind KindKeyManager") + } + if oldKMRuntime.ID() != newKMRuntime.ID() { + return fmt.Errorf("keymanager runtimes ID mismatch") + } + return nil + }(); err != nil { + return fmt.Errorf("keymanager runtimes fixture sanity check: %w", err) + } + + oldKMEncID := oldKMRuntime.GetEnclaveIdentity() + newKMEncID := newKMRuntime.GetEnclaveIdentity() + + if oldKMEncID == nil && newKMEncID == nil { + sc.logger.Info("No SGX runtimes, skipping policy update") + return nil + } + + // Ensure enclave IDs differ between the old and new runtimes. + oldEncID, _ := oldKMEncID.MarshalText() + newEncID, _ := newKMEncID.MarshalText() + if bytes.Equal(oldEncID, newEncID) { + return fmt.Errorf("expected different enclave identities, got: %s", newEncID) + } + + // Build updated SGX policies. + sc.logger.Info("building new KM SGX policy enclave policies map") + enclavePolicies := make(map[sgx.EnclaveIdentity]*keymanager.EnclavePolicySGX) + + enclavePolicies[*newKMEncID] = &keymanager.EnclavePolicySGX{} + enclavePolicies[*newKMEncID].MayQuery = make(map[common.Namespace][]sgx.EnclaveIdentity) + enclavePolicies[*oldKMEncID] = &keymanager.EnclavePolicySGX{} + enclavePolicies[*oldKMEncID].MayQuery = make(map[common.Namespace][]sgx.EnclaveIdentity) + + // Allow new runtime enclave to replicate from the old runtime enclave. + enclavePolicies[*oldKMEncID].MayReplicate = []sgx.EnclaveIdentity{*newKMEncID} + + // Allow compute runtime to query new runtime. + for _, rt := range sc.net.Runtimes() { + if rt.Kind() != registry.KindCompute { + continue + } + if eid := rt.GetEnclaveIdentity(); eid != nil { + enclavePolicies[*newKMEncID].MayQuery[rt.ID()] = []sgx.EnclaveIdentity{*eid} + } + } + + sc.logger.Info("initing updated KM policy") + if err := cli.Keymanager.InitPolicy(oldKMRuntime.ID(), 2, enclavePolicies, kmPolicyPath); err != nil { + return err + } + sc.logger.Info("signing updated KM policy") + if err := cli.Keymanager.SignPolicy("1", kmPolicyPath, kmPolicySig1Path); err != nil { + return err + } + if err := cli.Keymanager.SignPolicy("2", kmPolicyPath, kmPolicySig2Path); err != nil { + return err + } + if err := cli.Keymanager.SignPolicy("3", kmPolicyPath, kmPolicySig3Path); err != nil { + return err + } + + sc.logger.Info("updating KM policy") + if err := cli.Keymanager.GenUpdate(sc.nonce, kmPolicyPath, []string{kmPolicySig1Path, kmPolicySig2Path, kmPolicySig3Path}, kmUpdateTxPath); err != nil { + return err + } + if err := cli.Consensus.SubmitTx(kmUpdateTxPath); err != nil { + return fmt.Errorf("failed to update KM policy: %w", err) + } + sc.nonce++ + + return nil +} + +func (sc *kmUpgradeImpl) ensureReplicationWorked(ctx context.Context, km *oasis.Keymanager, rt *oasis.Runtime) error { + ctrl, err := oasis.NewController(km.SocketPath()) + if err != nil { + return err + } + node, err := ctrl.Registry.GetNode( + ctx, + ®istry.IDQuery{ + ID: km.NodeID, + }, + ) + if err != nil { + return err + } + nodeRt := node.GetRuntime(rt.ID()) + if nodeRt == nil { + return fmt.Errorf("node is missing keymanager runtime from descriptor") + } + var signedInitResponse keymanager.SignedInitResponse + if err = cbor.Unmarshal(nodeRt.ExtraInfo, &signedInitResponse); err != nil { + return fmt.Errorf("failed to unmarshal replica extrainfo") + } + + // Grab a state dump and ensure all keymanager nodes have a matching + // checksum. + doc, err := ctrl.Consensus.StateToGenesis(context.Background(), 0) + if err != nil { + return fmt.Errorf("failed to obtain consensus state: %w", err) + } + if err = func() error { + for _, status := range doc.KeyManager.Statuses { + if !status.ID.Equal(&nodeRt.ID) { + continue + } + if !status.IsInitialized { + return fmt.Errorf("key manager failed to initialize") + } + if !bytes.Equal(status.Checksum, signedInitResponse.InitResponse.Checksum) { + return fmt.Errorf("key manager failed to replicate, checksum mismatch") + } + return nil + } + return fmt.Errorf("consensus state missing km status") + }(); err != nil { + return err + } + + return nil +} + +func (sc *kmUpgradeImpl) Run(childEnv *env.Env) error { + ctx := context.Background() + cli := cli.New(childEnv, sc.net, sc.logger) + + clientErrCh, cmd, err := sc.runtimeImpl.start(childEnv) + if err != nil { + return err + } + sc.logger.Info("waiting for client to exit") + // Wait for the client to exit. + select { + case err = <-sc.runtimeImpl.net.Errors(): + _ = cmd.Process.Kill() + case err = <-clientErrCh: + } + if err != nil { + return err + } + + // Generate and update a policy that will allow replication for the new + // keymanager. + if err = sc.applyUpgradePolicy(childEnv); err != nil { + return fmt.Errorf("updating policies: %w", err) + } + + // Start the new keymanager. + sc.logger.Info("starting new keymanager") + newKm := sc.net.Keymanagers()[1] + if err = newKm.Start(); err != nil { + return fmt.Errorf("starting new key-manager: %w", err) + } + + // Update runtime to include the new enclave identity. + sc.logger.Info("updating keymanager runtime descriptor") + newRt := sc.net.Runtimes()[2] + kmRtDesc := newRt.ToRuntimeDescriptor() + kmTxPath := filepath.Join(childEnv.Dir(), "register_update_km_runtime.json") + if err = cli.Registry.GenerateRegisterRuntimeTx(sc.nonce, kmRtDesc, kmTxPath, ""); err != nil { + return fmt.Errorf("failed to generate register KM runtime tx: %w", err) + } + sc.nonce++ + if err = cli.Consensus.SubmitTx(kmTxPath); err != nil { + return fmt.Errorf("failed to update KM runtime: %w", err) + } + + // Wait for the new node to register. + sc.logger.Info("waiting for new keymanager node to register", + "num_nodes", sc.net.NumRegisterNodes(), + ) + + if err = sc.net.Controller().WaitNodesRegistered(ctx, sc.net.NumRegisterNodes()); err != nil { + return fmt.Errorf("failed to wait for nodes: %w", err) + } + + sc.logger.Info("wait for few epochs to ensure replication finishes") + var waitEpoch epochtime.EpochTime + waitEpoch, err = sc.net.Controller().Consensus.GetEpoch(ctx, 0) + if err != nil { + return err + } + waitEpoch += 3 + sc.logger.Info("waiting for epoch", + "wait_epoch", waitEpoch, + ) + err = sc.net.Controller().Consensus.WaitEpoch(ctx, waitEpoch) + if err != nil { + return err + } + + // Ensure replication succeeded. + if err = sc.ensureReplicationWorked(ctx, newKm, newRt); err != nil { + return err + } + + // Shutdown old km. + sc.logger.Info("shutting down old keymanager") + oldKm := sc.net.Keymanagers()[0] + if err = oldKm.Stop(); err != nil { + return fmt.Errorf("old keymanager node shutdown: %w", err) + } + + // The last part of the test won't work until: + // https://github.com/oasislabs/oasis-core/issues/2919 + /* + // Run test again. + sc.logger.Info("starting a second client to check if key manager works") + sc.runtimeImpl.clientArgs = []string{"--key", "key2"} + cmd, err = sc.startClient(childEnv) + if err != nil { + return err + } + client2ErrCh := make(chan error) + go func() { + client2ErrCh <- cmd.Wait() + }() + return sc.wait(childEnv, cmd, client2ErrCh) + */ + return nil +} diff --git a/go/oasis-test-runner/scenario/e2e/multiple_runtimes.go b/go/oasis-test-runner/scenario/e2e/multiple_runtimes.go index 30ef2ece3c5..33355b556a7 100644 --- a/go/oasis-test-runner/scenario/e2e/multiple_runtimes.go +++ b/go/oasis-test-runner/scenario/e2e/multiple_runtimes.go @@ -84,7 +84,7 @@ func (mr *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { if rt.Kind == registry.KindCompute { if runtimeBinary == "" { copy(id[:], rt.ID[:]) - runtimeBinary = rt.Binary + runtimeBinary = rt.Binaries[0] } } else { rts = append(rts, rt) @@ -105,7 +105,7 @@ func (mr *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { Kind: registry.KindCompute, Entity: 0, Keymanager: 0, - Binary: runtimeBinary, + Binaries: []string{runtimeBinary}, Executor: registry.ExecutorParameters{ GroupSize: uint64(mr.executorGroupSize), GroupBackupSize: 0, diff --git a/go/oasis-test-runner/scenario/e2e/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime.go index 8de999a3fe7..8f4094e2591 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime.go @@ -164,7 +164,7 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, }, - Binary: keyManagerBinary, + Binaries: []string{keyManagerBinary}, }, // Compute runtime. oasis.RuntimeFixture{ @@ -172,7 +172,7 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { Kind: registry.KindCompute, Entity: 0, Keymanager: 0, - Binary: runtimeBinary, + Binaries: []string{runtimeBinary}, Executor: registry.ExecutorParameters{ GroupSize: 2, GroupBackupSize: 1, @@ -553,6 +553,11 @@ func (sc *runtimeImpl) initialEpochTransitions() error { "num_nodes", numNodes, ) + // TODO: once #2130 is done, aditionally wait for nodes to be Ready here. + // As generally, for example a keymanager can register (with missing + // extra info) before actually being initialized and triggering an + // epoch transition at that point would be too soon. + if err := sc.net.Controller().WaitNodesRegistered(ctx, numNodes); err != nil { return fmt.Errorf("failed to wait for nodes: %w", err) } diff --git a/go/worker/keymanager/init.go b/go/worker/keymanager/init.go index 7291a471579..6cbc963c2e6 100644 --- a/go/worker/keymanager/init.go +++ b/go/worker/keymanager/init.go @@ -56,6 +56,7 @@ func New( stopCh: make(chan struct{}), quitCh: make(chan struct{}), initCh: make(chan struct{}), + initTickerCh: nil, commonWorker: commonWorker, backend: backend, grpcPolicy: policy.NewDynamicRuntimePolicyChecker(enclaverpc.ServiceName, commonWorker.GrpcPolicyWatcher), diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 275e238ef29..1ee358ccb1b 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -7,6 +7,8 @@ import ( "sync" "time" + "github.com/cenkalti/backoff/v4" + "github.com/oasislabs/oasis-core/go/common" "github.com/oasislabs/oasis-core/go/common/accessctl" "github.com/oasislabs/oasis-core/go/common/cbor" @@ -60,6 +62,9 @@ type Worker struct { // nolint: maligned quitCh chan struct{} initCh chan struct{} + initTicker *backoff.Ticker + initTickerCh <-chan time.Time + initialSyncDone bool runtime runtimeRegistry.Runtime @@ -178,6 +183,10 @@ func (w *Worker) updateStatus(status *api.Status, startedEvent *host.StartedEven var initOk bool defer func() { if !initOk { + // TODO: once #2130 is done and keymanager reports as Ready only + // after initialization succeeds, change this to always pre-register + // on the initial updateStatus call (and not only after the first + // initialization fails as it is done currently). // This is likely a new key manager that needs to replicate. // Send a node registration anyway, so that other nodes know // to update their access control. @@ -188,6 +197,12 @@ func (w *Worker) updateStatus(status *api.Status, startedEvent *host.StartedEven rt.Capabilities.TEE = startedEvent.CapabilityTEE return nil }) + + // If initialization failed setup a retry ticker. + if w.initTicker == nil { + w.initTicker = backoff.NewTicker(backoff.NewExponentialBackOff()) + w.initTickerCh = w.initTicker.C + } } }() @@ -287,6 +302,11 @@ func (w *Worker) updateStatus(status *api.Status, startedEvent *host.StartedEven w.logger.Info("Key manager initialized", "checksum", hex.EncodeToString(signedInitResp.InitResponse.Checksum), ) + if w.initTicker != nil { + w.initTickerCh = nil + w.initTicker.Stop() + w.initTicker = nil + } // Register as we are now ready to handle requests. initOk = true @@ -459,12 +479,19 @@ func (w *Worker) worker() { // nolint: gocyclo } // Forward status update to key manager runtime. - if err := w.updateStatus(currentStatus, currentStartedEvent); err != nil { + if err = w.updateStatus(currentStatus, currentStartedEvent); err != nil { w.logger.Error("failed to handle status update", "err", err, ) continue } + case <-w.initTickerCh: + if currentStatus == nil || currentStartedEvent == nil { + continue + } + if err = w.updateStatus(currentStatus, currentStartedEvent); err != nil { + w.logger.Error("failed to handle status update", "err", err) + } case rt := <-rtCh: if rt.Kind != registry.KindCompute || rt.KeyManager == nil || !rt.KeyManager.Equal(&runtimeID) { continue diff --git a/tests/fixture-data/net-runner/default.json b/tests/fixture-data/net-runner/default.json index b0a5ce15126..5c36ee463fc 100644 --- a/tests/fixture-data/net-runner/default.json +++ b/tests/fixture-data/net-runner/default.json @@ -34,7 +34,9 @@ "kind": 2, "entity": 0, "keymanager": -1, - "binary": "simple-keymanager", + "binaries": [ + "simple-keymanager" + ], "genesis_state": "", "genesis_round": 0, "executor": { @@ -80,7 +82,9 @@ "kind": 1, "entity": 0, "keymanager": 0, - "binary": "simple-keyvalue", + "binaries": [ + "simple-keyvalue" + ], "genesis_state": "", "genesis_round": 0, "executor": { diff --git a/tests/runtimes/simple-keymanager-upgrade/Cargo.toml b/tests/runtimes/simple-keymanager-upgrade/Cargo.toml new file mode 100644 index 00000000000..a1da7edb16c --- /dev/null +++ b/tests/runtimes/simple-keymanager-upgrade/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "simple-keymanager-upgrade" +version = "0.3.0-alpha" +authors = ["Oasis Labs Inc. "] +edition = "2018" + +[package.metadata.fortanix-sgx] +heap-size = 134217728 +stack-size = 2097152 +threads = 2 + +[dependencies] +oasis-core-runtime = { path = "../../../runtime" } +oasis-core-keymanager-lib = { path = "../../../keymanager-lib" } +oasis-core-keymanager-api-common = { path = "../../../keymanager-api-common" } + +[build-dependencies] +oasis-core-tools = { path = "../../../tools" } diff --git a/tests/runtimes/simple-keymanager-upgrade/src/api.rs b/tests/runtimes/simple-keymanager-upgrade/src/api.rs new file mode 100644 index 00000000000..46b81889514 --- /dev/null +++ b/tests/runtimes/simple-keymanager-upgrade/src/api.rs @@ -0,0 +1,28 @@ +use oasis_core_keymanager_api_common::*; +use oasis_core_runtime::common::crypto::signature::PrivateKey as OasisPrivateKey; +use std::collections::HashSet; + +pub fn trusted_policy_signers() -> TrustedPolicySigners { + TrustedPolicySigners { + signers: { + let mut set = HashSet::new(); + if option_env!("OASIS_UNSAFE_KM_POLICY_KEYS").is_some() { + for seed in [ + "ekiden key manager test multisig key 0", + "ekiden key manager test multisig key 1", + "ekiden key manager test multisig key 2", + ] + .iter() + { + let private_key = OasisPrivateKey::from_test_seed(seed.to_string()); + set.insert(private_key.public_key()); + } + } + + set + }, + // Maintain compatible simple-keymanager, but ensure a different + // MRENCLAVE for the keymanager-upgrade test. + threshold: 9002, + } +} diff --git a/tests/runtimes/simple-keymanager-upgrade/src/lib.rs b/tests/runtimes/simple-keymanager-upgrade/src/lib.rs new file mode 100644 index 00000000000..ed220f3c5c4 --- /dev/null +++ b/tests/runtimes/simple-keymanager-upgrade/src/lib.rs @@ -0,0 +1,4 @@ +pub mod api; + +// Re-exports. +pub use api::*; diff --git a/tests/runtimes/simple-keymanager-upgrade/src/main.rs b/tests/runtimes/simple-keymanager-upgrade/src/main.rs new file mode 100644 index 00000000000..1c8b6ff6240 --- /dev/null +++ b/tests/runtimes/simple-keymanager-upgrade/src/main.rs @@ -0,0 +1,9 @@ +use oasis_core_keymanager_lib::keymanager::*; +use oasis_core_runtime::{common::version::Version, version_from_cargo}; + +mod api; + +fn main() { + let init = new_keymanager(api::trusted_policy_signers()); + oasis_core_runtime::start_runtime(init, version_from_cargo!()); +}