From 3acbdbd21d1717e865bcaaa7438a9338626c1619 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Mon, 3 Jun 2019 08:32:26 +0000 Subject: [PATCH] go/keymanager: Add support for multiple nodes [skip ci] --- .buildkite/scripts/common_e2e.sh | 6 + Cargo.lock | 1 + go/ekiden/cmd/node/node.go | 45 ++-- go/ekiden/node_test.go | 1 + go/keymanager/api/api.go | 104 ++++++++- go/keymanager/client/client.go | 209 ++++++++++++----- go/keymanager/init.go | 59 +++++ go/keymanager/memory/memory.go | 239 +++++++++++++++++++ go/keymanager/tendermint/tendermint.go | 163 +++++++++++++ go/tendermint/apps/keymanager/api.go | 35 +++ go/tendermint/apps/keymanager/keymanager.go | 242 ++++++++++++++++++++ go/tendermint/apps/keymanager/state.go | 95 ++++++++ go/worker/keymanager/keymanager.go | 56 +++-- keymanager-runtime/Cargo.toml | 2 + keymanager-runtime/api/src/api.rs | 15 +- keymanager-runtime/src/kdf.rs | 153 ++++++++++--- keymanager-runtime/src/main.rs | 52 ++++- runtime/src/dispatcher.rs | 4 +- runtime/src/rpc/context.rs | 11 +- 19 files changed, 1344 insertions(+), 148 deletions(-) create mode 100644 go/keymanager/init.go create mode 100644 go/keymanager/memory/memory.go create mode 100644 go/keymanager/tendermint/tendermint.go create mode 100644 go/tendermint/apps/keymanager/api.go create mode 100644 go/tendermint/apps/keymanager/keymanager.go create mode 100644 go/tendermint/apps/keymanager/state.go diff --git a/.buildkite/scripts/common_e2e.sh b/.buildkite/scripts/common_e2e.sh index 0f5c2ddc6c9..91a2058c27e 100644 --- a/.buildkite/scripts/common_e2e.sh +++ b/.buildkite/scripts/common_e2e.sh @@ -158,6 +158,7 @@ run_backend_tendermint_committee() { --registry.backend tendermint \ --roothash.backend tendermint \ --roothash.tendermint.index_blocks \ + --keymanager.backend tendermint \ --genesis.file ${genesis_file} \ --tendermint.core.listen_address tcp://0.0.0.0:${tm_port} \ --tendermint.consensus.timeout_commit 250ms \ @@ -239,6 +240,7 @@ run_compute_node() { --scheduler.backend trivial \ --registry.backend tendermint \ --roothash.backend tendermint \ + --keymanager.backend tendermint \ --genesis.file ${EKIDEN_GENESIS_FILE} \ --tendermint.core.listen_address tcp://0.0.0.0:${tm_port} \ --tendermint.consensus.timeout_commit 250ms \ @@ -312,6 +314,7 @@ run_storage_node() { --scheduler.backend trivial \ --registry.backend tendermint \ --roothash.backend tendermint \ + --keymanager.backend tendermint \ --genesis.file ${EKIDEN_GENESIS_FILE} \ --tendermint.core.listen_address tcp://0.0.0.0:${tm_port} \ --tendermint.consensus.timeout_commit 250ms \ @@ -394,6 +397,7 @@ run_keymanager_node() { --scheduler.backend trivial \ --registry.backend tendermint \ --roothash.backend tendermint \ + --keymanager.backend tendermint \ --genesis.file ${EKIDEN_GENESIS_FILE} \ --tendermint.core.listen_address tcp://0.0.0.0:${tm_port} \ --tendermint.consensus.timeout_commit 250ms \ @@ -406,6 +410,7 @@ run_keymanager_node() { --worker.keymanager.runtime.loader ${EKIDEN_RUNTIME_LOADER} \ --worker.keymanager.runtime.binary ${EKIDEN_ROOT_PATH}/target/${runtime_target}/debug/ekiden-keymanager-runtime${runtime_ext} \ --worker.keymanager.runtime.id ${EKIDEN_KM_RUNTIME_ID} \ + --worker.keymanager.may_generate \ --tendermint.seeds "${EKIDEN_SEED_NODE_ID}@127.0.0.1:${EKIDEN_SEED_NODE_PORT}" \ --datadir ${data_dir} \ --debug.allow_test_keys \ @@ -449,6 +454,7 @@ run_seed_node() { --scheduler.backend trivial \ --registry.backend tendermint \ --roothash.backend tendermint \ + --keymanager.backend tendermint \ --tendermint.core.listen_address tcp://0.0.0.0:${EKIDEN_SEED_NODE_PORT} \ --tendermint.seed_mode \ --tendermint.debug.addr_book_lenient \ diff --git a/Cargo.lock b/Cargo.lock index d06e60145b3..cb73618dc44 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,6 +395,7 @@ dependencies = [ "serde_cbor 0.9.0 (git+https://github.com/pyfisch/cbor?rev=114ecaeac53799d0bf81ca8d1b980c7c419d76fe)", "sgx-isa 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "sp800-185 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "tiny-keccak 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "x25519-dalek 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "zeroize 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/go/ekiden/cmd/node/node.go b/go/ekiden/cmd/node/node.go index a15305193ee..76640f26236 100644 --- a/go/ekiden/cmd/node/node.go +++ b/go/ekiden/cmd/node/node.go @@ -29,6 +29,8 @@ import ( epochtimeAPI "github.com/oasislabs/ekiden/go/epochtime/api" "github.com/oasislabs/ekiden/go/genesis" "github.com/oasislabs/ekiden/go/ias" + "github.com/oasislabs/ekiden/go/keymanager" + keymanagerAPI "github.com/oasislabs/ekiden/go/keymanager/api" keymanagerClient "github.com/oasislabs/ekiden/go/keymanager/client" "github.com/oasislabs/ekiden/go/registry" registryAPI "github.com/oasislabs/ekiden/go/registry/api" @@ -45,7 +47,7 @@ import ( workerCommon "github.com/oasislabs/ekiden/go/worker/common" "github.com/oasislabs/ekiden/go/worker/common/p2p" "github.com/oasislabs/ekiden/go/worker/compute" - "github.com/oasislabs/ekiden/go/worker/keymanager" + keymanagerWorker "github.com/oasislabs/ekiden/go/worker/keymanager" "github.com/oasislabs/ekiden/go/worker/merge" "github.com/oasislabs/ekiden/go/worker/registration" workerStorage "github.com/oasislabs/ekiden/go/worker/storage" @@ -75,18 +77,20 @@ type Node struct { grpcInternal *grpc.Server svcTmnt tmService.TendermintService - Genesis genesis.Provider - Identity *identity.Identity - Beacon beaconAPI.Backend - Epochtime epochtimeAPI.Backend - Registry registryAPI.Backend - RootHash roothashAPI.Backend - Scheduler schedulerAPI.Backend - Staking stakingAPI.Backend - Storage storageAPI.Backend - IAS *ias.IAS - Client *client.Client - KeyManager *keymanagerClient.Client + Genesis genesis.Provider + Identity *identity.Identity + Beacon beaconAPI.Backend + Epochtime epochtimeAPI.Backend + Registry registryAPI.Backend + RootHash roothashAPI.Backend + Scheduler schedulerAPI.Backend + Staking stakingAPI.Backend + Storage storageAPI.Backend + IAS *ias.IAS + Client *client.Client + + KeyManager keymanagerAPI.Backend + KeyManagerClient *keymanagerClient.Client CommonWorker *workerCommon.Worker ComputeWorker *compute.Worker @@ -132,6 +136,9 @@ func (n *Node) initBackends() error { if n.Staking, err = staking.New(n.svcMgr.Ctx, n.svcTmnt); err != nil { return err } + if n.KeyManager, err = keymanager.New(n.svcMgr.Ctx, n.Epochtime, n.Registry, n.svcTmnt); err != nil { + return err + } n.svcMgr.RegisterCleanupOnly(n.Staking, "staking backend") if n.Scheduler, err = scheduler.New(n.svcMgr.Ctx, n.Epochtime, n.Registry, n.Beacon, n.svcTmnt); err != nil { return err @@ -204,13 +211,14 @@ func (n *Node) initAndStartWorkers(logger *logging.Logger) error { } n.svcMgr.Register(n.WorkerRegistration) - // Initialize the key manager service. - kmSvc, kmEnabled, err := keymanager.New( + // Initialize the key manager worker service. + kmSvc, kmEnabled, err := keymanagerWorker.New( dataDir, n.IAS, n.CommonWorker.Grpc, n.WorkerRegistration, &workerCommonCfg, + n.KeyManager, ) if err != nil { return err @@ -247,7 +255,7 @@ func (n *Node) initAndStartWorkers(logger *logging.Logger) error { n.CommonWorker, n.MergeWorker, n.IAS, - n.KeyManager, + n.KeyManagerClient, n.WorkerRegistration, ) if err != nil { @@ -474,7 +482,7 @@ func NewNode() (*Node, error) { logger.Info("starting ekiden node") // Initialize the key manager client service. - node.KeyManager, err = keymanagerClient.New(node.Registry) + node.KeyManagerClient, err = keymanagerClient.New(node.KeyManager, node.Registry) if err != nil { logger.Error("failed to initialize key manager client", "err", err, @@ -491,7 +499,7 @@ func NewNode() (*Node, error) { node.Scheduler, node.Registry, node.svcTmnt, - node.KeyManager, + node.KeyManagerClient, ) if err != nil { return nil, err @@ -560,6 +568,7 @@ func RegisterFlags(cmd *cobra.Command) { ias.RegisterFlags, keymanager.RegisterFlags, keymanagerClient.RegisterFlags, + keymanagerWorker.RegisterFlags, client.RegisterFlags, compute.RegisterFlags, p2p.RegisterFlags, diff --git a/go/ekiden/node_test.go b/go/ekiden/node_test.go index b8405e9c067..5d10a6a5024 100644 --- a/go/ekiden/node_test.go +++ b/go/ekiden/node_test.go @@ -47,6 +47,7 @@ var ( {"log.level.default", "DEBUG"}, {"epochtime.backend", "tendermint_mock"}, {"beacon.backend", "tendermint"}, + {"keymanager.backend", "tendermint"}, {"registry.backend", "tendermint"}, {"roothash.backend", "tendermint"}, {"roothash.tendermint.index_blocks", true}, diff --git a/go/keymanager/api/api.go b/go/keymanager/api/api.go index fcd316744c1..7c45498a48f 100644 --- a/go/keymanager/api/api.go +++ b/go/keymanager/api/api.go @@ -1,7 +1,65 @@ -// Package api implementes the key manager API and common data types. +// Package api implementes the key manager management API and common data types. package api -// InitResponse is the initialzation RPC response, returned as part of a +import ( + "context" + "errors" + + "github.com/oasislabs/ekiden/go/common/cbor" + "github.com/oasislabs/ekiden/go/common/crypto/signature" + "github.com/oasislabs/ekiden/go/common/node" + "github.com/oasislabs/ekiden/go/common/pubsub" + registry "github.com/oasislabs/ekiden/go/registry/api" +) + +var ( + // ErrNoSuchKeyManager is the error returned when a key manager does not + // exist. + ErrNoSuchKeyManager = errors.New("keymanager: no such key manager") + + // TestPublicKey is the insecure hardcoded key manager public key, used + // in insecure builds when a RAK is unavailable. + TestPublicKey signature.PublicKey + + initResponseContext = []byte("EkKmIniR") +) + +// Status is the current key manager status. +type Status struct { + // ID is the runtime ID of the key manager. + ID signature.PublicKey `codec:"id"` + + // IsInitialized is true iff the key manager is done initializing. + IsInitialized bool `codec:"is_initialized"` + + // IsSecure is true iff the key manger is secure. + IsSecure bool `codec:"is_secure"` + + // Checksum is the key manager master secret verification checksum. + Checksum []byte `codec:"checksum"` + + // Nodes is the list of currently active key manager node IDs. + Nodes []signature.PublicKey `codec:"nodes"` + + // TODO: Policy +} + +// Backend is a key manager management implementation. +type Backend interface { + // GetStatus returns a key manager status by key manager ID. + GetStatus(context.Context, signature.PublicKey) (*Status, error) + + // GetStatuses returns all currently tracked key manager statuses. + GetStatuses(context.Context) ([]*Status, error) + + // WatchStatuses returns a channel that produces a stream of messages + // containing the key manager statuses as it changes over time. + // + // Upon subscription the current status is sent immediately. + WatchStatuses() (<-chan *Status, *pubsub.Subscription) +} + +// InitResponse is the initialization RPC response, returned as part of a // SignedInitResponse from the key manager enclave. type InitResponse struct { IsSecure bool `codec:"is_secure"` @@ -14,3 +72,45 @@ type SignedInitResponse struct { InitResponse InitResponse `codec:"init_response"` Signature []byte `codec:"signature"` } + +func (r *SignedInitResponse) Verify(pk signature.PublicKey) error { + raw := cbor.Marshal(r.InitResponse) + if !pk.Verify(initResponseContext, raw, r.Signature) { + return errors.New("keymanager: invalid initialization response signature") + } + return nil +} + +// VerifyExtraInfo verifies and parses the per-node + per-runtime ExtraInfo +// blob for a key manager. +func VerifyExtraInfo(rt *registry.Runtime, nodeRt *node.Runtime) (*InitResponse, error) { + var ( + hw node.TEEHardware + rak signature.PublicKey + ) + if nodeRt.Capabilities.TEE == nil || nodeRt.Capabilities.TEE.Hardware == node.TEEHardwareInvalid { + hw = node.TEEHardwareInvalid + rak = TestPublicKey + } else { + // TODO: MRENCLAVE/MRSIGNER. + hw = nodeRt.Capabilities.TEE.Hardware + rak = nodeRt.Capabilities.TEE.RAK + } + if hw != rt.TEEHardware { + return nil, errors.New("keymanger: TEEHardware mismatch") + } + + var untrustedSignedInitResponse SignedInitResponse + if err := cbor.Unmarshal(nodeRt.ExtraInfo, &untrustedSignedInitResponse); err != nil { + return nil, err + } + if err := untrustedSignedInitResponse.Verify(rak); err != nil { + return nil, err + } + return &untrustedSignedInitResponse.InitResponse, nil +} + +func init() { + _ = TestPublicKey.UnmarshalHex("9d41a874b80e39a40c9644e964f0e4f967100c91654bfd7666435fe906af060f") + signature.RegisterTestPublicKey(TestPublicKey) +} diff --git a/go/keymanager/client/client.go b/go/keymanager/client/client.go index d060b4d83fa..6d2e11cafe0 100644 --- a/go/keymanager/client/client.go +++ b/go/keymanager/client/client.go @@ -18,6 +18,7 @@ import ( "github.com/oasislabs/ekiden/go/common/grpc/resolver/manual" "github.com/oasislabs/ekiden/go/common/logging" "github.com/oasislabs/ekiden/go/common/node" + "github.com/oasislabs/ekiden/go/keymanager/api" registry "github.com/oasislabs/ekiden/go/registry/api" "github.com/oasislabs/ekiden/go/worker/common/enclaverpc" ) @@ -39,90 +40,161 @@ type Client struct { logger *logging.Logger - registry registry.Backend + backend api.Backend + registry registry.Backend + + state map[signature.MapKey]*clientState + kmMap map[signature.MapKey]signature.PublicKey + + debugClient *enclaverpc.Client +} + +type clientState struct { + status *api.Status conn *grpc.ClientConn client *enclaverpc.Client resolverCleanupFn func() } +func (st *clientState) kill() { + if st.resolverCleanupFn != nil { + st.resolverCleanupFn() + st.resolverCleanupFn = nil + } + if st.conn != nil { + st.conn.Close() + st.conn = nil + } +} + // CallRemote calls a runtime-specific key manager via remote EnclaveRPC. func (c *Client) CallRemote(ctx context.Context, runtimeID signature.PublicKey, data []byte) ([]byte, error) { + if c.debugClient != nil { + return c.debugClient.CallEnclave(ctx, data) + } + c.RLock() defer c.RUnlock() - if c.client == nil { - return nil, ErrKeyManagerNotAvailable + + id := runtimeID.ToMapKey() + kmID := c.kmMap[id] + if kmID == nil { + if c.state[id] == nil { + return nil, ErrKeyManagerNotAvailable + } + + // The target query is for a keymanager runtime ID, probably + // replication. + kmID = runtimeID } - // TODO: The runtimeID is currently entirely ignored. `data` also contains - // a runtimeID for the purpose of separating keys. + st := c.state[kmID.ToMapKey()] + if st == nil || st.client == nil { + return nil, ErrKeyManagerNotAvailable + } - return c.client.CallEnclave(ctx, data) + return st.client.CallEnclave(ctx, data) } func (c *Client) worker() { - // TODO: The "correct" way to implement this is to schedule the key manager, - // but for now just work under the assumption that this is running on staging - // and or prod, and there is only one KM node registered at once, that all - // the runtimes will use. - - ch, sub := c.registry.WatchNodeList() - defer sub.Close() - - findFirstKMNode := func(l []*node.Node) *node.Node { - for _, n := range l { - if n.HasRoles(node.RoleKeyManager) { - return n + stCh, stSub := c.backend.WatchStatuses() + defer stSub.Close() + + rtCh, rtSub := c.registry.WatchRuntimes() + defer rtSub.Close() + + nlCh, nlSub := c.registry.WatchNodeList() + defer nlSub.Close() + + for { + select { + case st := <-stCh: + nl, err := c.registry.GetNodes(context.TODO()) + if err != nil { + c.logger.Error("failed to poll node list", + "err", err, + ) } + c.updateState(st, nl) + case rt := <-rtCh: + c.updateRuntime(rt) + case nl := <-nlCh: + c.updateNodes(nl.Nodes) } - return nil } +} - for nl := range ch { - c.logger.Debug("updating node list", - "epoch", nl.Epoch, - ) +func (c *Client) updateRuntime(rt *registry.Runtime) { + c.Lock() + defer c.Unlock() - c.updateConnection(findFirstKMNode(nl.Nodes)) + switch rt.Kind { + case registry.KindCompute: + c.kmMap[rt.ID.ToMapKey()] = rt.KeyManager + default: + delete(c.kmMap, rt.ID.ToMapKey()) } } -func (c *Client) updateConnection(n *node.Node) { - if n == nil { - c.logger.Error("failed to update connection, no key manager nodes found") - return +func (c *Client) updateState(status *api.Status, nodeList []*node.Node) { + nodeMap := make(map[signature.MapKey]*node.Node) + for _, n := range nodeList { + nodeMap[n.ID.ToMapKey()] = n } - if n.Certificate == nil { - // TODO: The registry should reject such registrations, so this should never happen. - c.logger.Error("key manager node registered without certificate, refusing to communicate", - "node_id", n.ID, - ) - return - } - - // TODO: Only update the connection if the key or address changed. c.Lock() defer c.Unlock() - cert, err := n.Certificate.Parse() - if err != nil { - c.logger.Error("failed to parse key manager certificate", - "err", err, - ) + idKey := status.ID.ToMapKey() + st := c.state[idKey] + + // It's not possible to service requests for this key manager. + if !status.IsInitialized || len(status.Nodes) == 0 { + // Kill the conn and return. + if st != nil { + st.kill() + c.state[idKey] = nil + } + return } + + // Build the new state. certPool := x509.NewCertPool() - certPool.AddCert(cert) + var addresses []resolver.Address + for _, v := range status.Nodes { + n := nodeMap[v.ToMapKey()] + if n == nil { + c.logger.Warn("key manager node missing descriptor", + "id", n.ID, + ) + continue + } + + cert, err := n.Certificate.Parse() + if err != nil { + c.logger.Error("failed to parse key manager certificate", + "id", n.ID, + "err", err, + ) + continue + } + certPool.AddCert(cert) + + for _, addr := range n.Addresses { + addresses = append(addresses, resolver.Address{Addr: addr.String()}) + } + } + creds := credentials.NewClientTLSFromCert(certPool, "ekiden-node") opts := grpc.WithTransportCredentials(creds) - if c.resolverCleanupFn != nil { - c.resolverCleanupFn() - c.resolverCleanupFn = nil - } - if c.conn != nil { - c.conn.Close() - c.conn = nil + // TODO: This probably could skip updating the connection sometimes. + + // Kill the old state if it exists. + if st != nil { + st.kill() + c.state[idKey] = nil } // Note: While this may look screwed up, the resolver needs the client conn @@ -136,25 +208,40 @@ func (c *Client) updateConnection(n *node.Node) { ) return } - var addresses []resolver.Address - for _, addr := range n.Addresses { - addresses = append(addresses, resolver.Address{Addr: addr.String()}) - } manualResolver.NewAddress(addresses) c.logger.Debug("updated connection", - "node", n, + "id", status.ID, ) - c.client = enclaverpc.NewFromConn(conn, kmEndpoint) - c.conn = conn - c.resolverCleanupFn = cleanupFn + c.state[idKey] = &clientState{ + status: status, + conn: conn, + client: enclaverpc.NewFromConn(conn, kmEndpoint), + resolverCleanupFn: cleanupFn, + } +} + +func (c *Client) updateNodes(nodeList []*node.Node) { + var statuses []*api.Status + + c.RLock() + for _, v := range c.state { + statuses = append(statuses, v.status) + } + c.RUnlock() + + for _, v := range statuses { + c.updateState(v, nodeList) + } } // New creates a new key manager client instance. -func New(registryBackend registry.Backend) (*Client, error) { +func New(backend api.Backend, registryBackend registry.Backend) (*Client, error) { c := &Client{ logger: logging.GetLogger("keymanager/client"), + state: make(map[signature.MapKey]*clientState), + kmMap: make(map[signature.MapKey]signature.PublicKey), } if debugAddress := viper.GetString(cfgDebugClientAddress); debugAddress != "" { @@ -165,13 +252,15 @@ func New(registryBackend registry.Backend) (*Client, error) { return nil, errors.Wrap(err, "keymanager/client: failed to create debug client") } - c.client = client + c.debugClient = client return c, nil } // Standard configuration watches the various backends. + c.backend = backend c.registry = registryBackend + go c.worker() return c, nil diff --git a/go/keymanager/init.go b/go/keymanager/init.go new file mode 100644 index 00000000000..ecc857baf79 --- /dev/null +++ b/go/keymanager/init.go @@ -0,0 +1,59 @@ +// Package keymanager implements the key manager backend. +package keymanager + +import ( + "context" + "fmt" + "strings" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + + epochtime "github.com/oasislabs/ekiden/go/epochtime/api" + "github.com/oasislabs/ekiden/go/keymanager/api" + "github.com/oasislabs/ekiden/go/keymanager/memory" + "github.com/oasislabs/ekiden/go/keymanager/tendermint" + registry "github.com/oasislabs/ekiden/go/registry/api" + "github.com/oasislabs/ekiden/go/tendermint/service" +) + +const cfgBackend = "keymanager.backend" + +// New constructs a new Backend based on the configuration flags. +func New( + ctx context.Context, + timeSource epochtime.Backend, + registry registry.Backend, + service service.TendermintService, +) (api.Backend, error) { + backend := viper.GetString(cfgBackend) + + var ( + impl api.Backend + err error + ) + + switch strings.ToLower(backend) { + case memory.BackendName: + impl, err = memory.New(ctx, registry) + case tendermint.BackendName: + impl, err = tendermint.New(ctx, timeSource, service) + default: + return nil, fmt.Errorf("keymanager: unsupported backend: '%v'", backend) + } + + return impl, err +} + +// RegisterFlags registers the configuration flags with the provided command. +func RegisterFlags(cmd *cobra.Command) { + if !cmd.Flags().Parsed() { + cmd.Flags().String(cfgBackend, memory.BackendName, "Key manager backend") + } + + for _, v := range []string{ + cfgBackend, + } { + _ = viper.BindPFlag(v, cmd.Flags().Lookup(v)) + } +} diff --git a/go/keymanager/memory/memory.go b/go/keymanager/memory/memory.go new file mode 100644 index 00000000000..0da3f224c73 --- /dev/null +++ b/go/keymanager/memory/memory.go @@ -0,0 +1,239 @@ +// Package memory provides the in-memory (centralized) key manager management +// implementation. +package memory + +import ( + "bytes" + "context" + "sync" + + "github.com/eapache/channels" + + "github.com/oasislabs/ekiden/go/common/cbor" + "github.com/oasislabs/ekiden/go/common/crypto/signature" + "github.com/oasislabs/ekiden/go/common/logging" + "github.com/oasislabs/ekiden/go/common/node" + "github.com/oasislabs/ekiden/go/common/pubsub" + "github.com/oasislabs/ekiden/go/keymanager/api" + registry "github.com/oasislabs/ekiden/go/registry/api" +) + +// BackendName is the name of the backend. +const BackendName = "memory" + +type runtimeState struct { + runtime *registry.Runtime + status *api.Status +} + +type memoryBackend struct { + sync.RWMutex + + logger *logging.Logger + registry registry.Backend + notifier *pubsub.Broker + + state map[signature.MapKey]*runtimeState + nodeList []*node.Node +} + +func (r *memoryBackend) GetStatus(ctx context.Context, id signature.PublicKey) (*api.Status, error) { + r.RLock() + defer r.RUnlock() + + state, ok := r.state[id.ToMapKey()] + if !ok { + return nil, api.ErrNoSuchKeyManager + } + + return state.status, nil +} + +func (r *memoryBackend) GetStatuses(ctx context.Context) ([]*api.Status, error) { + r.RLock() + defer r.RUnlock() + + var statuses []*api.Status + for _, v := range r.state { + statuses = append(statuses, v.status) + } + + return statuses, nil +} + +func (r *memoryBackend) WatchStatuses() (<-chan *api.Status, *pubsub.Subscription) { + sub := r.notifier.Subscribe() + ch := make(chan *api.Status) + sub.Unwrap(ch) + + return ch, sub +} + +func (r *memoryBackend) worker(ctx context.Context) { + rtCh, rtSub := r.registry.WatchRuntimes() + defer rtSub.Close() + + nlCh, nlSub := r.registry.WatchNodeList() + defer nlSub.Close() + + for { + select { + case <-ctx.Done(): + return + case rt := <-rtCh: + if rt.Kind == registry.KindKeyManager { + r.onRuntime(rt) + } + case nodeList := <-nlCh: + r.onNodeList(nodeList.Nodes) + } + } +} + +func (r *memoryBackend) onRuntime(rt *registry.Runtime) { + id := rt.ID.ToMapKey() + + r.Lock() + defer r.Unlock() + + st := r.state[id] + if st == nil { + // New key manager runtime (Uninitialized). + st = &runtimeState{ + runtime: rt, + status: &api.Status{ + ID: rt.ID, + }, + } + r.state[id] = st + } else { + st.runtime = rt + } + + r.updateStatusLocked(st, r.nodeList) +} + +func (r *memoryBackend) onNodeList(nodes []*node.Node) { + r.Lock() + defer r.Unlock() + + r.nodeList = nodes + + var oldStates []*runtimeState + for _, v := range r.state { + oldStates = append(oldStates, v) + } + + for _, v := range oldStates { + r.updateStatusLocked(v, r.nodeList) + } +} + +func (r *memoryBackend) updateStatusLocked(oldState *runtimeState, nodeList []*node.Node) { + if nodeList == nil { + r.logger.Error("no cached node list") + return + } + + oldStatus := oldState.status + status := &api.Status{ + ID: oldStatus.ID, + IsInitialized: oldStatus.IsInitialized, + IsSecure: oldStatus.IsSecure, + Checksum: oldStatus.Checksum, + } + id := status.ID + + for _, n := range nodeList { + // Ensure the node is a key manager. + if !n.HasRoles(node.RoleKeyManager) { + continue + } + + // Ensure the node has the relevant runtime. + var nodeRt *node.Runtime + for _, rt := range n.Runtimes { + if rt.ID.Equal(id) { + // Only honor the first descriptor for a given runtime ID. + nodeRt = rt + break + } + } + if nodeRt == nil { + continue + } + + initResponse, err := api.VerifyExtraInfo(oldState.runtime, nodeRt) + if err != nil { + r.logger.Error("failed to validate ExtraInfo", + "err", err, + "id", id, + "node_id", n.ID, + ) + continue + } + + if status.IsInitialized { + // Already initialized. Check to see if it should be added to + // the node list. + if initResponse.IsSecure != status.IsSecure { + r.logger.Error("Security status mismatch for runtime", + "id", id, + "node_id", n.ID, + ) + continue + } + if !bytes.Equal(initResponse.Checksum, status.Checksum) { + r.logger.Error("Checksum mismatch for runtime", + "id", id, + "node_id", n.ID, + ) + continue + } + } else { + // Not initialized. The first node gets to be the source + // of truth, every other node will sync off it. + + // TODO: Sanity check IsSecure/Checksum. + status.IsSecure = initResponse.IsSecure + status.IsInitialized = true + status.Checksum = initResponse.Checksum + } + + status.Nodes = append(status.Nodes, n.ID) + } + + // Don't notify if nothing has changed. + if bytes.Equal(cbor.Marshal(oldStatus), cbor.Marshal(status)) { + r.logger.Debug("status is unchanged after update", + "id", id, + ) + return + } + + // Update the status, and notify. + oldState.status = status + r.notifier.Broadcast(status) +} + +// New creates a new memory key manager management instance. +func New(ctx context.Context, registry registry.Backend) (api.Backend, error) { + r := &memoryBackend{ + logger: logging.GetLogger("keymanager/memory"), + registry: registry, + state: make(map[signature.MapKey]*runtimeState), + } + r.notifier = pubsub.NewBrokerEx(func(ch *channels.InfiniteChannel) { + r.RLock() + defer r.RUnlock() + + wr := ch.In() + for _, v := range r.state { + wr <- v.status + } + }) + + go r.worker(ctx) + + return r, nil +} diff --git a/go/keymanager/tendermint/tendermint.go b/go/keymanager/tendermint/tendermint.go new file mode 100644 index 00000000000..f0109512e84 --- /dev/null +++ b/go/keymanager/tendermint/tendermint.go @@ -0,0 +1,163 @@ +// Package tendermint provides the tendermint backed key manager management +// implementation. +package tendermint + +import ( + "bytes" + "context" + + "github.com/eapache/channels" + "github.com/pkg/errors" + tmtypes "github.com/tendermint/tendermint/types" + + "github.com/oasislabs/ekiden/go/common/cbor" + "github.com/oasislabs/ekiden/go/common/crypto/signature" + "github.com/oasislabs/ekiden/go/common/logging" + "github.com/oasislabs/ekiden/go/common/pubsub" + epochtime "github.com/oasislabs/ekiden/go/epochtime/api" + "github.com/oasislabs/ekiden/go/keymanager/api" + tmapi "github.com/oasislabs/ekiden/go/tendermint/api" + app "github.com/oasislabs/ekiden/go/tendermint/apps/keymanager" + "github.com/oasislabs/ekiden/go/tendermint/service" +) + +// BackendName is the name of the backend. +const BackendName = "tendermint" + +type tendermintBackend struct { + logger *logging.Logger + + service service.TendermintService + notifier *pubsub.Broker +} + +func (r *tendermintBackend) GetStatus(ctx context.Context, id signature.PublicKey) (*api.Status, error) { + query := tmapi.QueryGetByIDRequest{ + ID: id, + } + + response, err := r.service.Query(app.QueryGetStatus, query, 0) + if err != nil { + return nil, errors.Wrap(err, "keymanager/tendermint: get status query failed") + } + if response == nil { + return nil, api.ErrNoSuchKeyManager + } + + var status api.Status + if err = cbor.Unmarshal(response, &status); err != nil { + return nil, errors.Wrap(err, "keymanager/tendermint: get status malformed response") + } + + return &status, nil +} + +func (r *tendermintBackend) GetStatuses(ctx context.Context) ([]*api.Status, error) { + response, err := r.service.Query(app.QueryGetStatuses, nil, 0) + if err != nil { + return nil, errors.Wrap(err, "keymanager/tendermint: get statuses query failed") + } + + var statuses []*api.Status + if err = cbor.Unmarshal(response, &statuses); err != nil { + return nil, errors.Wrap(err, "keymanager/tendermint: get statuses malformed response") + } + + return statuses, nil +} + +func (r *tendermintBackend) WatchStatuses() (<-chan *api.Status, *pubsub.Subscription) { + sub := r.notifier.Subscribe() + ch := make(chan *api.Status) + sub.Unwrap(ch) + + return ch, sub +} + +func (r *tendermintBackend) worker(ctx context.Context) { + sub, err := r.service.Subscribe("keymanager-worker", app.QueryApp) + if err != nil { + r.logger.Error("failed to subscribe", + "err", err, + ) + return + } + defer r.service.Unsubscribe("keymanager-worker", app.QueryApp) // nolint: errcheck + + for { + var event interface{} + + select { + case msg := <-sub.Out(): + event = msg + case <-sub.Cancelled(): + return + case <-ctx.Done(): + return + } + + switch ev := event.(type) { + case tmtypes.EventDataNewBlock: + r.onEventDataNewBlock(ev) + default: + } + } +} + +func (r *tendermintBackend) onEventDataNewBlock(ev tmtypes.EventDataNewBlock) { + tags := ev.ResultBeginBlock.GetTags() + tags = append(tags, ev.ResultEndBlock.GetTags()...) + + for _, pair := range tags { + if bytes.Equal(pair.GetKey(), app.TagStatusUpdate) { + var statuses []*api.Status + if err := cbor.Unmarshal(pair.GetValue(), &statuses); err != nil { + r.logger.Error("worker: failed to get statuses from tag", + "err", err, + ) + continue + } + + for _, status := range statuses { + r.notifier.Broadcast(status) + } + } + } +} + +// New constructs a new tendermint backed key manager management Backend +// instance. +func New(ctx context.Context, timeSource epochtime.Backend, service service.TendermintService) (api.Backend, error) { + // We can only work with a block-based epochtime. + blockTimeSource, ok := timeSource.(epochtime.BlockBackend) + if !ok { + return nil, errors.New("keymanager/tendermint: need a block-based epochtime backend") + } + + app := app.New(blockTimeSource) + if err := service.RegisterApplication(app); err != nil { + return nil, errors.Wrap(err, "keymanager/tendermint: failed to register app") + } + + r := &tendermintBackend{ + logger: logging.GetLogger("keymanager/tendermint"), + service: service, + } + r.notifier = pubsub.NewBrokerEx(func(ch *channels.InfiniteChannel) { + statuses, err := r.GetStatuses(ctx) + if err != nil { + r.logger.Error("status notifier: unable to get a list of statuses", + "err", err, + ) + return + } + + wr := ch.In() + for _, v := range statuses { + wr <- v + } + }) + go r.worker(ctx) + + return r, nil +} diff --git a/go/tendermint/apps/keymanager/api.go b/go/tendermint/apps/keymanager/api.go new file mode 100644 index 00000000000..97e9677bd6c --- /dev/null +++ b/go/tendermint/apps/keymanager/api.go @@ -0,0 +1,35 @@ +// Package keymanager implementes the key manager management applicaiton. +package keymanager + +import ( + "github.com/oasislabs/ekiden/go/tendermint/api" +) + +const ( + // TransactionTag is a unique byte to identify transactions for + // the key manager application. + TransactionTag byte = 0x07 + + // AppName is the ABCI application name. + AppName string = "999_keymanager" +) + +var ( + // TagStatusUpdate is an ABCI transaction tag for a key manager status + // update (value is a CBOR serialized key manager status). + TagStatusUpdate = []byte("keymanager.status") + + // QueryApp is a query for filtering transactions processed by the + // key manager application. + QueryApp = api.QueryForEvent(api.TagApplication, []byte(AppName)) +) + +const ( + // QueryGetStatus is a path for a GetStatus query. + QueryGetStatus = AppName + "/status" + + // QueryGetStatuses is a path for a GetStatuses query. + QueryGetStatuses = AppName + "/statuses" +) + +// TODO: Policy updates etc here. diff --git a/go/tendermint/apps/keymanager/keymanager.go b/go/tendermint/apps/keymanager/keymanager.go new file mode 100644 index 00000000000..091242e17cb --- /dev/null +++ b/go/tendermint/apps/keymanager/keymanager.go @@ -0,0 +1,242 @@ +package keymanager + +import ( + "bytes" + + "github.com/pkg/errors" + "github.com/tendermint/tendermint/abci/types" + + "github.com/oasislabs/ekiden/go/common/cbor" + "github.com/oasislabs/ekiden/go/common/logging" + "github.com/oasislabs/ekiden/go/common/node" + epochtime "github.com/oasislabs/ekiden/go/epochtime/api" + "github.com/oasislabs/ekiden/go/genesis" + "github.com/oasislabs/ekiden/go/keymanager/api" + registry "github.com/oasislabs/ekiden/go/registry/api" + "github.com/oasislabs/ekiden/go/tendermint/abci" + tmapi "github.com/oasislabs/ekiden/go/tendermint/api" + registryapp "github.com/oasislabs/ekiden/go/tendermint/apps/registry" +) + +type keymanagerApplication struct { + logger *logging.Logger + state *abci.ApplicationState + + timeSource epochtime.BlockBackend +} + +func (app *keymanagerApplication) Name() string { + return AppName +} + +func (app *keymanagerApplication) TransactionTag() byte { + return TransactionTag +} + +func (app *keymanagerApplication) Blessed() bool { + return false +} + +func (app *keymanagerApplication) GetState(height int64) (interface{}, error) { + return newImmutableState(app.state, height) +} + +func (app *keymanagerApplication) OnRegister(state *abci.ApplicationState, queryRouter abci.QueryRouter) { + app.state = state + + // Register query handlers. + queryRouter.AddRoute(QueryGetStatus, tmapi.QueryGetByIDRequest{}, app.queryGetStatus) + queryRouter.AddRoute(QueryGetStatuses, nil, app.queryGetStatuses) +} + +func (app *keymanagerApplication) OnCleanup() {} + +func (app *keymanagerApplication) SetOption(request types.RequestSetOption) types.ResponseSetOption { + return types.ResponseSetOption{} +} + +func (app *keymanagerApplication) CheckTx(ctx *abci.Context, tx []byte) error { + // TODO: Add policy support. + return errors.New("tendermint/keymanager: transactions not supported yet") +} + +func (app *keymanagerApplication) ForeignCheckTx(ctx *abci.Context, other abci.Application, tx []byte) error { + return nil +} + +func (app *keymanagerApplication) InitChain(ctx *abci.Context, request types.RequestInitChain, doc *genesis.Document) error { + // TODO: Implement support for this, once it is sensible to do so. + // Note: Registry app needs to be moved above the keymanager one. + return nil +} + +func (app *keymanagerApplication) BeginBlock(ctx *abci.Context, request types.RequestBeginBlock) error { + if changed, epoch := app.state.EpochChanged(app.timeSource); changed { + return app.onEpochChange(ctx, epoch) + } + return nil +} + +func (app *keymanagerApplication) DeliverTx(ctx *abci.Context, tx []byte) error { + // TODO: Add policy support. + return errors.New("tendermint/keymanager: transactions not supported yet") +} + +func (app *keymanagerApplication) ForeignDeliverTx(ctx *abci.Context, other abci.Application, tx []byte) error { + return nil +} + +func (app *keymanagerApplication) EndBlock(request types.RequestEndBlock) (types.ResponseEndBlock, error) { + return types.ResponseEndBlock{}, nil +} + +func (app *keymanagerApplication) FireTimer(ctx *abci.Context, timer *abci.Timer) {} + +func (app *keymanagerApplication) queryGetStatus(s interface{}, r interface{}) ([]byte, error) { + state := s.(*immutableState) + request := r.(*tmapi.QueryGetByIDRequest) + + status, err := state.GetStatus(request.ID) + if err != nil { + return nil, err + } + return cbor.Marshal(status), nil +} + +func (app *keymanagerApplication) queryGetStatuses(s interface{}, r interface{}) ([]byte, error) { + state := s.(*immutableState) + + statuses, err := state.GetStatuses() + if err != nil { + return nil, err + } + return cbor.Marshal(statuses), nil +} + +func (app *keymanagerApplication) onEpochChange(ctx *abci.Context, epoch epochtime.EpochTime) error { + tree := app.state.DeliverTxTree() + + // Query the runtime and node lists. + regState := registryapp.NewMutableState(tree) + runtimes, _ := regState.GetRuntimes() + nodes, _ := regState.GetNodes() + registry.SortNodeList(nodes) + + // Recalculate all the key manager statuses. + // + // Note: This assumes that once a runtime is registered, it never expires. + var toEmit []*api.Status + state := NewMutableState(app.state.DeliverTxTree()) + for _, rt := range runtimes { + if rt.Kind != registry.KindKeyManager { + continue + } + + var forceEmit bool + oldStatus, err := state.GetStatus(rt.ID) + if err != nil { + // This is fatal, as it suggests state corruption. + app.logger.Error("failed to query key manager status", + "id", rt.ID, + "err", err, + ) + return errors.Wrap(err, "failed to query key manager status") + } + if oldStatus == nil { + // This must be a new key manager runtime. + forceEmit = true + oldStatus = &api.Status{ + ID: rt.ID, + } + } + + newStatus := app.generateStatus(rt, oldStatus, nodes) + if forceEmit || !bytes.Equal(cbor.Marshal(oldStatus), cbor.Marshal(newStatus)) { + // Set, enqueue for emit. + state.setStatus(newStatus) + toEmit = append(toEmit, newStatus) + } + } + + // Emit the update event if required. + if len(toEmit) > 0 { + ctx.EmitTag(tmapi.TagApplication, []byte(app.Name())) + ctx.EmitTag(TagStatusUpdate, cbor.Marshal(toEmit)) + } + + return nil +} + +func (app *keymanagerApplication) generateStatus(kmrt *registry.Runtime, oldStatus *api.Status, nodes []*node.Node) *api.Status { + status := &api.Status{ + ID: kmrt.ID, + IsInitialized: oldStatus.IsInitialized, + IsSecure: oldStatus.IsSecure, + Checksum: oldStatus.Checksum, + } + + for _, n := range nodes { + if !n.HasRoles(node.RoleKeyManager) { + continue + } + + var nodeRt *node.Runtime + for _, rt := range n.Runtimes { + if rt.ID.Equal(kmrt.ID) { + nodeRt = rt + break + } + } + if nodeRt == nil { + continue + } + + initResponse, err := api.VerifyExtraInfo(kmrt, nodeRt) + if err != nil { + app.logger.Error("failed to validate ExtraInfo", + "err", err, + "id", kmrt.ID, + "node_id", n.ID, + ) + continue + } + + if status.IsInitialized { + // Already initialized. Check to see if it should be added to + // the node list. + if initResponse.IsSecure != status.IsSecure { + app.logger.Error("Security status mismatch for runtime", + "id", kmrt.ID, + "node_id", n.ID, + ) + continue + } + if !bytes.Equal(initResponse.Checksum, status.Checksum) { + app.logger.Error("Checksum mismatch for runtime", + "id", kmrt.ID, + "node_id", n.ID, + ) + continue + } + } else { + // Not initialized. The first node gets to be the source + // of truth, every other node will sync off it. + + // TODO: Sanity check IsSecure/Checksum. + status.IsSecure = initResponse.IsSecure + status.IsInitialized = true + status.Checksum = initResponse.Checksum + } + + status.Nodes = append(status.Nodes, n.ID) + } + + return status +} + +func New(timeSource epochtime.BlockBackend) abci.Application { + return &keymanagerApplication{ + logger: logging.GetLogger("tendermint/keymanager"), + timeSource: timeSource, + } +} diff --git a/go/tendermint/apps/keymanager/state.go b/go/tendermint/apps/keymanager/state.go new file mode 100644 index 00000000000..c4352c6eb40 --- /dev/null +++ b/go/tendermint/apps/keymanager/state.go @@ -0,0 +1,95 @@ +package keymanager + +import ( + "fmt" + + "github.com/tendermint/iavl" + + "github.com/oasislabs/ekiden/go/common/cbor" + "github.com/oasislabs/ekiden/go/common/crypto/signature" + "github.com/oasislabs/ekiden/go/keymanager/api" + "github.com/oasislabs/ekiden/go/tendermint/abci" +) + +const stateStatusMap = "keymanager/status/%s" + +var () + +type immutableState struct { + *abci.ImmutableState +} + +func (st *immutableState) GetStatuses() ([]*api.Status, error) { + rawStatuses, err := st.getStatusesRaw() + if err != nil { + return nil, err + } + + var statuses []*api.Status + for _, raw := range rawStatuses { + var status api.Status + if err = cbor.Unmarshal(raw, &status); err != nil { + return nil, err + } + statuses = append(statuses, &status) + } + + return statuses, nil +} + +func (st *immutableState) getStatusesRaw() ([][]byte, error) { + var rawVec [][]byte + st.Snapshot.IterateRangeInclusive( + []byte(fmt.Sprintf(stateStatusMap, "")), + []byte(fmt.Sprintf(stateStatusMap, abci.LastID)), + true, + func(key, value []byte, version int64) bool { + rawVec = append(rawVec, value) + return false + }, + ) + + return rawVec, nil +} + +func (st *immutableState) GetStatus(id signature.PublicKey) (*api.Status, error) { + _, raw := st.Snapshot.Get([]byte(fmt.Sprintf(stateStatusMap, id.String()))) + if raw == nil { + return nil, nil + } + + var status api.Status + if err := cbor.Unmarshal(raw, &status); err != nil { + return nil, err + } + return &status, nil +} + +func newImmutableState(state *abci.ApplicationState, version int64) (*immutableState, error) { + inner, err := abci.NewImmutableState(state, version) + if err != nil { + return nil, err + } + return &immutableState{inner}, nil +} + +// MutableState is a mutable key manager state wrapper. +type MutableState struct { + *immutableState + + tree *iavl.MutableTree +} + +func (st *MutableState) setStatus(status *api.Status) { + st.tree.Set([]byte(fmt.Sprintf(stateStatusMap, status.ID.String())), cbor.Marshal(status)) +} + +// NewMutableState creates a new mutable key manager state wrapper. +func NewMutableState(tree *iavl.MutableTree) *MutableState { + inner := &abci.ImmutableState{Snapshot: tree.ImmutableTree} + + return &MutableState{ + immutableState: &immutableState{inner}, + tree: tree, + } +} diff --git a/go/worker/keymanager/keymanager.go b/go/worker/keymanager/keymanager.go index 94bfa5acffc..1edcd85c64e 100644 --- a/go/worker/keymanager/keymanager.go +++ b/go/worker/keymanager/keymanager.go @@ -36,6 +36,7 @@ const ( cfgRuntimeLoader = "worker.keymanager.runtime.loader" cfgRuntimeBinary = "worker.keymanager.runtime.binary" cfgRuntimeID = "worker.keymanager.runtime.id" + cfgMayGenerate = "worker.keymanager.may_generate" rpcCallTimeout = 5 * time.Second ) @@ -44,17 +45,14 @@ var ( _ service.BackgroundService = (*worker)(nil) errMalformedResponse = fmt.Errorf("worker/keymanager: malformed response from worker") - initResponseContext = []byte("EkKmIniR") emptyRoot hash.Hash - - testPublicKey signature.PublicKey ) type worker struct { sync.Mutex - enabled bool + logger *logging.Logger ctx context.Context cancelCtx context.CancelFunc @@ -69,8 +67,10 @@ type worker struct { registration *registration.Registration enclaveStatus *api.SignedInitResponse + backend api.Backend - logger *logging.Logger + enabled bool + mayGenerate bool } func (w *worker) Name() string { @@ -191,19 +191,33 @@ func (w *worker) onProcessStart(proto *protocol.Protocol, tee *node.CapabilityTE // Initialize the key manager. type InitRequest struct { - // TODO: At some point this needs the policy, checksum, peers, etc. + Checksum []byte `codec:"checksum"` + MayGenerate bool `codec:"may_generate"` } type InitCall struct { // nolint: maligned Method string `codec:"method"` Args InitRequest `codec:"args"` } - // XXX: Query the BFT component for the policy, checksum, peers (as available), - // and repeatedly try to initialize the KM enclave. + // Query the BFT component for the policy, checksum, peers (as available). + status, err := w.backend.GetStatus(w.ctx, w.runtimeID) + if err != nil { + if err != api.ErrNoSuchKeyManager { + w.logger.Error("failed to query key manger status", + "err", err, + "id", w.runtimeID, + ) + return err + } + status = &api.Status{} + } call := InitCall{ Method: "init", - Args: InitRequest{}, + Args: InitRequest{ + Checksum: cbor.FixSliceForSerde(status.Checksum), + MayGenerate: w.mayGenerate, + }, } req := &protocol.Body{ WorkerLocalRPCCallRequest: &protocol.WorkerLocalRPCCallRequest{ @@ -253,21 +267,19 @@ func (w *worker) onProcessStart(proto *protocol.Protocol, tee *node.CapabilityTE // Validate the signature. if tee != nil { - var signingKey *signature.PublicKey + var signingKey signature.PublicKey switch tee.Hardware { case node.TEEHardwareInvalid: - signingKey = &testPublicKey + signingKey = api.TestPublicKey case node.TEEHardwareIntelSGX: - signingKey = &tee.RAK + signingKey = tee.RAK default: return fmt.Errorf("worker/keymanager: unknown TEE hardware: %v", tee.Hardware) } - rawInitResp := cbor.Marshal(signedInitResp.InitResponse) - if !signingKey.Verify(initResponseContext, rawInitResp, signedInitResp.Signature) { - w.logger.Error("failed to validate initialization response signature") - return fmt.Errorf("worker/keymanager: failed to validate initialization response signature") + if err = signedInitResp.Verify(signingKey); err != nil { + return errors.Wrap(err, "worker/keymanager: failed to validate initialziation response signature") } } @@ -369,7 +381,7 @@ func (w *worker) onNodeRegistration(n *node.Node) error { } // New constructs a new key manager worker. -func New(dataDir string, ias *ias.IAS, grpc *grpc.Server, r *registration.Registration, workerCommonCfg *workerCommon.Config) (service.BackgroundService, bool, error) { +func New(dataDir string, ias *ias.IAS, grpc *grpc.Server, r *registration.Registration, workerCommonCfg *workerCommon.Config, backend api.Backend) (service.BackgroundService, bool, error) { var teeHardware node.TEEHardware s := viper.GetString(cfgTEEHardware) switch strings.ToLower(s) { @@ -386,7 +398,7 @@ func New(dataDir string, ias *ias.IAS, grpc *grpc.Server, r *registration.Regist ctx, cancelFn := context.WithCancel(context.Background()) w := &worker{ - enabled: viper.GetBool(cfgEnabled), + logger: logging.GetLogger("worker/keymanager"), ctx: ctx, cancelCtx: cancelFn, stopCh: make(chan struct{}), @@ -394,7 +406,9 @@ func New(dataDir string, ias *ias.IAS, grpc *grpc.Server, r *registration.Regist initCh: make(chan struct{}), grpc: grpc, registration: r, - logger: logging.GetLogger("worker/keymanager"), + backend: backend, + enabled: viper.GetBool(cfgEnabled), + mayGenerate: viper.GetBool(cfgMayGenerate), } if w.enabled { @@ -446,6 +460,7 @@ func RegisterFlags(cmd *cobra.Command) { cmd.Flags().String(cfgRuntimeLoader, "", "Path to key manager worker process binary") cmd.Flags().String(cfgRuntimeBinary, "", "Path to key manager runtime binary") cmd.Flags().String(cfgRuntimeID, "", "Key manager Runtime ID") + cmd.Flags().Bool(cfgMayGenerate, false, "Key manager may generate new master secret") } for _, v := range []string{ @@ -455,6 +470,7 @@ func RegisterFlags(cmd *cobra.Command) { cfgRuntimeLoader, cfgRuntimeBinary, cfgRuntimeID, + cfgMayGenerate, } { viper.BindPFlag(v, cmd.Flags().Lookup(v)) // nolint: errcheck } @@ -462,6 +478,4 @@ func RegisterFlags(cmd *cobra.Command) { func init() { emptyRoot.Empty() - _ = testPublicKey.UnmarshalHex("9d41a874b80e39a40c9644e964f0e4f967100c91654bfd7666435fe906af060f") - signature.RegisterTestPublicKey(testPublicKey) } diff --git a/keymanager-runtime/Cargo.toml b/keymanager-runtime/Cargo.toml index bb2b48bec2e..9b00c395d8c 100644 --- a/keymanager-runtime/Cargo.toml +++ b/keymanager-runtime/Cargo.toml @@ -12,6 +12,7 @@ threads = 2 [dependencies] ekiden-runtime = { path = "../runtime" } ekiden-keymanager-api = { path = "./api" } +ekiden-keymanager-client = { path = "../keymanager-client" } failure = "0.1.5" lazy_static = "1.3.0" lru = "0.1.15" @@ -21,5 +22,6 @@ io-context = "0.2.0" rand = "0.6.5" sgx-isa = { version = "0.2.0", features = ["sgxstd"] } sp800-185 = "0.2.0" +tiny-keccak = "1.4.2" x25519-dalek = "0.5.1" zeroize = "0.6" diff --git a/keymanager-runtime/api/src/api.rs b/keymanager-runtime/api/src/api.rs index ee369e52b25..75037be22a5 100644 --- a/keymanager-runtime/api/src/api.rs +++ b/keymanager-runtime/api/src/api.rs @@ -17,7 +17,12 @@ impl_bytes!(MasterSecret, 32, "A 256 bit master secret."); /// Key manager initialization request. #[derive(Clone, Serialize, Deserialize)] pub struct InitRequest { - // TODO: Policy, peers, checksum, etc. + /// True iff the enclave may generate a new master secret. + pub may_generate: bool, + /// Checksum for validating replication. + #[serde(with = "serde_bytes")] + pub checksum: Vec, + // TODO: Policy. } /// Key manager initialization response. @@ -86,6 +91,7 @@ pub struct ContractKey { /// State encryption key pub state_key: StateKey, /// Checksum of the key manager state. + #[serde(with = "serde_bytes")] pub checksum: Vec, } @@ -160,6 +166,7 @@ pub struct SignedPublicKey { /// Public key. pub key: PublicKey, /// Checksum of the key manager state. + #[serde(with = "serde_bytes")] pub checksum: Vec, /// Sign(sk, (key || checksum)) from the key manager. pub signature: Signature, @@ -174,8 +181,10 @@ pub enum KeyManagerError { InvalidAuthentication, #[fail(display = "key manager is not initialized")] NotInitialized, - #[fail(display = "key manager is already initialized")] - AlreadyInitialized, + #[fail(display = "key manager state corrupted")] + StateCorrupted, + #[fail(display = "key manager replication required")] + ReplicationRequired, } runtime_api! { diff --git a/keymanager-runtime/src/kdf.rs b/keymanager-runtime/src/kdf.rs index 79f564724c5..9eef08f07cf 100644 --- a/keymanager-runtime/src/kdf.rs +++ b/keymanager-runtime/src/kdf.rs @@ -2,20 +2,23 @@ use std::sync::{Arc, RwLock}; use failure::Fallible; +use io_context::Context; use lazy_static::lazy_static; use lru::LruCache; use rand::{rngs::OsRng, Rng}; use serde_cbor; use sgx_isa::Keypolicy; use sp800_185::{CShake, KMac}; +use tiny_keccak::sha3_256; use x25519_dalek; use zeroize::Zeroize; use ekiden_keymanager_api::{ - ContractKey, InitResponse, KeyManagerError, MasterSecret, PrivateKey, PublicKey, + ContractKey, InitRequest, InitResponse, KeyManagerError, MasterSecret, PrivateKey, PublicKey, ReplicateResponse, RequestIds, SignedInitResponse, SignedPublicKey, StateKey, INIT_RESPONSE_CONTEXT, PUBLIC_KEY_CONTEXT, }; +use ekiden_keymanager_client::KeyManagerClient; use ekiden_runtime::{ common::{ crypto::{ @@ -24,6 +27,7 @@ use ekiden_runtime::{ }, sgx::egetkey::egetkey, }, + executor::Executor, rpc::Context as RpcContext, storage::StorageContext, BUILD_INFO, @@ -83,6 +87,13 @@ struct Inner { } impl Inner { + fn reset(&mut self) { + self.master_secret = None; + self.checksum = None; + self.signer = None; + self.cache.clear(); + } + fn derive_contract_key(&self, req: &RequestIds) -> Fallible { let checksum = self.get_checksum()?; let mut contract_secret = self.derive_contract_secret(req)?; @@ -120,8 +131,7 @@ impl Inner { let mut k = [0u8; 32]; - // KMAC256(master_secret, MRENCLAVE_km || runtimeID || contractID, 32, "ekiden-derive-runtime-secret") - // XXX: We don't pass in the MRENCLAVE yet. + // KMAC256(master_secret, runtimeID || contractID, 32, "ekiden-derive-runtime-secret") let mut f = KMac::new_kmac256(master_secret.as_ref(), &RUNTIME_KDF_CUSTOM); f.update(req.runtime_id.as_ref()); f.update(req.contract_id.as_ref()); @@ -157,45 +167,110 @@ impl Kdf { /// Initialize the KDF internal state. #[cfg_attr(not(target_env = "sgx"), allow(unused))] - pub fn init(&self, ctx: &RpcContext) -> Fallible { + pub fn init( + &self, + req: &InitRequest, + ctx: &RpcContext, + client: Arc, + ) -> Fallible { let mut inner = self.inner.write().unwrap(); - // Initialization should be idempotent. - if inner.master_secret.is_none() { + // How initialization proceeds depends on the state and the request. + // + // WARNING: Once a master secret has been persisted to disk, it is + // intended that manual intervention by the operator is required to + // remove/alter it. + if inner.master_secret.is_some() { + // A master secret is set. This enclave has initialized successfully + // at least once. + + let checksum = inner.get_checksum()?; + if req.checksum.len() > 0 && req.checksum != checksum { + // The init request provided a checksum and there was a mismatch. + // The global key manager state disagrees with the enclave state. + inner.reset(); + return Err(KeyManagerError::StateCorrupted.into()); + } + } else if req.checksum.len() > 0 { + // A master secret is not set, and there is a checksum in the + // request. An enclave somewhere, has initialized at least + // once. + + // Attempt to load the master secret. + let (master_secret, did_replicate) = match Self::load_master_secret() { + Some(master_secret) => (master_secret, false), + None => { + // Couldn't load, fetch the master secret from another + // enclave instance. + + let result = client.replicate_master_secret(Context::create_child(&ctx.io_ctx)); + let master_secret = + Executor::with_current(|executor| executor.block_on(result))?; + (master_secret.unwrap(), true) + } + }; + + let checksum = Self::checksum_master_secret(&master_secret); + if req.checksum != checksum { + // We either loaded or replicated something that does + // not match the rest of the world. + inner.reset(); + return Err(KeyManagerError::StateCorrupted.into()); + } + + // The loaded/replicated master secret is consistent with the rest + // of the world. Ok to proceed. + if did_replicate { + Self::save_master_secret(&master_secret); + } + inner.master_secret = Some(master_secret); + inner.checksum = Some(checksum); + } else { + // A master secret is not set, and there is no checksum in the + // request. Either this key manager instance has never been + // initialized, or our view of the external state is not current. + + // Attempt to load the master secret, the caller may just be + // behind the rest of the world. let master_secret = match Self::load_master_secret() { Some(master_secret) => master_secret, - None => Self::generate_master_secret(), + None => { + // Unable to load, perhaps we can generate? + if !req.may_generate { + return Err(KeyManagerError::ReplicationRequired.into()); + } + + Self::generate_master_secret() + } }; - inner.master_secret = Some(MasterSecret::from(master_secret)); + + // Loaded or generated a master secret. There is no checksum to + // compare against, but that is expected when bootstrapping or + // lagging. + inner.checksum = Some(Self::checksum_master_secret(&master_secret)); + inner.master_secret = Some(master_secret); } - // (re)-generate the checksum, based on the possibly updated RAK. - let mut k = [0u8; 32]; - let mut f = KMac::new_kmac256( - inner.master_secret.as_ref().unwrap().as_ref(), - &CHECKSUM_CUSTOM, - ); + // If we make it this far, we have a master secret and checksum + // that either matches the global state, will become the global + // state, or should become the global state (rare). + // + // It is ok to generate a response. + // The RAK (signing key) may have changed since the last init call. #[cfg(target_env = "sgx")] { let signer: Arc = ctx.rak.clone(); inner.signer = Some(signer); - - f.update(ctx.rak.public_key().unwrap().as_ref()); } - #[cfg(not(target_env = "sgx"))] { let priv_key = Arc::new(signature::PrivateKey::from_pkcs8(INSECURE_SIGNING_KEY_PKCS8).unwrap()); - f.update(priv_key.public_key().as_ref()); - let signer: Arc = priv_key; inner.signer = Some(signer); } - f.finalize(&mut k); - inner.checksum = Some(k.to_vec()); // Build the response and sign it with the RAK. let init_response = InitResponse { @@ -260,7 +335,7 @@ impl Kdf { }) } - // Replciate master secret. + // Replicate master secret. pub fn replicate_master_secret(&self) -> Fallible { let inner = self.inner.read().unwrap(); @@ -270,7 +345,7 @@ impl Kdf { } } - fn load_master_secret() -> Option> { + fn load_master_secret() -> Option { let ciphertext = StorageContext::with_current(|_mkvs, untrusted_local| { untrusted_local.get(MASTER_SECRET_STORAGE_KEY.to_vec()) }) @@ -295,21 +370,17 @@ impl Kdf { .open(&nonce, ciphertext.to_vec(), vec![]) .expect("persisted state is corrupted"); - Some(plaintext) + Some(MasterSecret::from(plaintext)) } - fn generate_master_secret() -> Vec { + fn save_master_secret(master_secret: &MasterSecret) { let mut rng = OsRng::new().unwrap(); - // TODO: Support static keying for debugging. - let mut master_secret = [0u8; 32]; - rng.fill(&mut master_secret); - // Encrypt the master secret. let mut nonce = [0u8; NONCE_SIZE]; rng.fill(&mut nonce); let d2 = Self::new_d2(); - let mut ciphertext = d2.seal(&nonce, master_secret.to_vec(), vec![]); + let mut ciphertext = d2.seal(&nonce, master_secret.as_ref().to_vec(), vec![]); ciphertext.extend_from_slice(&nonce); // Persist the encrypted master secret. @@ -317,8 +388,28 @@ impl Kdf { untrusted_local.insert(MASTER_SECRET_STORAGE_KEY.to_vec(), ciphertext) }) .expect("failed to persist master secret"); + } + + fn generate_master_secret() -> MasterSecret { + let mut rng = OsRng::new().unwrap(); + + // TODO: Support static keying for debugging. + let mut master_secret = [0u8; 32]; + rng.fill(&mut master_secret); + let master_secret = MasterSecret::from(master_secret.to_vec()); + + Self::save_master_secret(&master_secret); + + master_secret + } + + fn checksum_master_secret(master_secret: &MasterSecret) -> Vec { + let mut tmp = master_secret.as_ref().to_vec().clone(); + tmp.extend_from_slice(&CHECKSUM_CUSTOM); + let checksum = sha3_256(&tmp); + tmp.zeroize(); - master_secret.to_vec() + checksum.to_vec() } fn new_d2() -> DeoxysII { diff --git a/keymanager-runtime/src/main.rs b/keymanager-runtime/src/main.rs index a4863476f4b..f284dfbd004 100644 --- a/keymanager-runtime/src/main.rs +++ b/keymanager-runtime/src/main.rs @@ -1,4 +1,5 @@ extern crate ekiden_keymanager_api; +extern crate ekiden_keymanager_client; extern crate ekiden_runtime; extern crate failure; extern crate io_context; @@ -7,40 +8,49 @@ extern crate lru; extern crate rand; extern crate serde_cbor; extern crate sp800_185; +extern crate tiny_keccak; extern crate x25519_dalek; extern crate zeroize; +use std::{str::FromStr, sync::Arc}; + mod kdf; mod methods; use failure::Fallible; use ekiden_keymanager_api::*; +use ekiden_keymanager_client::{KeyManagerClient, RemoteClient}; use ekiden_runtime::{ + common::{runtime::RuntimeId, sgx::avr}, + rak::RAK, register_runtime_rpc_methods, rpc::{ dispatcher::{Method as RpcMethod, MethodDescriptor as RpcMethodDescriptor}, Context as RpcContext, }, - RpcDemux, RpcDispatcher, TxnDispatcher, + Protocol, RpcDemux, RpcDispatcher, TxnDispatcher, }; use self::kdf::Kdf; -/// Initialize the Kdf. -fn init_kdf(_req: &InitRequest, ctx: &mut RpcContext) -> Fallible { - // TODO: Based on the InitRequest, and persisted state (if any): - // * Load the persisted state. - // * Generate a new master secret. - // * Replicate the master secret. +static mut KM_CLIENT: Option> = None; - Kdf::global().init(&ctx) +/// Initialize the Kdf. +fn init_kdf(req: &InitRequest, ctx: &mut RpcContext) -> Fallible { + unsafe { + let km_client = match &KM_CLIENT { + Some(km_client) => km_client, + None => panic!("no remote client available"), + }; + Kdf::global().init(&req, &ctx, km_client.clone()) + } } fn main() { // Initializer. - let init = |_: &_, - _: &_, + let init = |protocol: &Arc, + rak: &Arc, _rpc_demux: &mut RpcDemux, rpc: &mut RpcDispatcher, _txn: &mut TxnDispatcher| { @@ -62,6 +72,28 @@ fn main() { ), true, ); + + // HACK: There is no nice way of passing in the runtime ID at compile + // time yet. + let runtime_id = + RuntimeId::from_str("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") + .unwrap(); + + // We will only replicate from ourselves for now, once migration + // support is required, this needs to change somehow. + let mr_enclave = match avr::get_enclave_identity() { + Some(id) => Some(id.mr_enclave), + None => None, + }; + unsafe { + KM_CLIENT = Some(Arc::new(RemoteClient::new_runtime( + runtime_id, + mr_enclave, + protocol.clone(), + rak.clone(), + 1, // Not used, doesn't matter. + ))); + } }; // Start the runtime. diff --git a/runtime/src/dispatcher.rs b/runtime/src/dispatcher.rs index b15e4f85cd3..8916b670b72 100644 --- a/runtime/src/dispatcher.rs +++ b/runtime/src/dispatcher.rs @@ -390,7 +390,7 @@ impl Dispatcher { Context::create_child(&ctx), protocol.clone(), )); - let rpc_ctx = RpcContext::new(self.rak.clone(), session_info); + let rpc_ctx = RpcContext::new(ctx.clone(), self.rak.clone(), session_info); let response = StorageContext::enter(&mut mkvs, untrusted_local.clone(), || { rpc_dispatcher.dispatch(req, rpc_ctx) @@ -484,7 +484,7 @@ impl Dispatcher { Context::create_child(&ctx), protocol.clone(), )); - let rpc_ctx = RpcContext::new(self.rak.clone(), None); + let rpc_ctx = RpcContext::new(ctx.clone(), self.rak.clone(), None); let response = StorageContext::enter(&mut mkvs, untrusted_local.clone(), || { rpc_dispatcher.dispatch_local(req, rpc_ctx) }); diff --git a/runtime/src/rpc/context.rs b/runtime/src/rpc/context.rs index 5183fdf0721..5a5405b7533 100644 --- a/runtime/src/rpc/context.rs +++ b/runtime/src/rpc/context.rs @@ -1,6 +1,8 @@ //! RPC call context. use std::{any::Any, sync::Arc}; +use io_context::Context as IoContext; + use super::session::SessionInfo; use crate::rak::RAK; @@ -8,6 +10,8 @@ struct NoRuntimeContext; /// RPC call context. pub struct Context { + /// I/O context. + pub io_ctx: Arc, /// The current RAK if any. pub rak: Arc, /// Information about the session the RPC call was delivered over. @@ -18,8 +22,13 @@ pub struct Context { impl Context { /// Construct new transaction context. - pub fn new(rak: Arc, session_info: Option>) -> Self { + pub fn new( + io_ctx: Arc, + rak: Arc, + session_info: Option>, + ) -> Self { Self { + io_ctx, rak, session_info, runtime: Box::new(NoRuntimeContext),