From 3e2bdbf39b6b83904667dbec0c75cf89b216e8bb Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Fri, 7 Aug 2020 11:44:09 +0200 Subject: [PATCH 1/3] go/worker/compute/merge: Drop support for multiple committees Since there is currently no transaction scheduler implementation which would support multiple committees, there is no sense in the merge node to try to support such cases as it could be a source of bugs. Additionally it results in extra round trips to storage nodes due to the Merge operation which in case of a single committee does not do anything. The merge node is also the only client for the Merge* storage operations, so they can just be removed in order to reduce the exposed API surface. --- .changelog/3179.breaking.md | 10 ++ go/genesis/genesis_test.go | 2 +- go/oasis-net-runner/fixtures/default.go | 2 +- go/oasis-node/cmd/debug/byzantine/merge.go | 44 ++----- go/oasis-node/cmd/debug/byzantine/storage.go | 28 ---- .../debug/txsource/workload/registration.go | 2 +- go/oasis-node/cmd/registry/runtime/runtime.go | 2 +- go/oasis-node/node_test.go | 2 +- .../scenario/e2e/runtime/multiple_runtimes.go | 2 +- .../scenario/e2e/runtime/runtime.go | 2 +- go/registry/tests/tester.go | 2 +- go/roothash/api/commitment/pool.go | 4 +- go/runtime/registry/storage_router.go | 16 --- go/storage/api/api.go | 43 +----- go/storage/api/grpc.go | 96 -------------- go/storage/api/root_cache.go | 56 -------- go/storage/client/client.go | 24 ---- go/storage/database/database.go | 32 ----- go/storage/metrics.go | 28 ---- go/storage/tests/tester.go | 108 --------------- go/worker/compute/merge/committee/node.go | 124 ++++-------------- go/worker/compute/merge/committee/state.go | 2 +- go/worker/storage/committee/policy.go | 7 +- go/worker/storage/service_external.go | 46 ------- 24 files changed, 59 insertions(+), 625 deletions(-) create mode 100644 .changelog/3179.breaking.md diff --git a/.changelog/3179.breaking.md b/.changelog/3179.breaking.md new file mode 100644 index 00000000000..3617cc07201 --- /dev/null +++ b/.changelog/3179.breaking.md @@ -0,0 +1,10 @@ +go/worker/compute/merge: Drop support for multiple committees + +Since there is currently no transaction scheduler implementation which would +support multiple committees, there is no sense in the merge node to try to +support such cases as it could be a source of bugs. Additionally it results +in extra round trips to storage nodes due to the Merge operation which in +case of a single committee does not do anything. + +The merge node is also the only client for the Merge* storage operations, so +they can just be removed in order to reduce the exposed API surface. diff --git a/go/genesis/genesis_test.go b/go/genesis/genesis_test.go index eaeb2568cc0..294690a8a8f 100644 --- a/go/genesis/genesis_test.go +++ b/go/genesis/genesis_test.go @@ -219,7 +219,7 @@ func TestGenesisSanityCheck(t *testing.T) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index 441b3a73032..f45327078fb 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -106,7 +106,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/oasis-node/cmd/debug/byzantine/merge.go b/go/oasis-node/cmd/debug/byzantine/merge.go index 16e08d0ae15..9e671d4fe7f 100644 --- a/go/oasis-node/cmd/debug/byzantine/merge.go +++ b/go/oasis-node/cmd/debug/byzantine/merge.go @@ -6,21 +6,18 @@ import ( "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/identity" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - storage "github.com/oasisprotocol/oasis-core/go/storage/api" ) type mergeBatchContext struct { currentBlock *block.Block commitments []*commitment.OpenExecutorCommitment - storageReceipts []*storage.Receipt - newBlock *block.Block - commit *commitment.MergeCommitment + newBlock *block.Block + commit *commitment.MergeCommitment } func newMergeBatchContext() *mergeBatchContext { @@ -73,7 +70,6 @@ func (mbc *mergeBatchContext) receiveCommitments(ph *p2pHandle, count int) error func (mbc *mergeBatchContext) process(ctx context.Context, hnss []*honestNodeStorage) error { collectedCommittees := make(map[hash.Hash]bool) var ioRoots, stateRoots []hash.Hash - var messages []*block.Message for _, commitment := range mbc.commitments { if collectedCommittees[commitment.Body.CommitteeID] { continue @@ -81,41 +77,17 @@ func (mbc *mergeBatchContext) process(ctx context.Context, hnss []*honestNodeSto collectedCommittees[commitment.Body.CommitteeID] = true ioRoots = append(ioRoots, commitment.Body.Header.IORoot) stateRoots = append(stateRoots, commitment.Body.Header.StateRoot) - if len(commitment.Body.Header.Messages) > 0 { - messages = append(messages, commitment.Body.Header.Messages...) - } - } - - var emptyRoot hash.Hash - emptyRoot.Empty() - - var err error - mbc.storageReceipts, err = storageBroadcastMergeBatch(ctx, hnss, mbc.currentBlock.Header.Namespace, mbc.currentBlock.Header.Round, []storage.MergeOp{ - { - Base: emptyRoot, - Others: ioRoots, - }, - { - Base: mbc.currentBlock.Header.StateRoot, - Others: stateRoots, - }, - }) - if err != nil { - return fmt.Errorf("storage broadcast merge batch: %w", err) } - var firstReceiptBody storage.ReceiptBody - if err := mbc.storageReceipts[0].Open(&firstReceiptBody); err != nil { - return fmt.Errorf("storage receipt Open: %w", err) - } - var signatures []signature.Signature - for _, receipt := range mbc.storageReceipts { - signatures = append(signatures, receipt.Signature) + if len(collectedCommittees) != 1 { + return fmt.Errorf("multiple committees not supported: %d", len(collectedCommittees)) } + signatures := mbc.commitments[0].Body.StorageSignatures + messages := mbc.commitments[0].Body.Header.Messages mbc.newBlock = block.NewEmptyBlock(mbc.currentBlock, 0, block.Normal) - mbc.newBlock.Header.IORoot = firstReceiptBody.Roots[0] - mbc.newBlock.Header.StateRoot = firstReceiptBody.Roots[1] + mbc.newBlock.Header.IORoot = ioRoots[0] + mbc.newBlock.Header.StateRoot = stateRoots[0] mbc.newBlock.Header.Messages = messages mbc.newBlock.Header.StorageSignatures = signatures diff --git a/go/oasis-node/cmd/debug/byzantine/storage.go b/go/oasis-node/cmd/debug/byzantine/storage.go index 3129ecc75f5..f8de1011e81 100644 --- a/go/oasis-node/cmd/debug/byzantine/storage.go +++ b/go/oasis-node/cmd/debug/byzantine/storage.go @@ -106,14 +106,6 @@ func (hns *honestNodeStorage) ApplyBatch(ctx context.Context, request *storage.A return hns.client.ApplyBatch(ctx, request) } -func (hns *honestNodeStorage) Merge(ctx context.Context, request *storage.MergeRequest) ([]*storage.Receipt, error) { - return hns.client.Merge(ctx, request) -} - -func (hns *honestNodeStorage) MergeBatch(ctx context.Context, request *storage.MergeBatchRequest) ([]*storage.Receipt, error) { - return hns.client.MergeBatch(ctx, request) -} - func (hns *honestNodeStorage) GetDiff(ctx context.Context, request *storage.GetDiffRequest) (storage.WriteLogIterator, error) { return hns.client.GetDiff(ctx, request) } @@ -176,23 +168,3 @@ func storageBroadcastApplyBatch( return receipts, nil } - -func storageBroadcastMergeBatch( - ctx context.Context, - hnss []*honestNodeStorage, - ns common.Namespace, - round uint64, - ops []storage.MergeOp, -) ([]*storage.Receipt, error) { - var receipts []*storage.Receipt - for _, hns := range hnss { - r, err := hns.MergeBatch(ctx, &storage.MergeBatchRequest{Namespace: ns, Round: round, Ops: ops}) - if err != nil { - return receipts, fmt.Errorf("honest node storage MergeBatch %s: %w", hns.nodeID, err) - } - - receipts = append(receipts, r...) - } - - return receipts, nil -} diff --git a/go/oasis-node/cmd/debug/txsource/workload/registration.go b/go/oasis-node/cmd/debug/txsource/workload/registration.go index 8d08bb10cd0..d68090b7735 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/registration.go +++ b/go/oasis-node/cmd/debug/txsource/workload/registration.go @@ -68,7 +68,7 @@ func getRuntime(entityID signature.PublicKey, id common.Namespace) *registry.Run MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/oasis-node/cmd/registry/runtime/runtime.go b/go/oasis-node/cmd/registry/runtime/runtime.go index 3b6d76ddaa4..a7080ffaa0e 100644 --- a/go/oasis-node/cmd/registry/runtime/runtime.go +++ b/go/oasis-node/cmd/registry/runtime/runtime.go @@ -571,7 +571,7 @@ func init() { runtimeFlags.Uint64(CfgStorageMinWriteReplication, 1, "Minimum required storage write replication") runtimeFlags.Uint64(CfgStorageMaxApplyWriteLogEntries, 100_000, "Maximum number of write log entries") runtimeFlags.Uint64(CfgStorageMaxApplyOps, 2, "Maximum number of apply operations in a batch") - runtimeFlags.Uint64(CfgStorageMaxMergeRoots, 8, "Maximum number of merge roots") + runtimeFlags.Uint64(CfgStorageMaxMergeRoots, 1, "Maximum number of merge roots") runtimeFlags.Uint64(CfgStorageMaxMergeOps, 2, "Maximum number of merge operations in a batch") runtimeFlags.Uint64(CfgStorageCheckpointInterval, 0, "Storage checkpoint interval (in rounds)") runtimeFlags.Uint64(CfgStorageCheckpointNumKept, 0, "Number of storage checkpoints to keep") diff --git a/go/oasis-node/node_test.go b/go/oasis-node/node_test.go index c0056775db6..9b9dc46d2e9 100644 --- a/go/oasis-node/node_test.go +++ b/go/oasis-node/node_test.go @@ -106,7 +106,7 @@ var ( MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go index 2d0e89a9ab8..a11c1c3a4a1 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go @@ -107,7 +107,7 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 69e32dafff0..579514d58b0 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -180,7 +180,7 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 2, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ diff --git a/go/registry/tests/tester.go b/go/registry/tests/tester.go index 6e3cfc3052a..5ed7a9fd43b 100644 --- a/go/registry/tests/tester.go +++ b/go/registry/tests/tester.go @@ -1629,7 +1629,7 @@ func NewTestRuntime(seed []byte, ent *TestEntity, isKeyManager bool) (*TestRunti MinWriteReplication: 3, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 8, + MaxMergeRoots: 1, MaxMergeOps: 2, }, AdmissionPolicy: api.RuntimeAdmissionPolicy{ diff --git a/go/roothash/api/commitment/pool.go b/go/roothash/api/commitment/pool.go index 0fdb5947734..50d62c2a275 100644 --- a/go/roothash/api/commitment/pool.go +++ b/go/roothash/api/commitment/pool.go @@ -764,10 +764,10 @@ func (m *MultiPool) CheckEnoughCommitments() error { } // GetExecutorCommitments returns a list of executor commitments in the pool. -func (m *MultiPool) GetExecutorCommitments() (result []ExecutorCommitment) { +func (m *MultiPool) GetOpenExecutorCommitments() (result []OpenExecutorCommitment) { for _, p := range m.Committees { for _, c := range p.ExecuteCommitments { - result = append(result, c.ExecutorCommitment) + result = append(result, c) } } return diff --git a/go/runtime/registry/storage_router.go b/go/runtime/registry/storage_router.go index aa6306349f7..c5b423ef9f8 100644 --- a/go/runtime/registry/storage_router.go +++ b/go/runtime/registry/storage_router.go @@ -59,22 +59,6 @@ func (sr *storageRouter) ApplyBatch(ctx context.Context, request *api.ApplyBatch return rt.Storage().ApplyBatch(ctx, request) } -func (sr *storageRouter) Merge(ctx context.Context, request *api.MergeRequest) ([]*api.Receipt, error) { - rt, err := sr.getRuntime(request.Namespace) - if err != nil { - return nil, err - } - return rt.Storage().Merge(ctx, request) -} - -func (sr *storageRouter) MergeBatch(ctx context.Context, request *api.MergeBatchRequest) ([]*api.Receipt, error) { - rt, err := sr.getRuntime(request.Namespace) - if err != nil { - return nil, err - } - return rt.Storage().MergeBatch(ctx, request) -} - func (sr *storageRouter) GetDiff(ctx context.Context, request *api.GetDiffRequest) (api.WriteLogIterator, error) { rt, err := sr.getRuntime(request.StartRoot.Namespace) if err != nil { diff --git a/go/storage/api/api.go b/go/storage/api/api.go index 0b197f91b27..ae126ed50be 100644 --- a/go/storage/api/api.go +++ b/go/storage/api/api.go @@ -38,11 +38,8 @@ var ( // ErrUnsupported is the error returned when the called method is not // supported by the given backend. ErrUnsupported = errors.New(ModuleName, 4, "storage: method not supported by backend") - // ErrNoMergeRoots is the error returned when no other roots are passed - // to the Merge operation. - ErrNoMergeRoots = errors.New(ModuleName, 5, "storage: no roots to merge") // ErrLimitReached means that a configured limit has been reached. - ErrLimitReached = errors.New(ModuleName, 6, "storage: limit reached") + ErrLimitReached = errors.New(ModuleName, 5, "storage: limit reached") // The following errors are reimports from NodeDB. @@ -240,14 +237,6 @@ type ApplyOp struct { WriteLog WriteLog `json:"writelog"` } -// MergeOps is a merge operation within a batch of merge operations. -type MergeOp struct { - // Base is the base root for the merge. - Base hash.Hash `json:"base"` - // Others is a list of roots derived from base that should be merged. - Others []hash.Hash `json:"others"` -} - // ApplyRequest is an Apply request. type ApplyRequest struct { Namespace common.Namespace `json:"namespace"` @@ -265,21 +254,6 @@ type ApplyBatchRequest struct { Ops []ApplyOp `json:"ops"` } -// MergeRequest is a Merge request. -type MergeRequest struct { - Namespace common.Namespace `json:"namespace"` - Round uint64 `json:"round"` - Base hash.Hash `json:"base"` - Others []hash.Hash `json:"others"` -} - -// MergeBatchRequest is a MergeBatch request. -type MergeBatchRequest struct { - Namespace common.Namespace `json:"namespace"` - Round uint64 `json:"round"` - Ops []MergeOp `json:"ops"` -} - // SyncOptions are the sync options. type SyncOptions struct { OffsetKey []byte `json:"offset_key"` @@ -317,21 +291,6 @@ type Backend interface { // See Apply for more details. ApplyBatch(ctx context.Context, request *ApplyBatchRequest) ([]*Receipt, error) - // TODO: Add proof. - // Merge performs a 3-way merge operation between the specified - // roots and returns a receipt for the merged root. - // - // Round is the round of the base root while all other roots are - // expected to be in the next round. - Merge(ctx context.Context, request *MergeRequest) ([]*Receipt, error) - - // TODO: Add proof. - // MergeBatch performs multiple sets of merge operations and returns - // a single receipt covering all merged roots. - // - // See Merge for more details. - MergeBatch(ctx context.Context, request *MergeBatchRequest) ([]*Receipt, error) - // GetDiff returns an iterator of write log entries that must be applied // to get from the first given root to the second one. GetDiff(ctx context.Context, request *GetDiffRequest) (WriteLogIterator, error) diff --git a/go/storage/api/grpc.go b/go/storage/api/grpc.go index 026ba515dce..68da3e7dc9e 100644 --- a/go/storage/api/grpc.go +++ b/go/storage/api/grpc.go @@ -52,32 +52,6 @@ var ( return true, nil }) - // MethodMerge is the Merge method. - MethodMerge = ServiceName.NewMethod("Merge", MergeRequest{}). - WithNamespaceExtractor(func(ctx context.Context, req interface{}) (common.Namespace, error) { - r, ok := req.(*MergeRequest) - if !ok { - return common.Namespace{}, errInvalidRequestType - } - return r.Namespace, nil - }). - WithAccessControl(func(ctx context.Context, req interface{}) (bool, error) { - return true, nil - }) - - // MethodMergeBatch is the MergeBatch method. - MethodMergeBatch = ServiceName.NewMethod("MergeBatch", MergeBatchRequest{}). - WithNamespaceExtractor(func(ctx context.Context, req interface{}) (common.Namespace, error) { - r, ok := req.(*MergeBatchRequest) - if !ok { - return common.Namespace{}, errInvalidRequestType - } - return r.Namespace, nil - }). - WithAccessControl(func(ctx context.Context, req interface{}) (bool, error) { - return true, nil - }) - // MethodGetDiff is the GetDiff method. MethodGetDiff = ServiceName.NewMethod("GetDiff", GetDiffRequest{}). WithNamespaceExtractor(func(ctx context.Context, req interface{}) (common.Namespace, error) { @@ -142,14 +116,6 @@ var ( MethodName: MethodApplyBatch.ShortName(), Handler: handlerApplyBatch, }, - { - MethodName: MethodMerge.ShortName(), - Handler: handlerMerge, - }, - { - MethodName: MethodMergeBatch.ShortName(), - Handler: handlerMergeBatch, - }, { MethodName: MethodGetCheckpoints.ShortName(), Handler: handlerGetCheckpoints, @@ -285,52 +251,6 @@ func handlerApplyBatch( // nolint: golint return interceptor(ctx, &req, info, handler) } -func handlerMerge( // nolint: golint - srv interface{}, - ctx context.Context, - dec func(interface{}) error, - interceptor grpc.UnaryServerInterceptor, -) (interface{}, error) { - var req MergeRequest - if err := dec(&req); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(Backend).Merge(ctx, &req) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: MethodMerge.FullName(), - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(Backend).Merge(ctx, req.(*MergeRequest)) - } - return interceptor(ctx, &req, info, handler) -} - -func handlerMergeBatch( // nolint: golint - srv interface{}, - ctx context.Context, - dec func(interface{}) error, - interceptor grpc.UnaryServerInterceptor, -) (interface{}, error) { - var req MergeBatchRequest - if err := dec(&req); err != nil { - return nil, err - } - if interceptor == nil { - return srv.(Backend).MergeBatch(ctx, &req) - } - info := &grpc.UnaryServerInfo{ - Server: srv, - FullMethod: MethodMergeBatch.FullName(), - } - handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(Backend).MergeBatch(ctx, req.(*MergeBatchRequest)) - } - return interceptor(ctx, &req, info, handler) -} - func handlerGetCheckpoints( // nolint: golint srv interface{}, ctx context.Context, @@ -489,22 +409,6 @@ func (c *storageClient) ApplyBatch(ctx context.Context, request *ApplyBatchReque return rsp, nil } -func (c *storageClient) Merge(ctx context.Context, request *MergeRequest) ([]*Receipt, error) { - var rsp []*Receipt - if err := c.conn.Invoke(ctx, MethodMerge.FullName(), request, &rsp); err != nil { - return nil, err - } - return rsp, nil -} - -func (c *storageClient) MergeBatch(ctx context.Context, request *MergeBatchRequest) ([]*Receipt, error) { - var rsp []*Receipt - if err := c.conn.Invoke(ctx, MethodMergeBatch.FullName(), request, &rsp); err != nil { - return nil, err - } - return rsp, nil -} - func (c *storageClient) GetCheckpoints(ctx context.Context, request *checkpoint.GetCheckpointsRequest) ([]*checkpoint.Metadata, error) { var rsp []*checkpoint.Metadata if err := c.conn.Invoke(ctx, MethodGetCheckpoints.FullName(), request, &rsp); err != nil { diff --git a/go/storage/api/root_cache.go b/go/storage/api/root_cache.go index ad2c0be9c20..9e6695e70f4 100644 --- a/go/storage/api/root_cache.go +++ b/go/storage/api/root_cache.go @@ -33,62 +33,6 @@ func (rc *RootCache) GetTree(ctx context.Context, root Root) (mkvs.Tree, error) return mkvs.NewWithRoot(rc.remoteSyncer, rc.localDB, root, rc.persistEverything), nil } -// Merge performs a 3-way merge operation between the specified roots and returns -// a receipt for the merged root. -func (rc *RootCache) Merge( - ctx context.Context, - ns common.Namespace, - version uint64, - base hash.Hash, - others []hash.Hash, -) (*hash.Hash, error) { - if len(others) == 0 { - // No other roots passed, no reason to call the operation. - return nil, ErrNoMergeRoots - } - - // Make sure that all roots exist in storage before doing any work. - if !rc.localDB.HasRoot(Root{Namespace: ns, Version: version, Hash: base}) { - return nil, ErrRootNotFound - } - for _, rootHash := range others { - if !rc.localDB.HasRoot(Root{Namespace: ns, Version: version + 1, Hash: rootHash}) { - return nil, ErrRootNotFound - } - } - - if len(others) == 1 { - // Fast path: nothing to merge, just return the only root. - return &others[0], nil - } - - // Start with the first root. - // TODO: WithStorageProof. - tree := mkvs.NewWithRoot(nil, rc.localDB, Root{Namespace: ns, Version: version + 1, Hash: others[0]}) - defer tree.Close() - - // Apply operations from all roots. - baseRoot := Root{Namespace: ns, Version: version, Hash: base} - for _, rootHash := range others[1:] { - it, err := rc.localDB.GetWriteLog(ctx, baseRoot, Root{Namespace: ns, Version: version + 1, Hash: rootHash}) - if err != nil { - return nil, fmt.Errorf("storage/rootcache: failed to read write log: %w", err) - } - - if err = tree.ApplyWriteLog(ctx, it); err != nil { - return nil, fmt.Errorf("storage/rootcache: failed to apply write log: %w", err) - } - } - - var mergedRoot hash.Hash - var err error - if _, mergedRoot, err = tree.Commit(ctx, ns, version+1); err != nil { - return nil, fmt.Errorf("storage/rootcache: failed to commit write log: %w", err) - } - - return &mergedRoot, nil -} - // Apply applies the write log, bypassing the apply operation iff the new root // already is in the node database. func (rc *RootCache) Apply( diff --git a/go/storage/client/client.go b/go/storage/client/client.go index a7cdb3451ca..b1d48aee2b8 100644 --- a/go/storage/client/client.go +++ b/go/storage/client/client.go @@ -280,30 +280,6 @@ func (b *storageClientBackend) ApplyBatch(ctx context.Context, request *api.Appl ) } -func (b *storageClientBackend) Merge(ctx context.Context, request *api.MergeRequest) ([]*api.Receipt, error) { - return b.writeWithClient( - ctx, - request.Namespace, - request.Round+1, - func(ctx context.Context, c api.Backend, node *node.Node) (interface{}, error) { - return c.Merge(ctx, request) - }, - nil, - ) -} - -func (b *storageClientBackend) MergeBatch(ctx context.Context, request *api.MergeBatchRequest) ([]*api.Receipt, error) { - return b.writeWithClient( - ctx, - request.Namespace, - request.Round+1, - func(ctx context.Context, c api.Backend, node *node.Node) (interface{}, error) { - return c.MergeBatch(ctx, request) - }, - nil, - ) -} - func (b *storageClientBackend) readWithClient( ctx context.Context, ns common.Namespace, diff --git a/go/storage/database/database.go b/go/storage/database/database.go index 7f1a75832fb..73fc4106746 100644 --- a/go/storage/database/database.go +++ b/go/storage/database/database.go @@ -138,38 +138,6 @@ func (ba *databaseBackend) ApplyBatch(ctx context.Context, request *api.ApplyBat return []*api.Receipt{receipt}, err } -func (ba *databaseBackend) Merge(ctx context.Context, request *api.MergeRequest) ([]*api.Receipt, error) { - if ba.readOnly { - return nil, fmt.Errorf("storage/database: failed to Merge: %w", api.ErrReadOnly) - } - - newRoot, err := ba.rootCache.Merge(ctx, request.Namespace, request.Round, request.Base, request.Others) - if err != nil { - return nil, fmt.Errorf("storage/database: failed to Merge: %w", err) - } - - receipt, err := api.SignReceipt(ba.signer, request.Namespace, request.Round+1, []hash.Hash{*newRoot}) - return []*api.Receipt{receipt}, err -} - -func (ba *databaseBackend) MergeBatch(ctx context.Context, request *api.MergeBatchRequest) ([]*api.Receipt, error) { - if ba.readOnly { - return nil, fmt.Errorf("storage/database: failed to MergeBatch: %w", api.ErrReadOnly) - } - - newRoots := make([]hash.Hash, 0, len(request.Ops)) - for _, op := range request.Ops { - newRoot, err := ba.rootCache.Merge(ctx, request.Namespace, request.Round, op.Base, op.Others) - if err != nil { - return nil, fmt.Errorf("storage/database: failed to Merge, op: %w", err) - } - newRoots = append(newRoots, *newRoot) - } - - receipt, err := api.SignReceipt(ba.signer, request.Namespace, request.Round+1, newRoots) - return []*api.Receipt{receipt}, err -} - func (ba *databaseBackend) Cleanup() { ba.nodedb.Close() } diff --git a/go/storage/metrics.go b/go/storage/metrics.go index 1b1b06b12e2..8477d159b92 100644 --- a/go/storage/metrics.go +++ b/go/storage/metrics.go @@ -51,8 +51,6 @@ var ( labelApply = prometheus.Labels{"call": "apply"} labelApplyBatch = prometheus.Labels{"call": "apply_batch"} - labelMerge = prometheus.Labels{"call": "merge"} - labelMergeBatch = prometheus.Labels{"call": "merge_batch"} labelSyncGet = prometheus.Labels{"call": "sync_get"} labelSyncGetPrefixes = prometheus.Labels{"call": "sync_get_prefixes"} labelSyncIterate = prometheus.Labels{"call": "sync_iterate"} @@ -114,32 +112,6 @@ func (w *metricsWrapper) ApplyBatch(ctx context.Context, request *api.ApplyBatch return receipts, err } -func (w *metricsWrapper) Merge(ctx context.Context, request *api.MergeRequest) ([]*api.Receipt, error) { - start := time.Now() - receipts, err := w.Backend.Merge(ctx, request) - storageLatency.With(labelMerge).Observe(time.Since(start).Seconds()) - if err != nil { - storageFailures.With(labelMerge).Inc() - return nil, err - } - - storageCalls.With(labelMerge).Inc() - return receipts, err -} - -func (w *metricsWrapper) MergeBatch(ctx context.Context, request *api.MergeBatchRequest) ([]*api.Receipt, error) { - start := time.Now() - receipts, err := w.Backend.MergeBatch(ctx, request) - storageLatency.With(labelMergeBatch).Observe(time.Since(start).Seconds()) - if err != nil { - storageFailures.With(labelMergeBatch).Inc() - return nil, err - } - - storageCalls.With(labelMergeBatch).Inc() - return receipts, err -} - func (w *metricsWrapper) SyncGet(ctx context.Context, request *api.GetRequest) (*api.ProofResponse, error) { start := time.Now() res, err := w.Backend.SyncGet(ctx, request) diff --git a/go/storage/tests/tester.go b/go/storage/tests/tester.go index e7f2f749ef5..d49f2b71fbf 100644 --- a/go/storage/tests/tester.go +++ b/go/storage/tests/tester.go @@ -17,7 +17,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" - "github.com/oasisprotocol/oasis-core/go/storage/mkvs/writelog" ) var testValues = [][]byte{ @@ -92,9 +91,6 @@ func StorageImplementationTests(t *testing.T, localBackend api.LocalBackend, bac t.Run("Basic", func(t *testing.T) { testBasic(t, localBackend, backend, namespace, round) }) - t.Run("Merge", func(t *testing.T) { - testMerge(t, backend, namespace, round) - }) } func testBasic(t *testing.T, localBackend api.LocalBackend, backend api.Backend, namespace common.Namespace, round uint64) { @@ -263,107 +259,3 @@ func testBasic(t *testing.T, localBackend api.LocalBackend, backend api.Backend, require.Equal(t, cp.Chunks[0], hb.Build(), "GetCheckpointChunk must return correct chunk") }) } - -func testMerge(t *testing.T, backend api.Backend, namespace common.Namespace, round uint64) { - ctx := context.Background() - - writeLogs := []api.WriteLog{ - // Base root. - { - api.LogEntry{Key: []byte("foo"), Value: []byte("i am base")}, - }, - // First root. - { - api.LogEntry{Key: []byte("first"), Value: []byte("i am first root")}, - }, - // Second root. - { - api.LogEntry{Key: []byte("second"), Value: []byte("i am second root")}, - }, - // Third root. - { - api.LogEntry{Key: []byte("third"), Value: []byte("i am third root")}, - }, - } - - // Create all roots. - var roots []hash.Hash - for idx, writeLog := range writeLogs { - var dstRound uint64 - var baseRoot hash.Hash - if idx == 0 { - baseRoot.Empty() - dstRound = round - } else { - baseRoot = roots[0] - dstRound = round + 1 - } - - // Generate expected root hash. - tree := mkvs.NewWithRoot(backend, nil, api.Root{Namespace: namespace, Version: dstRound, Hash: baseRoot}) - defer tree.Close() - err := tree.ApplyWriteLog(ctx, writelog.NewStaticIterator(writeLog)) - require.NoError(t, err, "ApplyWriteLog") - var root hash.Hash - _, root, err = tree.Commit(ctx, namespace, dstRound) - require.NoError(t, err, "Commit") - - // Apply to storage backend. - _, err = backend.Apply(ctx, &api.ApplyRequest{ - Namespace: namespace, - SrcRound: round, - SrcRoot: baseRoot, - DstRound: dstRound, - DstRoot: root, - WriteLog: writeLog, - }) - require.NoError(t, err, "Apply") - - roots = append(roots, root) - } - - // Try to merge with only specifying the base. - _, err := backend.Merge(ctx, &api.MergeRequest{Namespace: namespace, Round: round, Base: roots[0]}) - require.Error(t, err, "Merge without other roots should return an error") - - // Try to merge with only specifying the base and first root. - receipts, err := backend.Merge(ctx, &api.MergeRequest{Namespace: namespace, Round: round, Base: roots[0], Others: roots[1:2]}) - require.NoError(t, err, "Merge") - require.NotNil(t, receipts, "Merge should return receipts") - - for _, receipt := range receipts { - var receiptBody api.ReceiptBody - err = receipt.Open(&receiptBody) - require.NoError(t, err, "receipt.Open") - require.Len(t, receiptBody.Roots, 1, "receipt should contain 1 root") - require.EqualValues(t, roots[1], receiptBody.Roots[0], "merged root should be equal to the only other root") - } - - // Try to merge with specifying the base and all three roots. - receipts, err = backend.Merge(ctx, &api.MergeRequest{Namespace: namespace, Round: round, Base: roots[0], Others: roots[1:]}) - require.NoError(t, err, "Merge") - require.NotNil(t, receipts, "Merge should return receipts") - - var mergedRoot hash.Hash - for _, receipt := range receipts { - var receiptBody api.ReceiptBody - err = receipt.Open(&receiptBody) - require.NoError(t, err, "receipt.Open") - require.Len(t, receiptBody.Roots, 1, "receipt should contain 1 root") - - mergedRoot = receiptBody.Roots[0] - } - - // Make sure that the merged root is the same as applying all write logs against - // the base root. - tree := mkvs.NewWithRoot(backend, nil, api.Root{Namespace: namespace, Version: round, Hash: roots[0]}) - defer tree.Close() - for _, writeLog := range writeLogs[1:] { - err = tree.ApplyWriteLog(ctx, writelog.NewStaticIterator(writeLog)) - require.NoError(t, err, "ApplyWriteLog") - } - _, expectedRoot, err := tree.Commit(ctx, namespace, round+1) - require.NoError(t, err, "Commit") - - require.Equal(t, expectedRoot, mergedRoot, "merged root should match expected root") -} diff --git a/go/worker/compute/merge/committee/node.go b/go/worker/compute/merge/committee/node.go index 206215db314..26b2e888e84 100644 --- a/go/worker/compute/merge/committee/node.go +++ b/go/worker/compute/merge/committee/node.go @@ -22,8 +22,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" runtimeCommittee "github.com/oasisprotocol/oasis-core/go/runtime/committee" - scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" - storage "github.com/oasisprotocol/oasis-core/go/storage/api" workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" @@ -432,7 +430,7 @@ func (n *Node) tryFinalizeResultsLocked(pool *commitment.Pool, didTimeout bool) n.logger.Info("have valid commitments from all committees, merging") - commitments := state.pool.GetExecutorCommitments() + commitments := state.pool.GetOpenExecutorCommitments() if epoch.IsMergeBackupWorker() && state.pendingEvent == nil { // Backup workers only perform merge after receiving a discrepancy event. @@ -445,12 +443,10 @@ func (n *Node) tryFinalizeResultsLocked(pool *commitment.Pool, didTimeout bool) } // Guarded by n.commonNode.CrossNode. -func (n *Node) startMergeLocked(commitments []commitment.ExecutorCommitment, results []*commitment.ComputeResultsHeader) { +func (n *Node) startMergeLocked(commitments []commitment.OpenExecutorCommitment, results []*commitment.ComputeResultsHeader) { doneCh := make(chan *commitment.MergeBody, 1) ctx, cancel := context.WithCancel(n.roundCtx) - epoch := n.commonNode.Group.GetEpochSnapshot() - // Create empty block based on previous block while we hold the lock. prevBlk := n.commonNode.CurrentBlock blk := block.NewEmptyBlock(prevBlk, 0, block.Normal) @@ -466,102 +462,38 @@ func (n *Node) startMergeLocked(commitments []commitment.ExecutorCommitment, res ctx, cancel = context.WithTimeout(ctx, n.commonCfg.StorageCommitTimeout) defer cancel() - var ioRoots, stateRoots []hash.Hash - var messages []*block.Message - for _, result := range results { - ioRoots = append(ioRoots, result.IORoot) - stateRoots = append(stateRoots, result.StateRoot) - - // Merge roothash messages. - // The rule is that at most one result can have sent roothash messages. - if len(result.Messages) > 0 { - if messages != nil { - n.logger.Error("multiple committees sent roothash messages") - return + var mergeBody commitment.MergeBody + switch len(results) { + case 1: + // Optimize the case where there is only a single committee -- there is nothing to merge + // so we can avoid a round trip to the storage nodes which already have the roots. + blk.Header.Messages = results[0].Messages + blk.Header.IORoot = results[0].IORoot + blk.Header.StateRoot = results[0].StateRoot + + // Collect all distinct storage signatures. + storageSigSet := make(map[signature.PublicKey]bool) + for _, ec := range commitments { + mergeBody.ExecutorCommits = append(mergeBody.ExecutorCommits, ec.ExecutorCommitment) + + for _, s := range ec.Body.StorageSignatures { + if storageSigSet[s.PublicKey] { + continue + } + storageSigSet[s.PublicKey] = true + blk.Header.StorageSignatures = append(blk.Header.StorageSignatures, s) } - messages = result.Messages } - } - var emptyRoot hash.Hash - emptyRoot.Empty() - - // NOTE: Order is important for verifying the receipt. - mergeOps := []storage.MergeOp{ - // I/O root. - { - Base: emptyRoot, - Others: ioRoots, - }, - // State root. - { - Base: prevBlk.Header.StateRoot, - Others: stateRoots, - }, - } - - receipts, err := n.commonNode.Storage.MergeBatch(ctx, &storage.MergeBatchRequest{ - Namespace: prevBlk.Header.Namespace, - Round: prevBlk.Header.Round, - Ops: mergeOps, - }) - if err != nil { - n.logger.Error("failed to merge", - "err", err, - ) - return - } - - signatures := []signature.Signature{} - for idx, receipt := range receipts { - var receiptBody storage.ReceiptBody - if err = receipt.Open(&receiptBody); err != nil { - n.logger.Error("failed to open receipt", - "receipt", receipt, - "err", err, - ) - return - } - - // Make sure that all merged roots from all storage nodes are the same. - ioRoot := receiptBody.Roots[0] - stateRoot := receiptBody.Roots[1] - if idx == 0 { - blk.Header.IORoot = ioRoot - blk.Header.StateRoot = stateRoot - } else if !blk.Header.IORoot.Equal(&ioRoot) || !blk.Header.StateRoot.Equal(&stateRoot) { - n.logger.Error("storage nodes returned different merge roots", - "first_io_root", blk.Header.IORoot, - "io_root", ioRoot, - "first_state_root", blk.Header.StateRoot, - "state_root", stateRoot, - ) - inconsistentMergeRootCount.With(n.getMetricLabels()).Inc() - return - } - - if err = blk.Header.VerifyStorageReceipt(&receiptBody); err != nil { - n.logger.Error("failed to validate receipt body", - "receipt body", receiptBody, - "err", err, - ) - return - } - signatures = append(signatures, receipt.Signature) - } - if err := epoch.VerifyCommitteeSignatures(scheduler.KindStorage, signatures); err != nil { - n.logger.Error("failed to validate receipt signer", - "err", err, - ) + mergeBody.Header = blk.Header + default: + // Multiple committees, we need to perform a storage merge operation. + n.logger.Error("merge from multiple committees not supported") return } - blk.Header.Messages = messages - blk.Header.StorageSignatures = signatures - doneCh <- &commitment.MergeBody{ - ExecutorCommits: commitments, - Header: blk.Header, - } + // Submit the merge result. + doneCh <- &mergeBody }() } diff --git a/go/worker/compute/merge/committee/state.go b/go/worker/compute/merge/committee/state.go index 4a452927b6f..4df3b8707eb 100644 --- a/go/worker/compute/merge/committee/state.go +++ b/go/worker/compute/merge/committee/state.go @@ -115,7 +115,7 @@ func (s StateWaitingForResults) String() string { // StateWaitingForEvent is the waiting for event state. type StateWaitingForEvent struct { - commitments []commitment.ExecutorCommitment + commitments []commitment.OpenExecutorCommitment results []*commitment.ComputeResultsHeader } diff --git a/go/worker/storage/committee/policy.go b/go/worker/storage/committee/policy.go index 2ef01d54c1c..10285d413db 100644 --- a/go/worker/storage/committee/policy.go +++ b/go/worker/storage/committee/policy.go @@ -22,10 +22,7 @@ var ( }, } mergeCommitteePolicy = &committee.AccessPolicy{ - Actions: []accessctl.Action{ - accessctl.Action(api.MethodMerge.FullName()), - accessctl.Action(api.MethodMergeBatch.FullName()), - }, + Actions: []accessctl.Action{}, } // NOTE: GetDiff/GetCheckpoint* need to be accessible to all storage nodes, // not just the ones in the current storage committee so that new nodes can @@ -44,8 +41,6 @@ var ( accessctl.Action(api.MethodGetCheckpointChunk.FullName()), accessctl.Action(api.MethodApply.FullName()), accessctl.Action(api.MethodApplyBatch.FullName()), - accessctl.Action(api.MethodMerge.FullName()), - accessctl.Action(api.MethodMergeBatch.FullName()), }, } ) diff --git a/go/worker/storage/service_external.go b/go/worker/storage/service_external.go index fbc19fb208c..43e5908afa1 100644 --- a/go/worker/storage/service_external.go +++ b/go/worker/storage/service_external.go @@ -122,52 +122,6 @@ func (s *storageService) ApplyBatch(ctx context.Context, request *api.ApplyBatch return s.storage.ApplyBatch(ctx, request) } -func (s *storageService) Merge(ctx context.Context, request *api.MergeRequest) ([]*api.Receipt, error) { - if err := s.ensureInitialized(ctx); err != nil { - return nil, err - } - if s.debugRejectUpdates { - return nil, errDebugRejectUpdates - } - - // Limit maximum number of roots to merge. - cfg, err := s.getConfig(ctx, request.Namespace) - if err != nil { - return nil, err - } - if uint64(len(request.Others)) > cfg.MaxMergeRoots { - return nil, api.ErrLimitReached - } - - return s.storage.Merge(ctx, request) -} - -func (s *storageService) MergeBatch(ctx context.Context, request *api.MergeBatchRequest) ([]*api.Receipt, error) { - if err := s.ensureInitialized(ctx); err != nil { - return nil, err - } - if s.debugRejectUpdates { - return nil, errDebugRejectUpdates - } - - // Limit maximum number of operations in a batch. - cfg, err := s.getConfig(ctx, request.Namespace) - if err != nil { - return nil, err - } - if uint64(len(request.Ops)) > cfg.MaxMergeOps { - return nil, api.ErrLimitReached - } - // Limit maximum number of roots to merge. - for _, op := range request.Ops { - if uint64(len(op.Others)) > cfg.MaxMergeRoots { - return nil, api.ErrLimitReached - } - } - - return s.storage.MergeBatch(ctx, request) -} - func (s *storageService) GetDiff(ctx context.Context, request *api.GetDiffRequest) (api.WriteLogIterator, error) { if err := s.ensureInitialized(ctx); err != nil { return nil, err From ba28af28a367f497ca2ea23fcb041d33a22eb9c2 Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Tue, 11 Aug 2020 11:59:22 +0200 Subject: [PATCH 2/3] go/roothash: Drop support for multiple committees --- .changelog/3179.breaking.md | 8 +- docs/consensus/roothash.md | 31 - docs/images/oasis-core-high-level.svg | 2 +- docs/images/oasis-core-runtime-details.svg | 2 +- docs/index.md | 1 - docs/oasis-node/metrics.md | 4 - docs/runtime/index.md | 1 - docs/toc.md | 2 +- go/common/version/version.go | 6 +- go/consensus/tendermint/apps/roothash/api.go | 12 - .../tendermint/apps/roothash/roothash.go | 208 +---- .../tendermint/apps/roothash/state/round.go | 37 +- .../tendermint/apps/roothash/transactions.go | 83 +- .../tendermint/apps/scheduler/scheduler.go | 20 - go/consensus/tendermint/roothash/roothash.go | 20 - go/genesis/genesis_test.go | 6 - go/oasis-net-runner/fixtures/default.go | 7 - .../cmd/debug/byzantine/byzantine.go | 368 +-------- go/oasis-node/cmd/debug/byzantine/executor.go | 15 +- go/oasis-node/cmd/debug/byzantine/merge.go | 120 --- go/oasis-node/cmd/debug/byzantine/roothash.go | 9 +- .../cmd/debug/byzantine/scheduler.go | 17 - .../cmd/debug/byzantine/scheduler_test.go | 5 - .../debug/txsource/workload/registration.go | 6 - go/oasis-node/cmd/node/node.go | 27 +- go/oasis-node/cmd/registry/runtime/runtime.go | 24 - go/oasis-node/node_test.go | 7 - go/oasis-test-runner/oasis/cli/registry.go | 6 - go/oasis-test-runner/oasis/fixture.go | 2 - go/oasis-test-runner/oasis/log.go | 12 - go/oasis-test-runner/oasis/runtime.go | 2 - .../scenario/e2e/registry_cli.go | 8 - .../scenario/e2e/runtime/byzantine.go | 19 - .../scenario/e2e/runtime/history_reindex.go | 2 - .../scenario/e2e/runtime/multiple_runtimes.go | 7 - .../scenario/e2e/runtime/runtime.go | 12 - go/registry/api/api.go | 20 - go/registry/api/runtime.go | 24 - go/registry/tests/tester.go | 8 - go/roothash/api/api.go | 39 - go/roothash/api/commitment/executor.go | 15 +- go/roothash/api/commitment/merge.go | 95 --- go/roothash/api/commitment/pool.go | 325 +------- go/roothash/api/commitment/pool_test.go | 488 +----------- go/roothash/api/commitment/txnscheduler.go | 3 - go/roothash/tests/tester.go | 49 +- go/runtime/host/mock/mock.go | 1 + go/scheduler/api/api.go | 11 +- go/scheduler/tests/tester.go | 19 +- go/worker/common/committee/group.go | 235 +----- go/worker/compute/executor/committee/fault.go | 186 ----- .../compute/executor/committee/fault_test.go | 247 ------ go/worker/compute/executor/committee/node.go | 97 +-- go/worker/compute/executor/init.go | 4 +- go/worker/compute/executor/worker.go | 9 +- go/worker/compute/init.go | 2 +- go/worker/compute/merge/committee/node.go | 748 ------------------ go/worker/compute/merge/committee/state.go | 160 ---- go/worker/compute/merge/init.go | 12 - go/worker/compute/merge/worker.go | 190 ----- .../compute/txnscheduler/algorithm/api/api.go | 7 +- .../algorithm/batching/batching.go | 42 +- .../txnscheduler/algorithm/tests/tester.go | 8 +- .../compute/txnscheduler/committee/node.go | 37 +- go/worker/keymanager/worker.go | 6 +- go/worker/storage/committee/node.go | 9 +- go/worker/storage/committee/policy.go | 3 - runtime/src/common/roothash.rs | 2 + runtime/src/common/version.rs | 2 +- runtime/src/dispatcher.rs | 1 + 70 files changed, 223 insertions(+), 3999 deletions(-) delete mode 100644 go/oasis-node/cmd/debug/byzantine/merge.go delete mode 100644 go/roothash/api/commitment/merge.go delete mode 100644 go/worker/compute/executor/committee/fault.go delete mode 100644 go/worker/compute/executor/committee/fault_test.go delete mode 100644 go/worker/compute/merge/committee/node.go delete mode 100644 go/worker/compute/merge/committee/state.go delete mode 100644 go/worker/compute/merge/init.go delete mode 100644 go/worker/compute/merge/worker.go diff --git a/.changelog/3179.breaking.md b/.changelog/3179.breaking.md index 3617cc07201..5b40a84ba01 100644 --- a/.changelog/3179.breaking.md +++ b/.changelog/3179.breaking.md @@ -1,10 +1,8 @@ -go/worker/compute/merge: Drop support for multiple committees +go/roothash: Drop support for multiple committees Since there is currently no transaction scheduler implementation which would -support multiple committees, there is no sense in the merge node to try to -support such cases as it could be a source of bugs. Additionally it results -in extra round trips to storage nodes due to the Merge operation which in -case of a single committee does not do anything. +support multiple committees, there is no sense in having the merge node as it +could be a source of bugs. The merge node is also the only client for the Merge* storage operations, so they can just be removed in order to reduce the exposed API surface. diff --git a/docs/consensus/roothash.md b/docs/consensus/roothash.md index 625e98bafab..3895b758f65 100644 --- a/docs/consensus/roothash.md +++ b/docs/consensus/roothash.md @@ -46,35 +46,4 @@ type ExecutorCommit struct { [executor commitments]: https://pkg.go.dev/github.com/oasisprotocol/oasis-core/go/roothash/api/commitment?tab=doc#ExecutorCommitment -### Merge Commit - -The merge commit method allows a merge node to submit commitments of an executed -state merge. A new merge commit transaction can be generated using -[`NewMergeCommitTx`]. - -**Method name:** - -``` -roothash.MergeCommit -``` - -**Body:** - -```golang -type ExecutorCommit struct { - ID common.Namespace `json:"id"` - Commits []commitment.MergeCommitment `json:"commits"` -} -``` - -**Fields:** - -* `id` specifies the [runtime identifier] of a runtime this commit is for. -* `commits` are the [merge commitments]. - - -[`NewMergeCommitTx`]: https://pkg.go.dev/github.com/oasisprotocol/oasis-core/go/roothash/api?tab=doc#NewMergeCommitTx -[merge commitments]: https://pkg.go.dev/github.com/oasisprotocol/oasis-core/go/roothash/api/commitment?tab=doc#MergeCommitment - - ## Events diff --git a/docs/images/oasis-core-high-level.svg b/docs/images/oasis-core-high-level.svg index 57f7a9a0be2..72c21eca7a0 100644 --- a/docs/images/oasis-core-high-level.svg +++ b/docs/images/oasis-core-high-level.svg @@ -1,3 +1,3 @@ -
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Cmte.
Scheduler
Cmte....
Root
Hash
Root...
Key
Manager
Key...
Consensus
Layer
Consensus...
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime A
Runtime A
Runtime
Layer
Runtime...
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime B
Runtime B
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime C
Runtime C
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime D
Runtime D
Viewer does not support full SVG 1.1
\ No newline at end of file +
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Cmte.
Scheduler
Cmte....
Root
Hash
Root...
Key
Manager
Key...
Consensus
Layer
Consensus...
Runtime
Layer
Runtime...
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime A
Runtime A
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime B
Runtime B
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime C
Runtime C
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime D
Runtime D
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/images/oasis-core-runtime-details.svg b/docs/images/oasis-core-runtime-details.svg index de853c493a3..6e96c77cb62 100644 --- a/docs/images/oasis-core-runtime-details.svg +++ b/docs/images/oasis-core-runtime-details.svg @@ -1,3 +1,3 @@ -
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Elects Committees
Elects Committees
Cmte.
Scheduler
Cmte....
Canonical
State
Canonical...
Canonical State
Canonical State
RootHash
RootHash
Key
Manager
Key...
Runtime Layer
Runtime...
Consensus Layer
Consensu...
Transaction
Scheduler
Transaction...
Transaction
Scheduler
Transaction...
Executor
Executor
Executor
Executor
Merge
Merge
Merge
Merge
Storage
Storage
Storage
Storage
Commit
Summaries
Commit...
Transactions
Transactions
Runtime
Client
Runtime...
Runtime A
Runtime A
Executor
Executor
Key
Manager
Key...
Distributes
Policy
Distributes...
Viewer does not support full SVG 1.1
\ No newline at end of file +
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Elects Committees
Elects Committees
Cmte.
Scheduler
Cmte....
Canonical
State
Canonical...
Canonical State
Canonical State
RootHash
RootHash
Key
Manager
Key...
Runtime Layer
Runtime...
Consensus Layer
Consensu...
Transaction
Scheduler
Transaction...
Transaction
Scheduler
Transaction...
Executor
Executor
Executor
Executor
Storage
Storage
Storage
Storage
Commit
Summaries
Commit...
Transactions
Transactions
Runtime
Client
Runtime...
Runtime A
Runtime A
Executor
Executor
Key
Manager
Key...
Distributes
Policy
Distributes...
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/index.md b/docs/index.md index f0db7b25afd..fd4fd531808 100644 --- a/docs/index.md +++ b/docs/index.md @@ -60,7 +60,6 @@ implementations. * Transaction Processing Pipeline * Transaction Scheduler Nodes * Executor Nodes - * Merge Nodes * Storage Nodes * Key Manager Nodes * Oasis Node (`oasis-node`) diff --git a/docs/oasis-node/metrics.md b/docs/oasis-node/metrics.md index 7be90df26d4..256f7016325 100644 --- a/docs/oasis-node/metrics.md +++ b/docs/oasis-node/metrics.md @@ -76,7 +76,6 @@ oasis_storage_successes | Counter | Number of storage successes. | call | [stora oasis_storage_value_size | Summary | Storage call value size (bytes). | call | [storage](../../go/storage/metrics.go) oasis_up | Gauge | Is oasis-test-runner active for specific scenario. | | [oasis-node/cmd/common/metrics](../../go/oasis-node/cmd/common/metrics/metrics.go) oasis_worker_aborted_batch_count | Counter | Number of aborted batches. | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) -oasis_worker_aborted_merge_count | Counter | Number of aborted merges. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_batch_processing_time | Summary | Time it takes for a batch to finalize (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_batch_read_time | Summary | Time it takes to read a batch from storage (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_batch_runtime_processing_time | Summary | Time it takes for a batch to be processed by the runtime (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) @@ -85,12 +84,9 @@ oasis_worker_epoch_number | Gauge | Current epoch number as seen by the worker. oasis_worker_epoch_transition_count | Counter | Number of epoch transitions. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) oasis_worker_execution_discrepancy_detected_count | Counter | Number of detected execute discrepancies. | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_failed_round_count | Counter | Number of failed roothash rounds. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) -oasis_worker_inconsistent_merge_root_count | Counter | Number of inconsistent merge roots. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) -oasis_worker_merge_discrepancy_detected_count | Counter | Number of detected merge discrepancies. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_node_registered | Gauge | Is oasis node registered (binary). | | [worker/registration](../../go/worker/registration/worker.go) oasis_worker_processed_block_count | Counter | Number of processed roothash blocks. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) oasis_worker_processed_event_count | Counter | Number of processed roothash events. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) -oasis_worker_roothash_merge_commit_latency | Summary | Latency of roothash merge commit (seconds). | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_storage_commit_latency | Summary | Latency of storage commit calls (state + outputs) (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_storage_full_round | Gauge | The last round that was fully synced and finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) oasis_worker_storage_pending_round | Gauge | The last round that is in-flight for syncing. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) diff --git a/docs/runtime/index.md b/docs/runtime/index.md index 3fb5abcbb62..a853cfd6ffb 100644 --- a/docs/runtime/index.md +++ b/docs/runtime/index.md @@ -128,7 +128,6 @@ roles (it can have multiple roles at once): * Transaction scheduler. * Executor node (primary or backup). -* Merge node (primary or backup). Subject to runtime configuration, each committee can contain multiple nodes of the same kind (e.g., multiple executor nodes). Some are considered _primary_ diff --git a/docs/toc.md b/docs/toc.md index 45b9fdff8e5..cb767f5aa6e 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -30,7 +30,7 @@ * [Operation Model](runtime/index.md#operation-model) * [Runtime Host Protocol](runtime/runtime-host-protocol.md) * [Identifiers](runtime/identifiers.md) -* Oasis Node (`oasis-node`) +* Oasis Node * [RPC](oasis-node/rpc.md) * [Metrics](oasis-node/metrics.md) diff --git a/go/common/version/version.go b/go/common/version/version.go index cafa942b79e..7869b4c2d6e 100644 --- a/go/common/version/version.go +++ b/go/common/version/version.go @@ -64,11 +64,11 @@ var ( // the runtime. // // NOTE: This version must be synced with runtime/src/common/version.rs. - RuntimeHostProtocol = Version{Major: 0, Minor: 15, Patch: 0} + RuntimeHostProtocol = Version{Major: 0, Minor: 16, Patch: 0} // RuntimeCommitteeProtocol versions the P2P protocol used by the runtime // committee members. - RuntimeCommitteeProtocol = Version{Major: 0, Minor: 10, Patch: 0} + RuntimeCommitteeProtocol = Version{Major: 0, Minor: 11, Patch: 0} // ConsensusProtocol versions all data structures and processing used by // the epochtime, beacon, registry, roothash, etc. modules that are @@ -76,7 +76,7 @@ var ( // // NOTE: Any change in the major or minor versions are considered // breaking changes for the protocol. - ConsensusProtocol = Version{Major: 0, Minor: 26, Patch: 0} + ConsensusProtocol = Version{Major: 0, Minor: 27, Patch: 0} // Tendermint exposes the tendermint core version. Tendermint = parseSemVerStr(version.TMCoreSemVer) diff --git a/go/consensus/tendermint/apps/roothash/api.go b/go/consensus/tendermint/apps/roothash/api.go index 5cec1c54c73..ebc0df3f5a3 100644 --- a/go/consensus/tendermint/apps/roothash/api.go +++ b/go/consensus/tendermint/apps/roothash/api.go @@ -68,24 +68,12 @@ type ValueExecutorCommitted struct { Event roothash.ExecutorCommittedEvent `json:"event"` } -// ValueMergeCommitted is the value component of a KeyMergeCommitted. -type ValueMergeCommitted struct { - ID common.Namespace `json:"id"` - Event roothash.MergeCommittedEvent `json:"event"` -} - // ValueFinalized is the value component of a TagFinalized. type ValueFinalized struct { ID common.Namespace `json:"id"` Round uint64 `json:"round"` } -// ValueMergeDiscrepancyDetected is the value component of a KeyMergeDiscrepancyDetected. -type ValueMergeDiscrepancyDetected struct { - Event roothash.MergeDiscrepancyDetectedEvent `json:"event"` - ID common.Namespace `json:"id"` -} - // ValueExecutionDiscrepancyDetected is the value component of a KeyMergeDiscrepancyDetected. type ValueExecutionDiscrepancyDetected struct { ID common.Namespace `json:"id"` diff --git a/go/consensus/tendermint/apps/roothash/roothash.go b/go/consensus/tendermint/apps/roothash/roothash.go index 386a76ba932..3c0f3fba2fc 100644 --- a/go/consensus/tendermint/apps/roothash/roothash.go +++ b/go/consensus/tendermint/apps/roothash/roothash.go @@ -3,7 +3,6 @@ package roothash import ( "bytes" - "encoding/binary" "errors" "fmt" @@ -11,7 +10,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" tmapi "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/api" @@ -122,7 +120,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo rtState.Suspended = false // Prepare new runtime committees based on what the scheduler did. - committeeID, executorPool, mergePool, empty, err := app.prepareNewCommittees(ctx, epoch, rtState, schedState, regState) + executorPool, empty, err := app.prepareNewCommittees(ctx, epoch, rtState, schedState, regState) if err != nil { return err } @@ -152,7 +150,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo } // If the committee has actually changed, force a new round. - if !rtState.Suspended && (rtState.Round == nil || !rtState.Round.CommitteeID.Equal(&committeeID)) { + if !rtState.Suspended { ctx.Logger().Debug("updating committee for runtime", "runtime_id", rt.ID, ) @@ -163,7 +161,6 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo ctx.Logger().Debug("new committee, transitioning round", "runtime_id", rt.ID, - "committee_id", committeeID, "round", blockNr, ) @@ -172,7 +169,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo app.emitEmptyBlock(ctx, rtState, block.EpochTransition) // Create a new round. - rtState.Round = roothashState.NewRound(committeeID, executorPool, mergePool, rtState.CurrentBlock) + rtState.Round = roothashState.NewRound(executorPool, rtState.CurrentBlock) } // Update the runtime descriptor to the latest per-epoch value. @@ -215,39 +212,14 @@ func (app *rootHashApplication) prepareNewCommittees( schedState *schedulerState.MutableState, regState *registryState.MutableState, ) ( - committeeID hash.Hash, - executorPool *commitment.MultiPool, - mergePool *commitment.Pool, + executorPool *commitment.Pool, empty bool, err error, ) { rtID := rtState.Runtime.ID - // Derive a deterministic committee identifier that depends on memberships - // of all committees. We need this to be able to quickly see if any - // committee members have changed. - // - // We first include the current epoch, then all executor committee member - // hashes and then the merge committee member hash: - // - // [little-endian epoch] - // "executor committees follow" - // [executor committe 1 members hash] - // [executor committe 2 members hash] - // ... - // [executor committe n members hash] - // "merge committee follows" - // [merge committee members hash] - // - var committeeIDParts [][]byte - var rawEpoch [8]byte - binary.LittleEndian.PutUint64(rawEpoch[:], uint64(epoch)) - committeeIDParts = append(committeeIDParts, rawEpoch[:]) - committeeIDParts = append(committeeIDParts, []byte("executor committees follow")) - - // NOTE: There will later be multiple executor committees. - var executorCommittees []*scheduler.Committee - xc1, err := schedState.Committee(ctx, scheduler.KindComputeExecutor, rtID) + executorPool = new(commitment.Pool) + executorCommittee, err := schedState.Committee(ctx, scheduler.KindComputeExecutor, rtID) if err != nil { ctx.Logger().Error("checkCommittees: failed to get executor committee from scheduler", "err", err, @@ -255,54 +227,17 @@ func (app *rootHashApplication) prepareNewCommittees( ) return } - if xc1 != nil { - executorCommittees = append(executorCommittees, xc1) - } - - executorPool = &commitment.MultiPool{ - Committees: make(map[hash.Hash]*commitment.Pool), - } - if len(executorCommittees) == 0 { - ctx.Logger().Warn("checkCommittees: no executor committees", - "runtime", rtID, - ) - empty = true - } - for _, executorCommittee := range executorCommittees { - executorCommitteeID := executorCommittee.EncodedMembersHash() - committeeIDParts = append(committeeIDParts, executorCommitteeID[:]) - - executorPool.Committees[executorCommitteeID] = &commitment.Pool{ - Runtime: rtState.Runtime, - Committee: executorCommittee, - } - } - - mergePool = new(commitment.Pool) - committeeIDParts = append(committeeIDParts, []byte("merge committee follows")) - mergeCommittee, err := schedState.Committee(ctx, scheduler.KindComputeMerge, rtID) - if err != nil { - ctx.Logger().Error("checkCommittees: failed to get merge committee from scheduler", - "err", err, - "runtime", rtID, - ) - return - } - if mergeCommittee == nil { - ctx.Logger().Warn("checkCommittees: no merge committee", + if executorCommittee == nil { + ctx.Logger().Warn("checkCommittees: no executor committee", "runtime", rtID, ) empty = true } else { - mergePool = &commitment.Pool{ + executorPool = &commitment.Pool{ Runtime: rtState.Runtime, - Committee: mergeCommittee, + Committee: executorCommittee, } - mergeCommitteeID := mergeCommittee.EncodedMembersHash() - committeeIDParts = append(committeeIDParts, mergeCommitteeID[:]) } - - committeeID.FromBytes(committeeIDParts...) return } @@ -334,13 +269,6 @@ func (app *rootHashApplication) ExecuteTx(ctx *tmapi.Context, tx *transaction.Tr } return app.executorCommit(ctx, state, &xc) - case roothash.MethodMergeCommit: - var mc roothash.MergeCommit - if err := cbor.Unmarshal(tx.Body, &mc); err != nil { - return err - } - - return app.mergeCommit(ctx, state, &mc) default: return roothash.ErrInvalidArgument } @@ -491,7 +419,7 @@ func (app *rootHashApplication) FireTimer(ctx *tmapi.Context, timer *tmapi.Timer "timer_round", tCtx.Round, ) - if rtState.Round.MergePool.IsTimeout(ctx.Now()) { + if rtState.Round.ExecutorPool.IsTimeout(ctx.Now()) { if err = app.tryFinalizeBlock(ctx, rtState, true); err != nil { ctx.Logger().Error("failed to finalize block", "err", err, @@ -499,9 +427,6 @@ func (app *rootHashApplication) FireTimer(ctx *tmapi.Context, timer *tmapi.Timer return fmt.Errorf("failed to finalize block: %w", err) } } - for _, pool := range rtState.Round.ExecutorPool.GetTimeoutCommittees(ctx.Now()) { - app.tryFinalizeExecute(ctx, rtState, pool, true) - } if err = state.SetRuntimeState(ctx, rtState); err != nil { return fmt.Errorf("failed to set runtime state: %w", err) @@ -537,86 +462,7 @@ func (app *rootHashApplication) updateTimer( } } -func (app *rootHashApplication) tryFinalizeExecute( - ctx *tmapi.Context, - rtState *roothashState.RuntimeState, - pool *commitment.Pool, - forced bool, -) { - runtime := rtState.Runtime - latestBlock := rtState.CurrentBlock - blockNr := latestBlock.Header.Round - committeeID := pool.GetCommitteeID() - - defer app.updateTimer(ctx, rtState, blockNr) - - if rtState.Round.Finalized { - ctx.Logger().Error("attempted to finalize execute when block already finalized", - "round", blockNr, - "committee_id", committeeID, - ) - return - } - - _, err := pool.TryFinalize(ctx.Now(), runtime.Executor.RoundTimeout, forced, true) - switch err { - case nil: - // No error -- there is no discrepancy. But only the merge committee - // can make progress even if we have all executor commitments. - - // TODO: Check if we need to punish the merge committee. - - ctx.Logger().Warn("no execution discrepancy, but only merge committee can make progress", - "round", blockNr, - "committee_id", committeeID, - ) - - if !forced { - // If this was not a timeout, we give the merge committee some - // more time to merge, otherwise we fail the round. - return - } - case commitment.ErrStillWaiting: - // Need more commits. - return - case commitment.ErrDiscrepancyDetected: - // Discrepancy has been detected. - ctx.Logger().Warn("execution discrepancy detected", - "round", blockNr, - "committee_id", committeeID, - logging.LogEvent, roothash.LogEventExecutionDiscrepancyDetected, - ) - - tagV := ValueExecutionDiscrepancyDetected{ - ID: runtime.ID, - Event: roothash.ExecutionDiscrepancyDetectedEvent{ - CommitteeID: pool.GetCommitteeID(), - Timeout: forced, - }, - } - ctx.EmitEvent( - tmapi.NewEventBuilder(app.Name()). - Attribute(KeyExecutionDiscrepancyDetected, cbor.Marshal(tagV)). - Attribute(KeyRuntimeID, ValueRuntimeID(runtime.ID)), - ) - return - default: - } - - // Something else went wrong, emit empty error block. Note that we need - // to abort everything even if only one committee failed to finalize as - // there is otherwise no way to make progress as merge committees will - // refuse to merge if there are discrepancies. - ctx.Logger().Error("round failed", - "round", blockNr, - "err", err, - logging.LogEvent, roothash.LogEventRoundFailed, - ) - - app.emitEmptyBlock(ctx, rtState, block.RoundFailed) -} - -func (app *rootHashApplication) tryFinalizeMerge( +func (app *rootHashApplication) tryFinalizeExecutor( ctx *tmapi.Context, rtState *roothashState.RuntimeState, forced bool, @@ -634,7 +480,7 @@ func (app *rootHashApplication) tryFinalizeMerge( return nil } - commit, err := rtState.Round.MergePool.TryFinalize(ctx.Now(), runtime.Merge.RoundTimeout, forced, true) + commit, err := rtState.Round.ExecutorPool.TryFinalize(ctx.Now(), runtime.Executor.RoundTimeout, forced, true) switch err { case nil: // Round has been finalized. @@ -643,11 +489,13 @@ func (app *rootHashApplication) tryFinalizeMerge( ) // Generate the final block. - blk := new(block.Block) - blk.Header = commit.ToDDResult().(block.Header) - blk.Header.Timestamp = uint64(ctx.Now().Unix()) + hdr := commit.ToDDResult().(commitment.ComputeResultsHeader) + + blk := block.NewEmptyBlock(rtState.CurrentBlock, uint64(ctx.Now().Unix()), block.Normal) + blk.Header.IORoot = hdr.IORoot + blk.Header.StateRoot = hdr.StateRoot + // Messages omitted on purpose. - rtState.Round.MergePool.ResetCommitments() rtState.Round.ExecutorPool.ResetCommitments() rtState.Round.Finalized = true @@ -661,18 +509,20 @@ func (app *rootHashApplication) tryFinalizeMerge( return nil case commitment.ErrDiscrepancyDetected: // Discrepancy has been detected. - ctx.Logger().Warn("merge discrepancy detected", + ctx.Logger().Warn("executor discrepancy detected", "round", blockNr, - logging.LogEvent, roothash.LogEventMergeDiscrepancyDetected, + logging.LogEvent, roothash.LogEventExecutionDiscrepancyDetected, ) - tagV := ValueMergeDiscrepancyDetected{ - ID: runtime.ID, - Event: roothash.MergeDiscrepancyDetectedEvent{}, + tagV := ValueExecutionDiscrepancyDetected{ + ID: runtime.ID, + Event: roothash.ExecutionDiscrepancyDetectedEvent{ + Timeout: forced, + }, } ctx.EmitEvent( tmapi.NewEventBuilder(app.Name()). - Attribute(KeyMergeDiscrepancyDetected, cbor.Marshal(tagV)). + Attribute(KeyExecutionDiscrepancyDetected, cbor.Marshal(tagV)). Attribute(KeyRuntimeID, ValueRuntimeID(runtime.ID)), ) return nil @@ -735,9 +585,9 @@ func (app *rootHashApplication) postProcessFinalizedBlock(ctx *tmapi.Context, rt func (app *rootHashApplication) tryFinalizeBlock( ctx *tmapi.Context, rtState *roothashState.RuntimeState, - mergeForced bool, + forced bool, ) error { - finalizedBlock := app.tryFinalizeMerge(ctx, rtState, mergeForced) + finalizedBlock := app.tryFinalizeExecutor(ctx, rtState, forced) if finalizedBlock == nil { return nil } diff --git a/go/consensus/tendermint/apps/roothash/state/round.go b/go/consensus/tendermint/apps/roothash/state/round.go index ffa391096d2..6bae4d11ae6 100644 --- a/go/consensus/tendermint/apps/roothash/state/round.go +++ b/go/consensus/tendermint/apps/roothash/state/round.go @@ -5,16 +5,13 @@ import ( "errors" "time" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" ) // Round is a roothash round. type Round struct { - CommitteeID hash.Hash `json:"committee_id"` - ExecutorPool *commitment.MultiPool `json:"executor_pool"` - MergePool *commitment.Pool `json:"merge_pool"` + ExecutorPool *commitment.Pool `json:"executor_pool"` CurrentBlock *block.Block `json:"current_block"` Finalized bool `json:"finalized"` @@ -22,16 +19,11 @@ type Round struct { func (r *Round) Reset() { r.ExecutorPool.ResetCommitments() - r.MergePool.ResetCommitments() r.Finalized = false } -func (r *Round) GetNextTimeout() (timeout time.Time) { - timeout = r.ExecutorPool.GetNextTimeout() - if timeout.IsZero() || (!r.MergePool.NextTimeout.IsZero() && r.MergePool.NextTimeout.Before(timeout)) { - timeout = r.MergePool.NextTimeout - } - return +func (r *Round) GetNextTimeout() time.Time { + return r.ExecutorPool.NextTimeout } func (r *Round) AddExecutorCommitment( @@ -39,23 +31,11 @@ func (r *Round) AddExecutorCommitment( commitment *commitment.ExecutorCommitment, sv commitment.SignatureVerifier, nl commitment.NodeLookup, -) (*commitment.Pool, error) { - if r.Finalized { - return nil, errors.New("tendermint/roothash: round is already finalized, can't commit") - } - return r.ExecutorPool.AddExecutorCommitment(ctx, r.CurrentBlock, sv, nl, commitment) -} - -func (r *Round) AddMergeCommitment( - ctx context.Context, - commitment *commitment.MergeCommitment, - sv commitment.SignatureVerifier, - nl commitment.NodeLookup, ) error { if r.Finalized { return errors.New("tendermint/roothash: round is already finalized, can't commit") } - return r.MergePool.AddMergeCommitment(ctx, r.CurrentBlock, sv, nl, commitment, r.ExecutorPool) + return r.ExecutorPool.AddExecutorCommitment(ctx, r.CurrentBlock, sv, nl, commitment) } func (r *Round) Transition(blk *block.Block) { @@ -63,17 +43,10 @@ func (r *Round) Transition(blk *block.Block) { r.Reset() } -func NewRound( - committeeID hash.Hash, - executorPool *commitment.MultiPool, - mergePool *commitment.Pool, - blk *block.Block, -) *Round { +func NewRound(executorPool *commitment.Pool, blk *block.Block) *Round { r := &Round{ - CommitteeID: committeeID, CurrentBlock: blk, ExecutorPool: executorPool, - MergePool: mergePool, } r.Reset() diff --git a/go/consensus/tendermint/apps/roothash/transactions.go b/go/consensus/tendermint/apps/roothash/transactions.go index b09777ec914..fdcd6efeba1 100644 --- a/go/consensus/tendermint/apps/roothash/transactions.go +++ b/go/consensus/tendermint/apps/roothash/transactions.go @@ -122,83 +122,14 @@ func (app *rootHashApplication) executorCommit( return err } - pools := make(map[*commitment.Pool]bool) for _, commit := range cc.Commits { - var pool *commitment.Pool - if pool, err = rtState.Round.AddExecutorCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec + if err = rtState.Round.AddExecutorCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec ctx.Logger().Error("failed to add compute commitment to round", "err", err, "round", rtState.CurrentBlock.Header.Round, ) return err } - - pools[pool] = true - } - - // Try to finalize compute rounds. - for pool := range pools { - app.tryFinalizeExecute(ctx, rtState, pool, false) - } - - // Update runtime state. - if err = state.SetRuntimeState(ctx, rtState); err != nil { - return fmt.Errorf("failed to set runtime state: %w", err) - } - - // Emit events for all accepted commits. - for _, commit := range cc.Commits { - evV := ValueExecutorCommitted{ - ID: cc.ID, - Event: roothash.ExecutorCommittedEvent{ - Commit: commit, - }, - } - ctx.EmitEvent( - tmapi.NewEventBuilder(app.Name()). - Attribute(KeyExecutorCommitted, cbor.Marshal(evV)). - Attribute(KeyRuntimeID, ValueRuntimeID(cc.ID)), - ) - } - - return nil -} - -func (app *rootHashApplication) mergeCommit( - ctx *abciAPI.Context, - state *roothashState.MutableState, - mc *roothash.MergeCommit, -) (err error) { - if ctx.IsCheckOnly() { - return nil - } - - // Charge gas for this transaction. - params, err := state.ConsensusParameters(ctx) - if err != nil { - ctx.Logger().Error("MergeCommit: failed to fetch consensus parameters", - "err", err, - ) - return err - } - if err = ctx.Gas().UseGas(1, roothash.GasOpMergeCommit, params.GasCosts); err != nil { - return err - } - - rtState, sv, nl, err := app.getRuntimeState(ctx, state, mc.ID) - if err != nil { - return err - } - - // Add commitments. - for _, commit := range mc.Commits { - if err = rtState.Round.AddMergeCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec - ctx.Logger().Error("failed to add merge commitment to round", - "err", err, - "round", rtState.CurrentBlock.Header.Round, - ) - return err - } } // Try to finalize round. @@ -215,17 +146,17 @@ func (app *rootHashApplication) mergeCommit( } // Emit events for all accepted commits. - for _, commit := range mc.Commits { - evV := ValueMergeCommitted{ - ID: mc.ID, - Event: roothash.MergeCommittedEvent{ + for _, commit := range cc.Commits { + evV := ValueExecutorCommitted{ + ID: cc.ID, + Event: roothash.ExecutorCommittedEvent{ Commit: commit, }, } ctx.EmitEvent( tmapi.NewEventBuilder(app.Name()). - Attribute(KeyMergeCommitted, cbor.Marshal(evV)). - Attribute(KeyRuntimeID, ValueRuntimeID(mc.ID)), + Attribute(KeyExecutorCommitted, cbor.Marshal(evV)). + Attribute(KeyRuntimeID, ValueRuntimeID(cc.ID)), ) } diff --git a/go/consensus/tendermint/apps/scheduler/scheduler.go b/go/consensus/tendermint/apps/scheduler/scheduler.go index e615f66c2e8..78e3e163501 100644 --- a/go/consensus/tendermint/apps/scheduler/scheduler.go +++ b/go/consensus/tendermint/apps/scheduler/scheduler.go @@ -38,7 +38,6 @@ var ( RNGContextExecutor = []byte("EkS-ABCI-Compute") RNGContextStorage = []byte("EkS-ABCI-Storage") RNGContextTransactionScheduler = []byte("EkS-ABCI-TransactionScheduler") - RNGContextMerge = []byte("EkS-ABCI-Merge") RNGContextValidators = []byte("EkS-ABCI-Validators") RNGContextEntities = []byte("EkS-ABCI-Entities") ) @@ -169,7 +168,6 @@ func (app *schedulerApplication) BeginBlock(ctx *api.Context, request types.Requ kinds := []scheduler.CommitteeKind{ scheduler.KindComputeExecutor, scheduler.KindComputeTxnScheduler, - scheduler.KindComputeMerge, scheduler.KindStorage, } for _, kind := range kinds { @@ -346,19 +344,6 @@ func (app *schedulerApplication) isSuitableTransactionScheduler(ctx *api.Context return false } -func (app *schedulerApplication) isSuitableMergeWorker(ctx *api.Context, n *node.Node, rt *registry.Runtime) bool { - if !n.HasRoles(node.RoleComputeWorker) { - return false - } - for _, nrt := range n.Runtimes { - if !nrt.ID.Equal(&rt.ID) { - continue - } - return true - } - return false -} - // GetPerm generates a permutation that we use to choose nodes from a list of eligible nodes to elect. func GetPerm(beacon []byte, runtimeID common.Namespace, rngCtx []byte, nrNodes int) ([]int, error) { drbg, err := drbg.New(crypto.SHA512, beacon, runtimeID[:], rngCtx) @@ -404,11 +389,6 @@ func (app *schedulerApplication) electCommittee( isSuitableFn = app.isSuitableExecutorWorker workerSize = int(rt.Executor.GroupSize) backupSize = int(rt.Executor.GroupBackupSize) - case scheduler.KindComputeMerge: - rngCtx = RNGContextMerge - isSuitableFn = app.isSuitableMergeWorker - workerSize = int(rt.Merge.GroupSize) - backupSize = int(rt.Merge.GroupBackupSize) case scheduler.KindComputeTxnScheduler: rngCtx = RNGContextTransactionScheduler isSuitableFn = app.isSuitableTransactionScheduler diff --git a/go/consensus/tendermint/roothash/roothash.go b/go/consensus/tendermint/roothash/roothash.go index 94e741a52d5..e08b4c9407c 100644 --- a/go/consensus/tendermint/roothash/roothash.go +++ b/go/consensus/tendermint/roothash/roothash.go @@ -593,16 +593,6 @@ func EventsFromTendermint( ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, FinalizedEvent: &api.FinalizedEvent{Round: value.Round}} events = append(events, ev) - case bytes.Equal(key, app.KeyMergeDiscrepancyDetected): - // A merge discrepancy has been detected. - var value app.ValueMergeDiscrepancyDetected - if err := cbor.Unmarshal(val, &value); err != nil { - errs = multierror.Append(errs, fmt.Errorf("roothash: corrupt MergeDiscrepancy event: %w", err)) - continue - } - - ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, MergeDiscrepancyDetected: &value.Event} - events = append(events, ev) case bytes.Equal(key, app.KeyExecutionDiscrepancyDetected): // An execution discrepancy has been detected. var value app.ValueExecutionDiscrepancyDetected @@ -623,16 +613,6 @@ func EventsFromTendermint( ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, ExecutorCommitted: &value.Event} events = append(events, ev) - case bytes.Equal(key, app.KeyMergeCommitted): - // A merge commit has been processed. - var value app.ValueMergeCommitted - if err := cbor.Unmarshal(val, &value); err != nil { - errs = multierror.Append(errs, fmt.Errorf("roothash: corrupt ValueMergeCommitted event: %w", err)) - continue - } - - ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, MergeCommitted: &value.Event} - events = append(events, ev) case bytes.Equal(key, app.KeyRuntimeID): // Runtime ID attribute (Base64-encoded to allow queries). default: diff --git a/go/genesis/genesis_test.go b/go/genesis/genesis_test.go index 294690a8a8f..f7ae4695a5c 100644 --- a/go/genesis/genesis_test.go +++ b/go/genesis/genesis_test.go @@ -203,10 +203,6 @@ func TestGenesisSanityCheck(t *testing.T) { GroupSize: 1, RoundTimeout: 1 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - RoundTimeout: 1 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 1, Algorithm: "batching", @@ -219,8 +215,6 @@ func TestGenesisSanityCheck(t *testing.T) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index f45327078fb..a4ec56dc9fe 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -89,11 +89,6 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 1, RoundTimeout: 20 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 2, - GroupBackupSize: 1, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 2, @@ -106,8 +101,6 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-node/cmd/debug/byzantine/byzantine.go b/go/oasis-node/cmd/debug/byzantine/byzantine.go index d36d057bc90..02350e07c80 100644 --- a/go/oasis-node/cmd/debug/byzantine/byzantine.go +++ b/go/oasis-node/cmd/debug/byzantine/byzantine.go @@ -8,7 +8,6 @@ import ( flag "github.com/spf13/pflag" "github.com/spf13/viper" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/common/node" @@ -19,7 +18,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/grpc" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/metrics" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" @@ -57,21 +55,6 @@ var ( Short: "act as an executor worker that registers and doesn't do any work", Run: doExecutorStraggler, } - mergeHonestCmd = &cobra.Command{ - Use: "merge-honest", - Short: "act as an honest merge worker", - Run: doMergeHonest, - } - mergeWrongCmd = &cobra.Command{ - Use: "merge-wrong", - Short: "act as a merge worker that commits wrong result", - Run: doMergeWrong, - } - mergeStragglerCmd = &cobra.Command{ - Use: "merge-straggler", - Short: "act as a merge worker that registers and doesn't do any work", - Run: doMergeStraggler, - } ) func activateCommonConfig(cmd *cobra.Command, args []string) { @@ -150,13 +133,6 @@ func doExecutorHonest(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } logger.Debug("executor honest: connecting to storage committee") hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) @@ -208,8 +184,8 @@ func doExecutorHonest(cmd *cobra.Command, args []string) { panic(fmt.Sprintf("compute create commitment failed: %+v", err)) } - if err = cbc.publishToCommittee(ht, electionHeight, mergeCommittee, scheduler.Worker, ph, defaultRuntimeID, electionHeight); err != nil { - panic(fmt.Sprintf("compute publish to committee merge worker failed: %+v", err)) + if err = cbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { + panic(fmt.Sprintf("compute publish to chain failed: %+v", err)) } logger.Debug("executor honest: commitment sent") } @@ -283,13 +259,6 @@ func doExecutorWrong(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } logger.Debug("executor honest: connecting to storage committee") hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) @@ -341,8 +310,8 @@ func doExecutorWrong(cmd *cobra.Command, args []string) { panic(fmt.Sprintf("compute create commitment failed: %+v", err)) } - if err = cbc.publishToCommittee(ht, electionHeight, mergeCommittee, scheduler.Worker, ph, defaultRuntimeID, electionHeight); err != nil { - panic(fmt.Sprintf("compute publish to committee merge worker failed: %+v", err)) + if err = cbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { + panic(fmt.Sprintf("compute publish to chain failed: %+v", err)) } logger.Debug("executor wrong: commitment sent") } @@ -411,13 +380,6 @@ func doExecutorStraggler(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } cbc := newComputeBatchContext() @@ -429,333 +391,11 @@ func doExecutorStraggler(cmd *cobra.Command, args []string) { logger.Debug("executor straggler: bailing") } -func doMergeHonest(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge honest: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - storageCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindStorage, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindStorage, err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - logger.Debug("merge honest: connecting to storage committee") - hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) - if err != nil { - panic(fmt.Sprintf("storage connect to committee failed: %+v", err)) - } - defer storageBroadcastCleanup(hnss) - - mbc := newMergeBatchContext() - - if err = mbc.loadCurrentBlock(ht, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge load current block failed: %+v", err)) - } - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge honest: received commitments", "commitments", mbc.commitments) - - ctx := context.Background() - - // Process merge honestly. - if err = mbc.process(ctx, hnss); err != nil { - panic(fmt.Sprintf("merge process failed: %+v", err)) - } - logger.Debug("merge honest: processed", - "new_block", mbc.newBlock, - ) - - if err = mbc.createCommitment(defaultIdentity); err != nil { - panic(fmt.Sprintf("merge create commitment failed: %+v", err)) - } - - if err = mbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge publish to chain failed: %+v", err)) - } - logger.Debug("merge honest: commitment sent") -} - -func doMergeWrong(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge wrong: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - storageCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindStorage, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindStorage, err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - logger.Debug("merge wrong: connecting to storage committee") - hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) - if err != nil { - panic(fmt.Sprintf("storage connect to committee failed: %+v", err)) - } - defer storageBroadcastCleanup(hnss) - - mbc := newMergeBatchContext() - - if err = mbc.loadCurrentBlock(ht, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge load current block failed: %+v", err)) - } - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge wrong: received commitments", "commitments", mbc.commitments) - - ctx := context.Background() - - // Process the merge wrong. - origCommitments := mbc.commitments - var emptyRoot hash.Hash - emptyRoot.Empty() - mbc.commitments = []*commitment.OpenExecutorCommitment{ - { - Body: &commitment.ComputeBody{ - Header: commitment.ComputeResultsHeader{ - IORoot: emptyRoot, - StateRoot: mbc.currentBlock.Header.StateRoot, - }, - }, - }, - } - - if err = mbc.process(ctx, hnss); err != nil { - panic(fmt.Sprintf("merge process failed: %+v", err)) - } - logger.Debug("merge wrong: processed", - "new_block", mbc.newBlock, - ) - - mbc.commitments = origCommitments - - // Sanity check the merge results. - if mbc.newBlock.Header.IORoot != emptyRoot { - panic(fmt.Sprintf("merge of empty IO trees should be empty. got %s, expected %s", mbc.newBlock.Header.IORoot, emptyRoot)) - } - if mbc.newBlock.Header.StateRoot != mbc.currentBlock.Header.StateRoot { - panic(fmt.Sprintf("merge of identical state trees should be the same. got %s, expected %s", mbc.newBlock.Header.StateRoot, mbc.currentBlock.Header.StateRoot)) - } - - if err = mbc.createCommitment(defaultIdentity); err != nil { - panic(fmt.Sprintf("merge create commitment failed: %+v", err)) - } - - if err = mbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge publish to chain failed: %+v", err)) - } - logger.Debug("merge wrong: commitment sent") -} - -func doMergeStraggler(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge straggler: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - mbc := newMergeBatchContext() - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge straggler: received commitments", "commitments", mbc.commitments) - - logger.Debug("merge straggler: bailing") -} - // Register registers the byzantine sub-command and all of its children. func Register(parentCmd *cobra.Command) { byzantineCmd.AddCommand(executorHonestCmd) byzantineCmd.AddCommand(executorWrongCmd) byzantineCmd.AddCommand(executorStragglerCmd) - byzantineCmd.AddCommand(mergeHonestCmd) - byzantineCmd.AddCommand(mergeWrongCmd) - byzantineCmd.AddCommand(mergeStragglerCmd) parentCmd.AddCommand(byzantineCmd) } diff --git a/go/oasis-node/cmd/debug/byzantine/executor.go b/go/oasis-node/cmd/debug/byzantine/executor.go index ee90a1bd84b..92af7b85f49 100644 --- a/go/oasis-node/cmd/debug/byzantine/executor.go +++ b/go/oasis-node/cmd/debug/byzantine/executor.go @@ -9,15 +9,14 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/identity" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" - scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" storage "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/syncer" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/writelog" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" ) type computeBatchContext struct { @@ -163,6 +162,7 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign storageSigs = append(storageSigs, receipt.Signature) } header := commitment.ComputeResultsHeader{ + Round: cbc.bd.Header.Round + 1, PreviousHash: cbc.bd.Header.EncodedHash(), IORoot: cbc.newIORoot, StateRoot: cbc.newStateRoot, @@ -170,7 +170,6 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign Messages: []*block.Message{}, } computeBody := &commitment.ComputeBody{ - CommitteeID: committeeID, Header: header, StorageSignatures: storageSigs, TxnSchedSig: cbc.bdSig, @@ -194,13 +193,9 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign return nil } -func (cbc *computeBatchContext) publishToCommittee(ht *honestTendermint, height int64, committee *scheduler.Committee, role scheduler.Role, ph *p2pHandle, runtimeID common.Namespace, groupVersion int64) error { - if err := schedulerPublishToCommittee(ph, runtimeID, &p2p.Message{ - GroupVersion: groupVersion, - SpanContext: nil, - ExecutorCommit: cbc.commit, - }); err != nil { - return fmt.Errorf("scheduler publish to committee: %w", err) +func (cbc *computeBatchContext) publishToChain(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace) error { + if err := roothashExecutorCommit(svc, id, runtimeID, []commitment.ExecutorCommitment{*cbc.commit}); err != nil { + return fmt.Errorf("roothash merge commentment: %w", err) } return nil diff --git a/go/oasis-node/cmd/debug/byzantine/merge.go b/go/oasis-node/cmd/debug/byzantine/merge.go deleted file mode 100644 index 9e671d4fe7f..00000000000 --- a/go/oasis-node/cmd/debug/byzantine/merge.go +++ /dev/null @@ -1,120 +0,0 @@ -package byzantine - -import ( - "context" - "fmt" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/identity" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" -) - -type mergeBatchContext struct { - currentBlock *block.Block - commitments []*commitment.OpenExecutorCommitment - - newBlock *block.Block - commit *commitment.MergeCommitment -} - -func newMergeBatchContext() *mergeBatchContext { - return &mergeBatchContext{} -} - -func (mbc *mergeBatchContext) loadCurrentBlock(ht *honestTendermint, runtimeID common.Namespace) error { - var err error - mbc.currentBlock, err = roothashGetLatestBlock(ht, 0, runtimeID) - if err != nil { - return fmt.Errorf("roothash get latest block: %w", err) - } - - return nil -} - -func mergeReceiveCommitment(ph *p2pHandle) (*commitment.OpenExecutorCommitment, error) { - var req p2pReqRes - for { - req = <-ph.requests - req.responseCh <- nil - - if req.msg.ExecutorCommit == nil { - continue - } - - break - } - - openCom, err := req.msg.ExecutorCommit.Open() - if err != nil { - return nil, fmt.Errorf("request message ExecutorWorkerFinished Open: %w", err) - } - - return openCom, nil -} - -func (mbc *mergeBatchContext) receiveCommitments(ph *p2pHandle, count int) error { - for i := 0; i < count; i++ { - openCom, err := mergeReceiveCommitment(ph) - if err != nil { - return fmt.Errorf("merge receive commitments %d: %w", i, err) - } - mbc.commitments = append(mbc.commitments, openCom) - } - - return nil -} - -func (mbc *mergeBatchContext) process(ctx context.Context, hnss []*honestNodeStorage) error { - collectedCommittees := make(map[hash.Hash]bool) - var ioRoots, stateRoots []hash.Hash - for _, commitment := range mbc.commitments { - if collectedCommittees[commitment.Body.CommitteeID] { - continue - } - collectedCommittees[commitment.Body.CommitteeID] = true - ioRoots = append(ioRoots, commitment.Body.Header.IORoot) - stateRoots = append(stateRoots, commitment.Body.Header.StateRoot) - } - - if len(collectedCommittees) != 1 { - return fmt.Errorf("multiple committees not supported: %d", len(collectedCommittees)) - } - signatures := mbc.commitments[0].Body.StorageSignatures - messages := mbc.commitments[0].Body.Header.Messages - - mbc.newBlock = block.NewEmptyBlock(mbc.currentBlock, 0, block.Normal) - mbc.newBlock.Header.IORoot = ioRoots[0] - mbc.newBlock.Header.StateRoot = stateRoots[0] - mbc.newBlock.Header.Messages = messages - mbc.newBlock.Header.StorageSignatures = signatures - - return nil -} - -func (mbc *mergeBatchContext) createCommitment(id *identity.Identity) error { - var executorCommits []commitment.ExecutorCommitment - for _, openCom := range mbc.commitments { - executorCommits = append(executorCommits, openCom.ExecutorCommitment) - } - var err error - mbc.commit, err = commitment.SignMergeCommitment(id.NodeSigner, &commitment.MergeBody{ - ExecutorCommits: executorCommits, - Header: mbc.newBlock.Header, - }) - if err != nil { - return fmt.Errorf("commitment sign merge commitment: %w", err) - } - - return nil -} - -func (mbc *mergeBatchContext) publishToChain(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace) error { - if err := roothashMergeCommit(svc, id, runtimeID, []commitment.MergeCommitment{*mbc.commit}); err != nil { - return fmt.Errorf("roothash merge commentment: %w", err) - } - - return nil -} diff --git a/go/oasis-node/cmd/debug/byzantine/roothash.go b/go/oasis-node/cmd/debug/byzantine/roothash.go index 72bc6ff2c76..b3f17c74813 100644 --- a/go/oasis-node/cmd/debug/byzantine/roothash.go +++ b/go/oasis-node/cmd/debug/byzantine/roothash.go @@ -7,15 +7,10 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/identity" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" ) -func roothashGetLatestBlock(ht *honestTendermint, height int64, runtimeID common.Namespace) (*block.Block, error) { - return ht.service.RootHash().GetLatestBlock(context.Background(), runtimeID, height) -} - -func roothashMergeCommit(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace, commits []commitment.MergeCommitment) error { - tx := roothash.NewMergeCommitTx(0, nil, runtimeID, commits) +func roothashExecutorCommit(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace, commits []commitment.ExecutorCommitment) error { + tx := roothash.NewExecutorCommitTx(0, nil, runtimeID, commits) return consensus.SignAndSubmitTx(context.Background(), svc, id.NodeSigner, tx) } diff --git a/go/oasis-node/cmd/debug/byzantine/scheduler.go b/go/oasis-node/cmd/debug/byzantine/scheduler.go index 7ab10f27765..02d1cc4e9fc 100644 --- a/go/oasis-node/cmd/debug/byzantine/scheduler.go +++ b/go/oasis-node/cmd/debug/byzantine/scheduler.go @@ -3,7 +3,6 @@ package byzantine import ( "context" "fmt" - "time" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" @@ -11,7 +10,6 @@ import ( consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" epochtime "github.com/oasisprotocol/oasis-core/go/epochtime/api" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" ) func schedulerNextElectionHeight(svc consensus.Backend, epoch epochtime.EpochTime) (int64, error) { @@ -103,18 +101,3 @@ func schedulerForRoleInCommittee(ht *honestTendermint, height int64, committee * return nil } - -func schedulerPublishToCommittee(ph *p2pHandle, runtimeID common.Namespace, message *p2p.Message) error { - // HACK: So, the ever-byzantine debug code is written under the - // assumption that it's possible to do p2p message delivery in - // a synchronous manner. - // - // This is no longer possible. Just publish and strategically - // sleep. Eventually someone could/should rewrite all of this - // debug code. The only thing that uses it is CI anyway. - - ph.service.Publish(ph.context, runtimeID, message) - time.Sleep(3 * time.Second) // Sigh - - return nil -} diff --git a/go/oasis-node/cmd/debug/byzantine/scheduler_test.go b/go/oasis-node/cmd/debug/byzantine/scheduler_test.go index d36a5c30c14..d0bbc57fc59 100644 --- a/go/oasis-node/cmd/debug/byzantine/scheduler_test.go +++ b/go/oasis-node/cmd/debug/byzantine/scheduler_test.go @@ -20,12 +20,9 @@ func hasSuitablePermutations(t *testing.T, beacon []byte, runtimeID common.Names require.NoError(t, err, "schedulerapp.GetPerm compute") transactionSchedulerIdxs, err := schedulerapp.GetPerm(beacon, runtimeID, schedulerapp.RNGContextTransactionScheduler, numComputeNodes) require.NoError(t, err, "schedulerapp.GetPerm transaction scheduler") - mergeIdxs, err := schedulerapp.GetPerm(beacon, runtimeID, schedulerapp.RNGContextMerge, numComputeNodes) - require.NoError(t, err, "schedulerapp.GetPerm merge") fmt.Printf("%20s schedule %v\n", scheduler.KindComputeExecutor, computeIdxs) fmt.Printf("%20s schedule %v\n", scheduler.KindComputeTxnScheduler, transactionSchedulerIdxs) - fmt.Printf("%20s schedule %v\n", scheduler.KindComputeMerge, mergeIdxs) committees := map[scheduler.CommitteeKind]struct { workers int @@ -34,12 +31,10 @@ func hasSuitablePermutations(t *testing.T, beacon []byte, runtimeID common.Names }{ scheduler.KindComputeExecutor: {workers: 2, backupWorkers: 1, perm: computeIdxs}, scheduler.KindComputeTxnScheduler: {workers: 1, backupWorkers: 0, perm: transactionSchedulerIdxs}, - scheduler.KindComputeMerge: {workers: 2, backupWorkers: 1, perm: mergeIdxs}, } for _, c1Kind := range []scheduler.CommitteeKind{ scheduler.KindComputeExecutor, - scheduler.KindComputeMerge, } { c1 := committees[c1Kind] maxWorker := c1.workers diff --git a/go/oasis-node/cmd/debug/txsource/workload/registration.go b/go/oasis-node/cmd/debug/txsource/workload/registration.go index d68090b7735..c5fb511b850 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/registration.go +++ b/go/oasis-node/cmd/debug/txsource/workload/registration.go @@ -52,10 +52,6 @@ func getRuntime(entityID signature.PublicKey, id common.Namespace) *registry.Run GroupSize: 1, RoundTimeout: 1 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - RoundTimeout: 1 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 1, Algorithm: "batching", @@ -68,8 +64,6 @@ func getRuntime(entityID signature.PublicKey, id common.Namespace) *registry.Run MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index a92775e6b5f..9c24a24108c 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -58,7 +58,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" "github.com/oasisprotocol/oasis-core/go/worker/compute" "github.com/oasisprotocol/oasis-core/go/worker/compute/executor" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/compute/txnscheduler" workerConsensusRPC "github.com/oasisprotocol/oasis-core/go/worker/consensusrpc" workerKeymanager "github.com/oasisprotocol/oasis-core/go/worker/keymanager" @@ -120,7 +119,6 @@ type Node struct { ExecutorWorker *executor.Worker StorageWorker *workerStorage.Worker TransactionSchedulerWorker *txnscheduler.Worker - MergeWorker *merge.Worker SentryWorker *workerSentry.Worker P2P *p2p.P2P RegistrationWorker *registration.Worker @@ -184,11 +182,6 @@ func (n *Node) waitReady(logger *logging.Logger) { <-n.TransactionSchedulerWorker.Initialized() } - // Wait for the merge worker. - if n.MergeWorker.Enabled() { - <-n.MergeWorker.Initialized() - } - // Wait for the common worker. if n.CommonWorker.Enabled() { <-n.CommonWorker.Initialized() @@ -292,8 +285,7 @@ func (n *Node) initRuntimeWorkers() error { // immediately when created, make sure that we don't start it if it is not // needed. // - // Currently, only executor, txn scheduler and merge workers need P2P - // transport. + // Currently, only executor and txn scheduler workers need P2P transport. if compute.Enabled() { p2pCtx, p2pSvc := service.NewContextCleanup(context.Background()) if genesisDoc.Registry.Parameters.DebugAllowUnroutableAddresses { @@ -389,21 +381,10 @@ func (n *Node) initRuntimeWorkers() error { } n.svcMgr.Register(n.StorageWorker) - // Initialize the merge worker. - n.MergeWorker, err = merge.New( - n.CommonWorker, - n.RegistrationWorker, - ) - if err != nil { - return err - } - n.svcMgr.Register(n.MergeWorker) - // Initialize the executor worker. n.ExecutorWorker, err = executor.New( dataDir, n.CommonWorker, - n.MergeWorker, n.RegistrationWorker, ) if err != nil { @@ -458,11 +439,6 @@ func (n *Node) startRuntimeWorkers(logger *logging.Logger) error { return err } - // Start the merge worker. - if err := n.MergeWorker.Start(); err != nil { - return err - } - // Start the common worker. if err := n.CommonWorker.Start(); err != nil { return err @@ -491,7 +467,6 @@ func (n *Node) startRuntimeWorkers(logger *logging.Logger) error { // Only start the external gRPC server if any workers are enabled. if n.StorageWorker.Enabled() || n.TransactionSchedulerWorker.Enabled() || - n.MergeWorker.Enabled() || n.KeymanagerWorker.Enabled() || n.ConsensusWorker.Enabled() { if err := n.CommonWorker.Grpc.Start(); err != nil { diff --git a/go/oasis-node/cmd/registry/runtime/runtime.go b/go/oasis-node/cmd/registry/runtime/runtime.go index a7080ffaa0e..0fe5b31813d 100644 --- a/go/oasis-node/cmd/registry/runtime/runtime.go +++ b/go/oasis-node/cmd/registry/runtime/runtime.go @@ -53,19 +53,11 @@ const ( CfgExecutorAllowedStragglers = "runtime.executor.allowed_stragglers" CfgExecutorRoundTimeout = "runtime.executor.round_timeout" - // Merge committee flags. - CfgMergeGroupSize = "runtime.merge.group_size" - CfgMergeGroupBackupSize = "runtime.merge.group_backup_size" - CfgMergeAllowedStragglers = "runtime.merge.allowed_stragglers" - CfgMergeRoundTimeout = "runtime.merge.round_timeout" - // Storage committee flags. CfgStorageGroupSize = "runtime.storage.group_size" CfgStorageMinWriteReplication = "runtime.storage.min_write_replication" CfgStorageMaxApplyWriteLogEntries = "runtime.storage.max_apply_write_log_entries" CfgStorageMaxApplyOps = "runtime.storage.max_apply_ops" - CfgStorageMaxMergeRoots = "runtime.storage.max_merge_roots" - CfgStorageMaxMergeOps = "runtime.storage.max_merge_ops" CfgStorageCheckpointInterval = "runtime.storage.checkpoint_interval" CfgStorageCheckpointNumKept = "runtime.storage.checkpoint_num_kept" CfgStorageCheckpointChunkSize = "runtime.storage.checkpoint_chunk_size" @@ -383,12 +375,6 @@ func runtimeFromFlags() (*registry.Runtime, signature.Signer, error) { // nolint AllowedStragglers: viper.GetUint64(CfgExecutorAllowedStragglers), RoundTimeout: viper.GetDuration(CfgExecutorRoundTimeout), }, - Merge: registry.MergeParameters{ - GroupSize: viper.GetUint64(CfgMergeGroupSize), - GroupBackupSize: viper.GetUint64(CfgMergeGroupBackupSize), - AllowedStragglers: viper.GetUint64(CfgMergeAllowedStragglers), - RoundTimeout: viper.GetDuration(CfgMergeRoundTimeout), - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: viper.GetUint64(CfgTxnSchedulerGroupSize), Algorithm: viper.GetString(CfgTxnSchedulerAlgorithm), @@ -401,8 +387,6 @@ func runtimeFromFlags() (*registry.Runtime, signature.Signer, error) { // nolint MinWriteReplication: viper.GetUint64(CfgStorageMinWriteReplication), MaxApplyWriteLogEntries: viper.GetUint64(CfgStorageMaxApplyWriteLogEntries), MaxApplyOps: viper.GetUint64(CfgStorageMaxApplyOps), - MaxMergeRoots: viper.GetUint64(CfgStorageMaxMergeRoots), - MaxMergeOps: viper.GetUint64(CfgStorageMaxMergeOps), CheckpointInterval: viper.GetUint64(CfgStorageCheckpointInterval), CheckpointNumKept: viper.GetUint64(CfgStorageCheckpointNumKept), CheckpointChunkSize: viper.GetUint64(CfgStorageCheckpointChunkSize), @@ -553,12 +537,6 @@ func init() { runtimeFlags.Uint64(CfgExecutorAllowedStragglers, 0, "Number of stragglers allowed per round in the runtime executor group") runtimeFlags.Duration(CfgExecutorRoundTimeout, 10*time.Second, "Executor committee round timeout for this runtime") - // Init Merge committee flags. - runtimeFlags.Uint64(CfgMergeGroupSize, 1, "Number of workers in the runtime merge group/committee") - runtimeFlags.Uint64(CfgMergeGroupBackupSize, 0, "Number of backup workers in the runtime merge group/committee") - runtimeFlags.Uint64(CfgMergeAllowedStragglers, 0, "Number of stragglers allowed per round in the runtime merge group") - runtimeFlags.Duration(CfgMergeRoundTimeout, 10*time.Second, "Merge committee round timeout for this runtime") - // Init Transaction scheduler flags. runtimeFlags.Uint64(CfgTxnSchedulerGroupSize, 1, "Number of transaction scheduler nodes for the runtime") runtimeFlags.String(CfgTxnSchedulerAlgorithm, "batching", "Transaction scheduling algorithm") @@ -571,8 +549,6 @@ func init() { runtimeFlags.Uint64(CfgStorageMinWriteReplication, 1, "Minimum required storage write replication") runtimeFlags.Uint64(CfgStorageMaxApplyWriteLogEntries, 100_000, "Maximum number of write log entries") runtimeFlags.Uint64(CfgStorageMaxApplyOps, 2, "Maximum number of apply operations in a batch") - runtimeFlags.Uint64(CfgStorageMaxMergeRoots, 1, "Maximum number of merge roots") - runtimeFlags.Uint64(CfgStorageMaxMergeOps, 2, "Maximum number of merge operations in a batch") runtimeFlags.Uint64(CfgStorageCheckpointInterval, 0, "Storage checkpoint interval (in rounds)") runtimeFlags.Uint64(CfgStorageCheckpointNumKept, 0, "Number of storage checkpoints to keep") runtimeFlags.Uint64(CfgStorageCheckpointChunkSize, 0, "Storage checkpoint chunk size") diff --git a/go/oasis-node/node_test.go b/go/oasis-node/node_test.go index 9b9dc46d2e9..d6e7b02aaa0 100644 --- a/go/oasis-node/node_test.go +++ b/go/oasis-node/node_test.go @@ -89,11 +89,6 @@ var ( GroupBackupSize: 0, RoundTimeout: 20 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - GroupBackupSize: 0, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -106,8 +101,6 @@ var ( MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-test-runner/oasis/cli/registry.go b/go/oasis-test-runner/oasis/cli/registry.go index f347636fc58..86a0f27ad66 100644 --- a/go/oasis-test-runner/oasis/cli/registry.go +++ b/go/oasis-test-runner/oasis/cli/registry.go @@ -58,16 +58,10 @@ func (r *RegistryHelpers) runRegistryRuntimeSubcommand( "--"+cmdRegRt.CfgExecutorGroupBackupSize, strconv.FormatUint(runtime.Executor.GroupBackupSize, 10), "--"+cmdRegRt.CfgExecutorAllowedStragglers, strconv.FormatUint(runtime.Executor.AllowedStragglers, 10), "--"+cmdRegRt.CfgExecutorRoundTimeout, runtime.Executor.RoundTimeout.String(), - "--"+cmdRegRt.CfgMergeGroupSize, strconv.FormatUint(runtime.Merge.GroupSize, 10), - "--"+cmdRegRt.CfgMergeGroupBackupSize, strconv.FormatUint(runtime.Merge.GroupBackupSize, 10), - "--"+cmdRegRt.CfgMergeAllowedStragglers, strconv.FormatUint(runtime.Merge.AllowedStragglers, 10), - "--"+cmdRegRt.CfgMergeRoundTimeout, runtime.Merge.RoundTimeout.String(), "--"+cmdRegRt.CfgStorageGroupSize, strconv.FormatUint(runtime.Storage.GroupSize, 10), "--"+cmdRegRt.CfgStorageMinWriteReplication, strconv.FormatUint(runtime.Storage.MinWriteReplication, 10), "--"+cmdRegRt.CfgStorageMaxApplyWriteLogEntries, strconv.FormatUint(runtime.Storage.MaxApplyWriteLogEntries, 10), "--"+cmdRegRt.CfgStorageMaxApplyOps, strconv.FormatUint(runtime.Storage.MaxApplyOps, 10), - "--"+cmdRegRt.CfgStorageMaxMergeRoots, strconv.FormatUint(runtime.Storage.MaxMergeRoots, 10), - "--"+cmdRegRt.CfgStorageMaxMergeOps, strconv.FormatUint(runtime.Storage.MaxMergeOps, 10), "--"+cmdRegRt.CfgStorageCheckpointInterval, strconv.FormatUint(runtime.Storage.CheckpointInterval, 10), "--"+cmdRegRt.CfgStorageCheckpointNumKept, strconv.FormatUint(runtime.Storage.CheckpointNumKept, 10), "--"+cmdRegRt.CfgStorageCheckpointChunkSize, strconv.FormatUint(runtime.Storage.CheckpointChunkSize, 10), diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index 3a6bd3d0d39..31205e920ab 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -198,7 +198,6 @@ type RuntimeFixture struct { // nolint: maligned GenesisRound uint64 `json:"genesis_round,omitempty"` Executor registry.ExecutorParameters `json:"executor"` - Merge registry.MergeParameters `json:"merge"` TxnScheduler registry.TxnSchedulerParameters `json:"txn_scheduler"` Storage registry.StorageParameters `json:"storage"` @@ -237,7 +236,6 @@ func (f *RuntimeFixture) Create(netFixture *NetworkFixture, net *Network) (*Runt TEEHardware: netFixture.TEE.Hardware, MrSigner: netFixture.TEE.MrSigner, Executor: f.Executor, - Merge: f.Merge, TxnScheduler: f.TxnScheduler, Storage: f.Storage, AdmissionPolicy: f.AdmissionPolicy, diff --git a/go/oasis-test-runner/oasis/log.go b/go/oasis-test-runner/oasis/log.go index 53d7f287c34..05c8fd4ca99 100644 --- a/go/oasis-test-runner/oasis/log.go +++ b/go/oasis-test-runner/oasis/log.go @@ -51,18 +51,6 @@ func LogAssertNoExecutionDiscrepancyDetected() log.WatcherHandlerFactory { return LogAssertNotEvent(roothash.LogEventExecutionDiscrepancyDetected, "execution discrepancy detected") } -// LogAssertMergeDiscrepancyDetected returns a handler which checks whether a -// merge discrepancy was detected based on JSON log output. -func LogAssertMergeDiscrepancyDetected() log.WatcherHandlerFactory { - return LogAssertEvent(roothash.LogEventMergeDiscrepancyDetected, "merge discrepancy not detected") -} - -// LogAssertNoMergeDiscrepancyDetected returns a handler which checks whether a -// merge discrepancy was not detected based on JSON log output. -func LogAssertNoMergeDiscrepancyDetected() log.WatcherHandlerFactory { - return LogAssertNotEvent(roothash.LogEventMergeDiscrepancyDetected, "merge discrepancy detected") -} - // LogAssertPeerExchangeDisabled returns a handler which checks whether a peer // exchange disabled event was detected based on JSON log output. func LogAssertPeerExchangeDisabled() log.WatcherHandlerFactory { diff --git a/go/oasis-test-runner/oasis/runtime.go b/go/oasis-test-runner/oasis/runtime.go index b651e89bbfa..33fd550a40a 100644 --- a/go/oasis-test-runner/oasis/runtime.go +++ b/go/oasis-test-runner/oasis/runtime.go @@ -58,7 +58,6 @@ type RuntimeCfg struct { // nolint: maligned GenesisRound uint64 Executor registry.ExecutorParameters - Merge registry.MergeParameters TxnScheduler registry.TxnSchedulerParameters Storage registry.StorageParameters @@ -128,7 +127,6 @@ func (net *Network) NewRuntime(cfg *RuntimeCfg) (*Runtime, error) { Kind: cfg.Kind, TEEHardware: cfg.TEEHardware, Executor: cfg.Executor, - Merge: cfg.Merge, TxnScheduler: cfg.TxnScheduler, Storage: cfg.Storage, AdmissionPolicy: cfg.AdmissionPolicy, diff --git a/go/oasis-test-runner/scenario/e2e/registry_cli.go b/go/oasis-test-runner/scenario/e2e/registry_cli.go index ebeb7223129..ae55ff95baa 100644 --- a/go/oasis-test-runner/scenario/e2e/registry_cli.go +++ b/go/oasis-test-runner/scenario/e2e/registry_cli.go @@ -619,12 +619,6 @@ func (sc *registryCLIImpl) testRuntime(ctx context.Context, childEnv *env.Env, c AllowedStragglers: 3, RoundTimeout: 4 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 5, - GroupBackupSize: 6, - AllowedStragglers: 7, - RoundTimeout: 8 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 10, Algorithm: "batching", @@ -637,8 +631,6 @@ func (sc *registryCLIImpl) testRuntime(ctx context.Context, childEnv *env.Env, c MinWriteReplication: 9, MaxApplyWriteLogEntries: 10, MaxApplyOps: 11, - MaxMergeRoots: 12, - MaxMergeOps: 13, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ EntityWhitelist: ®istry.EntityWhitelistRuntimeAdmissionPolicy{ diff --git a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go index 969a157f423..9e5f0837f34 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go @@ -27,32 +27,13 @@ var ( oasis.LogAssertNoTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), }, oasis.ByzantineSlot3IdentitySeed) // ByzantineExecutorStraggler is the byzantine executor straggler scenario. ByzantineExecutorStraggler scenario.Scenario = newByzantineImpl("executor-straggler", []log.WatcherHandlerFactory{ oasis.LogAssertTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), }, oasis.ByzantineSlot3IdentitySeed) - - // ByzantineMergeHonest is the byzantine merge honest scenario. - ByzantineMergeHonest scenario.Scenario = newByzantineImpl("merge-honest", nil, oasis.ByzantineSlot1IdentitySeed) - // ByzantineMergeWrong is the byzantine merge wrong scenario. - ByzantineMergeWrong scenario.Scenario = newByzantineImpl("merge-wrong", []log.WatcherHandlerFactory{ - oasis.LogAssertNoTimeouts(), - oasis.LogAssertNoRoundFailures(), - oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertMergeDiscrepancyDetected(), - }, oasis.ByzantineSlot1IdentitySeed) - // ByzantineMergeStraggler is the byzantine merge straggler scenario. - ByzantineMergeStraggler scenario.Scenario = newByzantineImpl("merge-straggler", []log.WatcherHandlerFactory{ - oasis.LogAssertTimeouts(), - oasis.LogAssertNoRoundFailures(), - oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertMergeDiscrepancyDetected(), - }, oasis.ByzantineSlot1IdentitySeed) ) type byzantineImpl struct { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go b/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go index e5a626141f8..d6b57413de6 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go @@ -68,8 +68,6 @@ func (sc *historyReindexImpl) Fixture() (*oasis.NetworkFixture, error) { // Use a single compute node. f.Runtimes[rtIdx].Executor.GroupSize = 1 f.Runtimes[rtIdx].Executor.GroupBackupSize = 0 - f.Runtimes[rtIdx].Merge.GroupSize = 1 - f.Runtimes[rtIdx].Merge.GroupBackupSize = 0 return f, nil } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go index a11c1c3a4a1..933720b2ca3 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go @@ -90,11 +90,6 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 0, RoundTimeout: 10 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - GroupBackupSize: 0, - RoundTimeout: 10 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -107,8 +102,6 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 579514d58b0..6e95d841932 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -47,7 +47,6 @@ var ( oasis.LogAssertNoTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), } runtimeID common.Namespace @@ -163,11 +162,6 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 1, RoundTimeout: 10 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 2, - GroupBackupSize: 1, - RoundTimeout: 10 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -180,8 +174,6 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 2, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, @@ -497,10 +489,6 @@ func RegisterScenarios() error { ByzantineExecutorHonest, ByzantineExecutorWrong, ByzantineExecutorStraggler, - // Byzantine merge node. - ByzantineMergeHonest, - ByzantineMergeWrong, - ByzantineMergeStraggler, // Storage sync test. StorageSync, // Sentry test. diff --git a/go/registry/api/api.go b/go/registry/api/api.go index 81876fa3242..bd776a59451 100644 --- a/go/registry/api/api.go +++ b/go/registry/api/api.go @@ -1052,14 +1052,6 @@ func VerifyRegisterRuntimeArgs( // nolint: gocyclo return nil, fmt.Errorf("%w: executor group too small", ErrInvalidArgument) } - // Ensure there is at least one member of the merge group. - if rt.Merge.GroupSize == 0 { - logger.Error("RegisterRuntime: merge group size too small", - "runtime", rt, - ) - return nil, fmt.Errorf("%w: merge group too small", ErrInvalidArgument) - } - // Ensure there is at least one member of the transaction scheduler group. if rt.TxnScheduler.GroupSize == 0 { logger.Error("RegisterRuntime: transaction scheduler group too small", @@ -1187,18 +1179,6 @@ func VerifyRegisterRuntimeStorageArgs(rt *Runtime, logger *logging.Logger) error ) return fmt.Errorf("%w: storage MaxApplyOps parameter too small", ErrInvalidArgument) } - if params.MaxMergeRoots == 0 { - logger.Error("RegisterRuntime: storage MaxMergeRoots parameter too small", - "runtime", rt, - ) - return fmt.Errorf("%w: storage MaxMergeRoots parameter too small", ErrInvalidArgument) - } - if params.MaxMergeOps < 2 { - logger.Error("RegisterRuntime: storage MaxMergeOps parameter too small", - "runtime", rt, - ) - return fmt.Errorf("%w: storage MaxMergeOps parameter too small", ErrInvalidArgument) - } // Verify storage checkpointing configuration if enabled. if params.CheckpointInterval > 0 { diff --git a/go/registry/api/runtime.go b/go/registry/api/runtime.go index 6d21fc6f54b..c4eebcc8d76 100644 --- a/go/registry/api/runtime.go +++ b/go/registry/api/runtime.go @@ -93,21 +93,6 @@ type ExecutorParameters struct { RoundTimeout time.Duration `json:"round_timeout"` } -// MergeParameters are parameters for the merge committee. -type MergeParameters struct { - // GroupSize is the size of the committee. - GroupSize uint64 `json:"group_size"` - - // GroupBackupSize is the size of the discrepancy resolution group. - GroupBackupSize uint64 `json:"group_backup_size"` - - // AllowedStragglers is the number of allowed stragglers. - AllowedStragglers uint64 `json:"allowed_stragglers"` - - // RoundTimeout is the round timeout of the nodes in the group. - RoundTimeout time.Duration `json:"round_timeout"` -} - // TxnSchedulerParameters are parameters for the transaction scheduler committee. type TxnSchedulerParameters struct { // GroupSize is the size of the committee. @@ -145,12 +130,6 @@ type StorageParameters struct { // MaxApplyOps is the maximum number of apply operations in a batch. MaxApplyOps uint64 `json:"max_apply_ops"` - // MaxMergeRoots is the maximum number of merge roots. - MaxMergeRoots uint64 `json:"max_merge_roots"` - - // MaxApplyOps configures the maximum number of merge operations in a batch. - MaxMergeOps uint64 `json:"max_merge_ops"` - // CheckpointInterval is the expected runtime state checkpoint interval (in rounds). CheckpointInterval uint64 `json:"checkpoint_interval"` @@ -247,9 +226,6 @@ type Runtime struct { // nolint: maligned // Executor stores parameters of the executor committee. Executor ExecutorParameters `json:"executor,omitempty"` - // Merge stores parameters of the merge committee. - Merge MergeParameters `json:"merge,omitempty"` - // TxnScheduler stores parameters of the transactions scheduler committee. TxnScheduler TxnSchedulerParameters `json:"txn_scheduler,omitempty"` diff --git a/go/registry/tests/tester.go b/go/registry/tests/tester.go index 5ed7a9fd43b..dc490f5417c 100644 --- a/go/registry/tests/tester.go +++ b/go/registry/tests/tester.go @@ -1611,12 +1611,6 @@ func NewTestRuntime(seed []byte, ent *TestEntity, isKeyManager bool) (*TestRunti AllowedStragglers: 1, RoundTimeout: 20 * time.Second, }, - Merge: api.MergeParameters{ - GroupSize: 3, - GroupBackupSize: 5, - AllowedStragglers: 1, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: api.TxnSchedulerParameters{ GroupSize: 3, Algorithm: api.TxnSchedulerAlgorithmBatching, @@ -1629,8 +1623,6 @@ func NewTestRuntime(seed []byte, ent *TestEntity, isKeyManager bool) (*TestRunti MinWriteReplication: 3, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: api.RuntimeAdmissionPolicy{ AnyNode: &api.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/roothash/api/api.go b/go/roothash/api/api.go index 883cbcc38d1..7e598c3a963 100644 --- a/go/roothash/api/api.go +++ b/go/roothash/api/api.go @@ -24,9 +24,6 @@ const ( // LogEventExecutionDiscrepancyDetected is a log event value that signals // an execution discrepancy has been detected. LogEventExecutionDiscrepancyDetected = "roothash/execution_discrepancy_detected" - // LogEventMergeDiscrepancyDetected is a log event value that signals - // a merge discrepancy has been detected. - LogEventMergeDiscrepancyDetected = "roothash/merge_discrepancy_detected" // LogEventTimerFired is a log event value that signals a timer has fired. LogEventTimerFired = "roothash/timer_fired" // LogEventRoundFailed is a log event value that signals a round has failed. @@ -56,13 +53,10 @@ var ( // MethodExecutorCommit is the method name for executor commit submission. MethodExecutorCommit = transaction.NewMethodName(ModuleName, "ExecutorCommit", ExecutorCommit{}) - // MethodMergeCommit is the method name for merge commit submission. - MethodMergeCommit = transaction.NewMethodName(ModuleName, "MergeCommit", MergeCommit{}) // Methods is a list of all methods supported by the roothash backend. Methods = []transaction.MethodName{ MethodExecutorCommit, - MethodMergeCommit, } ) @@ -115,20 +109,6 @@ func NewExecutorCommitTx(nonce uint64, fee *transaction.Fee, runtimeID common.Na }) } -// MergeCommit is the argument set for the MergeCommit method. -type MergeCommit struct { - ID common.Namespace `json:"id"` - Commits []commitment.MergeCommitment `json:"commits"` -} - -// NewMergeCommitTx creates a new executor commit transaction. -func NewMergeCommitTx(nonce uint64, fee *transaction.Fee, runtimeID common.Namespace, commits []commitment.MergeCommitment) *transaction.Transaction { - return transaction.NewTransaction(nonce, fee, MethodMergeCommit, &MergeCommit{ - ID: runtimeID, - Commits: commits, - }) -} - // AnnotatedBlock is an annotated roothash block. type AnnotatedBlock struct { // Height is the underlying roothash backend's block height that @@ -145,26 +125,12 @@ type ExecutorCommittedEvent struct { Commit commitment.ExecutorCommitment `json:"commit"` } -// MergeCommittedEvent is an event emitted each time a merge node commits. -type MergeCommittedEvent struct { - // Commit is the merge commitment. - Commit commitment.MergeCommitment `json:"commit"` -} - // ExecutionDiscrepancyDetectedEvent is an execute discrepancy detected event. type ExecutionDiscrepancyDetectedEvent struct { - // CommitteeID is the identifier of the executor committee where a - // discrepancy has been detected. - CommitteeID hash.Hash `json:"cid"` - // Timeout signals whether the discrepancy was due to a timeout. Timeout bool `json:"timeout"` } -// MergeDiscrepancyDetectedEvent is a merge discrepancy detected event. -type MergeDiscrepancyDetectedEvent struct { -} - // FinalizedEvent is a finalized event. type FinalizedEvent struct { Round uint64 `json:"round"` @@ -178,9 +144,7 @@ type Event struct { RuntimeID common.Namespace `json:"runtime_id"` ExecutorCommitted *ExecutorCommittedEvent `json:"executor_committed,omitempty"` - MergeCommitted *MergeCommittedEvent `json:"merge_committed,omitempty"` ExecutionDiscrepancyDetected *ExecutionDiscrepancyDetectedEvent `json:"execution_discrepancy,omitempty"` - MergeDiscrepancyDetected *MergeDiscrepancyDetectedEvent `json:"merge_discrepancy,omitempty"` FinalizedEvent *FinalizedEvent `json:"finalized,omitempty"` } @@ -220,8 +184,6 @@ type ConsensusParameters struct { const ( // GasOpComputeCommit is the gas operation identifier for compute commits. GasOpComputeCommit transaction.Op = "compute_commit" - // GasOpMergeCommit is the gas operation identifier for merge commits. - GasOpMergeCommit transaction.Op = "merge_commit" ) // XXX: Define reasonable default gas costs. @@ -229,7 +191,6 @@ const ( // DefaultGasCosts are the "default" gas costs for operations. var DefaultGasCosts = transaction.Costs{ GasOpComputeCommit: 1000, - GasOpMergeCommit: 1000, } // SanityCheckBlocks examines the blocks table. diff --git a/go/roothash/api/commitment/executor.go b/go/roothash/api/commitment/executor.go index 51dc21a4017..c74dbb085d1 100644 --- a/go/roothash/api/commitment/executor.go +++ b/go/roothash/api/commitment/executor.go @@ -32,6 +32,7 @@ var ( // // Keep the roothash RAK validation in sync with changes to this structure. type ComputeResultsHeader struct { + Round uint64 `json:"round"` PreviousHash hash.Hash `json:"previous_hash"` IORoot hash.Hash `json:"io_root"` StateRoot hash.Hash `json:"state_root"` @@ -40,6 +41,10 @@ type ComputeResultsHeader struct { // IsParentOf returns true iff the header is the parent of a child header. func (h *ComputeResultsHeader) IsParentOf(child *block.Header) bool { + if h.Round != child.Round+1 { + return false + } + childHash := child.EncodedHash() return h.PreviousHash.Equal(&childHash) } @@ -51,7 +56,6 @@ func (h *ComputeResultsHeader) EncodedHash() hash.Hash { // ComputeBody holds the data signed in a compute worker commitment. type ComputeBody struct { - CommitteeID hash.Hash `json:"cid"` Header ComputeResultsHeader `json:"header"` StorageSignatures []signature.Signature `json:"storage_signatures"` RakSig signature.RawSignature `json:"rak_sig"` @@ -66,7 +70,6 @@ type ComputeBody struct { // matches what we're seeing. func (m *ComputeBody) VerifyTxnSchedSignature(header block.Header) bool { dispatch := &TxnSchedulerBatch{ - CommitteeID: m.CommitteeID, IORoot: m.InputRoot, StorageSignatures: m.InputStorageSigs, Header: header, @@ -89,11 +92,11 @@ func (m *ComputeBody) RootsForStorageReceipt() []hash.Hash { // // Note: Ensuring that the signature is signed by the keypair(s) that are // expected is the responsibility of the caller. -func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace, round uint64) error { +func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace) error { receiptBody := storage.ReceiptBody{ Version: 1, Namespace: ns, - Round: round, + Round: m.Header.Round, Roots: m.RootsForStorageReceipt(), } @@ -106,12 +109,12 @@ func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace, round // VerifyStorageReceipt validates that the provided storage receipt // matches the header. -func (m *ComputeBody) VerifyStorageReceipt(ns common.Namespace, round uint64, receipt *storage.ReceiptBody) error { +func (m *ComputeBody) VerifyStorageReceipt(ns common.Namespace, receipt *storage.ReceiptBody) error { if !receipt.Namespace.Equal(&ns) { return errors.New("roothash: receipt has unexpected namespace") } - if receipt.Round != round { + if receipt.Round != m.Header.Round { return errors.New("roothash: receipt has unexpected round") } diff --git a/go/roothash/api/commitment/merge.go b/go/roothash/api/commitment/merge.go deleted file mode 100644 index b5c03bd49a0..00000000000 --- a/go/roothash/api/commitment/merge.go +++ /dev/null @@ -1,95 +0,0 @@ -// Package commitment defines a roothash commitment. -package commitment - -import ( - "errors" - - "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" -) - -// MergeSignatureContext is the signature context used to sign merge -// worker commitments. -var MergeSignatureContext = signature.NewContext("oasis-core/roothash: merge commitment", signature.WithChainSeparation()) - -type MergeBody struct { - ExecutorCommits []ExecutorCommitment `json:"commits"` - Header block.Header `json:"header"` -} - -// MergeCommitment is a roothash commitment from a merge worker. -// -// The signed content is MergeBody. -type MergeCommitment struct { - signature.Signed -} - -// Equal compares vs another MergeCommitment for equality. -func (c *MergeCommitment) Equal(cmp *MergeCommitment) bool { - return c.Signed.Equal(&cmp.Signed) -} - -// OpenMergeCommitment is a merge commitment that has been verified and -// deserialized. -// -// The open commitment still contains the original signed commitment. -type OpenMergeCommitment struct { - MergeCommitment - - Body *MergeBody `json:"-"` // No need to serialize as it can be reconstructed. -} - -// UnmarshalCBOR handles CBOR unmarshalling from passed data. -func (c *OpenMergeCommitment) UnmarshalCBOR(data []byte) error { - if err := cbor.Unmarshal(data, &c.MergeCommitment); err != nil { - return err - } - - c.Body = new(MergeBody) - return cbor.Unmarshal(c.Blob, c.Body) -} - -// MostlyEqual returns true if the commitment is mostly equal to another -// specified commitment as per discrepancy detection criteria. -func (c OpenMergeCommitment) MostlyEqual(other OpenCommitment) bool { - return c.Body.Header.MostlyEqual(&other.(OpenMergeCommitment).Body.Header) -} - -// ToVote returns a hash that represents a vote for this commitment as -// per discrepancy resolution criteria. -func (c OpenMergeCommitment) ToVote() hash.Hash { - return c.Body.Header.EncodedHash() -} - -// ToDDResult returns a commitment-specific result after discrepancy -// detection. -func (c OpenMergeCommitment) ToDDResult() interface{} { - return c.Body.Header -} - -// Open validates the merge commitment signature, and de-serializes the body. -func (c *MergeCommitment) Open() (*OpenMergeCommitment, error) { - var body MergeBody - if err := c.Signed.Open(MergeSignatureContext, &body); err != nil { - return nil, errors.New("roothash/commitment: merge commitment has invalid signature") - } - - return &OpenMergeCommitment{ - MergeCommitment: *c, - Body: &body, - }, nil -} - -// SignMergeCommitment serializes the message and signs the commitment. -func SignMergeCommitment(signer signature.Signer, body *MergeBody) (*MergeCommitment, error) { - signed, err := signature.SignSigned(signer, MergeSignatureContext, body) - if err != nil { - return nil, err - } - - return &MergeCommitment{ - Signed: *signed, - }, nil -} diff --git a/go/roothash/api/commitment/pool.go b/go/roothash/api/commitment/pool.go index 50d62c2a275..04e9f32d1ba 100644 --- a/go/roothash/api/commitment/pool.go +++ b/go/roothash/api/commitment/pool.go @@ -31,10 +31,9 @@ var ( ErrStillWaiting = errors.New(moduleName, 9, "roothash/commitment: still waiting for commits") ErrInsufficientVotes = errors.New(moduleName, 10, "roothash/commitment: insufficient votes to finalize discrepancy resolution round") ErrBadExecutorCommits = errors.New(moduleName, 11, "roothash/commitment: bad executor commitments") - ErrInvalidCommitteeID = errors.New(moduleName, 12, "roothash/commitment: invalid committee ID") - ErrTxnSchedSigInvalid = p2pError.Permanent(errors.New(moduleName, 13, "roothash/commitment: txn scheduler signature invalid")) - ErrInvalidMessages = p2pError.Permanent(errors.New(moduleName, 14, "roothash/commitment: invalid messages")) - ErrBadStorageReceipts = errors.New(moduleName, 15, "roothash/commitment: bad storage receipts") + ErrTxnSchedSigInvalid = p2pError.Permanent(errors.New(moduleName, 12, "roothash/commitment: txn scheduler signature invalid")) + ErrInvalidMessages = p2pError.Permanent(errors.New(moduleName, 13, "roothash/commitment: invalid messages")) + ErrBadStorageReceipts = errors.New(moduleName, 14, "roothash/commitment: bad storage receipts") ) var logger *logging.Logger = logging.GetLogger("roothash/commitment/pool") @@ -66,9 +65,6 @@ type Pool struct { // ExecuteCommitments are the commitments in the pool iff Committee.Kind // is scheduler.KindComputeExecutor. ExecuteCommitments map[signature.PublicKey]OpenExecutorCommitment `json:"execute_commitments,omitempty"` - // MergeCommitments are the commitments in the pool iff Committee.Kind - // is scheduler.KindComputeMerge. - MergeCommitments map[signature.PublicKey]OpenMergeCommitment `json:"merge_commitments,omitempty"` // Discrepancy is a flag signalling that a discrepancy has been detected. Discrepancy bool `json:"discrepancy"` // NextTimeout is the time when the next call to TryFinalize(true) should @@ -96,21 +92,12 @@ func (p *Pool) isMember(id signature.PublicKey) bool { return p.MemberSet[id] } -// GetCommitteeID returns the identifier of the committee this pool is collecting -// commitments for. -func (p *Pool) GetCommitteeID() hash.Hash { - return p.Committee.EncodedMembersHash() -} - // ResetCommitments resets the commitments in the pool and clears the discrepancy // flag. func (p *Pool) ResetCommitments() { if p.ExecuteCommitments == nil || len(p.ExecuteCommitments) > 0 { p.ExecuteCommitments = make(map[signature.PublicKey]OpenExecutorCommitment) } - if p.MergeCommitments == nil || len(p.MergeCommitments) > 0 { - p.MergeCommitments = make(map[signature.PublicKey]OpenMergeCommitment) - } p.Discrepancy = false p.NextTimeout = time.Time{} } @@ -128,8 +115,6 @@ func (p *Pool) getCommitment(id signature.PublicKey) (OpenCommitment, bool) { switch p.Committee.Kind { case scheduler.KindComputeExecutor: com, ok = p.ExecuteCommitments[id] - case scheduler.KindComputeMerge: - com, ok = p.MergeCommitments[id] default: panic("roothash/commitment: unknown committee kind: " + p.Committee.Kind.String()) } @@ -209,21 +194,9 @@ func (p *Pool) addOpenExecutorCommitment( } } - // Verify that this is for the correct committee. - cID := p.GetCommitteeID() - if !cID.Equal(&body.CommitteeID) { - logger.Debug("executor commitment has invalid committee ID", - "expected_committee_id", cID, - "committee_id", body.CommitteeID, - "node_id", id, - ) - return ErrInvalidCommitteeID - } - // Check if the block is based on the previous block. if !header.IsParentOf(&blk.Header) { logger.Debug("executor commitment is not based on correct block", - "committee_id", cID, "node_id", id, "expected_previous_hash", blk.Header.EncodedHash(), "previous_hash", header.PreviousHash, @@ -232,10 +205,8 @@ func (p *Pool) addOpenExecutorCommitment( } // Verify that the txn scheduler signature for current commitment is valid. - currentTxnSchedSig := body.TxnSchedSig if err := sv.VerifyCommitteeSignatures(scheduler.KindComputeTxnScheduler, []signature.Signature{body.TxnSchedSig}); err != nil { logger.Debug("executor commitment has bad transaction scheduler signers", - "committee_id", cID, "node_id", id, "err", err, ) @@ -256,38 +227,19 @@ func (p *Pool) addOpenExecutorCommitment( } if err := sv.VerifyCommitteeSignatures(scheduler.KindStorage, body.StorageSignatures); err != nil { logger.Debug("executor commitment has bad storage receipt signers", - "committee_id", cID, "node_id", id, "err", err, ) return err } - if err := body.VerifyStorageReceiptSignatures(blk.Header.Namespace, blk.Header.Round+1); err != nil { + if err := body.VerifyStorageReceiptSignatures(blk.Header.Namespace); err != nil { logger.Debug("executor commitment has bad storage receipt signatures", - "committee_id", cID, "node_id", id, "err", err, ) return p2pError.Permanent(err) } - // Go through existing commitments and check if the txn scheduler signed - // different batches for the same committee. - for _, com := range p.ExecuteCommitments { - cb := com.Body - if cID.Equal(&cb.CommitteeID) { - existingTxnSchedSig := cb.TxnSchedSig - if currentTxnSchedSig.PublicKey.Equal(existingTxnSchedSig.PublicKey) && currentTxnSchedSig.Signature != existingTxnSchedSig.Signature { - // Same committe, same txn sched, but txn sched signatures - // don't match -- txn sched is malicious! - // TODO: Slash stake! (issue #1931) - logger.Warn("txn sched signed two different batches for the same committee ID", - "committee_id", cb.CommitteeID, - ) - } - } - } - if p.ExecuteCommitments == nil { p.ExecuteCommitments = make(map[signature.PublicKey]OpenExecutorCommitment) } @@ -346,8 +298,6 @@ func (p *Pool) CheckEnoughCommitments(didTimeout bool) error { switch p.Committee.Kind { case scheduler.KindComputeExecutor: required -= int(p.Runtime.Executor.AllowedStragglers) - case scheduler.KindComputeMerge: - required -= int(p.Runtime.Merge.AllowedStragglers) default: panic("roothash/commitment: unknown committee kind while checking commitments: " + p.Committee.Kind.String()) } @@ -522,133 +472,6 @@ func (p *Pool) TryFinalize( return commit, nil } -// AddMergeCommitment verifies and adds a new merge commitment to the pool. -// -// Any executor commitments are added to the provided pool. -func (p *Pool) AddMergeCommitment( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitment *MergeCommitment, - ccPool *MultiPool, -) error { - if p.Committee == nil { - return ErrNoCommittee - } - if p.Committee.Kind != scheduler.KindComputeMerge { - return ErrInvalidCommitteeKind - } - - id := commitment.Signature.PublicKey - - // Ensure that the node is actually a committee member. We do not enforce specific - // roles based on current discrepancy state to allow commitments arriving in any - // order (e.g., a backup worker can submit a commitment even before there is a - // discrepancy). - if !p.isMember(id) { - return ErrNotInCommittee - } - - // Ensure the node did not already submit a commitment. - if _, ok := p.MergeCommitments[id]; ok { - return ErrAlreadyCommitted - } - - // Check the commitment signature and de-serialize. - openCom, err := commitment.Open() - if err != nil { - return err - } - body := openCom.Body - header := &body.Header - - // Check if the block is based on the previous block. - if !header.IsParentOf(&blk.Header) { - logger.Debug("merge commitment is not based on correct block", - "node_id", id, - "expected_previous_hash", blk.Header.EncodedHash(), - "previous_hash", header.PreviousHash, - ) - return ErrNotBasedOnCorrectBlock - } - - // Check executor commitments -- all commitments must be valid and there - // must be no discrepancy as the merge committee nodes are supposed to - // check this. - if err = ccPool.addExecutorCommitments(ctx, blk, sv, nl, body.ExecutorCommits); err != nil { - return err - } - - // There must be enough executor commits for all committees. - if err = ccPool.CheckEnoughCommitments(); err != nil { - return ErrBadExecutorCommits - } - - for _, sp := range ccPool.Committees { - if !sp.Discrepancy { - // If there was no discrepancy yet there must not be one now. - _, err = sp.DetectDiscrepancy() - switch err { - case nil: - case ErrDiscrepancyDetected: - // We may also be able to already perform discrepancy resolution, check if - // this is possible. - _, err = sp.ResolveDiscrepancy() - if err == nil { - break - } - fallthrough - default: - logger.Debug("discrepancy detection failed for executor committee", - "err", err, - ) - return ErrBadExecutorCommits - } - } else { - // If there was a discrepancy before it must be resolved now. - _, err = sp.ResolveDiscrepancy() - if err != nil { - logger.Debug("discrepancy resolution failed for executor committee", - "err", err, - ) - return ErrBadExecutorCommits - } - } - } - - // Check if the header refers to merkle roots in storage. - if uint64(len(header.StorageSignatures)) < p.Runtime.Storage.MinWriteReplication { - logger.Debug("merge commitment doesn't have enough storage receipts", - "node_id", id, - "min_write_replication", p.Runtime.Storage.MinWriteReplication, - "num_receipts", len(header.StorageSignatures), - ) - return ErrBadStorageReceipts - } - if err = sv.VerifyCommitteeSignatures(scheduler.KindStorage, header.StorageSignatures); err != nil { - logger.Debug("merge commitment has bad storage receipt signers", - "node_id", id, - "err", err, - ) - return err - } - if err = header.VerifyStorageReceiptSignatures(); err != nil { - logger.Debug("merge commitment has bad storage receipt signatures", - "node_id", id, - "err", err, - ) - return err - } - - if p.MergeCommitments == nil { - p.MergeCommitments = make(map[signature.PublicKey]OpenMergeCommitment) - } - p.MergeCommitments[id] = *openCom - - return nil -} - // GetExecutorCommitments returns a list of executor commitments in the pool. func (p *Pool) GetExecutorCommitments() (result []ExecutorCommitment) { for _, c := range p.ExecuteCommitments { @@ -661,143 +484,3 @@ func (p *Pool) GetExecutorCommitments() (result []ExecutorCommitment) { func (p *Pool) IsTimeout(now time.Time) bool { return !p.NextTimeout.IsZero() && !p.NextTimeout.After(now) } - -// MultiPool contains pools for multiple committees and routes operations to -// multiple committees based on commitments' committee IDs. -type MultiPool struct { - Committees map[hash.Hash]*Pool `json:"committees"` -} - -// AddExecutorCommitment verifies and adds a new executor commitment to the pool. -func (m *MultiPool) AddExecutorCommitment( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitment *ExecutorCommitment, -) (*Pool, error) { - // Check the commitment signature and de-serialize into header. - openCom, err := commitment.Open() - if err != nil { - return nil, p2pError.Permanent(err) - } - - p := m.Committees[openCom.Body.CommitteeID] - if p == nil { - return nil, ErrInvalidCommitteeID - } - - return p, p.addOpenExecutorCommitment(ctx, blk, sv, nl, openCom) -} - -// addExecutorCommitments verifies and adds multiple executor commitments to the pool. -// All valid commitments will be added, redundant commitments will be ignored. -// -// Note that any signatures being invalid will result in no changes to the pool. -func (m *MultiPool) addExecutorCommitments( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitments []ExecutorCommitment, -) error { - // Batch verify all of the signatures at once. - msgs := make([][]byte, 0, len(commitments)) - sigs := make([]signature.Signature, 0, len(commitments)) - for i := range commitments { - v := commitments[i] // This is deliberate. - msgs = append(msgs, v.Blob) - sigs = append(sigs, v.Signature) - } - - if !signature.VerifyBatch(ExecutorSignatureContext, msgs, sigs) { - return signature.ErrVerifyFailed - } - - // Ok, all of the signatures are valid, deserialize the blobs and add them - // serially. - var hadError bool - for _, v := range commitments { - var body ComputeBody - if err := cbor.Unmarshal(v.Blob, &body); err != nil { - hadError = true - continue - } - - openCom := &OpenExecutorCommitment{ - ExecutorCommitment: v, - Body: &body, - } - - p := m.Committees[openCom.Body.CommitteeID] - if p == nil { - hadError = true - continue - } - - err := p.addOpenExecutorCommitment(ctx, blk, sv, nl, openCom) - switch err { - case nil, ErrAlreadyCommitted: - default: - hadError = true - } - } - if hadError { - return ErrBadExecutorCommits - } - - return nil -} - -// CheckEnoughCommitments checks if there are enough commitments in the pool to be -// able to perform discrepancy detection. -// -// Note that this checks all committees in the multi-pool and returns an error if -// any doesn't have enoguh commitments. -func (m *MultiPool) CheckEnoughCommitments() error { - for _, p := range m.Committees { - if err := p.CheckEnoughCommitments(false); err != nil { - return err - } - } - return nil -} - -// GetExecutorCommitments returns a list of executor commitments in the pool. -func (m *MultiPool) GetOpenExecutorCommitments() (result []OpenExecutorCommitment) { - for _, p := range m.Committees { - for _, c := range p.ExecuteCommitments { - result = append(result, c) - } - } - return -} - -// GetTimeoutCommittees returns a list of committee pools that are up for their -// TryFinalize to be called. -func (m *MultiPool) GetTimeoutCommittees(now time.Time) (result []*Pool) { - for _, p := range m.Committees { - if p.IsTimeout(now) { - result = append(result, p) - } - } - return -} - -// GetNextTimeout returns the minimum next timeout of all committee pools. -func (m *MultiPool) GetNextTimeout() (timeout time.Time) { - for _, p := range m.Committees { - if timeout.IsZero() || (!p.NextTimeout.IsZero() && p.NextTimeout.Before(timeout)) { - timeout = p.NextTimeout - } - } - return -} - -// ResetCommitments resets the commitments in the pool and clears their discrepancy -// flags. -func (m *MultiPool) ResetCommitments() { - for _, p := range m.Committees { - p.ResetCommitments() - } -} diff --git a/go/roothash/api/commitment/pool_test.go b/go/roothash/api/commitment/pool_test.go index 8d54bb6a955..febe9df6c9f 100644 --- a/go/roothash/api/commitment/pool_test.go +++ b/go/roothash/api/commitment/pool_test.go @@ -79,6 +79,7 @@ func TestPoolDefault(t *testing.T) { body := ComputeBody{ Header: ComputeResultsHeader{ + Round: blk.Header.Round, PreviousHash: blk.Header.PreviousHash, IORoot: blk.Header.IORoot, StateRoot: blk.Header.StateRoot, @@ -142,10 +143,7 @@ func TestPoolSingleCommitment(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) - - commit, err := SignExecutorCommitment(sk, &body) - require.NoError(t, err, "SignExecutorCommitment") + childBlk, parentBlk, body := generateComputeBody(t) sv := &staticSignatureVerifier{ storagePublicKey: body.StorageSignatures[0].PublicKey, @@ -157,9 +155,34 @@ func TestPoolSingleCommitment(t *testing.T) { }, } - // Adding a commitment not based on correct block should fail. - err = pool.AddExecutorCommitment(context.Background(), parentBlk, sv, nl, commit) - require.Error(t, err, "AddExecutorCommitment") + // Test invalid commitments. + for _, tc := range []struct { + name string + fn func(*ComputeBody) + expectedErr error + }{ + {"BlockBadRound", func(b *ComputeBody) { b.Header.Round-- }, ErrNotBasedOnCorrectBlock}, + {"BlockBadPreviousHash", func(b *ComputeBody) { b.Header.PreviousHash.FromBytes([]byte("invalid")) }, ErrNotBasedOnCorrectBlock}, + {"StorageSigs1", func(b *ComputeBody) { b.StorageSignatures = nil }, ErrBadStorageReceipts}, + } { + _, _, invalidBody := generateComputeBody(t) + invalidBody.StorageSignatures = append([]signature.Signature{}, body.StorageSignatures...) + invalidBody.TxnSchedSig = body.TxnSchedSig + + tc.fn(&invalidBody) + + var commit *ExecutorCommitment + commit, err = SignExecutorCommitment(sk, &invalidBody) + require.NoError(t, err, "SignExecutorCommitment(%s)", tc.name) + + err = pool.AddExecutorCommitment(context.Background(), childBlk, sv, nl, commit) + require.Error(t, err, "AddExecutorCommitment(%s)", tc.name) + require.Equal(t, tc.expectedErr, err, "AddExecutorCommitment(%s)", tc.name) + } + + // Generate a valid commitment. + commit, err := SignExecutorCommitment(sk, &body) + require.NoError(t, err, "SignExecutorCommitment") // There should not be enough executor commitments. err = pool.CheckEnoughCommitments(false) @@ -271,7 +294,7 @@ func TestPoolSingleCommitmentTEE(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) rakSig, err := signature.Sign(skRAK, ComputeResultsHeaderSignatureContext, cbor.Marshal(body.Header)) require.NoError(t, err, "Sign") body.RakSig = rakSig.Signature @@ -279,10 +302,6 @@ func TestPoolSingleCommitmentTEE(t *testing.T) { commit, err := SignExecutorCommitment(sk, &body) require.NoError(t, err, "SignExecutorCommitment") - // Adding a commitment not based on correct block should fail. - err = pool.AddExecutorCommitment(context.Background(), parentBlk, nopSV, nl, commit) - require.Error(t, err, "AddExecutorCommitment") - // There should not be enough executor commitments. err = pool.CheckEnoughCommitments(false) require.Error(t, err, "CheckEnoughCommitments") @@ -327,10 +346,7 @@ func TestPoolTwoCommitments(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) - - bodyInvalidID := body - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -338,15 +354,6 @@ func TestPoolTwoCommitments(t *testing.T) { commit2, err := SignExecutorCommitment(sk2, &body) require.NoError(t, err, "SignExecutorCommitment") - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sk1, &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - // Adding commitment 1 should succeed. err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, commit1) require.NoError(t, err, "AddExecutorCommitment") @@ -383,7 +390,7 @@ func TestPoolTwoCommitments(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, parentBlk, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -492,7 +499,7 @@ func TestPoolSerialization(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) commit, err := SignExecutorCommitment(sk, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -518,410 +525,6 @@ func TestPoolSerialization(t *testing.T) { require.EqualValues(t, &body.Header, &header, "DD should return the same header") } -func TestMultiPoolSerialization(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, sks1, committee1, nl := generateMockCommittee(t) - _, sks2, committee2, _ := generateMockCommittee(t) - com1ID := committee1.EncodedMembersHash() - com2ID := committee2.EncodedMembersHash() - - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, _, body2 := generateComputeBody(t, committee2) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding commitment 1 should succeed. - sp, err := pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // Adding commitment 2 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // Adding commitment 3 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // Adding commitment 4 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - m := cbor.Marshal(pool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough executor commitments. - err = d.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") -} - -func TestPoolMergeCommitment(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, executorSks, executorCommittee, nl := generateMockCommittee(t) - _, mergeSks, mergeCommittee, _ := generateMockCommittee(t) - mergeCommittee.Kind = scheduler.KindComputeMerge - executorCommitteeID := executorCommittee.EncodedMembersHash() - - t.Run("NoDiscrepancy", func(t *testing.T) { - // Create a merge commitment pool. - mergePool := Pool{ - Runtime: rt, - Committee: mergeCommittee, - } - - // Create a executor commitment multi-pool. - executorPool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - executorCommitteeID: { - Runtime: rt, - Committee: executorCommittee, - }, - }, - } - - // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, executorCommittee) - - commit1, err := SignExecutorCommitment(executorSks[0], &body) - require.NoError(t, err, "SignExecutorCommitment") - - commit2, err := SignExecutorCommitment(executorSks[1], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Generate a merge commitment. - mergeBody := MergeBody{ - ExecutorCommits: []ExecutorCommitment{*commit1, *commit2}, - Header: parentBlk.Header, - } - - mergeCommit1, err := SignMergeCommitment(mergeSks[0], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - mergeCommit2, err := SignMergeCommitment(mergeSks[1], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - // Adding a commitment having not enough storage receipts should fail. - mergeBodyNotEnoughStorageSig := mergeBody - mergeBodyNotEnoughStorageSig.Header.StorageSignatures = []signature.Signature{} - incorrectCommit, err := SignMergeCommitment(mergeSks[0], &mergeBodyNotEnoughStorageSig) - require.NoError(t, err, "SignMergeCommitment") - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, incorrectCommit, &executorPool) - require.Error(t, err, "AddMergeCommitment") - require.Equal(t, ErrBadStorageReceipts, err, "AddMergeCommitment") - - // Adding commitment 1 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit1, &executorPool) - require.NoError(t, err, "AddMergeCommitment") - - // There should not be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - err = mergePool.CheckEnoughCommitments(true) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit2, &executorPool) - require.NoError(t, err, "AddExecutorCommitment") - - m := cbor.Marshal(executorPool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.NoError(t, err, "CheckEnoughCommitments") - - // There should be no discrepancy. - dc, err := mergePool.DetectDiscrepancy() - require.NoError(t, err, "DetectDiscrepancy") - require.Equal(t, false, mergePool.Discrepancy) - header := dc.ToDDResult().(block.Header) - require.EqualValues(t, &parentBlk.Header, &header, "DD should return the same header") - }) - - t.Run("ResolvedExecutionDiscrepancy", func(t *testing.T) { - // Create a merge commitment pool. - mergePool := Pool{ - Runtime: rt, - Committee: mergeCommittee, - } - - // Create a executor commitment multi-pool. - executorPool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - executorCommitteeID: { - Runtime: rt, - Committee: executorCommittee, - }, - }, - } - - // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, executorCommittee) - - commit1, err := SignExecutorCommitment(executorSks[0], &body) - require.NoError(t, err, "SignExecutorCommitment") - - commit3, err := SignExecutorCommitment(executorSks[2], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Update state root and fix the storage receipt. - body.Header.StateRoot.FromBytes([]byte("discrepancy")) - body.StorageSignatures = []signature.Signature{generateStorageReceiptSignature(t, parentBlk, &body)} - - commit2, err := SignExecutorCommitment(executorSks[1], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Generate a merge commitment. - mergeBody := MergeBody{ - ExecutorCommits: []ExecutorCommitment{*commit1, *commit2, *commit3}, - Header: parentBlk.Header, - } - - mergeCommit1, err := SignMergeCommitment(mergeSks[0], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - mergeCommit2, err := SignMergeCommitment(mergeSks[1], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - // Adding commitment 1 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit1, &executorPool) - require.NoError(t, err, "AddMergeCommitment") - - // There should not be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - err = mergePool.CheckEnoughCommitments(true) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit2, &executorPool) - require.NoError(t, err, "AddExecutorCommitment") - - m := cbor.Marshal(executorPool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.NoError(t, err, "CheckEnoughCommitments") - - // There should be no discrepancy. - dc, err := mergePool.DetectDiscrepancy() - require.NoError(t, err, "DetectDiscrepancy") - require.Equal(t, false, mergePool.Discrepancy) - header := dc.ToDDResult().(block.Header) - require.EqualValues(t, &parentBlk.Header, &header, "DD should return the same header") - }) -} - -func TestMultiPool(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, sks1, committee1, nl := generateMockCommittee(t) - _, sks2, committee2, _ := generateMockCommittee(t) - com1ID := committee1.EncodedMembersHash() - com2ID := committee2.EncodedMembersHash() - - t.Run("NoDiscrepancy", func(t *testing.T) { - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, _, body2 := generateComputeBody(t, committee2) - - bodyInvalidID := body1 - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sks1[0], &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - - // Adding commitment 1 should succeed. - sp, err := pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 3 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 4 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // There should be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") - }) - - t.Run("Discrepancy", func(t *testing.T) { - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, parentBlk, body2 := generateComputeBody(t, committee2) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Update state root and fix the storage receipt. - body2.Header.StateRoot.FromBytes([]byte("discrepancy")) - body2.StorageSignatures = []signature.Signature{generateStorageReceiptSignature(t, parentBlk, &body2)} - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding commitment 1 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 3 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 4 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - - // There should be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") - }) -} - func TestTryFinalize(t *testing.T) { genesisTestHelpers.SetTestChainContext() @@ -941,10 +544,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) - - bodyInvalidID := body - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -952,15 +552,6 @@ func TestTryFinalize(t *testing.T) { commit2, err := SignExecutorCommitment(sk2, &body) require.NoError(t, err, "SignExecutorCommitment") - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sk1, &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - // Adding commitment 1 should succeed. err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, commit1) require.NoError(t, err, "AddExecutorCommitment") @@ -989,7 +580,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, parentBlk, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -1049,7 +640,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -1160,14 +751,14 @@ func generateMockCommittee(t *testing.T) ( return } -func generateComputeBody(t *testing.T, committee *scheduler.Committee) (*block.Block, *block.Block, ComputeBody) { +func generateComputeBody(t *testing.T) (*block.Block, *block.Block, ComputeBody) { var id common.Namespace childBlk := block.NewGenesisBlock(id, 0) parentBlk := block.NewEmptyBlock(childBlk, 1, block.Normal) body := ComputeBody{ - CommitteeID: committee.EncodedMembersHash(), Header: ComputeResultsHeader{ + Round: parentBlk.Header.Round, PreviousHash: parentBlk.Header.PreviousHash, IORoot: parentBlk.Header.IORoot, StateRoot: parentBlk.Header.StateRoot, @@ -1205,7 +796,6 @@ func generateTxnSchedulerSignature(t *testing.T, childBlk *block.Block, body *Co body.InputRoot = hash.Hash{} body.InputStorageSigs = []signature.Signature{} dispatch := &TxnSchedulerBatch{ - CommitteeID: body.CommitteeID, IORoot: body.InputRoot, StorageSignatures: body.InputStorageSigs, Header: childBlk.Header, diff --git a/go/roothash/api/commitment/txnscheduler.go b/go/roothash/api/commitment/txnscheduler.go index e955eb5b5ab..0c1d8feccca 100644 --- a/go/roothash/api/commitment/txnscheduler.go +++ b/go/roothash/api/commitment/txnscheduler.go @@ -16,9 +16,6 @@ var TxnSchedulerBatchSigCtx = signature.NewContext("oasis-core/roothash: tx batc // Don't forget to bump CommitteeProtocol version in go/common/version // if you change anything in this struct. type TxnSchedulerBatch struct { - // CommitteeID is the committee ID of the target executor committee. - CommitteeID hash.Hash `json:"cid"` - // IORoot is the I/O root containing the inputs (transactions) that // the executor node should use. IORoot hash.Hash `json:"io_root"` diff --git a/go/roothash/tests/tester.go b/go/roothash/tests/tester.go index 03665dcf545..1307feef3b3 100644 --- a/go/roothash/tests/tester.go +++ b/go/roothash/tests/tester.go @@ -40,7 +40,6 @@ type runtimeState struct { genesisBlock *block.Block executorCommittee *testCommittee - mergeCommittee *testCommittee storageCommittee *testCommittee txnSchedCommittee *testCommittee } @@ -195,7 +194,7 @@ func (s *runtimeState) testEpochTransitionBlock(t *testing.T, scheduler schedule nodes[node.Node.ID] = node } - s.executorCommittee, s.mergeCommittee, s.storageCommittee, s.txnSchedCommittee = mustGetCommittee(t, s.rt, epoch+1, scheduler, nodes) + s.executorCommittee, s.storageCommittee, s.txnSchedCommittee = mustGetCommittee(t, s.rt, epoch+1, scheduler, nodes) // Wait to receive an epoch transition block. for { @@ -228,7 +227,7 @@ func testSuccessfulRound(t *testing.T, backend api.Backend, consensus consensusA func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, consensus consensusAPI.Backend, identity *identity.Identity) { require := require.New(t) - rt, executorCommittee, mergeCommittee := s.rt, s.executorCommittee, s.mergeCommittee + rt, executorCommittee := s.rt, s.executorCommittee dataDir, err := ioutil.TempDir("", "oasis-storage-test_") require.NoError(err, "TempDir") @@ -299,8 +298,8 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co toCommit = append(toCommit, executorCommittee.workers...) for _, node := range toCommit { commitBody := commitment.ComputeBody{ - CommitteeID: executorCommittee.committee.EncodedMembersHash(), Header: commitment.ComputeResultsHeader{ + Round: parent.Header.Round, PreviousHash: parent.Header.PreviousHash, IORoot: parent.Header.IORoot, StateRoot: parent.Header.StateRoot, @@ -312,7 +311,6 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co // Fake txn scheduler signature. dispatch := &commitment.TxnSchedulerBatch{ - CommitteeID: commitBody.CommitteeID, IORoot: commitBody.InputRoot, StorageSignatures: commitBody.InputStorageSigs, Header: child.Header, @@ -330,28 +328,12 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co executorCommits = append(executorCommits, *commit) } - // Generate all the merge commitments. - var mergeCommits []commitment.MergeCommitment - toCommit = []*registryTests.TestNode{} - toCommit = append(toCommit, mergeCommittee.workers...) - for _, node := range toCommit { - commitBody := commitment.MergeBody{ - ExecutorCommits: executorCommits, - Header: parent.Header, - } - // `err` shadows outside. - commit, err := commitment.SignMergeCommitment(node.Signer, &commitBody) // nolint: vetshadow - require.NoError(err, "SignSigned") - - mergeCommits = append(mergeCommits, *commit) - } - ctx, cancel := context.WithTimeout(context.Background(), recvTimeout) defer cancel() - tx := api.NewMergeCommitTx(0, nil, rt.Runtime.ID, mergeCommits) + tx := api.NewExecutorCommitTx(0, nil, rt.Runtime.ID, executorCommits) err = consensusAPI.SignAndSubmitTx(ctx, consensus, toCommit[0].Signer, tx) - require.NoError(err, "MergeCommit") + require.NoError(err, "ExecutorCommit") // Ensure that the round was finalized. for { @@ -380,15 +362,15 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co // There should be merge commitment events for all commitments. evts, err := backend.GetEvents(ctx, blk.Height) require.NoError(err, "GetEvents") - // Merge commit event + Finalized event. - require.Len(evts, len(mergeCommits)+1, "should have all events") + // Executor commit event + Finalized event. + require.Len(evts, len(executorCommits)+1, "should have all events") // First event is Finalized. require.EqualValues(&api.FinalizedEvent{Round: header.Round}, evts[0].FinalizedEvent, "finalized event should have the right round") for i, ev := range evts[1:] { switch { - case ev.MergeCommitted != nil: - // Merge commitment event. - require.EqualValues(mergeCommits[i], ev.MergeCommitted.Commit, "merge commitment event should have the right commitment") + case ev.ExecutorCommitted != nil: + // Executor commitment event. + require.EqualValues(executorCommits[i], ev.ExecutorCommitted.Commit, "executor commitment event should have the right commitment") default: // There should be no other event types. t.Fatalf("unexpected event: %+v", ev) @@ -418,7 +400,6 @@ func mustGetCommittee( nodes map[signature.PublicKey]*registryTests.TestNode, ) ( executorCommittee *testCommittee, - mergeCommittee *testCommittee, storageCommittee *testCommittee, txnSchedCommittee *testCommittee, ) { @@ -459,10 +440,8 @@ func mustGetCommittee( groupSize = int(rt.Runtime.TxnScheduler.GroupSize) groupBackupSize = 0 case scheduler.KindComputeExecutor: - fallthrough - case scheduler.KindComputeMerge: - groupSize = int(rt.Runtime.Merge.GroupSize) - groupBackupSize = int(rt.Runtime.Merge.GroupBackupSize) + groupSize = int(rt.Runtime.Executor.GroupSize) + groupBackupSize = int(rt.Runtime.Executor.GroupBackupSize) case scheduler.KindStorage: groupSize = int(rt.Runtime.Storage.GroupSize) } @@ -484,13 +463,11 @@ func mustGetCommittee( txnSchedCommittee = &ret case scheduler.KindComputeExecutor: executorCommittee = &ret - case scheduler.KindComputeMerge: - mergeCommittee = &ret case scheduler.KindStorage: storageCommittee = &ret } - if executorCommittee == nil || mergeCommittee == nil || storageCommittee == nil || txnSchedCommittee == nil { + if executorCommittee == nil || storageCommittee == nil || txnSchedCommittee == nil { continue } diff --git a/go/runtime/host/mock/mock.go b/go/runtime/host/mock/mock.go index 2ca94a136d9..8a341b70c89 100644 --- a/go/runtime/host/mock/mock.go +++ b/go/runtime/host/mock/mock.go @@ -77,6 +77,7 @@ func (r *runtime) Call(ctx context.Context, body *protocol.Body) (*protocol.Body return &protocol.Body{RuntimeExecuteTxBatchResponse: &protocol.RuntimeExecuteTxBatchResponse{ Batch: protocol.ComputedBatch{ Header: commitment.ComputeResultsHeader{ + Round: rq.Block.Header.Round + 1, PreviousHash: rq.Block.Header.EncodedHash(), IORoot: ioRoot, StateRoot: stateRoot, diff --git a/go/scheduler/api/api.go b/go/scheduler/api/api.go index 9aff5b1328a..873206eee1d 100644 --- a/go/scheduler/api/api.go +++ b/go/scheduler/api/api.go @@ -75,14 +75,11 @@ const ( // KindComputeTxnScheduler is a transaction scheduler committee. KindComputeTxnScheduler CommitteeKind = 2 - // KindComputeMerge is a merge committee. - KindComputeMerge CommitteeKind = 3 - // KindStorage is a storage committee. - KindStorage CommitteeKind = 4 + KindStorage CommitteeKind = 3 // MaxCommitteeKind is a dummy value used for iterating all committee kinds. - MaxCommitteeKind = 5 + MaxCommitteeKind = 4 ) // NeedsLeader returns if committee kind needs leader role. @@ -92,8 +89,6 @@ func (k CommitteeKind) NeedsLeader() (bool, error) { return false, nil case KindComputeTxnScheduler: return true, nil - case KindComputeMerge: - return false, nil case KindStorage: return false, nil default: @@ -110,8 +105,6 @@ func (k CommitteeKind) String() string { return "executor" case KindComputeTxnScheduler: return "txn_scheduler" - case KindComputeMerge: - return "merge" case KindStorage: return "storage" default: diff --git a/go/scheduler/tests/tester.go b/go/scheduler/tests/tester.go index ea74a07702a..3568c818bdb 100644 --- a/go/scheduler/tests/tester.go +++ b/go/scheduler/tests/tester.go @@ -42,10 +42,10 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend epochtime := consensus.EpochTime().(epochtime.SetableBackend) epoch := epochtimeTests.MustAdvanceEpoch(t, epochtime, 1) - ensureValidCommittees := func(expectedExecutor, expectedTransactionScheduler, expectedMerge, expectedStorage int) { - var executor, transactionScheduler, merge, storage *api.Committee + ensureValidCommittees := func(expectedExecutor, expectedTransactionScheduler, expectedStorage int) { + var executor, transactionScheduler, storage *api.Committee var seen int - for seen < 4 { + for seen < 3 { select { case committee := <-ch: if committee.ValidFor < epoch { @@ -64,10 +64,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend require.Nil(transactionScheduler, "haven't seen a transaction scheduler committee yet") require.Len(committee.Members, expectedTransactionScheduler, "committee has all transaction scheduler nodes") transactionScheduler = committee - case api.KindComputeMerge: - require.Nil(merge, "haven't seen a merge committee yet") - require.Len(committee.Members, expectedMerge, "committee has all merge nodes") - merge = committee case api.KindStorage: require.Nil(storage, "haven't seen a storage committee yet") require.Len(committee.Members, expectedStorage, "committee has all storage nodes") @@ -98,9 +94,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend case api.KindComputeTxnScheduler: require.EqualValues(transactionScheduler, committee, "fetched transaction scheduler committee is identical") transactionScheduler = nil - case api.KindComputeMerge: - require.EqualValues(merge, committee, "fetched merge committee is identical") - merge = nil case api.KindStorage: require.EqualValues(storage, committee, "fetched storage committee is identical") storage = nil @@ -109,7 +102,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend require.Nil(executor, "fetched an executor committee") require.Nil(transactionScheduler, "fetched a transaction scheduler committee") - require.Nil(merge, "fetched a merge committee") require.Nil(storage, "fetched a storage committee") } @@ -125,7 +117,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend ensureValidCommittees( nExecutor, int(rt.Runtime.TxnScheduler.GroupSize), - int(rt.Runtime.Merge.GroupSize)+int(rt.Runtime.Merge.GroupBackupSize), nStorage, ) @@ -141,7 +132,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend ensureValidCommittees( 3, int(rt.Runtime.TxnScheduler.GroupSize), - int(rt.Runtime.Merge.GroupSize)+int(rt.Runtime.Merge.GroupBackupSize), 1, ) @@ -196,9 +186,6 @@ func requireValidCommitteeMembers(t *testing.T, committee *api.Committee, runtim case api.KindComputeExecutor: require.EqualValues(runtime.Executor.GroupSize, workers, "executor committee should have the correct number of workers") require.EqualValues(runtime.Executor.GroupBackupSize, backups, "executor committee should have the correct number of backup workers") - case api.KindComputeMerge: - require.EqualValues(runtime.Merge.GroupSize, workers, "merge committee should have the correct number of workers") - require.EqualValues(runtime.Merge.GroupBackupSize, backups, "merge committee should have the correct number of backup workers") case api.KindStorage, api.KindComputeTxnScheduler: numCommitteeMembersWithoutLeader := len(committee.Members) needsLeader, err := committee.Kind.NeedsLeader() diff --git a/go/worker/common/committee/group.go b/go/worker/common/committee/group.go index 54ccd2e753d..545c1283972 100644 --- a/go/worker/common/committee/group.go +++ b/go/worker/common/committee/group.go @@ -9,7 +9,6 @@ import ( opentracingExt "github.com/opentracing/opentracing-go/ext" "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/identity" "github.com/oasisprotocol/oasis-core/go/common/logging" @@ -18,8 +17,6 @@ import ( consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/epochtime/api" registry "github.com/oasisprotocol/oasis-core/go/registry/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" "github.com/oasisprotocol/oasis-core/go/runtime/committee" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" @@ -56,21 +53,12 @@ type epoch struct { // executorCommittee is the executor committee we are a member of. executorCommittee *CommitteeInfo - // executorCommitteeID is the identifier of our executor committee. - executorCommitteeID hash.Hash - // executorCommittees are all executor committees. - executorCommittees map[hash.Hash]*CommitteeInfo - // executorCommitteeMemberSet is a set of node public keys of executor committee members. - executorCommitteeMemberSet map[signature.PublicKey]bool // txnSchedulerCommitee is the txn scheduler committee we are a member of. txnSchedulerCommittee *CommitteeInfo // txnSchedulerLeader is the node public key of txn scheduler leader. txnSchedulerLeader signature.PublicKey - // mergeCommittee is the merge committee we are a member of. - mergeCommittee *CommitteeInfo - // storageCommittee is the storage committee we are a member of. storageCommittee *CommitteeInfo @@ -81,36 +69,20 @@ type epoch struct { type EpochSnapshot struct { groupVersion int64 - executorCommitteeID hash.Hash - epochNumber api.EpochTime executorRole scheduler.Role txnSchedulerRole scheduler.Role - mergeRole scheduler.Role runtime *registry.Runtime - executorCommittees map[hash.Hash]*CommitteeInfo + executorCommittee *CommitteeInfo txnSchedulerCommittee *CommitteeInfo - mergeCommittee *CommitteeInfo storageCommittee *CommitteeInfo nodes committee.NodeDescriptorLookup } -// NewMockEpochSnapshot returns a mock epoch snapshot to be used in tests. -func NewMockEpochSnapshot() *EpochSnapshot { - executorCommitteeID := hash.NewFromBytes([]byte("mock committee id")) - - return &EpochSnapshot{ - executorCommitteeID: executorCommitteeID, - executorCommittees: map[hash.Hash]*CommitteeInfo{ - executorCommitteeID: {}, - }, - } -} - // GetGroupVersion returns the consensus backend block height of the last // processed committee election. func (e *EpochSnapshot) GetGroupVersion() int64 { @@ -122,17 +94,9 @@ func (e *EpochSnapshot) GetRuntime() *registry.Runtime { return e.runtime } -// GetExecutorCommittees returns the current executor committees. -func (e *EpochSnapshot) GetExecutorCommittees() map[hash.Hash]*CommitteeInfo { - return e.executorCommittees -} - -// GetExecutorCommitteeID returns ID of the executor committee the current node is -// a member of. -// -// NOTE: Will return an invalid all-zero ID if not a member. -func (e *EpochSnapshot) GetExecutorCommitteeID() hash.Hash { - return e.executorCommitteeID +// GetExecutorCommittee returns the current executor committee. +func (e *EpochSnapshot) GetExecutorCommittee() *CommitteeInfo { + return e.executorCommittee } // GetEpochNumber returns the sequential number of the epoch. @@ -169,29 +133,6 @@ func (e *EpochSnapshot) IsTransactionSchedulerLeader() bool { return e.txnSchedulerRole == scheduler.Leader } -// GetMergeCommittee returns the current merge committee. -func (e *EpochSnapshot) GetMergeCommittee() *CommitteeInfo { - return e.mergeCommittee -} - -// IsMergeMember checks if the current node is a member of the merge committee -// in the current epoch. -func (e *EpochSnapshot) IsMergeMember() bool { - return e.mergeRole != scheduler.Invalid -} - -// IsMergeWorker checks if the current node is a worker of the merge committee in -// the current epoch. -func (e *EpochSnapshot) IsMergeWorker() bool { - return e.mergeRole == scheduler.Worker -} - -// IsMergeBackupWorker checks if the current node is a backup worker of the merge committee in -// the current epoch. -func (e *EpochSnapshot) IsMergeBackupWorker() bool { - return e.mergeRole == scheduler.BackupWorker -} - // GetStorageCommittee returns the current storage committee. func (e *EpochSnapshot) GetStorageCommittee() *CommitteeInfo { return e.storageCommittee @@ -322,10 +263,7 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { } // Find the current committees. - executorCommittees := make(map[hash.Hash]*CommitteeInfo) - executorCommitteeMemberSet := make(map[signature.PublicKey]bool) - var executorCommittee, txnSchedulerCommittee, mergeCommittee, storageCommittee *CommitteeInfo - var executorCommitteeID hash.Hash + var executorCommittee, txnSchedulerCommittee, storageCommittee *CommitteeInfo var txnSchedulerLeader signature.PublicKey publicIdentity := g.identity.NodeSigner.Public() for _, cm := range committees { @@ -356,41 +294,22 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { switch cm.Kind { case scheduler.KindComputeExecutor: - // There can be multiple executor committees per runtime. - cID := cm.EncodedMembersHash() - executorCommittees[cID] = ci - if role != scheduler.Invalid { - if executorCommittee != nil { - return fmt.Errorf("member of multiple executor committees") - } - - executorCommittee = ci - executorCommitteeID = cID - } - - for _, m := range cm.Members { - executorCommitteeMemberSet[m.PublicKey] = true - } + executorCommittee = ci case scheduler.KindComputeTxnScheduler: txnSchedulerCommittee = ci if leader.IsValid() { txnSchedulerLeader = leader } - case scheduler.KindComputeMerge: - mergeCommittee = ci case scheduler.KindStorage: storageCommittee = ci } } - if len(executorCommittees) == 0 { - return fmt.Errorf("no executor committees") + if executorCommittee == nil { + return fmt.Errorf("no executor committee") } if txnSchedulerCommittee == nil { return fmt.Errorf("no transaction scheduler committee") } - if mergeCommittee == nil { - return fmt.Errorf("no merge committee") - } if storageCommittee == nil { return fmt.Errorf("no storage committee") } @@ -413,34 +332,23 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { // Update the current epoch. g.activeEpoch = &epoch{ - epochNumber: epochNumber, - epochCtx: epochCtx, - cancelEpochCtx: cancelEpochCtx, - roundCtx: roundCtx, - cancelRoundCtx: cancelRoundCtx, - groupVersion: height, - executorCommittee: executorCommittee, - executorCommitteeID: executorCommitteeID, - executorCommittees: executorCommittees, - executorCommitteeMemberSet: executorCommitteeMemberSet, - txnSchedulerCommittee: txnSchedulerCommittee, - txnSchedulerLeader: txnSchedulerLeader, - mergeCommittee: mergeCommittee, - storageCommittee: storageCommittee, - runtime: runtime, - } - - // Executor committee may be nil in case we are not a member of any committee. - var executorRole scheduler.Role - if executorCommittee != nil { - executorRole = executorCommittee.Role + epochNumber: epochNumber, + epochCtx: epochCtx, + cancelEpochCtx: cancelEpochCtx, + roundCtx: roundCtx, + cancelRoundCtx: cancelRoundCtx, + groupVersion: height, + executorCommittee: executorCommittee, + txnSchedulerCommittee: txnSchedulerCommittee, + txnSchedulerLeader: txnSchedulerLeader, + storageCommittee: storageCommittee, + runtime: runtime, } g.logger.Info("epoch transition complete", "group_version", height, - "executor_role", executorRole, + "executor_role", executorCommittee.Role, "txn_scheduler_role", txnSchedulerCommittee.Role, - "merge_role", mergeCommittee.Role, ) return nil @@ -461,26 +369,17 @@ func (g *Group) GetEpochSnapshot() *EpochSnapshot { } s := &EpochSnapshot{ - epochNumber: g.activeEpoch.epochNumber, - groupVersion: g.activeEpoch.groupVersion, - // NOTE: Transaction scheduler and merge committees are always set. + epochNumber: g.activeEpoch.epochNumber, + groupVersion: g.activeEpoch.groupVersion, + executorRole: g.activeEpoch.executorCommittee.Role, txnSchedulerRole: g.activeEpoch.txnSchedulerCommittee.Role, - mergeRole: g.activeEpoch.mergeCommittee.Role, runtime: g.activeEpoch.runtime, - executorCommittees: g.activeEpoch.executorCommittees, + executorCommittee: g.activeEpoch.executorCommittee, txnSchedulerCommittee: g.activeEpoch.txnSchedulerCommittee, - mergeCommittee: g.activeEpoch.mergeCommittee, storageCommittee: g.activeEpoch.storageCommittee, nodes: g.nodes, } - // Executor committee may be nil in case we are not a member of any committee. - xc := g.activeEpoch.executorCommittee - if xc != nil { - s.executorRole = xc.Role - s.executorCommitteeID = g.activeEpoch.executorCommitteeID - } - return s } @@ -498,21 +397,13 @@ func (g *Group) AuthenticatePeer(peerID signature.PublicKey, msg *p2p.Message) e // If we are in the executor committee, we accept messages from the transaction // scheduler committee leader. - if g.activeEpoch.executorCommittee != nil && g.activeEpoch.txnSchedulerLeader.IsValid() { + if g.activeEpoch.executorCommittee.Role != scheduler.Invalid && g.activeEpoch.txnSchedulerLeader.IsValid() { n := g.nodes.LookupByPeerID(peerID) if n != nil { authorized = authorized || g.activeEpoch.txnSchedulerLeader.Equal(n.ID) } } - // If we are in the merge committee, we accept messages from any executor committee member. - if g.activeEpoch.mergeCommittee.Role != scheduler.Invalid { - n := g.nodes.LookupByPeerID(peerID) - if n != nil { - authorized = authorized || g.activeEpoch.executorCommitteeMemberSet[n.ID] - } - } - if !authorized { err := fmt.Errorf("group: peer is not authorized") @@ -567,14 +458,17 @@ func (g *Group) HandlePeerMessage(unusedPeerID signature.PublicKey, msg *p2p.Mes return g.handler.HandlePeerMessage(ctx, msg) } -func (g *Group) publishLocked( - spanCtx opentracing.SpanContext, - ci *CommitteeInfo, - msg *p2p.Message, -) error { +// Publish publishes a message to the P2P network. +func (g *Group) Publish(spanCtx opentracing.SpanContext, msg *p2p.Message) error { + g.RLock() + defer g.RUnlock() + if g.p2p == nil { return fmt.Errorf("group: p2p transport is not enabled") } + if g.activeEpoch == nil { + return fmt.Errorf("group: no active epoch") + } pubCtx := g.activeEpoch.roundCtx @@ -587,73 +481,12 @@ func (g *Group) publishLocked( msg.GroupVersion = g.activeEpoch.groupVersion msg.SpanContext = scBinary - // Publish batch to given committee. + // Publish message to the P2P network. g.p2p.Publish(pubCtx, g.runtimeID, msg) return nil } -// PublishScheduledBatch publishes a batch to all members in the executor committee. -// Returns the transaction scheduler's signature for this batch. -func (g *Group) PublishScheduledBatch( - spanCtx opentracing.SpanContext, - committeeID hash.Hash, - ioRoot hash.Hash, - storageSignatures []signature.Signature, - hdr block.Header, -) (*signature.Signature, error) { - g.RLock() - defer g.RUnlock() - - if g.activeEpoch == nil || g.activeEpoch.txnSchedulerCommittee.Role != scheduler.Leader { - return nil, fmt.Errorf("group: not leader of txn scheduler committee") - } - - xc := g.activeEpoch.executorCommittees[committeeID] - if xc == nil { - return nil, fmt.Errorf("group: invalid executor committee") - } - - dispatchMsg := &commitment.TxnSchedulerBatch{ - CommitteeID: committeeID, - IORoot: ioRoot, - StorageSignatures: storageSignatures, - Header: hdr, - } - - signedDispatchMsg, err := commitment.SignTxnSchedulerBatch(g.identity.NodeSigner, dispatchMsg) - if err != nil { - return nil, fmt.Errorf("group: unable to sign txn scheduler batch dispatch msg: %w", err) - } - - return &signedDispatchMsg.Signature, g.publishLocked( - spanCtx, - xc, - &p2p.Message{ - TxnSchedulerBatch: signedDispatchMsg, - }, - ) -} - -// PublishExecuteFinished publishes an execute commitment to all members in the merge -// committee. -func (g *Group) PublishExecuteFinished(spanCtx opentracing.SpanContext, c *commitment.ExecutorCommitment) error { - g.RLock() - defer g.RUnlock() - - if g.activeEpoch == nil || g.activeEpoch.executorCommittee == nil { - return fmt.Errorf("group: not member of executor committee") - } - - return g.publishLocked( - spanCtx, - g.activeEpoch.mergeCommittee, - &p2p.Message{ - ExecutorCommit: c, - }, - ) -} - // NewGroup creates a new group. func NewGroup( ctx context.Context, diff --git a/go/worker/compute/executor/committee/fault.go b/go/worker/compute/executor/committee/fault.go deleted file mode 100644 index 109787609ef..00000000000 --- a/go/worker/compute/executor/committee/fault.go +++ /dev/null @@ -1,186 +0,0 @@ -package committee - -import ( - "context" - "time" - - "github.com/oasisprotocol/oasis-core/go/common/logging" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - runtimeRegistry "github.com/oasisprotocol/oasis-core/go/runtime/registry" -) - -type faultSubmitter interface { - // SubmitExecutorCommit submits an executor commitment when a fault is detected. - SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error -} - -type nodeFaultSubmitter struct { - node *Node -} - -// Implements faultSubmitter. -func (nf *nodeFaultSubmitter) SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error { - tx := roothash.NewExecutorCommitTx(0, nil, nf.node.commonNode.Runtime.ID(), []commitment.ExecutorCommitment{*commit}) - return consensus.SignAndSubmitTx(ctx, nf.node.commonNode.Consensus, nf.node.commonNode.Identity.NodeSigner, tx) -} - -func newNodeFaultSubmitter(node *Node) faultSubmitter { - return &nodeFaultSubmitter{node} -} - -type faultDetector struct { - runtime runtimeRegistry.Runtime - submitter faultSubmitter - commit *commitment.ExecutorCommitment - - quitCh chan struct{} - eventCh chan *roothash.Event - - logger *logging.Logger -} - -func (d *faultDetector) notify(ev *roothash.Event) { - select { - case <-d.quitCh: - // In case the worker has quit, prevent blocking on the event channel. - case d.eventCh <- ev: - } -} - -func (d *faultDetector) submit(ctx context.Context) { - d.logger.Warn("independently submitting executor commit") - - err := d.submitter.SubmitExecutorCommit(ctx, d.commit) - switch err { - case nil: - d.logger.Info("independently submitted executor commit") - default: - d.logger.Error("failed to submit executor commit independently", - "err", err, - ) - } -} - -func (d *faultDetector) worker(ctx context.Context) { - // We should submit the commitment immediately in case when: - // - // - We see a merge commit that does not have our commitment. - // - We see an executor commit. - // - RoundTimeout elapses without seeing our commitment. - // - defer close(d.quitCh) - - // Determine the round timeout and start a local timer. - rtDesc, err := d.runtime.RegistryDescriptor(ctx) - if err != nil { - d.logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return - } - // Add a small amount to compensate for network latency. - timer := time.NewTimer(rtDesc.Executor.RoundTimeout + 1*time.Second) - - // Extract committee ID for easier comparison. - openCommit, err := d.commit.Open() - if err != nil { - // This should NEVER happen. - d.logger.Error("bad own commitment", - "err", err, - ) - return - } - - // TODO: Once we have P2P gossipsub also look at gossiped commitments in addition to consensus. - - for { - select { - case <-ctx.Done(): - return - case <-timer.C: - // Local round timeout expired. - d.logger.Warn("local round timeout expired without seeing our commitment") - go d.submit(ctx) - return - case ev := <-d.eventCh: - // Received a roothash event for our runtime. - switch { - case ev.ExecutorCommitted != nil: - // Executor committed independently, check if it is for our committee. - ec, err := ev.ExecutorCommitted.Commit.Open() - if err != nil { - // This should NEVER happen as the consensus backend verifies this. - d.logger.Error("bad executor commitment from consensus backend?", - "err", err, - ) - continue - } - - if !ec.Body.CommitteeID.Equal(&openCommit.Body.CommitteeID) { - continue - } - - // If this is our own commit (in theory anyone could submit it on our behalf), we - // don't need to do anything. - if ec.Equal(d.commit) { - d.logger.Info("our commitment has been submitted to consensus layer by an external party") - return - } - - // Executor committed independently, we should too as this means that so far we have - // not seen any separate commitments. - d.logger.Warn("seen another executor independently submit commitments, following", - "executor_node_id", ev.ExecutorCommitted.Commit.Signature.PublicKey, - ) - go d.submit(ctx) - return - case ev.MergeCommitted != nil: - // Merge node committed. If our commit is included, then we can stop as there is at - // least one honest merge node. - mc, err := ev.MergeCommitted.Commit.Open() - if err != nil { - // This should NEVER happen as the consensus backend verifies this. - d.logger.Error("bad merge commitment from consensus backend?", - "err", err, - ) - continue - } - - for _, ec := range mc.Body.ExecutorCommits { - if ec.Equal(d.commit) { - // Found our commitment, stop right here. - d.logger.Info("our commitment has been submitted to consensus layer by an honest merge node") - return - } - } - - // A merge node submitted commitments but didn't include ours. - d.logger.Warn("seen merge commitment without our commitment", - "merge_node_id", ev.MergeCommitted.Commit.Signature.PublicKey, - ) - go d.submit(ctx) - return - } - } - } -} - -func newFaultDetector( - ctx context.Context, - rt runtimeRegistry.Runtime, - commit *commitment.ExecutorCommitment, - submitter faultSubmitter, -) *faultDetector { - d := &faultDetector{ - runtime: rt, - submitter: submitter, - commit: commit, - quitCh: make(chan struct{}), - eventCh: make(chan *roothash.Event), - logger: logging.GetLogger("worker/executor/committee/fault").With("runtime_id", rt.ID()), - } - go d.worker(ctx) - return d -} diff --git a/go/worker/compute/executor/committee/fault_test.go b/go/worker/compute/executor/committee/fault_test.go deleted file mode 100644 index d039063b459..00000000000 --- a/go/worker/compute/executor/committee/fault_test.go +++ /dev/null @@ -1,247 +0,0 @@ -package committee - -import ( - "context" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - memorySigner "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" - "github.com/oasisprotocol/oasis-core/go/common/pubsub" - genesisTestHelpers "github.com/oasisprotocol/oasis-core/go/genesis/tests" - registry "github.com/oasisprotocol/oasis-core/go/registry/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - "github.com/oasisprotocol/oasis-core/go/runtime/history" - "github.com/oasisprotocol/oasis-core/go/runtime/localstorage" - "github.com/oasisprotocol/oasis-core/go/runtime/tagindexer" - storage "github.com/oasisprotocol/oasis-core/go/storage/api" -) - -type testFaultSubmitter struct { - sync.Mutex - - faults []commitment.ExecutorCommitment -} - -// Implements faultSubmitter. -func (tf *testFaultSubmitter) SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error { - tf.Lock() - defer tf.Unlock() - - tf.faults = append(tf.faults, *commit) - return nil -} - -func (tf *testFaultSubmitter) getFaults() []commitment.ExecutorCommitment { - tf.Lock() - defer tf.Unlock() - - return append([]commitment.ExecutorCommitment{}, tf.faults...) -} - -type testRuntime struct { -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) ID() common.Namespace { - return common.Namespace{} -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) RegistryDescriptor(ctx context.Context) (*registry.Runtime, error) { - return ®istry.Runtime{}, nil -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) WatchRegistryDescriptor() (<-chan *registry.Runtime, pubsub.ClosableSubscription, error) { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) History() history.History { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) TagIndexer() tagindexer.QueryableBackend { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) Storage() storage.Backend { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) LocalStorage() localstorage.LocalStorage { - panic("not implemented") -} - -func TestFaultDetector(t *testing.T) { - require := require.New(t) - - genesisTestHelpers.SetTestChainContext() - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test") - commit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - rt := testRuntime{} - - for _, tc := range []struct { - name string - fn func(*testing.T, *faultDetector, *testFaultSubmitter, *commitment.ExecutorCommitment) - }{ - {"Timeout", testFaultDetectorTimeout}, - {"EarlyExecutor", testFaultDetectorEarlyExecutor}, - {"ExternalSubmission", testFaultDetectorExternalSubmission}, - {"FaultyMerge", testFaultDetectorFaultyMerge}, - {"HonestMerge", testFaultDetectorHonestMerge}, - } { - t.Run(tc.name, func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - tf := testFaultSubmitter{} - fd := newFaultDetector(ctx, &rt, commit, &tf) - - tc.fn(t, fd, &tf, commit) - }) - } -} - -func testFaultDetectorTimeout(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - // The fault detector should timeout in one second even if we don't do any notifies. - time.Sleep(1200 * time.Millisecond) - - faults := tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after timeout") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorEarlyExecutor(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: EarlyExecutor") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{ - CommitteeID: hash.NewFromBytes([]byte("EarlyExecutorBadCommitteeID")), - }) - require.NoError(err, "SignExecutorCommitment") - - // Nothing should happen if the commitee ID doesn't match. - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *earlyCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be no submissions. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit anything in case of events for other committees") - - // Notify the detector of an early executor submitting their commitment. - earlyCommit, err = commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *earlyCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be a submission. - faults = tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after early executor") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorExternalSubmission(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - // Notify the detector of an external process submitting our commitment. - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *commit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should not be a submission. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit commitment after seeing own commit") - - // The fault detector should stop after seeing an honest merge node, so even waiting for the - // timeout amount should not trigger it. - time.Sleep(1200 * time.Millisecond) - - faults = tf.getFaults() - require.Len(faults, 0, "fault detector should be stopped") -} - -func testFaultDetectorFaultyMerge(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: FaultyMerge") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - mergeCommit, err := commitment.SignMergeCommitment(signer, &commitment.MergeBody{ - ExecutorCommits: []commitment.ExecutorCommitment{*earlyCommit}, - }) - require.NoError(err, "SignMergeCommitment") - - // Notify the detector of a merge commit that does not include own commit. - fd.notify(&roothash.Event{ - MergeCommitted: &roothash.MergeCommittedEvent{ - Commit: *mergeCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be a submission. - faults := tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after merge without own commit") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorHonestMerge(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: HonestMerge") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - // Merge commit that includes our commit -- should not trigger a submission. - mergeCommit, err := commitment.SignMergeCommitment(signer, &commitment.MergeBody{ - ExecutorCommits: []commitment.ExecutorCommitment{*commit, *earlyCommit}, - }) - require.NoError(err, "SignMergeCommitment") - - // Notify the detector of a merge commit that includes own commit. - fd.notify(&roothash.Event{ - MergeCommitted: &roothash.MergeCommittedEvent{ - Commit: *mergeCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should not be a submission. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit commitment after merge without own commit") - - // The fault detector should stop after seeing an honest merge node, so even waiting for the - // timeout amount should not trigger it. - time.Sleep(1200 * time.Millisecond) - - faults = tf.getFaults() - require.Len(faults, 0, "fault detector should be stopped") -} diff --git a/go/worker/compute/executor/committee/node.go b/go/worker/compute/executor/committee/node.go index a230ea2d7e3..a62b1ad5802 100644 --- a/go/worker/compute/executor/committee/node.go +++ b/go/worker/compute/executor/committee/node.go @@ -20,6 +20,7 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/pubsub" "github.com/oasisprotocol/oasis-core/go/common/tracing" "github.com/oasisprotocol/oasis-core/go/common/version" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" @@ -32,7 +33,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" p2pError "github.com/oasisprotocol/oasis-core/go/worker/common/p2p/error" - mergeCommittee "github.com/oasisprotocol/oasis-core/go/worker/compute/merge/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -114,7 +114,6 @@ type Node struct { *commonWorker.RuntimeHostNode commonNode *committee.Node - mergeNode *mergeCommittee.Node commonCfg commonWorker.Config roleProvider registration.RoleProvider @@ -138,9 +137,6 @@ type Node struct { // Bump this when we need to change what the worker selects over. reselect chan struct{} - // Guarded by .commonNode.CrossNode. - faultDetector *faultDetector - logger *logging.Logger } @@ -213,7 +209,7 @@ func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (boo return false, p2pError.Permanent(err) } - err := n.queueBatchBlocking(ctx, bd.CommitteeID, bd.IORoot, bd.StorageSignatures, bd.Header, sbd.Signature) + err := n.queueBatchBlocking(ctx, bd.IORoot, bd.StorageSignatures, bd.Header, sbd.Signature) if err != nil { return false, err } @@ -224,7 +220,6 @@ func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (boo func (n *Node) queueBatchBlocking( ctx context.Context, - committeeID hash.Hash, ioRootHash hash.Hash, storageSignatures []signature.Signature, hdr block.Header, @@ -292,25 +287,18 @@ func (n *Node) queueBatchBlocking( n.commonNode.CrossNode.Lock() defer n.commonNode.CrossNode.Unlock() - return n.handleExternalBatchLocked(committeeID, batch, hdr) + return n.handleExternalBatchLocked(batch, hdr) } // HandleBatchFromTransactionSchedulerLocked processes a batch from the transaction scheduler. // Guarded by n.commonNode.CrossNode. func (n *Node) HandleBatchFromTransactionSchedulerLocked( batchSpanCtx opentracing.SpanContext, - committeeID hash.Hash, ioRoot hash.Hash, batch transaction.RawBatch, txnSchedSig signature.Signature, inputStorageSigs []signature.Signature, ) { - epoch := n.commonNode.Group.GetEpochSnapshot() - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&committeeID) { - return - } - n.maybeStartProcessingBatchLocked(&unresolvedBatch{ ioRoot: storage.Root{ Namespace: n.commonNode.CurrentBlock.Header.Namespace, @@ -602,7 +590,6 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { // Generate proposed compute results. proposedResults := &commitment.ComputeBody{ - CommitteeID: epoch.GetExecutorCommitteeID(), Header: batch.Header, RakSig: batch.RakSig, TxnSchedSig: state.batch.txnSchedSignature, @@ -664,7 +651,7 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { ) return err } - if err = proposedResults.VerifyStorageReceipt(lastHeader.Namespace, lastHeader.Round+1, &receiptBody); err != nil { + if err = proposedResults.VerifyStorageReceipt(lastHeader.Namespace, &receiptBody); err != nil { n.logger.Error("failed to validate receipt body", "receipt body", receiptBody, "err", err, @@ -690,7 +677,7 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { return } - // Commit. + // Sign the commitment and submit. commit, err := commitment.SignExecutorCommitment(n.commonNode.Identity.NodeSigner, proposedResults) if err != nil { n.logger.Error("failed to sign commitment", @@ -700,71 +687,39 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { return } - // Publish commitment to merge committee. - spanPublish := opentracing.StartSpan("PublishExecuteFinished(commitment)", - opentracing.ChildOf(state.batch.spanCtx), - ) - err = n.commonNode.Group.PublishExecuteFinished(state.batch.spanCtx, commit) - if err != nil { - spanPublish.Finish() - n.logger.Error("failed to publish results to committee", - "err", err, - ) - n.abortBatchLocked(err) - return - } - spanPublish.Finish() + // Publish commitment to the consensus layer. + tx := roothash.NewExecutorCommitTx(0, nil, n.commonNode.Runtime.ID(), []commitment.ExecutorCommitment{*commit}) + go func() { + commitErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) + switch commitErr { + case nil: + n.logger.Info("executor commit finalized") + default: + n.logger.Error("failed to submit executor commit", + "err", commitErr, + ) + } + }() // TODO: Add crash point. - // Set up the fault detector so that we can submit the commitment independently from any other - // merge nodes in case a fault is detected (which would indicate that the entire merge committee - // is faulty). - n.faultDetector = newFaultDetector(n.roundCtx, n.commonNode.Runtime, commit, newNodeFaultSubmitter(n)) - n.transitionLocked(StateWaitingForFinalize{ batchStartTime: state.batchStartTime, }) - if epoch.IsMergeMember() { - if n.mergeNode == nil { - n.logger.Error("scheduler says we are a merge worker, but we are not") - } else { - n.mergeNode.HandleResultsFromExecutorWorkerLocked(state.batch.spanCtx, commit) - } - } - crash.Here(crashPointBatchProposeAfter) } // HandleNewEventLocked implements NodeHooks. // Guarded by n.commonNode.CrossNode. func (n *Node) HandleNewEventLocked(ev *roothash.Event) { - // In case a fault detector exists, notify it of events. - if n.faultDetector != nil { - n.faultDetector.notify(ev) - } - dis := ev.ExecutionDiscrepancyDetected if dis == nil { // Ignore other events. return } - // Check if the discrepancy occurred in our committee. - epoch := n.commonNode.Group.GetEpochSnapshot() - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&dis.CommitteeID) { - n.logger.Debug("ignoring discrepancy event for a different committee", - "expected_committee", expectedID, - "committee", dis.CommitteeID, - ) - return - } - - n.logger.Warn("execution discrepancy detected", - "committee_id", dis.CommitteeID, - ) + n.logger.Warn("execution discrepancy detected") crash.Here(crashPointDiscrepancyDetectedAfter) @@ -803,7 +758,7 @@ func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snaps } // Guarded by n.commonNode.CrossNode. -func (n *Node) handleExternalBatchLocked(committeeID hash.Hash, batch *unresolvedBatch, hdr block.Header) error { +func (n *Node) handleExternalBatchLocked(batch *unresolvedBatch, hdr block.Header) error { // If we are not waiting for a batch, don't do anything. if _, ok := n.state.(StateWaitingForBatch); !ok { return errIncorrectState @@ -817,16 +772,6 @@ func (n *Node) handleExternalBatchLocked(committeeID hash.Hash, batch *unresolve return errIncorrectRole } - // We only accept batches for our own committee. - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&committeeID) { - n.logger.Error("got external batch for a different executor committee", - "expected_committee", expectedID, - "committee", committeeID, - ) - return nil - } - // Check if we have the correct block -- in this case, start processing the batch. if n.commonNode.CurrentBlock.Header.MostlyEqual(&hdr) { n.maybeStartProcessingBatchLocked(batch) @@ -989,7 +934,6 @@ func (n *Node) worker() { func NewNode( commonNode *committee.Node, - mergeNode *mergeCommittee.Node, commonCfg commonWorker.Config, roleProvider registration.RoleProvider, ) (*Node, error) { @@ -1008,7 +952,6 @@ func NewNode( n := &Node{ RuntimeHostNode: rhn, commonNode: commonNode, - mergeNode: mergeNode, commonCfg: commonCfg, roleProvider: roleProvider, ctx: ctx, diff --git a/go/worker/compute/executor/init.go b/go/worker/compute/executor/init.go index 06b5c96a5d7..aef900e93d1 100644 --- a/go/worker/compute/executor/init.go +++ b/go/worker/compute/executor/init.go @@ -3,7 +3,6 @@ package executor import ( workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/compute" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -11,8 +10,7 @@ import ( func New( dataDir string, commonWorker *workerCommon.Worker, - mergeWorker *merge.Worker, registration *registration.Worker, ) (*Worker, error) { - return newWorker(dataDir, compute.Enabled(), commonWorker, mergeWorker, registration) + return newWorker(dataDir, compute.Enabled(), commonWorker, registration) } diff --git a/go/worker/compute/executor/worker.go b/go/worker/compute/executor/worker.go index 911ab911133..9013eace4b8 100644 --- a/go/worker/compute/executor/worker.go +++ b/go/worker/compute/executor/worker.go @@ -10,7 +10,6 @@ import ( workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/compute/executor/committee" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -19,7 +18,6 @@ type Worker struct { enabled bool commonWorker *workerCommon.Worker - merge *merge.Worker registration *registration.Worker runtimes map[common.Namespace]*committee.Node @@ -141,16 +139,13 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { "runtime_id", id, ) - // Get other nodes from this runtime. - mergeNode := w.merge.GetRuntime(id) - rp, err := w.registration.NewRuntimeRoleProvider(node.RoleComputeWorker, id) if err != nil { return fmt.Errorf("failed to create role provider: %w", err) } // Create committee node for the given runtime. - node, err := committee.NewNode(commonNode, mergeNode, w.commonWorker.GetConfig(), rp) + node, err := committee.NewNode(commonNode, w.commonWorker.GetConfig(), rp) if err != nil { return err } @@ -169,7 +164,6 @@ func newWorker( dataDir string, enabled bool, commonWorker *workerCommon.Worker, - merge *merge.Worker, registration *registration.Worker, ) (*Worker, error) { ctx, cancelCtx := context.WithCancel(context.Background()) @@ -177,7 +171,6 @@ func newWorker( w := &Worker{ enabled: enabled, commonWorker: commonWorker, - merge: merge, registration: registration, runtimes: make(map[common.Namespace]*committee.Node), ctx: ctx, diff --git a/go/worker/compute/init.go b/go/worker/compute/init.go index 228fd029ec2..2cd78fa5b8c 100644 --- a/go/worker/compute/init.go +++ b/go/worker/compute/init.go @@ -6,7 +6,7 @@ import ( ) const ( - // CfgWorkerEnabled enables the compute worker, tx scheduler worker, and merge worker. + // CfgWorkerEnabled enables the compute worker and the tx scheduler worker. CfgWorkerEnabled = "worker.compute.enabled" ) diff --git a/go/worker/compute/merge/committee/node.go b/go/worker/compute/merge/committee/node.go deleted file mode 100644 index 26b2e888e84..00000000000 --- a/go/worker/compute/merge/committee/node.go +++ /dev/null @@ -1,748 +0,0 @@ -package committee - -import ( - "context" - "errors" - "fmt" - "math" - "sync" - "time" - - "github.com/cenkalti/backoff/v4" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" - "github.com/oasisprotocol/oasis-core/go/common/logging" - "github.com/oasisprotocol/oasis-core/go/common/node" - "github.com/oasisprotocol/oasis-core/go/common/pubsub" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - runtimeCommittee "github.com/oasisprotocol/oasis-core/go/runtime/committee" - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - "github.com/oasisprotocol/oasis-core/go/worker/common/committee" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -var ( - errIncorrectState = errors.New("merge: incorrect state") - errSeenNewerBlock = errors.New("merge: seen newer block") - errMergeFailed = errors.New("merge: failed to perform merge") -) - -var ( - discrepancyDetectedCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_merge_discrepancy_detected_count", - Help: "Number of detected merge discrepancies.", - }, - []string{"runtime"}, - ) - roothashCommitLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Name: "oasis_worker_roothash_merge_commit_latency", - Help: "Latency of roothash merge commit (seconds).", - }, - []string{"runtime"}, - ) - abortedMergeCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_aborted_merge_count", - Help: "Number of aborted merges.", - }, - []string{"runtime"}, - ) - inconsistentMergeRootCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_inconsistent_merge_root_count", - Help: "Number of inconsistent merge roots.", - }, - []string{"runtime"}, - ) - nodeCollectors = []prometheus.Collector{ - discrepancyDetectedCount, - roothashCommitLatency, - abortedMergeCount, - inconsistentMergeRootCount, - } - - metricsOnce sync.Once - - infiniteTimeout = time.Duration(math.MaxInt64) -) - -// Node is a committee node. -type Node struct { // nolint: maligned - commonNode *committee.Node - commonCfg workerCommon.Config - - roleProvider registration.RoleProvider - - ctx context.Context - cancelCtx context.CancelFunc - stopCh chan struct{} - stopOnce sync.Once - quitCh chan struct{} - initCh chan struct{} - - // Mutable and shared with common node's worker. - // Guarded by .commonNode.CrossNode. - state NodeState - // Context valid until the next round. - // Guarded by .commonNode.CrossNode. - roundCtx context.Context - roundCancelCtx context.CancelFunc - - stateTransitions *pubsub.Broker - // Bump this when we need to change what the worker selects over. - reselect chan struct{} - - logger *logging.Logger -} - -// Name returns the service name. -func (n *Node) Name() string { - return "committee node" -} - -// Start starts the service. -func (n *Node) Start() error { - go n.worker() - return nil -} - -// Stop halts the service. -func (n *Node) Stop() { - n.stopOnce.Do(func() { close(n.stopCh) }) -} - -// Quit returns a channel that will be closed when the service terminates. -func (n *Node) Quit() <-chan struct{} { - return n.quitCh -} - -// Cleanup performs the service specific post-termination cleanup. -func (n *Node) Cleanup() { -} - -// Initialized returns a channel that will be closed when the node is -// initialized and ready to service requests. -func (n *Node) Initialized() <-chan struct{} { - return n.initCh -} - -// WatchStateTransitions subscribes to the node's state transitions. -func (n *Node) WatchStateTransitions() (<-chan NodeState, *pubsub.Subscription) { - sub := n.stateTransitions.Subscribe() - ch := make(chan NodeState) - sub.Unwrap(ch) - - return ch, sub -} - -func (n *Node) getMetricLabels() prometheus.Labels { - return prometheus.Labels{ - "runtime": n.commonNode.Runtime.ID().String(), - } -} - -// HandlePeerMessage implements NodeHooks. -func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (bool, error) { - if message.ExecutorCommit != nil { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - err := n.handleResultsLocked(ctx, message.ExecutorCommit) - if err != nil { - return false, err - } - return true, nil - } - return false, nil -} - -func (n *Node) bumpReselect() { - select { - case n.reselect <- struct{}{}: - default: - // If there's one already queued, we don't need to do anything. - } -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) transitionLocked(state NodeState) { - n.logger.Info("state transition", - "current_state", n.state, - "new_state", state, - ) - - // Validate state transition. - dests := validStateTransitions[n.state.Name()] - - var valid bool - for _, dest := range dests[:] { - if dest == state.Name() { - valid = true - break - } - } - - if !valid { - panic(fmt.Sprintf("invalid state transition: %s -> %s", n.state, state)) - } - - n.state = state - n.stateTransitions.Broadcast(state) - // Restart our worker's select in case our state-specific channels have changed. - n.bumpReselect() -} - -func (n *Node) newStateWaitingForResultsLocked(epoch *committee.EpochSnapshot) StateWaitingForResults { - pool := &commitment.MultiPool{ - Committees: make(map[hash.Hash]*commitment.Pool), - } - - for cID, ci := range epoch.GetExecutorCommittees() { - pool.Committees[cID] = &commitment.Pool{ - Runtime: epoch.GetRuntime(), - Committee: ci.Committee, - } - } - - return StateWaitingForResults{ - pool: pool, - timer: time.NewTimer(infiniteTimeout), - consensusTimeout: make(map[hash.Hash]bool), - } -} - -// HandleEpochTransitionLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleEpochTransitionLocked(epoch *committee.EpochSnapshot) { - if epoch.IsMergeWorker() || epoch.IsMergeBackupWorker() { - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - } else { - n.transitionLocked(StateNotReady{}) - } -} - -// HandleNewBlockEarlyLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewBlockEarlyLocked(blk *block.Block) { - // If we have seen a new block while waiting for results, we need to - // abort it no matter what as any processed state may be invalid. - n.abortMergeLocked(errSeenNewerBlock) -} - -// HandleNewBlockLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewBlockLocked(blk *block.Block) { - epoch := n.commonNode.Group.GetEpochSnapshot() - - // Cancel old round context, start a new one. - if n.roundCancelCtx != nil { - (n.roundCancelCtx)() - } - n.roundCtx, n.roundCancelCtx = context.WithCancel(n.ctx) - - // Perform actions based on current state. - switch n.state.(type) { - case StateWaitingForEvent: - // Block finalized without the need for a backup worker. - n.logger.Info("considering the round finalized", - "round", blk.Header.Round, - "header_hash", blk.Header.EncodedHash(), - ) - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - case StateWaitingForFinalize: - // A new block means the round has been finalized. - n.logger.Info("considering the round finalized", - "round", blk.Header.Round, - "header_hash", blk.Header.EncodedHash(), - ) - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - } -} - -// HandleResultsFromExecutorWorkerLocked processes results from an executor worker. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleResultsFromExecutorWorkerLocked(spanCtx opentracing.SpanContext, commit *commitment.ExecutorCommitment) { - // Spawn retry in a goroutine to prevent blocking processing. - go func() { - call := func() error { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - return n.handleResultsLocked(n.roundCtx, commit) - } - bkoff := backoff.WithContext(backoff.NewExponentialBackOff(), n.roundCtx) - if err := backoff.Retry(call, bkoff); err != nil { - n.logger.Warn("failed to handle results from local executor worker", - "err", err, - ) - } - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleResultsLocked(ctx context.Context, commit *commitment.ExecutorCommitment) error { - // If we are not waiting for results, don't do anything. - state, ok := n.state.(StateWaitingForResults) - if !ok { - return errIncorrectState - } - - n.logger.Debug("received new executor commitment", - "node_id", commit.Signature.PublicKey, - ) - - epoch := n.commonNode.Group.GetEpochSnapshot() - sp, err := state.pool.AddExecutorCommitment(ctx, n.commonNode.CurrentBlock, epoch, epoch, commit) - if err != nil { - return err - } - - // Attempt finalization. We defer this part in order to not block P2P relaying. - expectedRound := n.commonNode.CurrentBlock.Header.Round - go func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - // Ignore defered finalization attempt if state has changed. - if _, ok := n.state.(StateWaitingForResults); !ok { - return - } - - // Ignore defered finalization attempt if current block has changed. - if n.commonNode.CurrentBlock.Header.Round != expectedRound { - return - } - - n.tryFinalizeResultsLocked(sp, false) - }() - return nil -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) tryFinalizeResultsLocked(pool *commitment.Pool, didTimeout bool) { - state := n.state.(StateWaitingForResults) - now := time.Now() - - defer func() { - if !didTimeout && !state.timer.Stop() { - <-state.timer.C - } - - nextTimeout := state.pool.GetNextTimeout() - if nextTimeout.IsZero() { - // Disarm timer. - n.logger.Debug("disarming round timeout") - state.timer.Reset(infiniteTimeout) - } else { - // (Re-)arm timer. - n.logger.Debug("(re-)arming round timeout") - state.timer.Reset(nextTimeout.Sub(now)) - } - }() - - epoch := n.commonNode.Group.GetEpochSnapshot() - // The roothash backend will start counting its timeout on its own based on - // any received commits so in the worst case the actual timeout will be - // 2*roundTimeout. - // - // We have two kinds of timeouts -- the first is based on local monotonic time and - // starts counting as soon as the first commitment for a committee is received. It - // is used to trigger submission of executor commitments to the consensus layer for - // proof of timeout. The consensus layer starts its own timeout and this is the - // second timeout. - // - // The timeout is only considered authoritative once confirmed by consensus. In - // case of a local-only timeout, we will submit what executor commitments we have - // to consensus and not change the internal Discrepancy flag. - cid := pool.GetCommitteeID() - logger := n.logger.With("committee_id", cid) - consensusTimeout := state.consensusTimeout[cid] - rt, err := n.commonNode.Runtime.RegistryDescriptor(n.roundCtx) - if err != nil { - logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return - } - runtimeTimeout := rt.Executor.RoundTimeout - - commit, err := pool.TryFinalize(now, runtimeTimeout, didTimeout, consensusTimeout) - switch err { - case nil: - case commitment.ErrStillWaiting: - // Not enough commitments. - logger.Debug("still waiting for commitments") - return - case commitment.ErrDiscrepancyDetected: - // We may also be able to already perform discrepancy resolution, check if - // this is possible. This may be the case if we receive commits from backup - // workers before receiving commits from regular workers. - commit, err = pool.TryFinalize(now, runtimeTimeout, false, false) - if err == nil { - // Discrepancy was already resolved, proceed with merge. - break - } - - // Discrepancy detected. - fallthrough - case commitment.ErrInsufficientVotes: - // Discrepancy resolution failed. - logger.Warn("insufficient votes, performing executor commit") - - // Submit executor commit to BFT. - ccs := pool.GetExecutorCommitments() - go func() { - tx := roothash.NewExecutorCommitTx(0, nil, n.commonNode.Runtime.ID(), ccs) - ccErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) - - switch ccErr { - case nil: - logger.Info("executor commit finalized") - default: - logger.Warn("failed to submit executor commit", - "err", ccErr, - ) - } - }() - return - default: - n.abortMergeLocked(err) - return - } - - // Check that we have everything from all committees. - result := commit.ToDDResult().(commitment.ComputeResultsHeader) - state.results = append(state.results, &result) - if len(state.results) < len(state.pool.Committees) { - n.logger.Debug("still waiting for other committees") - // State transition to store the updated results. - n.transitionLocked(state) - return - } - - n.logger.Info("have valid commitments from all committees, merging") - - commitments := state.pool.GetOpenExecutorCommitments() - - if epoch.IsMergeBackupWorker() && state.pendingEvent == nil { - // Backup workers only perform merge after receiving a discrepancy event. - n.transitionLocked(StateWaitingForEvent{commitments: commitments, results: state.results}) - return - } - - // No discrepancy, perform merge. - n.startMergeLocked(commitments, state.results) -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) startMergeLocked(commitments []commitment.OpenExecutorCommitment, results []*commitment.ComputeResultsHeader) { - doneCh := make(chan *commitment.MergeBody, 1) - ctx, cancel := context.WithCancel(n.roundCtx) - - // Create empty block based on previous block while we hold the lock. - prevBlk := n.commonNode.CurrentBlock - blk := block.NewEmptyBlock(prevBlk, 0, block.Normal) - - n.transitionLocked(StateProcessingMerge{doneCh: doneCh, cancel: cancel}) - - // Start processing merge in a separate goroutine. This is to make it possible - // to abort the merge if a newer block is seen while we are merging. - go func() { - defer close(doneCh) - - // Merge results to storage. - ctx, cancel = context.WithTimeout(ctx, n.commonCfg.StorageCommitTimeout) - defer cancel() - - var mergeBody commitment.MergeBody - switch len(results) { - case 1: - // Optimize the case where there is only a single committee -- there is nothing to merge - // so we can avoid a round trip to the storage nodes which already have the roots. - blk.Header.Messages = results[0].Messages - blk.Header.IORoot = results[0].IORoot - blk.Header.StateRoot = results[0].StateRoot - - // Collect all distinct storage signatures. - storageSigSet := make(map[signature.PublicKey]bool) - for _, ec := range commitments { - mergeBody.ExecutorCommits = append(mergeBody.ExecutorCommits, ec.ExecutorCommitment) - - for _, s := range ec.Body.StorageSignatures { - if storageSigSet[s.PublicKey] { - continue - } - storageSigSet[s.PublicKey] = true - blk.Header.StorageSignatures = append(blk.Header.StorageSignatures, s) - } - } - - mergeBody.Header = blk.Header - default: - // Multiple committees, we need to perform a storage merge operation. - n.logger.Error("merge from multiple committees not supported") - return - } - - // Submit the merge result. - doneCh <- &mergeBody - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) proposeHeaderLocked(result *commitment.MergeBody) { - n.logger.Debug("proposing header", - "previous_hash", result.Header.PreviousHash, - "round", result.Header.Round, - ) - - // Submit MC-Commit to BFT for DD and finalization. - mc, err := commitment.SignMergeCommitment(n.commonNode.Identity.NodeSigner, result) - if err != nil { - n.logger.Error("failed to sign merge commitment", - "err", err, - ) - n.abortMergeLocked(err) - return - } - - n.transitionLocked(StateWaitingForFinalize{}) - - // TODO: Tracing. - // span := opentracing.StartSpan("roothash.MergeCommit", opentracing.ChildOf(state.batchSpanCtx)) - // defer span.Finish() - - // Submit merge commit to consensus. - mcs := []commitment.MergeCommitment{*mc} - mergeCommitStart := time.Now() - go func() { - tx := roothash.NewMergeCommitTx(0, nil, n.commonNode.Runtime.ID(), mcs) - mcErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) - // Record merge commit latency. - roothashCommitLatency.With(n.getMetricLabels()).Observe(time.Since(mergeCommitStart).Seconds()) - - switch mcErr { - case nil: - n.logger.Info("merge commit finalized") - default: - n.logger.Error("failed to submit merge commit", - "err", mcErr, - ) - } - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) abortMergeLocked(reason error) { - switch state := n.state.(type) { - case StateWaitingForResults: - case StateProcessingMerge: - // Cancel merge processing. - state.cancel() - default: - return - } - - n.logger.Warn("aborting merge", - "reason", reason, - ) - - // TODO: Return transactions to transaction scheduler. - - abortedMergeCount.With(n.getMetricLabels()).Inc() - - // After the batch has been aborted, we must wait for the round to be - // finalized. - n.transitionLocked(StateWaitingForFinalize{}) -} - -// HandleNewEventLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewEventLocked(ev *roothash.Event) { - switch { - case ev.MergeDiscrepancyDetected != nil: - n.handleMergeDiscrepancyLocked(ev.MergeDiscrepancyDetected) - case ev.ExecutionDiscrepancyDetected != nil: - n.handleExecutorDiscrepancyLocked(ev.ExecutionDiscrepancyDetected) - default: - // Ignore other events. - } -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleMergeDiscrepancyLocked(ev *roothash.MergeDiscrepancyDetectedEvent) { - n.logger.Warn("merge discrepancy detected") - - discrepancyDetectedCount.With(n.getMetricLabels()).Inc() - - if !n.commonNode.Group.GetEpochSnapshot().IsMergeBackupWorker() { - return - } - - var state StateWaitingForEvent - switch s := n.state.(type) { - case StateWaitingForResults: - // Discrepancy detected event received before the results. We need to - // record the received event and keep waiting for the results. - s.pendingEvent = ev - n.transitionLocked(s) - return - case StateWaitingForEvent: - state = s - default: - n.logger.Warn("ignoring received discrepancy event in incorrect state", - "state", s, - ) - return - } - - // Backup worker, start processing merge. - n.logger.Info("backup worker activating and processing merge") - n.startMergeLocked(state.commitments, state.results) -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleExecutorDiscrepancyLocked(ev *roothash.ExecutionDiscrepancyDetectedEvent) { - n.logger.Warn("execution discrepancy detected", - "committee_id", ev.CommitteeID, - "timeout", ev.Timeout, - ) - - switch s := n.state.(type) { - case StateWaitingForResults: - // If the discrepancy was due to a timeout, record it. - pool := s.pool.Committees[ev.CommitteeID] - if pool == nil { - n.logger.Error("execution discrepancy event for unknown committee", - "committee_id", ev.CommitteeID, - ) - return - } - - if ev.Timeout { - s.consensusTimeout[ev.CommitteeID] = true - n.tryFinalizeResultsLocked(pool, true) - } - default: - } -} - -// HandleNodeUpdateLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snapshot *committee.EpochSnapshot) { - // Nothing to do here. -} - -func (n *Node) worker() { - defer close(n.quitCh) - defer (n.cancelCtx)() - - // Wait for the common node to be initialized. - select { - case <-n.commonNode.Initialized(): - case <-n.stopCh: - close(n.initCh) - return - } - - n.logger.Info("starting committee node") - - // We are initialized. - close(n.initCh) - - // We are now ready to service requests. - n.roleProvider.SetAvailable(func(*node.Node) error { return nil }) - - for { - // Select over some channels based on current state. - var timerCh <-chan time.Time - var mergeDoneCh <-chan *commitment.MergeBody - - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - switch state := n.state.(type) { - case StateWaitingForResults: - timerCh = state.timer.C - case StateProcessingMerge: - mergeDoneCh = state.doneCh - default: - } - }() - - select { - case <-n.stopCh: - n.logger.Info("termination requested") - return - case <-timerCh: - n.logger.Warn("round timeout expired, forcing finalization") - - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - state, ok := n.state.(StateWaitingForResults) - if !ok || state.timer.C != timerCh { - return - } - - for _, pool := range state.pool.GetTimeoutCommittees(time.Now()) { - n.tryFinalizeResultsLocked(pool, true) - } - }() - case result := <-mergeDoneCh: - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - if state, ok := n.state.(StateProcessingMerge); !ok || state.doneCh != mergeDoneCh { - return - } - - if result == nil { - n.logger.Warn("merge aborted") - n.abortMergeLocked(errMergeFailed) - } else { - n.logger.Info("merge completed, proposing header") - n.proposeHeaderLocked(result) - } - }() - case <-n.reselect: - // Recalculate select set. - } - } -} - -func NewNode(commonNode *committee.Node, commonCfg workerCommon.Config, roleProvider registration.RoleProvider) (*Node, error) { - metricsOnce.Do(func() { - prometheus.MustRegister(nodeCollectors...) - }) - - ctx, cancel := context.WithCancel(context.Background()) - - n := &Node{ - commonNode: commonNode, - commonCfg: commonCfg, - roleProvider: roleProvider, - ctx: ctx, - cancelCtx: cancel, - stopCh: make(chan struct{}), - quitCh: make(chan struct{}), - initCh: make(chan struct{}), - state: StateNotReady{}, - stateTransitions: pubsub.NewBroker(false), - reselect: make(chan struct{}, 1), - logger: logging.GetLogger("worker/merge/committee").With("runtime_id", commonNode.Runtime.ID()), - } - - return n, nil -} diff --git a/go/worker/compute/merge/committee/state.go b/go/worker/compute/merge/committee/state.go deleted file mode 100644 index 4df3b8707eb..00000000000 --- a/go/worker/compute/merge/committee/state.go +++ /dev/null @@ -1,160 +0,0 @@ -package committee - -import ( - "context" - "time" - - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" -) - -// StateName is a symbolic state without the attached values. -type StateName string - -const ( - // NotReady is the name of StateNotReady. - NotReady = "NotReady" - // WaitingForResults is the name of StateWaitingForResults. - WaitingForResults = "WaitingForResults" - // WaitingForEvent is the name of StateWaitingForEvent. - WaitingForEvent = "WaitingForEvent" - // ProcessingMerge is the name of StateProcessingMerge. - ProcessingMerge = "ProcessingMerge" - // WaitingForFinalize is the name of StateWaitingForFinalize. - WaitingForFinalize = "WaitingForFinalize" -) - -// Valid state transitions. -var validStateTransitions = map[StateName][]StateName{ - // Transitions from NotReady state. - NotReady: { - // Epoch transition occurred and we are not in the committee. - NotReady, - // Epoch transition occurred and we are in the committee. - WaitingForResults, - }, - - // Transitions from WaitingForResults state. - WaitingForResults: { - // Abort: seen newer block while waiting for results. - WaitingForFinalize, - // We are waiting for more results. - WaitingForResults, - // Received results, waiting for disrepancy event. - WaitingForEvent, - // Got all results, merging. - ProcessingMerge, - }, - - // Transitions from WaitingForEvent state. - WaitingForEvent: { - // Abort: seen newer block while waiting for event. - WaitingForResults, - // Discrepancy event received. - ProcessingMerge, - // Epoch transition occurred and we are not in the committee. - NotReady, - }, - - // Transitions from ProcessingMerge state. - ProcessingMerge: { - // Merge completed (or abort due to newer block seen). - WaitingForFinalize, - }, - - // Transitions from WaitingForFinalize state. - WaitingForFinalize: { - // Round has been finalized. - WaitingForResults, - // Epoch transition occurred and we are no longer in the committee. - NotReady, - }, -} - -// NodeState is a node's state. -type NodeState interface { - // Name returns the name of the state. - Name() StateName -} - -// StateNotReady is the not ready state. -type StateNotReady struct { -} - -// Name returns the name of the state. -func (s StateNotReady) Name() StateName { - return NotReady -} - -// String returns a string representation of the state. -func (s StateNotReady) String() string { - return string(s.Name()) -} - -// StateWaitingForResults is the waiting for results state. -type StateWaitingForResults struct { - pool *commitment.MultiPool - timer *time.Timer - consensusTimeout map[hash.Hash]bool - results []*commitment.ComputeResultsHeader - // Pending merge discrepancy detected event in case the node is a - // backup worker and the event was received before the results. - pendingEvent *roothash.MergeDiscrepancyDetectedEvent -} - -// Name returns the name of the state. -func (s StateWaitingForResults) Name() StateName { - return WaitingForResults -} - -// String returns a string representation of the state. -func (s StateWaitingForResults) String() string { - return string(s.Name()) -} - -// StateWaitingForEvent is the waiting for event state. -type StateWaitingForEvent struct { - commitments []commitment.OpenExecutorCommitment - results []*commitment.ComputeResultsHeader -} - -// Name returns the name of the state. -func (s StateWaitingForEvent) Name() StateName { - return WaitingForEvent -} - -// String returns a string representation of the state. -func (s StateWaitingForEvent) String() string { - return string(s.Name()) -} - -// StateProcessingMerge is the processing merge state. -type StateProcessingMerge struct { - doneCh <-chan *commitment.MergeBody - cancel context.CancelFunc -} - -// Name returns the name of the state. -func (s StateProcessingMerge) Name() StateName { - return ProcessingMerge -} - -// String returns a string representation of the state. -func (s StateProcessingMerge) String() string { - return string(s.Name()) -} - -// StateWaitingForFinalize is the waiting for finalize state. -type StateWaitingForFinalize struct { -} - -// Name returns the name of the state. -func (s StateWaitingForFinalize) Name() StateName { - return WaitingForFinalize -} - -// String returns a string representation of the state. -func (s StateWaitingForFinalize) String() string { - return string(s.Name()) -} diff --git a/go/worker/compute/merge/init.go b/go/worker/compute/merge/init.go deleted file mode 100644 index e7ab946ad6f..00000000000 --- a/go/worker/compute/merge/init.go +++ /dev/null @@ -1,12 +0,0 @@ -package merge - -import ( - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - "github.com/oasisprotocol/oasis-core/go/worker/compute" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -// New creates a new worker. -func New(commonWorker *workerCommon.Worker, registration *registration.Worker) (*Worker, error) { - return newWorker(compute.Enabled(), commonWorker, registration) -} diff --git a/go/worker/compute/merge/worker.go b/go/worker/compute/merge/worker.go deleted file mode 100644 index 1f5d6e499c7..00000000000 --- a/go/worker/compute/merge/worker.go +++ /dev/null @@ -1,190 +0,0 @@ -package merge - -import ( - "context" - "fmt" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/logging" - "github.com/oasisprotocol/oasis-core/go/common/node" - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge/committee" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -// Worker is a merge worker. -type Worker struct { - enabled bool - - commonWorker *workerCommon.Worker - registration *registration.Worker - - runtimes map[common.Namespace]*committee.Node - - ctx context.Context - cancelCtx context.CancelFunc - quitCh chan struct{} - initCh chan struct{} - - logger *logging.Logger -} - -// Name returns the service name. -func (w *Worker) Name() string { - return "merge worker" -} - -// Start starts the service. -func (w *Worker) Start() error { - if !w.enabled { - w.logger.Info("not starting merge worker as it is disabled") - - // In case the worker is not enabled, close the init channel immediately. - close(w.initCh) - - return nil - } - - // Wait for all runtimes to terminate. - go func() { - defer close(w.quitCh) - - for _, rt := range w.runtimes { - <-rt.Quit() - } - }() - - // Wait for all runtimes to be initialized and for the node - // to be registered for the current epoch. - go func() { - for _, rt := range w.runtimes { - <-rt.Initialized() - } - - <-w.registration.InitialRegistrationCh() - - close(w.initCh) - }() - - // Start runtime services. - for id, rt := range w.runtimes { - w.logger.Info("starting services for runtime", - "runtime_id", id, - ) - - if err := rt.Start(); err != nil { - return err - } - } - - return nil -} - -// Stop halts the service. -func (w *Worker) Stop() { - if !w.enabled { - close(w.quitCh) - return - } - - for id, rt := range w.runtimes { - w.logger.Info("stopping services for runtime", - "runtime_id", id, - ) - - rt.Stop() - } -} - -// Enabled returns if worker is enabled. -func (w *Worker) Enabled() bool { - return w.enabled -} - -// Quit returns a channel that will be closed when the service terminates. -func (w *Worker) Quit() <-chan struct{} { - return w.quitCh -} - -// Cleanup performs the service specific post-termination cleanup. -func (w *Worker) Cleanup() { - if !w.enabled { - return - } - - for _, rt := range w.runtimes { - rt.Cleanup() - } -} - -// Initialized returns a channel that will be closed when the merge worker -// is initialized and ready to service requests. -func (w *Worker) Initialized() <-chan struct{} { - return w.initCh -} - -// GetRuntime returns a registered runtime. -// -// In case the runtime with the specified id was not registered it -// returns nil. -func (w *Worker) GetRuntime(id common.Namespace) *committee.Node { - return w.runtimes[id] -} - -func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { - id := commonNode.Runtime.ID() - w.logger.Info("registering new runtime", - "runtime_id", id, - ) - - rp, err := w.registration.NewRuntimeRoleProvider(node.RoleComputeWorker, id) - if err != nil { - return fmt.Errorf("failed to create role provider: %w", err) - } - - node, err := committee.NewNode(commonNode, w.commonWorker.GetConfig(), rp) - if err != nil { - return err - } - - commonNode.AddHooks(node) - w.runtimes[id] = node - - w.logger.Info("new runtime registered", - "runtime_id", id, - ) - - return nil -} - -func newWorker(enabled bool, commonWorker *workerCommon.Worker, registration *registration.Worker) (*Worker, error) { - ctx, cancelCtx := context.WithCancel(context.Background()) - - w := &Worker{ - enabled: enabled, - commonWorker: commonWorker, - registration: registration, - runtimes: make(map[common.Namespace]*committee.Node), - ctx: ctx, - cancelCtx: cancelCtx, - quitCh: make(chan struct{}), - initCh: make(chan struct{}), - logger: logging.GetLogger("worker/merge"), - } - - if enabled { - if !w.commonWorker.Enabled() { - panic("common worker should have been enabled for merge worker") - } - - // Register all configured runtimes. - for _, rt := range commonWorker.GetRuntimes() { - if err := w.registerRuntime(rt); err != nil { - return nil, err - } - } - } - - return w, nil -} diff --git a/go/worker/compute/txnscheduler/algorithm/api/api.go b/go/worker/compute/txnscheduler/algorithm/api/api.go index b6e981ec82b..30e7163ad2f 100644 --- a/go/worker/compute/txnscheduler/algorithm/api/api.go +++ b/go/worker/compute/txnscheduler/algorithm/api/api.go @@ -4,7 +4,6 @@ package api import ( "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" - "github.com/oasisprotocol/oasis-core/go/worker/common/committee" ) // Algorithm defines an algorithm for scheduling incoming transaction. @@ -17,10 +16,6 @@ type Algorithm interface { // IsInitialized returns true, if an algorithm has been initialized. IsInitialized() bool - // EpochTransition notifies the transaction scheduler about a new - // epoch transition, passing in an epoch snapshot. - EpochTransition(epoch *committee.EpochSnapshot) error - // ScheduleTx attempts to schedule a transaction. // // The scheduling algorithm may peek into the transaction to extract @@ -44,5 +39,5 @@ type Algorithm interface { // TransactionDispatcher dispatches transactions to a scheduled executor committee. type TransactionDispatcher interface { // Dispatch attempts to dispatch a batch to a executor committee. - Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error + Dispatch(batch transaction.RawBatch) error } diff --git a/go/worker/compute/txnscheduler/algorithm/batching/batching.go b/go/worker/compute/txnscheduler/algorithm/batching/batching.go index b33c824e642..f7378360113 100644 --- a/go/worker/compute/txnscheduler/algorithm/batching/batching.go +++ b/go/worker/compute/txnscheduler/algorithm/batching/batching.go @@ -2,15 +2,12 @@ package batching import ( - "sync" - flag "github.com/spf13/pflag" "github.com/spf13/viper" "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/logging" registry "github.com/oasisprotocol/oasis-core/go/registry/api" - "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/compute/txnscheduler/algorithm/api" ) @@ -25,15 +22,11 @@ const ( var Flags = flag.NewFlagSet("", flag.ContinueOnError) type batchingState struct { - sync.RWMutex - cfg config incomingQueue *incomingQueue dispatcher api.TransactionDispatcher - epoch *committee.EpochSnapshot - logger *logging.Logger } @@ -44,29 +37,6 @@ type config struct { } func (s *batchingState) scheduleBatch(force bool) error { - // The simple batching algorithm only supports a single executor committee. Use - // with multiple committees will currently cause the rounds to fail as all other - // committees will be idle. - var committeeID *hash.Hash - func() { - // Guarding against EpochTransition() modifying current epoch. - s.RLock() - defer s.RUnlock() - - // We cannot schedule anything until there is an epoch transition. - if s.epoch == nil { - return - } - - for id := range s.epoch.GetExecutorCommittees() { - committeeID = &id - break - } - }() - if committeeID == nil { - return nil - } - batch, err := s.incomingQueue.Take(force) if err != nil && err != errNoBatchAvailable { s.logger.Error("failed to get batch from the queue", @@ -76,8 +46,8 @@ func (s *batchingState) scheduleBatch(force bool) error { } if len(batch) > 0 { - // Try to dispatch batch to the first committee. - if err := s.dispatcher.Dispatch(*committeeID, batch); err != nil { + // Try to dispatch batch. + if err := s.dispatcher.Dispatch(batch); err != nil { // Put the batch back into the incoming queue in case this failed. if errAB := s.incomingQueue.AddBatch(batch); errAB != nil { s.logger.Error("failed to add batch back into the incoming queue", @@ -91,14 +61,6 @@ func (s *batchingState) scheduleBatch(force bool) error { return nil } -func (s *batchingState) EpochTransition(epoch *committee.EpochSnapshot) error { - s.Lock() - defer s.Unlock() - - s.epoch = epoch - return nil -} - func (s *batchingState) ScheduleTx(tx []byte) error { if err := s.incomingQueue.Add(tx); err != nil { // Return success in case of duplicate calls to avoid the client diff --git a/go/worker/compute/txnscheduler/algorithm/tests/tester.go b/go/worker/compute/txnscheduler/algorithm/tests/tester.go index 4aadfa74bba..6ceff50bc26 100644 --- a/go/worker/compute/txnscheduler/algorithm/tests/tester.go +++ b/go/worker/compute/txnscheduler/algorithm/tests/tester.go @@ -9,7 +9,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" - "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/compute/txnscheduler/algorithm/api" ) @@ -22,7 +21,7 @@ func (t *testDispatcher) Clear() { t.DispatchedBatches = []transaction.RawBatch{} } -func (t *testDispatcher) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error { +func (t *testDispatcher) Dispatch(batch transaction.RawBatch) error { if t.ShouldFail { return errors.New("dispatch failed") } @@ -41,11 +40,6 @@ func AlgorithmImplementationTests( err := algorithm.Initialize(&td) require.NoError(t, err, "Initialize(td)") - // Simulate an epoch transition. - epoch := committee.NewMockEpochSnapshot() - err = algorithm.EpochTransition(epoch) - require.NoError(t, err, "EpochTransition") - // Run the test cases. t.Run("ScheduleTxs", func(t *testing.T) { testScheduleTransactions(t, &td, algorithm) diff --git a/go/worker/compute/txnscheduler/committee/node.go b/go/worker/compute/txnscheduler/committee/node.go index 3c751f5d283..9338f716ce7 100644 --- a/go/worker/compute/txnscheduler/committee/node.go +++ b/go/worker/compute/txnscheduler/committee/node.go @@ -21,6 +21,7 @@ import ( epochtime "github.com/oasisprotocol/oasis-core/go/epochtime/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" + "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" runtimeCommittee "github.com/oasisprotocol/oasis-core/go/runtime/committee" "github.com/oasisprotocol/oasis-core/go/runtime/host" "github.com/oasisprotocol/oasis-core/go/runtime/host/protocol" @@ -302,14 +303,6 @@ func (n *Node) HandleEpochTransitionLocked(epoch *committee.EpochSnapshot) { n.algorithmMutex.RUnlock() if epoch.IsTransactionSchedulerLeader() { - if err := n.algorithm.EpochTransition(epoch); err != nil { - n.logger.Error("scheduling algorithm failed to process epoch transition", - "err", err, - ) - n.transitionLocked(StateNotReady{}) - return - } - n.transitionLocked(StateWaitingForBatch{}) } else { n.algorithm.Clear() @@ -351,7 +344,7 @@ func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snaps } // Dispatch dispatches a batch to the executor committee. -func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error { +func (n *Node) Dispatch(batch transaction.RawBatch) error { n.commonNode.CrossNode.Lock() defer n.commonNode.CrossNode.Unlock() @@ -436,12 +429,25 @@ func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error for _, receipt := range ioReceipts { ioReceiptSignatures = append(ioReceiptSignatures, receipt.Signature) } - txnSchedSig, err := n.commonNode.Group.PublishScheduledBatch( + + dispatchMsg := &commitment.TxnSchedulerBatch{ + IORoot: ioRoot, + StorageSignatures: ioReceiptSignatures, + Header: n.commonNode.CurrentBlock.Header, + } + signedDispatchMsg, err := commitment.SignTxnSchedulerBatch(n.commonNode.Identity.NodeSigner, dispatchMsg) + if err != nil { + n.logger.Error("failed to sign txn scheduler batch", + "err", err, + ) + return fmt.Errorf("failed to sign txn scheduler batch: %w", err) + } + + err = n.commonNode.Group.Publish( batchSpanCtx, - committeeID, - ioRoot, - ioReceiptSignatures, - n.commonNode.CurrentBlock.Header, + &p2p.Message{ + TxnSchedulerBatch: signedDispatchMsg, + }, ) if err != nil { spanPublish.Finish() @@ -461,10 +467,9 @@ func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error } else { n.executorNode.HandleBatchFromTransactionSchedulerLocked( batchSpanCtx, - committeeID, ioRoot, batch, - *txnSchedSig, + signedDispatchMsg.Signature, ioReceiptSignatures, ) } diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 6ade2be2675..5b23216f48e 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -659,10 +659,8 @@ func (crw *clientRuntimeWatcher) updateExternalServicePolicyLocked(snapshot *com policy := accessctl.NewPolicy() // Apply rules to current executor committee members. - for _, xc := range snapshot.GetExecutorCommittees() { - if xc != nil { - executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) - } + if xc := snapshot.GetExecutorCommittee(); xc != nil { + executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) } // Apply rules for configured sentry nodes. diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/committee/node.go index cf5b2a3554b..86442853a6b 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/committee/node.go @@ -372,17 +372,12 @@ func (n *Node) updateExternalServicePolicyLocked(snapshot *committee.EpochSnapsh sentryNodesPolicy.AddPublicKeyPolicy(&policy, addr.PubKey) } - for _, xc := range snapshot.GetExecutorCommittees() { - if xc != nil { - executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) - } + if xc := snapshot.GetExecutorCommittee(); xc != nil { + executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) } if tsc := snapshot.GetTransactionSchedulerCommittee(); tsc != nil { txnSchedulerCommitteePolicy.AddRulesForCommittee(&policy, tsc, snapshot.Nodes()) } - if mc := snapshot.GetMergeCommittee(); mc != nil { - mergeCommitteePolicy.AddRulesForCommittee(&policy, mc, snapshot.Nodes()) - } // TODO: Query registry only for storage nodes after // https://github.com/oasisprotocol/oasis-core/issues/1923 is implemented. nodes, err := n.commonNode.Consensus.Registry().GetNodes(context.Background(), snapshot.GetGroupVersion()) diff --git a/go/worker/storage/committee/policy.go b/go/worker/storage/committee/policy.go index 10285d413db..e4fd3816437 100644 --- a/go/worker/storage/committee/policy.go +++ b/go/worker/storage/committee/policy.go @@ -21,9 +21,6 @@ var ( accessctl.Action(api.MethodApplyBatch.FullName()), }, } - mergeCommitteePolicy = &committee.AccessPolicy{ - Actions: []accessctl.Action{}, - } // NOTE: GetDiff/GetCheckpoint* need to be accessible to all storage nodes, // not just the ones in the current storage committee so that new nodes can // sync-up. diff --git a/runtime/src/common/roothash.rs b/runtime/src/common/roothash.rs index 666dbde480b..f80e0025fe1 100644 --- a/runtime/src/common/roothash.rs +++ b/runtime/src/common/roothash.rs @@ -97,6 +97,8 @@ pub const COMPUTE_RESULTS_HEADER_CONTEXT: &'static [u8] = /// the actual results. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct ComputeResultsHeader { + /// Round number. + pub round: u64, /// Hash of the previous block header this batch was computed against. pub previous_hash: Hash, /// The I/O merkle root. diff --git a/runtime/src/common/version.rs b/runtime/src/common/version.rs index fc93cd0da17..173d007625f 100644 --- a/runtime/src/common/version.rs +++ b/runtime/src/common/version.rs @@ -66,6 +66,6 @@ impl From for Version { // the worker host. pub const PROTOCOL_VERSION: Version = Version { major: 0, - minor: 15, + minor: 16, patch: 0, }; diff --git a/runtime/src/dispatcher.rs b/runtime/src/dispatcher.rs index 41a0658f596..d233b3c2b49 100644 --- a/runtime/src/dispatcher.rs +++ b/runtime/src/dispatcher.rs @@ -405,6 +405,7 @@ impl Dispatcher { .expect("io commit must succeed"); let header = ComputeResultsHeader { + round: block.header.round + 1, previous_hash: block.header.encoded_hash(), io_root, state_root: new_state_root, From 2209ffeabb4492efdcf28e8e7e598a5a512107c0 Mon Sep 17 00:00:00 2001 From: Jernej Kos Date: Tue, 11 Aug 2020 18:36:59 +0200 Subject: [PATCH 3/3] go/worker/common/committee/group: Don't use roundCtx for P2P messages Since P2P message delivery is async, the roundCtx could be for the previous round and so could get cancelled prematurely. Introduce a timeout instead. --- go/worker/common/committee/group.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/go/worker/common/committee/group.go b/go/worker/common/committee/group.go index 545c1283972..90fab7b57a9 100644 --- a/go/worker/common/committee/group.go +++ b/go/worker/common/committee/group.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "time" "github.com/opentracing/opentracing-go" opentracingExt "github.com/opentracing/opentracing-go/ext" @@ -23,11 +24,16 @@ import ( p2pError "github.com/oasisprotocol/oasis-core/go/worker/common/p2p/error" ) +// peerMessageProcessTimeout is the maximum time that peer message processing can take. +const peerMessageProcessTimeout = 10 * time.Second + // MessageHandler handles messages from other nodes. type MessageHandler interface { - // HandlePeerMessage handles a message. + // HandlePeerMessage handles a message that has already been authenticated to come from a + // registered node. // - // The message has already been authenticated to come from a registered node. + // The provided context is short-lived so if the handler needs to perform additional work, that + // should be dispatched to a separate goroutine and not block delivery. HandlePeerMessage(ctx context.Context, msg *p2p.Message) error } @@ -422,7 +428,7 @@ func (g *Group) AuthenticatePeer(peerID signature.PublicKey, msg *p2p.Message) e func (g *Group) HandlePeerMessage(unusedPeerID signature.PublicKey, msg *p2p.Message) error { // Perform some checks on the incoming message. We make sure to release the // lock before running the handler. - ctx, err := func() (context.Context, error) { + err := func() error { g.RLock() defer g.RUnlock() @@ -432,18 +438,21 @@ func (g *Group) HandlePeerMessage(unusedPeerID signature.PublicKey, msg *p2p.Mes switch { case msg.GroupVersion < g.activeEpoch.groupVersion: // Stale messages will never become valid. - return nil, p2pError.Permanent(fmt.Errorf("group version in the past")) + return p2pError.Permanent(fmt.Errorf("group version in the past")) case msg.GroupVersion > g.activeEpoch.groupVersion: // Messages from the future may eventually become valid. - return nil, fmt.Errorf("group version from the future") + return fmt.Errorf("group version from the future") } - return g.activeEpoch.roundCtx, nil + return nil }() if err != nil { return err } + ctx, cancel := context.WithTimeout(context.Background(), peerMessageProcessTimeout) + defer cancel() + // Import SpanContext from the message and store it in the current Context. if msg.SpanContext != nil { sc, err := tracing.SpanContextFromBinary(msg.SpanContext)