diff --git a/.changelog/3179.breaking.md b/.changelog/3179.breaking.md index 3617cc07201..5b40a84ba01 100644 --- a/.changelog/3179.breaking.md +++ b/.changelog/3179.breaking.md @@ -1,10 +1,8 @@ -go/worker/compute/merge: Drop support for multiple committees +go/roothash: Drop support for multiple committees Since there is currently no transaction scheduler implementation which would -support multiple committees, there is no sense in the merge node to try to -support such cases as it could be a source of bugs. Additionally it results -in extra round trips to storage nodes due to the Merge operation which in -case of a single committee does not do anything. +support multiple committees, there is no sense in having the merge node as it +could be a source of bugs. The merge node is also the only client for the Merge* storage operations, so they can just be removed in order to reduce the exposed API surface. diff --git a/docs/images/oasis-core-high-level.svg b/docs/images/oasis-core-high-level.svg index 57f7a9a0be2..72c21eca7a0 100644 --- a/docs/images/oasis-core-high-level.svg +++ b/docs/images/oasis-core-high-level.svg @@ -1,3 +1,3 @@ -
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Cmte.
Scheduler
Cmte....
Root
Hash
Root...
Key
Manager
Key...
Consensus
Layer
Consensus...
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime A
Runtime A
Runtime
Layer
Runtime...
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime B
Runtime B
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime C
Runtime C
Transaction
Scheduler
Transaction...
Executor
Executor
Merge
Merge
Storage
Storage
Runtime D
Runtime D
Viewer does not support full SVG 1.1
\ No newline at end of file +
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Cmte.
Scheduler
Cmte....
Root
Hash
Root...
Key
Manager
Key...
Consensus
Layer
Consensus...
Runtime
Layer
Runtime...
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime A
Runtime A
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime B
Runtime B
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime C
Runtime C
Transaction
Scheduler
Transaction...
Executor
Executor
Storage
Storage
Runtime D
Runtime D
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/images/oasis-core-runtime-details.svg b/docs/images/oasis-core-runtime-details.svg index de853c493a3..6e96c77cb62 100644 --- a/docs/images/oasis-core-runtime-details.svg +++ b/docs/images/oasis-core-runtime-details.svg @@ -1,3 +1,3 @@ -
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Elects Committees
Elects Committees
Cmte.
Scheduler
Cmte....
Canonical
State
Canonical...
Canonical State
Canonical State
RootHash
RootHash
Key
Manager
Key...
Runtime Layer
Runtime...
Consensus Layer
Consensu...
Transaction
Scheduler
Transaction...
Transaction
Scheduler
Transaction...
Executor
Executor
Executor
Executor
Merge
Merge
Merge
Merge
Storage
Storage
Storage
Storage
Commit
Summaries
Commit...
Transactions
Transactions
Runtime
Client
Runtime...
Runtime A
Runtime A
Executor
Executor
Key
Manager
Key...
Distributes
Policy
Distributes...
Viewer does not support full SVG 1.1
\ No newline at end of file +
Epoch
Time
Epoch...
Random
Beacon
Random...
Staking
Staking
Registry
Registry
Elects Committees
Elects Committees
Cmte.
Scheduler
Cmte....
Canonical
State
Canonical...
Canonical State
Canonical State
RootHash
RootHash
Key
Manager
Key...
Runtime Layer
Runtime...
Consensus Layer
Consensu...
Transaction
Scheduler
Transaction...
Transaction
Scheduler
Transaction...
Executor
Executor
Executor
Executor
Storage
Storage
Storage
Storage
Commit
Summaries
Commit...
Transactions
Transactions
Runtime
Client
Runtime...
Runtime A
Runtime A
Executor
Executor
Key
Manager
Key...
Distributes
Policy
Distributes...
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/index.md b/docs/index.md index f0db7b25afd..fd4fd531808 100644 --- a/docs/index.md +++ b/docs/index.md @@ -60,7 +60,6 @@ implementations. * Transaction Processing Pipeline * Transaction Scheduler Nodes * Executor Nodes - * Merge Nodes * Storage Nodes * Key Manager Nodes * Oasis Node (`oasis-node`) diff --git a/docs/oasis-node/metrics.md b/docs/oasis-node/metrics.md index 7be90df26d4..256f7016325 100644 --- a/docs/oasis-node/metrics.md +++ b/docs/oasis-node/metrics.md @@ -76,7 +76,6 @@ oasis_storage_successes | Counter | Number of storage successes. | call | [stora oasis_storage_value_size | Summary | Storage call value size (bytes). | call | [storage](../../go/storage/metrics.go) oasis_up | Gauge | Is oasis-test-runner active for specific scenario. | | [oasis-node/cmd/common/metrics](../../go/oasis-node/cmd/common/metrics/metrics.go) oasis_worker_aborted_batch_count | Counter | Number of aborted batches. | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) -oasis_worker_aborted_merge_count | Counter | Number of aborted merges. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_batch_processing_time | Summary | Time it takes for a batch to finalize (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_batch_read_time | Summary | Time it takes to read a batch from storage (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_batch_runtime_processing_time | Summary | Time it takes for a batch to be processed by the runtime (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) @@ -85,12 +84,9 @@ oasis_worker_epoch_number | Gauge | Current epoch number as seen by the worker. oasis_worker_epoch_transition_count | Counter | Number of epoch transitions. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) oasis_worker_execution_discrepancy_detected_count | Counter | Number of detected execute discrepancies. | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_failed_round_count | Counter | Number of failed roothash rounds. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) -oasis_worker_inconsistent_merge_root_count | Counter | Number of inconsistent merge roots. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) -oasis_worker_merge_discrepancy_detected_count | Counter | Number of detected merge discrepancies. | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_node_registered | Gauge | Is oasis node registered (binary). | | [worker/registration](../../go/worker/registration/worker.go) oasis_worker_processed_block_count | Counter | Number of processed roothash blocks. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) oasis_worker_processed_event_count | Counter | Number of processed roothash events. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) -oasis_worker_roothash_merge_commit_latency | Summary | Latency of roothash merge commit (seconds). | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_storage_commit_latency | Summary | Latency of storage commit calls (state + outputs) (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) oasis_worker_storage_full_round | Gauge | The last round that was fully synced and finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) oasis_worker_storage_pending_round | Gauge | The last round that is in-flight for syncing. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) diff --git a/docs/runtime/index.md b/docs/runtime/index.md index 3fb5abcbb62..a853cfd6ffb 100644 --- a/docs/runtime/index.md +++ b/docs/runtime/index.md @@ -128,7 +128,6 @@ roles (it can have multiple roles at once): * Transaction scheduler. * Executor node (primary or backup). -* Merge node (primary or backup). Subject to runtime configuration, each committee can contain multiple nodes of the same kind (e.g., multiple executor nodes). Some are considered _primary_ diff --git a/docs/toc.md b/docs/toc.md index 45b9fdff8e5..cb767f5aa6e 100644 --- a/docs/toc.md +++ b/docs/toc.md @@ -30,7 +30,7 @@ * [Operation Model](runtime/index.md#operation-model) * [Runtime Host Protocol](runtime/runtime-host-protocol.md) * [Identifiers](runtime/identifiers.md) -* Oasis Node (`oasis-node`) +* Oasis Node * [RPC](oasis-node/rpc.md) * [Metrics](oasis-node/metrics.md) diff --git a/go/consensus/tendermint/apps/roothash/api.go b/go/consensus/tendermint/apps/roothash/api.go index 5cec1c54c73..ebc0df3f5a3 100644 --- a/go/consensus/tendermint/apps/roothash/api.go +++ b/go/consensus/tendermint/apps/roothash/api.go @@ -68,24 +68,12 @@ type ValueExecutorCommitted struct { Event roothash.ExecutorCommittedEvent `json:"event"` } -// ValueMergeCommitted is the value component of a KeyMergeCommitted. -type ValueMergeCommitted struct { - ID common.Namespace `json:"id"` - Event roothash.MergeCommittedEvent `json:"event"` -} - // ValueFinalized is the value component of a TagFinalized. type ValueFinalized struct { ID common.Namespace `json:"id"` Round uint64 `json:"round"` } -// ValueMergeDiscrepancyDetected is the value component of a KeyMergeDiscrepancyDetected. -type ValueMergeDiscrepancyDetected struct { - Event roothash.MergeDiscrepancyDetectedEvent `json:"event"` - ID common.Namespace `json:"id"` -} - // ValueExecutionDiscrepancyDetected is the value component of a KeyMergeDiscrepancyDetected. type ValueExecutionDiscrepancyDetected struct { ID common.Namespace `json:"id"` diff --git a/go/consensus/tendermint/apps/roothash/roothash.go b/go/consensus/tendermint/apps/roothash/roothash.go index 386a76ba932..3c0f3fba2fc 100644 --- a/go/consensus/tendermint/apps/roothash/roothash.go +++ b/go/consensus/tendermint/apps/roothash/roothash.go @@ -3,7 +3,6 @@ package roothash import ( "bytes" - "encoding/binary" "errors" "fmt" @@ -11,7 +10,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" tmapi "github.com/oasisprotocol/oasis-core/go/consensus/tendermint/api" @@ -122,7 +120,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo rtState.Suspended = false // Prepare new runtime committees based on what the scheduler did. - committeeID, executorPool, mergePool, empty, err := app.prepareNewCommittees(ctx, epoch, rtState, schedState, regState) + executorPool, empty, err := app.prepareNewCommittees(ctx, epoch, rtState, schedState, regState) if err != nil { return err } @@ -152,7 +150,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo } // If the committee has actually changed, force a new round. - if !rtState.Suspended && (rtState.Round == nil || !rtState.Round.CommitteeID.Equal(&committeeID)) { + if !rtState.Suspended { ctx.Logger().Debug("updating committee for runtime", "runtime_id", rt.ID, ) @@ -163,7 +161,6 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo ctx.Logger().Debug("new committee, transitioning round", "runtime_id", rt.ID, - "committee_id", committeeID, "round", blockNr, ) @@ -172,7 +169,7 @@ func (app *rootHashApplication) onCommitteeChanged(ctx *tmapi.Context, epoch epo app.emitEmptyBlock(ctx, rtState, block.EpochTransition) // Create a new round. - rtState.Round = roothashState.NewRound(committeeID, executorPool, mergePool, rtState.CurrentBlock) + rtState.Round = roothashState.NewRound(executorPool, rtState.CurrentBlock) } // Update the runtime descriptor to the latest per-epoch value. @@ -215,39 +212,14 @@ func (app *rootHashApplication) prepareNewCommittees( schedState *schedulerState.MutableState, regState *registryState.MutableState, ) ( - committeeID hash.Hash, - executorPool *commitment.MultiPool, - mergePool *commitment.Pool, + executorPool *commitment.Pool, empty bool, err error, ) { rtID := rtState.Runtime.ID - // Derive a deterministic committee identifier that depends on memberships - // of all committees. We need this to be able to quickly see if any - // committee members have changed. - // - // We first include the current epoch, then all executor committee member - // hashes and then the merge committee member hash: - // - // [little-endian epoch] - // "executor committees follow" - // [executor committe 1 members hash] - // [executor committe 2 members hash] - // ... - // [executor committe n members hash] - // "merge committee follows" - // [merge committee members hash] - // - var committeeIDParts [][]byte - var rawEpoch [8]byte - binary.LittleEndian.PutUint64(rawEpoch[:], uint64(epoch)) - committeeIDParts = append(committeeIDParts, rawEpoch[:]) - committeeIDParts = append(committeeIDParts, []byte("executor committees follow")) - - // NOTE: There will later be multiple executor committees. - var executorCommittees []*scheduler.Committee - xc1, err := schedState.Committee(ctx, scheduler.KindComputeExecutor, rtID) + executorPool = new(commitment.Pool) + executorCommittee, err := schedState.Committee(ctx, scheduler.KindComputeExecutor, rtID) if err != nil { ctx.Logger().Error("checkCommittees: failed to get executor committee from scheduler", "err", err, @@ -255,54 +227,17 @@ func (app *rootHashApplication) prepareNewCommittees( ) return } - if xc1 != nil { - executorCommittees = append(executorCommittees, xc1) - } - - executorPool = &commitment.MultiPool{ - Committees: make(map[hash.Hash]*commitment.Pool), - } - if len(executorCommittees) == 0 { - ctx.Logger().Warn("checkCommittees: no executor committees", - "runtime", rtID, - ) - empty = true - } - for _, executorCommittee := range executorCommittees { - executorCommitteeID := executorCommittee.EncodedMembersHash() - committeeIDParts = append(committeeIDParts, executorCommitteeID[:]) - - executorPool.Committees[executorCommitteeID] = &commitment.Pool{ - Runtime: rtState.Runtime, - Committee: executorCommittee, - } - } - - mergePool = new(commitment.Pool) - committeeIDParts = append(committeeIDParts, []byte("merge committee follows")) - mergeCommittee, err := schedState.Committee(ctx, scheduler.KindComputeMerge, rtID) - if err != nil { - ctx.Logger().Error("checkCommittees: failed to get merge committee from scheduler", - "err", err, - "runtime", rtID, - ) - return - } - if mergeCommittee == nil { - ctx.Logger().Warn("checkCommittees: no merge committee", + if executorCommittee == nil { + ctx.Logger().Warn("checkCommittees: no executor committee", "runtime", rtID, ) empty = true } else { - mergePool = &commitment.Pool{ + executorPool = &commitment.Pool{ Runtime: rtState.Runtime, - Committee: mergeCommittee, + Committee: executorCommittee, } - mergeCommitteeID := mergeCommittee.EncodedMembersHash() - committeeIDParts = append(committeeIDParts, mergeCommitteeID[:]) } - - committeeID.FromBytes(committeeIDParts...) return } @@ -334,13 +269,6 @@ func (app *rootHashApplication) ExecuteTx(ctx *tmapi.Context, tx *transaction.Tr } return app.executorCommit(ctx, state, &xc) - case roothash.MethodMergeCommit: - var mc roothash.MergeCommit - if err := cbor.Unmarshal(tx.Body, &mc); err != nil { - return err - } - - return app.mergeCommit(ctx, state, &mc) default: return roothash.ErrInvalidArgument } @@ -491,7 +419,7 @@ func (app *rootHashApplication) FireTimer(ctx *tmapi.Context, timer *tmapi.Timer "timer_round", tCtx.Round, ) - if rtState.Round.MergePool.IsTimeout(ctx.Now()) { + if rtState.Round.ExecutorPool.IsTimeout(ctx.Now()) { if err = app.tryFinalizeBlock(ctx, rtState, true); err != nil { ctx.Logger().Error("failed to finalize block", "err", err, @@ -499,9 +427,6 @@ func (app *rootHashApplication) FireTimer(ctx *tmapi.Context, timer *tmapi.Timer return fmt.Errorf("failed to finalize block: %w", err) } } - for _, pool := range rtState.Round.ExecutorPool.GetTimeoutCommittees(ctx.Now()) { - app.tryFinalizeExecute(ctx, rtState, pool, true) - } if err = state.SetRuntimeState(ctx, rtState); err != nil { return fmt.Errorf("failed to set runtime state: %w", err) @@ -537,86 +462,7 @@ func (app *rootHashApplication) updateTimer( } } -func (app *rootHashApplication) tryFinalizeExecute( - ctx *tmapi.Context, - rtState *roothashState.RuntimeState, - pool *commitment.Pool, - forced bool, -) { - runtime := rtState.Runtime - latestBlock := rtState.CurrentBlock - blockNr := latestBlock.Header.Round - committeeID := pool.GetCommitteeID() - - defer app.updateTimer(ctx, rtState, blockNr) - - if rtState.Round.Finalized { - ctx.Logger().Error("attempted to finalize execute when block already finalized", - "round", blockNr, - "committee_id", committeeID, - ) - return - } - - _, err := pool.TryFinalize(ctx.Now(), runtime.Executor.RoundTimeout, forced, true) - switch err { - case nil: - // No error -- there is no discrepancy. But only the merge committee - // can make progress even if we have all executor commitments. - - // TODO: Check if we need to punish the merge committee. - - ctx.Logger().Warn("no execution discrepancy, but only merge committee can make progress", - "round", blockNr, - "committee_id", committeeID, - ) - - if !forced { - // If this was not a timeout, we give the merge committee some - // more time to merge, otherwise we fail the round. - return - } - case commitment.ErrStillWaiting: - // Need more commits. - return - case commitment.ErrDiscrepancyDetected: - // Discrepancy has been detected. - ctx.Logger().Warn("execution discrepancy detected", - "round", blockNr, - "committee_id", committeeID, - logging.LogEvent, roothash.LogEventExecutionDiscrepancyDetected, - ) - - tagV := ValueExecutionDiscrepancyDetected{ - ID: runtime.ID, - Event: roothash.ExecutionDiscrepancyDetectedEvent{ - CommitteeID: pool.GetCommitteeID(), - Timeout: forced, - }, - } - ctx.EmitEvent( - tmapi.NewEventBuilder(app.Name()). - Attribute(KeyExecutionDiscrepancyDetected, cbor.Marshal(tagV)). - Attribute(KeyRuntimeID, ValueRuntimeID(runtime.ID)), - ) - return - default: - } - - // Something else went wrong, emit empty error block. Note that we need - // to abort everything even if only one committee failed to finalize as - // there is otherwise no way to make progress as merge committees will - // refuse to merge if there are discrepancies. - ctx.Logger().Error("round failed", - "round", blockNr, - "err", err, - logging.LogEvent, roothash.LogEventRoundFailed, - ) - - app.emitEmptyBlock(ctx, rtState, block.RoundFailed) -} - -func (app *rootHashApplication) tryFinalizeMerge( +func (app *rootHashApplication) tryFinalizeExecutor( ctx *tmapi.Context, rtState *roothashState.RuntimeState, forced bool, @@ -634,7 +480,7 @@ func (app *rootHashApplication) tryFinalizeMerge( return nil } - commit, err := rtState.Round.MergePool.TryFinalize(ctx.Now(), runtime.Merge.RoundTimeout, forced, true) + commit, err := rtState.Round.ExecutorPool.TryFinalize(ctx.Now(), runtime.Executor.RoundTimeout, forced, true) switch err { case nil: // Round has been finalized. @@ -643,11 +489,13 @@ func (app *rootHashApplication) tryFinalizeMerge( ) // Generate the final block. - blk := new(block.Block) - blk.Header = commit.ToDDResult().(block.Header) - blk.Header.Timestamp = uint64(ctx.Now().Unix()) + hdr := commit.ToDDResult().(commitment.ComputeResultsHeader) + + blk := block.NewEmptyBlock(rtState.CurrentBlock, uint64(ctx.Now().Unix()), block.Normal) + blk.Header.IORoot = hdr.IORoot + blk.Header.StateRoot = hdr.StateRoot + // Messages omitted on purpose. - rtState.Round.MergePool.ResetCommitments() rtState.Round.ExecutorPool.ResetCommitments() rtState.Round.Finalized = true @@ -661,18 +509,20 @@ func (app *rootHashApplication) tryFinalizeMerge( return nil case commitment.ErrDiscrepancyDetected: // Discrepancy has been detected. - ctx.Logger().Warn("merge discrepancy detected", + ctx.Logger().Warn("executor discrepancy detected", "round", blockNr, - logging.LogEvent, roothash.LogEventMergeDiscrepancyDetected, + logging.LogEvent, roothash.LogEventExecutionDiscrepancyDetected, ) - tagV := ValueMergeDiscrepancyDetected{ - ID: runtime.ID, - Event: roothash.MergeDiscrepancyDetectedEvent{}, + tagV := ValueExecutionDiscrepancyDetected{ + ID: runtime.ID, + Event: roothash.ExecutionDiscrepancyDetectedEvent{ + Timeout: forced, + }, } ctx.EmitEvent( tmapi.NewEventBuilder(app.Name()). - Attribute(KeyMergeDiscrepancyDetected, cbor.Marshal(tagV)). + Attribute(KeyExecutionDiscrepancyDetected, cbor.Marshal(tagV)). Attribute(KeyRuntimeID, ValueRuntimeID(runtime.ID)), ) return nil @@ -735,9 +585,9 @@ func (app *rootHashApplication) postProcessFinalizedBlock(ctx *tmapi.Context, rt func (app *rootHashApplication) tryFinalizeBlock( ctx *tmapi.Context, rtState *roothashState.RuntimeState, - mergeForced bool, + forced bool, ) error { - finalizedBlock := app.tryFinalizeMerge(ctx, rtState, mergeForced) + finalizedBlock := app.tryFinalizeExecutor(ctx, rtState, forced) if finalizedBlock == nil { return nil } diff --git a/go/consensus/tendermint/apps/roothash/state/round.go b/go/consensus/tendermint/apps/roothash/state/round.go index ffa391096d2..6bae4d11ae6 100644 --- a/go/consensus/tendermint/apps/roothash/state/round.go +++ b/go/consensus/tendermint/apps/roothash/state/round.go @@ -5,16 +5,13 @@ import ( "errors" "time" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" ) // Round is a roothash round. type Round struct { - CommitteeID hash.Hash `json:"committee_id"` - ExecutorPool *commitment.MultiPool `json:"executor_pool"` - MergePool *commitment.Pool `json:"merge_pool"` + ExecutorPool *commitment.Pool `json:"executor_pool"` CurrentBlock *block.Block `json:"current_block"` Finalized bool `json:"finalized"` @@ -22,16 +19,11 @@ type Round struct { func (r *Round) Reset() { r.ExecutorPool.ResetCommitments() - r.MergePool.ResetCommitments() r.Finalized = false } -func (r *Round) GetNextTimeout() (timeout time.Time) { - timeout = r.ExecutorPool.GetNextTimeout() - if timeout.IsZero() || (!r.MergePool.NextTimeout.IsZero() && r.MergePool.NextTimeout.Before(timeout)) { - timeout = r.MergePool.NextTimeout - } - return +func (r *Round) GetNextTimeout() time.Time { + return r.ExecutorPool.NextTimeout } func (r *Round) AddExecutorCommitment( @@ -39,23 +31,11 @@ func (r *Round) AddExecutorCommitment( commitment *commitment.ExecutorCommitment, sv commitment.SignatureVerifier, nl commitment.NodeLookup, -) (*commitment.Pool, error) { - if r.Finalized { - return nil, errors.New("tendermint/roothash: round is already finalized, can't commit") - } - return r.ExecutorPool.AddExecutorCommitment(ctx, r.CurrentBlock, sv, nl, commitment) -} - -func (r *Round) AddMergeCommitment( - ctx context.Context, - commitment *commitment.MergeCommitment, - sv commitment.SignatureVerifier, - nl commitment.NodeLookup, ) error { if r.Finalized { return errors.New("tendermint/roothash: round is already finalized, can't commit") } - return r.MergePool.AddMergeCommitment(ctx, r.CurrentBlock, sv, nl, commitment, r.ExecutorPool) + return r.ExecutorPool.AddExecutorCommitment(ctx, r.CurrentBlock, sv, nl, commitment) } func (r *Round) Transition(blk *block.Block) { @@ -63,17 +43,10 @@ func (r *Round) Transition(blk *block.Block) { r.Reset() } -func NewRound( - committeeID hash.Hash, - executorPool *commitment.MultiPool, - mergePool *commitment.Pool, - blk *block.Block, -) *Round { +func NewRound(executorPool *commitment.Pool, blk *block.Block) *Round { r := &Round{ - CommitteeID: committeeID, CurrentBlock: blk, ExecutorPool: executorPool, - MergePool: mergePool, } r.Reset() diff --git a/go/consensus/tendermint/apps/roothash/transactions.go b/go/consensus/tendermint/apps/roothash/transactions.go index b09777ec914..fdcd6efeba1 100644 --- a/go/consensus/tendermint/apps/roothash/transactions.go +++ b/go/consensus/tendermint/apps/roothash/transactions.go @@ -122,83 +122,14 @@ func (app *rootHashApplication) executorCommit( return err } - pools := make(map[*commitment.Pool]bool) for _, commit := range cc.Commits { - var pool *commitment.Pool - if pool, err = rtState.Round.AddExecutorCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec + if err = rtState.Round.AddExecutorCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec ctx.Logger().Error("failed to add compute commitment to round", "err", err, "round", rtState.CurrentBlock.Header.Round, ) return err } - - pools[pool] = true - } - - // Try to finalize compute rounds. - for pool := range pools { - app.tryFinalizeExecute(ctx, rtState, pool, false) - } - - // Update runtime state. - if err = state.SetRuntimeState(ctx, rtState); err != nil { - return fmt.Errorf("failed to set runtime state: %w", err) - } - - // Emit events for all accepted commits. - for _, commit := range cc.Commits { - evV := ValueExecutorCommitted{ - ID: cc.ID, - Event: roothash.ExecutorCommittedEvent{ - Commit: commit, - }, - } - ctx.EmitEvent( - tmapi.NewEventBuilder(app.Name()). - Attribute(KeyExecutorCommitted, cbor.Marshal(evV)). - Attribute(KeyRuntimeID, ValueRuntimeID(cc.ID)), - ) - } - - return nil -} - -func (app *rootHashApplication) mergeCommit( - ctx *abciAPI.Context, - state *roothashState.MutableState, - mc *roothash.MergeCommit, -) (err error) { - if ctx.IsCheckOnly() { - return nil - } - - // Charge gas for this transaction. - params, err := state.ConsensusParameters(ctx) - if err != nil { - ctx.Logger().Error("MergeCommit: failed to fetch consensus parameters", - "err", err, - ) - return err - } - if err = ctx.Gas().UseGas(1, roothash.GasOpMergeCommit, params.GasCosts); err != nil { - return err - } - - rtState, sv, nl, err := app.getRuntimeState(ctx, state, mc.ID) - if err != nil { - return err - } - - // Add commitments. - for _, commit := range mc.Commits { - if err = rtState.Round.AddMergeCommitment(ctx, &commit, sv, nl); err != nil { // nolint: gosec - ctx.Logger().Error("failed to add merge commitment to round", - "err", err, - "round", rtState.CurrentBlock.Header.Round, - ) - return err - } } // Try to finalize round. @@ -215,17 +146,17 @@ func (app *rootHashApplication) mergeCommit( } // Emit events for all accepted commits. - for _, commit := range mc.Commits { - evV := ValueMergeCommitted{ - ID: mc.ID, - Event: roothash.MergeCommittedEvent{ + for _, commit := range cc.Commits { + evV := ValueExecutorCommitted{ + ID: cc.ID, + Event: roothash.ExecutorCommittedEvent{ Commit: commit, }, } ctx.EmitEvent( tmapi.NewEventBuilder(app.Name()). - Attribute(KeyMergeCommitted, cbor.Marshal(evV)). - Attribute(KeyRuntimeID, ValueRuntimeID(mc.ID)), + Attribute(KeyExecutorCommitted, cbor.Marshal(evV)). + Attribute(KeyRuntimeID, ValueRuntimeID(cc.ID)), ) } diff --git a/go/consensus/tendermint/apps/scheduler/scheduler.go b/go/consensus/tendermint/apps/scheduler/scheduler.go index e615f66c2e8..78e3e163501 100644 --- a/go/consensus/tendermint/apps/scheduler/scheduler.go +++ b/go/consensus/tendermint/apps/scheduler/scheduler.go @@ -38,7 +38,6 @@ var ( RNGContextExecutor = []byte("EkS-ABCI-Compute") RNGContextStorage = []byte("EkS-ABCI-Storage") RNGContextTransactionScheduler = []byte("EkS-ABCI-TransactionScheduler") - RNGContextMerge = []byte("EkS-ABCI-Merge") RNGContextValidators = []byte("EkS-ABCI-Validators") RNGContextEntities = []byte("EkS-ABCI-Entities") ) @@ -169,7 +168,6 @@ func (app *schedulerApplication) BeginBlock(ctx *api.Context, request types.Requ kinds := []scheduler.CommitteeKind{ scheduler.KindComputeExecutor, scheduler.KindComputeTxnScheduler, - scheduler.KindComputeMerge, scheduler.KindStorage, } for _, kind := range kinds { @@ -346,19 +344,6 @@ func (app *schedulerApplication) isSuitableTransactionScheduler(ctx *api.Context return false } -func (app *schedulerApplication) isSuitableMergeWorker(ctx *api.Context, n *node.Node, rt *registry.Runtime) bool { - if !n.HasRoles(node.RoleComputeWorker) { - return false - } - for _, nrt := range n.Runtimes { - if !nrt.ID.Equal(&rt.ID) { - continue - } - return true - } - return false -} - // GetPerm generates a permutation that we use to choose nodes from a list of eligible nodes to elect. func GetPerm(beacon []byte, runtimeID common.Namespace, rngCtx []byte, nrNodes int) ([]int, error) { drbg, err := drbg.New(crypto.SHA512, beacon, runtimeID[:], rngCtx) @@ -404,11 +389,6 @@ func (app *schedulerApplication) electCommittee( isSuitableFn = app.isSuitableExecutorWorker workerSize = int(rt.Executor.GroupSize) backupSize = int(rt.Executor.GroupBackupSize) - case scheduler.KindComputeMerge: - rngCtx = RNGContextMerge - isSuitableFn = app.isSuitableMergeWorker - workerSize = int(rt.Merge.GroupSize) - backupSize = int(rt.Merge.GroupBackupSize) case scheduler.KindComputeTxnScheduler: rngCtx = RNGContextTransactionScheduler isSuitableFn = app.isSuitableTransactionScheduler diff --git a/go/consensus/tendermint/roothash/roothash.go b/go/consensus/tendermint/roothash/roothash.go index 94e741a52d5..e08b4c9407c 100644 --- a/go/consensus/tendermint/roothash/roothash.go +++ b/go/consensus/tendermint/roothash/roothash.go @@ -593,16 +593,6 @@ func EventsFromTendermint( ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, FinalizedEvent: &api.FinalizedEvent{Round: value.Round}} events = append(events, ev) - case bytes.Equal(key, app.KeyMergeDiscrepancyDetected): - // A merge discrepancy has been detected. - var value app.ValueMergeDiscrepancyDetected - if err := cbor.Unmarshal(val, &value); err != nil { - errs = multierror.Append(errs, fmt.Errorf("roothash: corrupt MergeDiscrepancy event: %w", err)) - continue - } - - ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, MergeDiscrepancyDetected: &value.Event} - events = append(events, ev) case bytes.Equal(key, app.KeyExecutionDiscrepancyDetected): // An execution discrepancy has been detected. var value app.ValueExecutionDiscrepancyDetected @@ -623,16 +613,6 @@ func EventsFromTendermint( ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, ExecutorCommitted: &value.Event} events = append(events, ev) - case bytes.Equal(key, app.KeyMergeCommitted): - // A merge commit has been processed. - var value app.ValueMergeCommitted - if err := cbor.Unmarshal(val, &value); err != nil { - errs = multierror.Append(errs, fmt.Errorf("roothash: corrupt ValueMergeCommitted event: %w", err)) - continue - } - - ev := &api.Event{RuntimeID: value.ID, Height: height, TxHash: txHash, MergeCommitted: &value.Event} - events = append(events, ev) case bytes.Equal(key, app.KeyRuntimeID): // Runtime ID attribute (Base64-encoded to allow queries). default: diff --git a/go/genesis/genesis_test.go b/go/genesis/genesis_test.go index 294690a8a8f..f7ae4695a5c 100644 --- a/go/genesis/genesis_test.go +++ b/go/genesis/genesis_test.go @@ -203,10 +203,6 @@ func TestGenesisSanityCheck(t *testing.T) { GroupSize: 1, RoundTimeout: 1 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - RoundTimeout: 1 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 1, Algorithm: "batching", @@ -219,8 +215,6 @@ func TestGenesisSanityCheck(t *testing.T) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index f45327078fb..a4ec56dc9fe 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -89,11 +89,6 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 1, RoundTimeout: 20 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 2, - GroupBackupSize: 1, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 2, @@ -106,8 +101,6 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-node/cmd/debug/byzantine/byzantine.go b/go/oasis-node/cmd/debug/byzantine/byzantine.go index d36d057bc90..02350e07c80 100644 --- a/go/oasis-node/cmd/debug/byzantine/byzantine.go +++ b/go/oasis-node/cmd/debug/byzantine/byzantine.go @@ -8,7 +8,6 @@ import ( flag "github.com/spf13/pflag" "github.com/spf13/viper" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/logging" "github.com/oasisprotocol/oasis-core/go/common/node" @@ -19,7 +18,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/grpc" "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/metrics" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" @@ -57,21 +55,6 @@ var ( Short: "act as an executor worker that registers and doesn't do any work", Run: doExecutorStraggler, } - mergeHonestCmd = &cobra.Command{ - Use: "merge-honest", - Short: "act as an honest merge worker", - Run: doMergeHonest, - } - mergeWrongCmd = &cobra.Command{ - Use: "merge-wrong", - Short: "act as a merge worker that commits wrong result", - Run: doMergeWrong, - } - mergeStragglerCmd = &cobra.Command{ - Use: "merge-straggler", - Short: "act as a merge worker that registers and doesn't do any work", - Run: doMergeStraggler, - } ) func activateCommonConfig(cmd *cobra.Command, args []string) { @@ -150,13 +133,6 @@ func doExecutorHonest(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } logger.Debug("executor honest: connecting to storage committee") hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) @@ -208,8 +184,8 @@ func doExecutorHonest(cmd *cobra.Command, args []string) { panic(fmt.Sprintf("compute create commitment failed: %+v", err)) } - if err = cbc.publishToCommittee(ht, electionHeight, mergeCommittee, scheduler.Worker, ph, defaultRuntimeID, electionHeight); err != nil { - panic(fmt.Sprintf("compute publish to committee merge worker failed: %+v", err)) + if err = cbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { + panic(fmt.Sprintf("compute publish to chain failed: %+v", err)) } logger.Debug("executor honest: commitment sent") } @@ -283,13 +259,6 @@ func doExecutorWrong(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } logger.Debug("executor honest: connecting to storage committee") hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) @@ -341,8 +310,8 @@ func doExecutorWrong(cmd *cobra.Command, args []string) { panic(fmt.Sprintf("compute create commitment failed: %+v", err)) } - if err = cbc.publishToCommittee(ht, electionHeight, mergeCommittee, scheduler.Worker, ph, defaultRuntimeID, electionHeight); err != nil { - panic(fmt.Sprintf("compute publish to committee merge worker failed: %+v", err)) + if err = cbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { + panic(fmt.Sprintf("compute publish to chain failed: %+v", err)) } logger.Debug("executor wrong: commitment sent") } @@ -411,13 +380,6 @@ func doExecutorStraggler(cmd *cobra.Command, args []string) { if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeMerge, err)) - } - if err = schedulerCheckNotScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled merge failed: %+v", err)) - } cbc := newComputeBatchContext() @@ -429,333 +391,11 @@ func doExecutorStraggler(cmd *cobra.Command, args []string) { logger.Debug("executor straggler: bailing") } -func doMergeHonest(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge honest: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - storageCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindStorage, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindStorage, err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - logger.Debug("merge honest: connecting to storage committee") - hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) - if err != nil { - panic(fmt.Sprintf("storage connect to committee failed: %+v", err)) - } - defer storageBroadcastCleanup(hnss) - - mbc := newMergeBatchContext() - - if err = mbc.loadCurrentBlock(ht, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge load current block failed: %+v", err)) - } - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge honest: received commitments", "commitments", mbc.commitments) - - ctx := context.Background() - - // Process merge honestly. - if err = mbc.process(ctx, hnss); err != nil { - panic(fmt.Sprintf("merge process failed: %+v", err)) - } - logger.Debug("merge honest: processed", - "new_block", mbc.newBlock, - ) - - if err = mbc.createCommitment(defaultIdentity); err != nil { - panic(fmt.Sprintf("merge create commitment failed: %+v", err)) - } - - if err = mbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge publish to chain failed: %+v", err)) - } - logger.Debug("merge honest: commitment sent") -} - -func doMergeWrong(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge wrong: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - storageCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindStorage, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindStorage, err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - logger.Debug("merge wrong: connecting to storage committee") - hnss, err := storageConnectToCommittee(ht, electionHeight, storageCommittee, scheduler.Worker, defaultIdentity) - if err != nil { - panic(fmt.Sprintf("storage connect to committee failed: %+v", err)) - } - defer storageBroadcastCleanup(hnss) - - mbc := newMergeBatchContext() - - if err = mbc.loadCurrentBlock(ht, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge load current block failed: %+v", err)) - } - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge wrong: received commitments", "commitments", mbc.commitments) - - ctx := context.Background() - - // Process the merge wrong. - origCommitments := mbc.commitments - var emptyRoot hash.Hash - emptyRoot.Empty() - mbc.commitments = []*commitment.OpenExecutorCommitment{ - { - Body: &commitment.ComputeBody{ - Header: commitment.ComputeResultsHeader{ - IORoot: emptyRoot, - StateRoot: mbc.currentBlock.Header.StateRoot, - }, - }, - }, - } - - if err = mbc.process(ctx, hnss); err != nil { - panic(fmt.Sprintf("merge process failed: %+v", err)) - } - logger.Debug("merge wrong: processed", - "new_block", mbc.newBlock, - ) - - mbc.commitments = origCommitments - - // Sanity check the merge results. - if mbc.newBlock.Header.IORoot != emptyRoot { - panic(fmt.Sprintf("merge of empty IO trees should be empty. got %s, expected %s", mbc.newBlock.Header.IORoot, emptyRoot)) - } - if mbc.newBlock.Header.StateRoot != mbc.currentBlock.Header.StateRoot { - panic(fmt.Sprintf("merge of identical state trees should be the same. got %s, expected %s", mbc.newBlock.Header.StateRoot, mbc.currentBlock.Header.StateRoot)) - } - - if err = mbc.createCommitment(defaultIdentity); err != nil { - panic(fmt.Sprintf("merge create commitment failed: %+v", err)) - } - - if err = mbc.publishToChain(ht.service, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("merge publish to chain failed: %+v", err)) - } - logger.Debug("merge wrong: commitment sent") -} - -func doMergeStraggler(cmd *cobra.Command, args []string) { - if err := common.Init(); err != nil { - common.EarlyLogAndExit(err) - } - - defaultIdentity, err := initDefaultIdentity(common.DataDir()) - if err != nil { - panic(fmt.Sprintf("init default identity failed: %+v", err)) - } - - ht := newHonestTendermint() - if err = ht.start(defaultIdentity, common.DataDir()); err != nil { - panic(fmt.Sprintf("honest Tendermint start failed: %+v", err)) - } - defer func() { - if err1 := ht.stop(); err1 != nil { - panic(fmt.Sprintf("honest Tendermint stop failed: %+v", err1)) - } - }() - - ph := newP2PHandle() - if err = ph.start(ht, defaultIdentity, defaultRuntimeID); err != nil { - panic(fmt.Sprintf("P2P start failed: %+v", err)) - } - defer func() { - if err1 := ph.stop(); err1 != nil { - panic(fmt.Sprintf("P2P stop failed: %+v", err1)) - } - }() - - activationEpoch := epochtime.EpochTime(viper.GetUint64(CfgActivationEpoch)) - if err = epochtimeWaitForEpoch(ht.service, activationEpoch); err != nil { - panic(fmt.Sprintf("epochtimeWaitForEpoch: %+v", err)) - } - - if err = registryRegisterNode(ht.service, defaultIdentity, common.DataDir(), fakeAddresses, ph.service.Addresses(), defaultRuntimeID, nil, node.RoleComputeWorker); err != nil { - panic(fmt.Sprintf("registryRegisterNode: %+v", err)) - } - - electionHeight, err := schedulerNextElectionHeight(ht.service, activationEpoch+1) - if err != nil { - panic(fmt.Sprintf("scheduler next election height failed: %+v", err)) - } - mergeCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeMerge, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s at height %d failed: %+v", scheduler.KindComputeMerge, electionHeight, err)) - } - if err = schedulerCheckScheduled(mergeCommittee, defaultIdentity.NodeSigner.Public(), scheduler.Worker); err != nil { - panic(fmt.Sprintf("scheduler check scheduled failed: %+v", err)) - } - logger.Debug("merge straggler: merge schedule ok") - executorCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeExecutor, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeExecutor, err)) - } - if err = schedulerCheckNotScheduled(executorCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled executor failed: %+v", err)) - } - transactionSchedulerCommittee, err := schedulerGetCommittee(ht, electionHeight, scheduler.KindComputeTxnScheduler, defaultRuntimeID) - if err != nil { - panic(fmt.Sprintf("scheduler get committee %s failed: %+v", scheduler.KindComputeTxnScheduler, err)) - } - if err = schedulerCheckNotScheduled(transactionSchedulerCommittee, defaultIdentity.NodeSigner.Public()); err != nil { - panic(fmt.Sprintf("scheduler check not scheduled txnscheduler failed: %+v", err)) - } - - mbc := newMergeBatchContext() - - // Receive 1 committee * 2 commitments per committee. - if err = mbc.receiveCommitments(ph, 2); err != nil { - panic(fmt.Sprintf("merge receive commitments failed: %+v", err)) - } - logger.Debug("merge straggler: received commitments", "commitments", mbc.commitments) - - logger.Debug("merge straggler: bailing") -} - // Register registers the byzantine sub-command and all of its children. func Register(parentCmd *cobra.Command) { byzantineCmd.AddCommand(executorHonestCmd) byzantineCmd.AddCommand(executorWrongCmd) byzantineCmd.AddCommand(executorStragglerCmd) - byzantineCmd.AddCommand(mergeHonestCmd) - byzantineCmd.AddCommand(mergeWrongCmd) - byzantineCmd.AddCommand(mergeStragglerCmd) parentCmd.AddCommand(byzantineCmd) } diff --git a/go/oasis-node/cmd/debug/byzantine/executor.go b/go/oasis-node/cmd/debug/byzantine/executor.go index ee90a1bd84b..92af7b85f49 100644 --- a/go/oasis-node/cmd/debug/byzantine/executor.go +++ b/go/oasis-node/cmd/debug/byzantine/executor.go @@ -9,15 +9,14 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" "github.com/oasisprotocol/oasis-core/go/common/identity" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" "github.com/oasisprotocol/oasis-core/go/runtime/transaction" - scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" storage "github.com/oasisprotocol/oasis-core/go/storage/api" "github.com/oasisprotocol/oasis-core/go/storage/mkvs" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/syncer" "github.com/oasisprotocol/oasis-core/go/storage/mkvs/writelog" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" ) type computeBatchContext struct { @@ -163,6 +162,7 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign storageSigs = append(storageSigs, receipt.Signature) } header := commitment.ComputeResultsHeader{ + Round: cbc.bd.Header.Round + 1, PreviousHash: cbc.bd.Header.EncodedHash(), IORoot: cbc.newIORoot, StateRoot: cbc.newStateRoot, @@ -170,7 +170,6 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign Messages: []*block.Message{}, } computeBody := &commitment.ComputeBody{ - CommitteeID: committeeID, Header: header, StorageSignatures: storageSigs, TxnSchedSig: cbc.bdSig, @@ -194,13 +193,9 @@ func (cbc *computeBatchContext) createCommitment(id *identity.Identity, rak sign return nil } -func (cbc *computeBatchContext) publishToCommittee(ht *honestTendermint, height int64, committee *scheduler.Committee, role scheduler.Role, ph *p2pHandle, runtimeID common.Namespace, groupVersion int64) error { - if err := schedulerPublishToCommittee(ph, runtimeID, &p2p.Message{ - GroupVersion: groupVersion, - SpanContext: nil, - ExecutorCommit: cbc.commit, - }); err != nil { - return fmt.Errorf("scheduler publish to committee: %w", err) +func (cbc *computeBatchContext) publishToChain(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace) error { + if err := roothashExecutorCommit(svc, id, runtimeID, []commitment.ExecutorCommitment{*cbc.commit}); err != nil { + return fmt.Errorf("roothash merge commentment: %w", err) } return nil diff --git a/go/oasis-node/cmd/debug/byzantine/merge.go b/go/oasis-node/cmd/debug/byzantine/merge.go deleted file mode 100644 index 9e671d4fe7f..00000000000 --- a/go/oasis-node/cmd/debug/byzantine/merge.go +++ /dev/null @@ -1,120 +0,0 @@ -package byzantine - -import ( - "context" - "fmt" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/identity" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" -) - -type mergeBatchContext struct { - currentBlock *block.Block - commitments []*commitment.OpenExecutorCommitment - - newBlock *block.Block - commit *commitment.MergeCommitment -} - -func newMergeBatchContext() *mergeBatchContext { - return &mergeBatchContext{} -} - -func (mbc *mergeBatchContext) loadCurrentBlock(ht *honestTendermint, runtimeID common.Namespace) error { - var err error - mbc.currentBlock, err = roothashGetLatestBlock(ht, 0, runtimeID) - if err != nil { - return fmt.Errorf("roothash get latest block: %w", err) - } - - return nil -} - -func mergeReceiveCommitment(ph *p2pHandle) (*commitment.OpenExecutorCommitment, error) { - var req p2pReqRes - for { - req = <-ph.requests - req.responseCh <- nil - - if req.msg.ExecutorCommit == nil { - continue - } - - break - } - - openCom, err := req.msg.ExecutorCommit.Open() - if err != nil { - return nil, fmt.Errorf("request message ExecutorWorkerFinished Open: %w", err) - } - - return openCom, nil -} - -func (mbc *mergeBatchContext) receiveCommitments(ph *p2pHandle, count int) error { - for i := 0; i < count; i++ { - openCom, err := mergeReceiveCommitment(ph) - if err != nil { - return fmt.Errorf("merge receive commitments %d: %w", i, err) - } - mbc.commitments = append(mbc.commitments, openCom) - } - - return nil -} - -func (mbc *mergeBatchContext) process(ctx context.Context, hnss []*honestNodeStorage) error { - collectedCommittees := make(map[hash.Hash]bool) - var ioRoots, stateRoots []hash.Hash - for _, commitment := range mbc.commitments { - if collectedCommittees[commitment.Body.CommitteeID] { - continue - } - collectedCommittees[commitment.Body.CommitteeID] = true - ioRoots = append(ioRoots, commitment.Body.Header.IORoot) - stateRoots = append(stateRoots, commitment.Body.Header.StateRoot) - } - - if len(collectedCommittees) != 1 { - return fmt.Errorf("multiple committees not supported: %d", len(collectedCommittees)) - } - signatures := mbc.commitments[0].Body.StorageSignatures - messages := mbc.commitments[0].Body.Header.Messages - - mbc.newBlock = block.NewEmptyBlock(mbc.currentBlock, 0, block.Normal) - mbc.newBlock.Header.IORoot = ioRoots[0] - mbc.newBlock.Header.StateRoot = stateRoots[0] - mbc.newBlock.Header.Messages = messages - mbc.newBlock.Header.StorageSignatures = signatures - - return nil -} - -func (mbc *mergeBatchContext) createCommitment(id *identity.Identity) error { - var executorCommits []commitment.ExecutorCommitment - for _, openCom := range mbc.commitments { - executorCommits = append(executorCommits, openCom.ExecutorCommitment) - } - var err error - mbc.commit, err = commitment.SignMergeCommitment(id.NodeSigner, &commitment.MergeBody{ - ExecutorCommits: executorCommits, - Header: mbc.newBlock.Header, - }) - if err != nil { - return fmt.Errorf("commitment sign merge commitment: %w", err) - } - - return nil -} - -func (mbc *mergeBatchContext) publishToChain(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace) error { - if err := roothashMergeCommit(svc, id, runtimeID, []commitment.MergeCommitment{*mbc.commit}); err != nil { - return fmt.Errorf("roothash merge commentment: %w", err) - } - - return nil -} diff --git a/go/oasis-node/cmd/debug/byzantine/roothash.go b/go/oasis-node/cmd/debug/byzantine/roothash.go index 72bc6ff2c76..b3f17c74813 100644 --- a/go/oasis-node/cmd/debug/byzantine/roothash.go +++ b/go/oasis-node/cmd/debug/byzantine/roothash.go @@ -7,15 +7,10 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/identity" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" ) -func roothashGetLatestBlock(ht *honestTendermint, height int64, runtimeID common.Namespace) (*block.Block, error) { - return ht.service.RootHash().GetLatestBlock(context.Background(), runtimeID, height) -} - -func roothashMergeCommit(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace, commits []commitment.MergeCommitment) error { - tx := roothash.NewMergeCommitTx(0, nil, runtimeID, commits) +func roothashExecutorCommit(svc consensus.Backend, id *identity.Identity, runtimeID common.Namespace, commits []commitment.ExecutorCommitment) error { + tx := roothash.NewExecutorCommitTx(0, nil, runtimeID, commits) return consensus.SignAndSubmitTx(context.Background(), svc, id.NodeSigner, tx) } diff --git a/go/oasis-node/cmd/debug/byzantine/scheduler.go b/go/oasis-node/cmd/debug/byzantine/scheduler.go index 7ab10f27765..02d1cc4e9fc 100644 --- a/go/oasis-node/cmd/debug/byzantine/scheduler.go +++ b/go/oasis-node/cmd/debug/byzantine/scheduler.go @@ -3,7 +3,6 @@ package byzantine import ( "context" "fmt" - "time" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" @@ -11,7 +10,6 @@ import ( consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" epochtime "github.com/oasisprotocol/oasis-core/go/epochtime/api" scheduler "github.com/oasisprotocol/oasis-core/go/scheduler/api" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" ) func schedulerNextElectionHeight(svc consensus.Backend, epoch epochtime.EpochTime) (int64, error) { @@ -103,18 +101,3 @@ func schedulerForRoleInCommittee(ht *honestTendermint, height int64, committee * return nil } - -func schedulerPublishToCommittee(ph *p2pHandle, runtimeID common.Namespace, message *p2p.Message) error { - // HACK: So, the ever-byzantine debug code is written under the - // assumption that it's possible to do p2p message delivery in - // a synchronous manner. - // - // This is no longer possible. Just publish and strategically - // sleep. Eventually someone could/should rewrite all of this - // debug code. The only thing that uses it is CI anyway. - - ph.service.Publish(ph.context, runtimeID, message) - time.Sleep(3 * time.Second) // Sigh - - return nil -} diff --git a/go/oasis-node/cmd/debug/byzantine/scheduler_test.go b/go/oasis-node/cmd/debug/byzantine/scheduler_test.go index d36a5c30c14..d0bbc57fc59 100644 --- a/go/oasis-node/cmd/debug/byzantine/scheduler_test.go +++ b/go/oasis-node/cmd/debug/byzantine/scheduler_test.go @@ -20,12 +20,9 @@ func hasSuitablePermutations(t *testing.T, beacon []byte, runtimeID common.Names require.NoError(t, err, "schedulerapp.GetPerm compute") transactionSchedulerIdxs, err := schedulerapp.GetPerm(beacon, runtimeID, schedulerapp.RNGContextTransactionScheduler, numComputeNodes) require.NoError(t, err, "schedulerapp.GetPerm transaction scheduler") - mergeIdxs, err := schedulerapp.GetPerm(beacon, runtimeID, schedulerapp.RNGContextMerge, numComputeNodes) - require.NoError(t, err, "schedulerapp.GetPerm merge") fmt.Printf("%20s schedule %v\n", scheduler.KindComputeExecutor, computeIdxs) fmt.Printf("%20s schedule %v\n", scheduler.KindComputeTxnScheduler, transactionSchedulerIdxs) - fmt.Printf("%20s schedule %v\n", scheduler.KindComputeMerge, mergeIdxs) committees := map[scheduler.CommitteeKind]struct { workers int @@ -34,12 +31,10 @@ func hasSuitablePermutations(t *testing.T, beacon []byte, runtimeID common.Names }{ scheduler.KindComputeExecutor: {workers: 2, backupWorkers: 1, perm: computeIdxs}, scheduler.KindComputeTxnScheduler: {workers: 1, backupWorkers: 0, perm: transactionSchedulerIdxs}, - scheduler.KindComputeMerge: {workers: 2, backupWorkers: 1, perm: mergeIdxs}, } for _, c1Kind := range []scheduler.CommitteeKind{ scheduler.KindComputeExecutor, - scheduler.KindComputeMerge, } { c1 := committees[c1Kind] maxWorker := c1.workers diff --git a/go/oasis-node/cmd/debug/txsource/workload/registration.go b/go/oasis-node/cmd/debug/txsource/workload/registration.go index d68090b7735..c5fb511b850 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/registration.go +++ b/go/oasis-node/cmd/debug/txsource/workload/registration.go @@ -52,10 +52,6 @@ func getRuntime(entityID signature.PublicKey, id common.Namespace) *registry.Run GroupSize: 1, RoundTimeout: 1 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - RoundTimeout: 1 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 1, Algorithm: "batching", @@ -68,8 +64,6 @@ func getRuntime(entityID signature.PublicKey, id common.Namespace) *registry.Run MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index a92775e6b5f..9c24a24108c 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -58,7 +58,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" "github.com/oasisprotocol/oasis-core/go/worker/compute" "github.com/oasisprotocol/oasis-core/go/worker/compute/executor" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/compute/txnscheduler" workerConsensusRPC "github.com/oasisprotocol/oasis-core/go/worker/consensusrpc" workerKeymanager "github.com/oasisprotocol/oasis-core/go/worker/keymanager" @@ -120,7 +119,6 @@ type Node struct { ExecutorWorker *executor.Worker StorageWorker *workerStorage.Worker TransactionSchedulerWorker *txnscheduler.Worker - MergeWorker *merge.Worker SentryWorker *workerSentry.Worker P2P *p2p.P2P RegistrationWorker *registration.Worker @@ -184,11 +182,6 @@ func (n *Node) waitReady(logger *logging.Logger) { <-n.TransactionSchedulerWorker.Initialized() } - // Wait for the merge worker. - if n.MergeWorker.Enabled() { - <-n.MergeWorker.Initialized() - } - // Wait for the common worker. if n.CommonWorker.Enabled() { <-n.CommonWorker.Initialized() @@ -292,8 +285,7 @@ func (n *Node) initRuntimeWorkers() error { // immediately when created, make sure that we don't start it if it is not // needed. // - // Currently, only executor, txn scheduler and merge workers need P2P - // transport. + // Currently, only executor and txn scheduler workers need P2P transport. if compute.Enabled() { p2pCtx, p2pSvc := service.NewContextCleanup(context.Background()) if genesisDoc.Registry.Parameters.DebugAllowUnroutableAddresses { @@ -389,21 +381,10 @@ func (n *Node) initRuntimeWorkers() error { } n.svcMgr.Register(n.StorageWorker) - // Initialize the merge worker. - n.MergeWorker, err = merge.New( - n.CommonWorker, - n.RegistrationWorker, - ) - if err != nil { - return err - } - n.svcMgr.Register(n.MergeWorker) - // Initialize the executor worker. n.ExecutorWorker, err = executor.New( dataDir, n.CommonWorker, - n.MergeWorker, n.RegistrationWorker, ) if err != nil { @@ -458,11 +439,6 @@ func (n *Node) startRuntimeWorkers(logger *logging.Logger) error { return err } - // Start the merge worker. - if err := n.MergeWorker.Start(); err != nil { - return err - } - // Start the common worker. if err := n.CommonWorker.Start(); err != nil { return err @@ -491,7 +467,6 @@ func (n *Node) startRuntimeWorkers(logger *logging.Logger) error { // Only start the external gRPC server if any workers are enabled. if n.StorageWorker.Enabled() || n.TransactionSchedulerWorker.Enabled() || - n.MergeWorker.Enabled() || n.KeymanagerWorker.Enabled() || n.ConsensusWorker.Enabled() { if err := n.CommonWorker.Grpc.Start(); err != nil { diff --git a/go/oasis-node/cmd/registry/runtime/runtime.go b/go/oasis-node/cmd/registry/runtime/runtime.go index a7080ffaa0e..0fe5b31813d 100644 --- a/go/oasis-node/cmd/registry/runtime/runtime.go +++ b/go/oasis-node/cmd/registry/runtime/runtime.go @@ -53,19 +53,11 @@ const ( CfgExecutorAllowedStragglers = "runtime.executor.allowed_stragglers" CfgExecutorRoundTimeout = "runtime.executor.round_timeout" - // Merge committee flags. - CfgMergeGroupSize = "runtime.merge.group_size" - CfgMergeGroupBackupSize = "runtime.merge.group_backup_size" - CfgMergeAllowedStragglers = "runtime.merge.allowed_stragglers" - CfgMergeRoundTimeout = "runtime.merge.round_timeout" - // Storage committee flags. CfgStorageGroupSize = "runtime.storage.group_size" CfgStorageMinWriteReplication = "runtime.storage.min_write_replication" CfgStorageMaxApplyWriteLogEntries = "runtime.storage.max_apply_write_log_entries" CfgStorageMaxApplyOps = "runtime.storage.max_apply_ops" - CfgStorageMaxMergeRoots = "runtime.storage.max_merge_roots" - CfgStorageMaxMergeOps = "runtime.storage.max_merge_ops" CfgStorageCheckpointInterval = "runtime.storage.checkpoint_interval" CfgStorageCheckpointNumKept = "runtime.storage.checkpoint_num_kept" CfgStorageCheckpointChunkSize = "runtime.storage.checkpoint_chunk_size" @@ -383,12 +375,6 @@ func runtimeFromFlags() (*registry.Runtime, signature.Signer, error) { // nolint AllowedStragglers: viper.GetUint64(CfgExecutorAllowedStragglers), RoundTimeout: viper.GetDuration(CfgExecutorRoundTimeout), }, - Merge: registry.MergeParameters{ - GroupSize: viper.GetUint64(CfgMergeGroupSize), - GroupBackupSize: viper.GetUint64(CfgMergeGroupBackupSize), - AllowedStragglers: viper.GetUint64(CfgMergeAllowedStragglers), - RoundTimeout: viper.GetDuration(CfgMergeRoundTimeout), - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: viper.GetUint64(CfgTxnSchedulerGroupSize), Algorithm: viper.GetString(CfgTxnSchedulerAlgorithm), @@ -401,8 +387,6 @@ func runtimeFromFlags() (*registry.Runtime, signature.Signer, error) { // nolint MinWriteReplication: viper.GetUint64(CfgStorageMinWriteReplication), MaxApplyWriteLogEntries: viper.GetUint64(CfgStorageMaxApplyWriteLogEntries), MaxApplyOps: viper.GetUint64(CfgStorageMaxApplyOps), - MaxMergeRoots: viper.GetUint64(CfgStorageMaxMergeRoots), - MaxMergeOps: viper.GetUint64(CfgStorageMaxMergeOps), CheckpointInterval: viper.GetUint64(CfgStorageCheckpointInterval), CheckpointNumKept: viper.GetUint64(CfgStorageCheckpointNumKept), CheckpointChunkSize: viper.GetUint64(CfgStorageCheckpointChunkSize), @@ -553,12 +537,6 @@ func init() { runtimeFlags.Uint64(CfgExecutorAllowedStragglers, 0, "Number of stragglers allowed per round in the runtime executor group") runtimeFlags.Duration(CfgExecutorRoundTimeout, 10*time.Second, "Executor committee round timeout for this runtime") - // Init Merge committee flags. - runtimeFlags.Uint64(CfgMergeGroupSize, 1, "Number of workers in the runtime merge group/committee") - runtimeFlags.Uint64(CfgMergeGroupBackupSize, 0, "Number of backup workers in the runtime merge group/committee") - runtimeFlags.Uint64(CfgMergeAllowedStragglers, 0, "Number of stragglers allowed per round in the runtime merge group") - runtimeFlags.Duration(CfgMergeRoundTimeout, 10*time.Second, "Merge committee round timeout for this runtime") - // Init Transaction scheduler flags. runtimeFlags.Uint64(CfgTxnSchedulerGroupSize, 1, "Number of transaction scheduler nodes for the runtime") runtimeFlags.String(CfgTxnSchedulerAlgorithm, "batching", "Transaction scheduling algorithm") @@ -571,8 +549,6 @@ func init() { runtimeFlags.Uint64(CfgStorageMinWriteReplication, 1, "Minimum required storage write replication") runtimeFlags.Uint64(CfgStorageMaxApplyWriteLogEntries, 100_000, "Maximum number of write log entries") runtimeFlags.Uint64(CfgStorageMaxApplyOps, 2, "Maximum number of apply operations in a batch") - runtimeFlags.Uint64(CfgStorageMaxMergeRoots, 1, "Maximum number of merge roots") - runtimeFlags.Uint64(CfgStorageMaxMergeOps, 2, "Maximum number of merge operations in a batch") runtimeFlags.Uint64(CfgStorageCheckpointInterval, 0, "Storage checkpoint interval (in rounds)") runtimeFlags.Uint64(CfgStorageCheckpointNumKept, 0, "Number of storage checkpoints to keep") runtimeFlags.Uint64(CfgStorageCheckpointChunkSize, 0, "Storage checkpoint chunk size") diff --git a/go/oasis-node/node_test.go b/go/oasis-node/node_test.go index 9b9dc46d2e9..d6e7b02aaa0 100644 --- a/go/oasis-node/node_test.go +++ b/go/oasis-node/node_test.go @@ -89,11 +89,6 @@ var ( GroupBackupSize: 0, RoundTimeout: 20 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - GroupBackupSize: 0, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -106,8 +101,6 @@ var ( MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-test-runner/oasis/cli/registry.go b/go/oasis-test-runner/oasis/cli/registry.go index f347636fc58..86a0f27ad66 100644 --- a/go/oasis-test-runner/oasis/cli/registry.go +++ b/go/oasis-test-runner/oasis/cli/registry.go @@ -58,16 +58,10 @@ func (r *RegistryHelpers) runRegistryRuntimeSubcommand( "--"+cmdRegRt.CfgExecutorGroupBackupSize, strconv.FormatUint(runtime.Executor.GroupBackupSize, 10), "--"+cmdRegRt.CfgExecutorAllowedStragglers, strconv.FormatUint(runtime.Executor.AllowedStragglers, 10), "--"+cmdRegRt.CfgExecutorRoundTimeout, runtime.Executor.RoundTimeout.String(), - "--"+cmdRegRt.CfgMergeGroupSize, strconv.FormatUint(runtime.Merge.GroupSize, 10), - "--"+cmdRegRt.CfgMergeGroupBackupSize, strconv.FormatUint(runtime.Merge.GroupBackupSize, 10), - "--"+cmdRegRt.CfgMergeAllowedStragglers, strconv.FormatUint(runtime.Merge.AllowedStragglers, 10), - "--"+cmdRegRt.CfgMergeRoundTimeout, runtime.Merge.RoundTimeout.String(), "--"+cmdRegRt.CfgStorageGroupSize, strconv.FormatUint(runtime.Storage.GroupSize, 10), "--"+cmdRegRt.CfgStorageMinWriteReplication, strconv.FormatUint(runtime.Storage.MinWriteReplication, 10), "--"+cmdRegRt.CfgStorageMaxApplyWriteLogEntries, strconv.FormatUint(runtime.Storage.MaxApplyWriteLogEntries, 10), "--"+cmdRegRt.CfgStorageMaxApplyOps, strconv.FormatUint(runtime.Storage.MaxApplyOps, 10), - "--"+cmdRegRt.CfgStorageMaxMergeRoots, strconv.FormatUint(runtime.Storage.MaxMergeRoots, 10), - "--"+cmdRegRt.CfgStorageMaxMergeOps, strconv.FormatUint(runtime.Storage.MaxMergeOps, 10), "--"+cmdRegRt.CfgStorageCheckpointInterval, strconv.FormatUint(runtime.Storage.CheckpointInterval, 10), "--"+cmdRegRt.CfgStorageCheckpointNumKept, strconv.FormatUint(runtime.Storage.CheckpointNumKept, 10), "--"+cmdRegRt.CfgStorageCheckpointChunkSize, strconv.FormatUint(runtime.Storage.CheckpointChunkSize, 10), diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index 3a6bd3d0d39..31205e920ab 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -198,7 +198,6 @@ type RuntimeFixture struct { // nolint: maligned GenesisRound uint64 `json:"genesis_round,omitempty"` Executor registry.ExecutorParameters `json:"executor"` - Merge registry.MergeParameters `json:"merge"` TxnScheduler registry.TxnSchedulerParameters `json:"txn_scheduler"` Storage registry.StorageParameters `json:"storage"` @@ -237,7 +236,6 @@ func (f *RuntimeFixture) Create(netFixture *NetworkFixture, net *Network) (*Runt TEEHardware: netFixture.TEE.Hardware, MrSigner: netFixture.TEE.MrSigner, Executor: f.Executor, - Merge: f.Merge, TxnScheduler: f.TxnScheduler, Storage: f.Storage, AdmissionPolicy: f.AdmissionPolicy, diff --git a/go/oasis-test-runner/oasis/log.go b/go/oasis-test-runner/oasis/log.go index 53d7f287c34..05c8fd4ca99 100644 --- a/go/oasis-test-runner/oasis/log.go +++ b/go/oasis-test-runner/oasis/log.go @@ -51,18 +51,6 @@ func LogAssertNoExecutionDiscrepancyDetected() log.WatcherHandlerFactory { return LogAssertNotEvent(roothash.LogEventExecutionDiscrepancyDetected, "execution discrepancy detected") } -// LogAssertMergeDiscrepancyDetected returns a handler which checks whether a -// merge discrepancy was detected based on JSON log output. -func LogAssertMergeDiscrepancyDetected() log.WatcherHandlerFactory { - return LogAssertEvent(roothash.LogEventMergeDiscrepancyDetected, "merge discrepancy not detected") -} - -// LogAssertNoMergeDiscrepancyDetected returns a handler which checks whether a -// merge discrepancy was not detected based on JSON log output. -func LogAssertNoMergeDiscrepancyDetected() log.WatcherHandlerFactory { - return LogAssertNotEvent(roothash.LogEventMergeDiscrepancyDetected, "merge discrepancy detected") -} - // LogAssertPeerExchangeDisabled returns a handler which checks whether a peer // exchange disabled event was detected based on JSON log output. func LogAssertPeerExchangeDisabled() log.WatcherHandlerFactory { diff --git a/go/oasis-test-runner/oasis/runtime.go b/go/oasis-test-runner/oasis/runtime.go index b651e89bbfa..33fd550a40a 100644 --- a/go/oasis-test-runner/oasis/runtime.go +++ b/go/oasis-test-runner/oasis/runtime.go @@ -58,7 +58,6 @@ type RuntimeCfg struct { // nolint: maligned GenesisRound uint64 Executor registry.ExecutorParameters - Merge registry.MergeParameters TxnScheduler registry.TxnSchedulerParameters Storage registry.StorageParameters @@ -128,7 +127,6 @@ func (net *Network) NewRuntime(cfg *RuntimeCfg) (*Runtime, error) { Kind: cfg.Kind, TEEHardware: cfg.TEEHardware, Executor: cfg.Executor, - Merge: cfg.Merge, TxnScheduler: cfg.TxnScheduler, Storage: cfg.Storage, AdmissionPolicy: cfg.AdmissionPolicy, diff --git a/go/oasis-test-runner/scenario/e2e/registry_cli.go b/go/oasis-test-runner/scenario/e2e/registry_cli.go index ebeb7223129..ae55ff95baa 100644 --- a/go/oasis-test-runner/scenario/e2e/registry_cli.go +++ b/go/oasis-test-runner/scenario/e2e/registry_cli.go @@ -619,12 +619,6 @@ func (sc *registryCLIImpl) testRuntime(ctx context.Context, childEnv *env.Env, c AllowedStragglers: 3, RoundTimeout: 4 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 5, - GroupBackupSize: 6, - AllowedStragglers: 7, - RoundTimeout: 8 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ GroupSize: 10, Algorithm: "batching", @@ -637,8 +631,6 @@ func (sc *registryCLIImpl) testRuntime(ctx context.Context, childEnv *env.Env, c MinWriteReplication: 9, MaxApplyWriteLogEntries: 10, MaxApplyOps: 11, - MaxMergeRoots: 12, - MaxMergeOps: 13, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ EntityWhitelist: ®istry.EntityWhitelistRuntimeAdmissionPolicy{ diff --git a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go index 969a157f423..9e5f0837f34 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/byzantine.go @@ -27,32 +27,13 @@ var ( oasis.LogAssertNoTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), }, oasis.ByzantineSlot3IdentitySeed) // ByzantineExecutorStraggler is the byzantine executor straggler scenario. ByzantineExecutorStraggler scenario.Scenario = newByzantineImpl("executor-straggler", []log.WatcherHandlerFactory{ oasis.LogAssertTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), }, oasis.ByzantineSlot3IdentitySeed) - - // ByzantineMergeHonest is the byzantine merge honest scenario. - ByzantineMergeHonest scenario.Scenario = newByzantineImpl("merge-honest", nil, oasis.ByzantineSlot1IdentitySeed) - // ByzantineMergeWrong is the byzantine merge wrong scenario. - ByzantineMergeWrong scenario.Scenario = newByzantineImpl("merge-wrong", []log.WatcherHandlerFactory{ - oasis.LogAssertNoTimeouts(), - oasis.LogAssertNoRoundFailures(), - oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertMergeDiscrepancyDetected(), - }, oasis.ByzantineSlot1IdentitySeed) - // ByzantineMergeStraggler is the byzantine merge straggler scenario. - ByzantineMergeStraggler scenario.Scenario = newByzantineImpl("merge-straggler", []log.WatcherHandlerFactory{ - oasis.LogAssertTimeouts(), - oasis.LogAssertNoRoundFailures(), - oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertMergeDiscrepancyDetected(), - }, oasis.ByzantineSlot1IdentitySeed) ) type byzantineImpl struct { diff --git a/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go b/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go index e5a626141f8..d6b57413de6 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/history_reindex.go @@ -68,8 +68,6 @@ func (sc *historyReindexImpl) Fixture() (*oasis.NetworkFixture, error) { // Use a single compute node. f.Runtimes[rtIdx].Executor.GroupSize = 1 f.Runtimes[rtIdx].Executor.GroupBackupSize = 0 - f.Runtimes[rtIdx].Merge.GroupSize = 1 - f.Runtimes[rtIdx].Merge.GroupBackupSize = 0 return f, nil } diff --git a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go index a11c1c3a4a1..933720b2ca3 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/multiple_runtimes.go @@ -90,11 +90,6 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 0, RoundTimeout: 10 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 1, - GroupBackupSize: 0, - RoundTimeout: 10 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -107,8 +102,6 @@ func (sc *multipleRuntimesImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 1, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go index 579514d58b0..6e95d841932 100644 --- a/go/oasis-test-runner/scenario/e2e/runtime/runtime.go +++ b/go/oasis-test-runner/scenario/e2e/runtime/runtime.go @@ -47,7 +47,6 @@ var ( oasis.LogAssertNoTimeouts(), oasis.LogAssertNoRoundFailures(), oasis.LogAssertNoExecutionDiscrepancyDetected(), - oasis.LogAssertNoMergeDiscrepancyDetected(), } runtimeID common.Namespace @@ -163,11 +162,6 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { GroupBackupSize: 1, RoundTimeout: 10 * time.Second, }, - Merge: registry.MergeParameters{ - GroupSize: 2, - GroupBackupSize: 1, - RoundTimeout: 10 * time.Second, - }, TxnScheduler: registry.TxnSchedulerParameters{ Algorithm: registry.TxnSchedulerAlgorithmBatching, GroupSize: 1, @@ -180,8 +174,6 @@ func (sc *runtimeImpl) Fixture() (*oasis.NetworkFixture, error) { MinWriteReplication: 2, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: registry.RuntimeAdmissionPolicy{ AnyNode: ®istry.AnyNodeRuntimeAdmissionPolicy{}, @@ -497,10 +489,6 @@ func RegisterScenarios() error { ByzantineExecutorHonest, ByzantineExecutorWrong, ByzantineExecutorStraggler, - // Byzantine merge node. - ByzantineMergeHonest, - ByzantineMergeWrong, - ByzantineMergeStraggler, // Storage sync test. StorageSync, // Sentry test. diff --git a/go/registry/api/api.go b/go/registry/api/api.go index 81876fa3242..bd776a59451 100644 --- a/go/registry/api/api.go +++ b/go/registry/api/api.go @@ -1052,14 +1052,6 @@ func VerifyRegisterRuntimeArgs( // nolint: gocyclo return nil, fmt.Errorf("%w: executor group too small", ErrInvalidArgument) } - // Ensure there is at least one member of the merge group. - if rt.Merge.GroupSize == 0 { - logger.Error("RegisterRuntime: merge group size too small", - "runtime", rt, - ) - return nil, fmt.Errorf("%w: merge group too small", ErrInvalidArgument) - } - // Ensure there is at least one member of the transaction scheduler group. if rt.TxnScheduler.GroupSize == 0 { logger.Error("RegisterRuntime: transaction scheduler group too small", @@ -1187,18 +1179,6 @@ func VerifyRegisterRuntimeStorageArgs(rt *Runtime, logger *logging.Logger) error ) return fmt.Errorf("%w: storage MaxApplyOps parameter too small", ErrInvalidArgument) } - if params.MaxMergeRoots == 0 { - logger.Error("RegisterRuntime: storage MaxMergeRoots parameter too small", - "runtime", rt, - ) - return fmt.Errorf("%w: storage MaxMergeRoots parameter too small", ErrInvalidArgument) - } - if params.MaxMergeOps < 2 { - logger.Error("RegisterRuntime: storage MaxMergeOps parameter too small", - "runtime", rt, - ) - return fmt.Errorf("%w: storage MaxMergeOps parameter too small", ErrInvalidArgument) - } // Verify storage checkpointing configuration if enabled. if params.CheckpointInterval > 0 { diff --git a/go/registry/api/runtime.go b/go/registry/api/runtime.go index 6d21fc6f54b..c4eebcc8d76 100644 --- a/go/registry/api/runtime.go +++ b/go/registry/api/runtime.go @@ -93,21 +93,6 @@ type ExecutorParameters struct { RoundTimeout time.Duration `json:"round_timeout"` } -// MergeParameters are parameters for the merge committee. -type MergeParameters struct { - // GroupSize is the size of the committee. - GroupSize uint64 `json:"group_size"` - - // GroupBackupSize is the size of the discrepancy resolution group. - GroupBackupSize uint64 `json:"group_backup_size"` - - // AllowedStragglers is the number of allowed stragglers. - AllowedStragglers uint64 `json:"allowed_stragglers"` - - // RoundTimeout is the round timeout of the nodes in the group. - RoundTimeout time.Duration `json:"round_timeout"` -} - // TxnSchedulerParameters are parameters for the transaction scheduler committee. type TxnSchedulerParameters struct { // GroupSize is the size of the committee. @@ -145,12 +130,6 @@ type StorageParameters struct { // MaxApplyOps is the maximum number of apply operations in a batch. MaxApplyOps uint64 `json:"max_apply_ops"` - // MaxMergeRoots is the maximum number of merge roots. - MaxMergeRoots uint64 `json:"max_merge_roots"` - - // MaxApplyOps configures the maximum number of merge operations in a batch. - MaxMergeOps uint64 `json:"max_merge_ops"` - // CheckpointInterval is the expected runtime state checkpoint interval (in rounds). CheckpointInterval uint64 `json:"checkpoint_interval"` @@ -247,9 +226,6 @@ type Runtime struct { // nolint: maligned // Executor stores parameters of the executor committee. Executor ExecutorParameters `json:"executor,omitempty"` - // Merge stores parameters of the merge committee. - Merge MergeParameters `json:"merge,omitempty"` - // TxnScheduler stores parameters of the transactions scheduler committee. TxnScheduler TxnSchedulerParameters `json:"txn_scheduler,omitempty"` diff --git a/go/registry/tests/tester.go b/go/registry/tests/tester.go index 5ed7a9fd43b..dc490f5417c 100644 --- a/go/registry/tests/tester.go +++ b/go/registry/tests/tester.go @@ -1611,12 +1611,6 @@ func NewTestRuntime(seed []byte, ent *TestEntity, isKeyManager bool) (*TestRunti AllowedStragglers: 1, RoundTimeout: 20 * time.Second, }, - Merge: api.MergeParameters{ - GroupSize: 3, - GroupBackupSize: 5, - AllowedStragglers: 1, - RoundTimeout: 20 * time.Second, - }, TxnScheduler: api.TxnSchedulerParameters{ GroupSize: 3, Algorithm: api.TxnSchedulerAlgorithmBatching, @@ -1629,8 +1623,6 @@ func NewTestRuntime(seed []byte, ent *TestEntity, isKeyManager bool) (*TestRunti MinWriteReplication: 3, MaxApplyWriteLogEntries: 100_000, MaxApplyOps: 2, - MaxMergeRoots: 1, - MaxMergeOps: 2, }, AdmissionPolicy: api.RuntimeAdmissionPolicy{ AnyNode: &api.AnyNodeRuntimeAdmissionPolicy{}, diff --git a/go/roothash/api/api.go b/go/roothash/api/api.go index 883cbcc38d1..7e598c3a963 100644 --- a/go/roothash/api/api.go +++ b/go/roothash/api/api.go @@ -24,9 +24,6 @@ const ( // LogEventExecutionDiscrepancyDetected is a log event value that signals // an execution discrepancy has been detected. LogEventExecutionDiscrepancyDetected = "roothash/execution_discrepancy_detected" - // LogEventMergeDiscrepancyDetected is a log event value that signals - // a merge discrepancy has been detected. - LogEventMergeDiscrepancyDetected = "roothash/merge_discrepancy_detected" // LogEventTimerFired is a log event value that signals a timer has fired. LogEventTimerFired = "roothash/timer_fired" // LogEventRoundFailed is a log event value that signals a round has failed. @@ -56,13 +53,10 @@ var ( // MethodExecutorCommit is the method name for executor commit submission. MethodExecutorCommit = transaction.NewMethodName(ModuleName, "ExecutorCommit", ExecutorCommit{}) - // MethodMergeCommit is the method name for merge commit submission. - MethodMergeCommit = transaction.NewMethodName(ModuleName, "MergeCommit", MergeCommit{}) // Methods is a list of all methods supported by the roothash backend. Methods = []transaction.MethodName{ MethodExecutorCommit, - MethodMergeCommit, } ) @@ -115,20 +109,6 @@ func NewExecutorCommitTx(nonce uint64, fee *transaction.Fee, runtimeID common.Na }) } -// MergeCommit is the argument set for the MergeCommit method. -type MergeCommit struct { - ID common.Namespace `json:"id"` - Commits []commitment.MergeCommitment `json:"commits"` -} - -// NewMergeCommitTx creates a new executor commit transaction. -func NewMergeCommitTx(nonce uint64, fee *transaction.Fee, runtimeID common.Namespace, commits []commitment.MergeCommitment) *transaction.Transaction { - return transaction.NewTransaction(nonce, fee, MethodMergeCommit, &MergeCommit{ - ID: runtimeID, - Commits: commits, - }) -} - // AnnotatedBlock is an annotated roothash block. type AnnotatedBlock struct { // Height is the underlying roothash backend's block height that @@ -145,26 +125,12 @@ type ExecutorCommittedEvent struct { Commit commitment.ExecutorCommitment `json:"commit"` } -// MergeCommittedEvent is an event emitted each time a merge node commits. -type MergeCommittedEvent struct { - // Commit is the merge commitment. - Commit commitment.MergeCommitment `json:"commit"` -} - // ExecutionDiscrepancyDetectedEvent is an execute discrepancy detected event. type ExecutionDiscrepancyDetectedEvent struct { - // CommitteeID is the identifier of the executor committee where a - // discrepancy has been detected. - CommitteeID hash.Hash `json:"cid"` - // Timeout signals whether the discrepancy was due to a timeout. Timeout bool `json:"timeout"` } -// MergeDiscrepancyDetectedEvent is a merge discrepancy detected event. -type MergeDiscrepancyDetectedEvent struct { -} - // FinalizedEvent is a finalized event. type FinalizedEvent struct { Round uint64 `json:"round"` @@ -178,9 +144,7 @@ type Event struct { RuntimeID common.Namespace `json:"runtime_id"` ExecutorCommitted *ExecutorCommittedEvent `json:"executor_committed,omitempty"` - MergeCommitted *MergeCommittedEvent `json:"merge_committed,omitempty"` ExecutionDiscrepancyDetected *ExecutionDiscrepancyDetectedEvent `json:"execution_discrepancy,omitempty"` - MergeDiscrepancyDetected *MergeDiscrepancyDetectedEvent `json:"merge_discrepancy,omitempty"` FinalizedEvent *FinalizedEvent `json:"finalized,omitempty"` } @@ -220,8 +184,6 @@ type ConsensusParameters struct { const ( // GasOpComputeCommit is the gas operation identifier for compute commits. GasOpComputeCommit transaction.Op = "compute_commit" - // GasOpMergeCommit is the gas operation identifier for merge commits. - GasOpMergeCommit transaction.Op = "merge_commit" ) // XXX: Define reasonable default gas costs. @@ -229,7 +191,6 @@ const ( // DefaultGasCosts are the "default" gas costs for operations. var DefaultGasCosts = transaction.Costs{ GasOpComputeCommit: 1000, - GasOpMergeCommit: 1000, } // SanityCheckBlocks examines the blocks table. diff --git a/go/roothash/api/commitment/executor.go b/go/roothash/api/commitment/executor.go index 51dc21a4017..6d131d8f332 100644 --- a/go/roothash/api/commitment/executor.go +++ b/go/roothash/api/commitment/executor.go @@ -32,6 +32,7 @@ var ( // // Keep the roothash RAK validation in sync with changes to this structure. type ComputeResultsHeader struct { + Round uint64 `json:"round"` PreviousHash hash.Hash `json:"previous_hash"` IORoot hash.Hash `json:"io_root"` StateRoot hash.Hash `json:"state_root"` @@ -41,7 +42,7 @@ type ComputeResultsHeader struct { // IsParentOf returns true iff the header is the parent of a child header. func (h *ComputeResultsHeader) IsParentOf(child *block.Header) bool { childHash := child.EncodedHash() - return h.PreviousHash.Equal(&childHash) + return h.PreviousHash.Equal(&childHash) && h.Round == child.Round+1 } // EncodedHash returns the encoded cryptographic hash of the header. @@ -51,7 +52,6 @@ func (h *ComputeResultsHeader) EncodedHash() hash.Hash { // ComputeBody holds the data signed in a compute worker commitment. type ComputeBody struct { - CommitteeID hash.Hash `json:"cid"` Header ComputeResultsHeader `json:"header"` StorageSignatures []signature.Signature `json:"storage_signatures"` RakSig signature.RawSignature `json:"rak_sig"` @@ -66,7 +66,6 @@ type ComputeBody struct { // matches what we're seeing. func (m *ComputeBody) VerifyTxnSchedSignature(header block.Header) bool { dispatch := &TxnSchedulerBatch{ - CommitteeID: m.CommitteeID, IORoot: m.InputRoot, StorageSignatures: m.InputStorageSigs, Header: header, @@ -89,11 +88,11 @@ func (m *ComputeBody) RootsForStorageReceipt() []hash.Hash { // // Note: Ensuring that the signature is signed by the keypair(s) that are // expected is the responsibility of the caller. -func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace, round uint64) error { +func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace) error { receiptBody := storage.ReceiptBody{ Version: 1, Namespace: ns, - Round: round, + Round: m.Header.Round, Roots: m.RootsForStorageReceipt(), } @@ -106,12 +105,12 @@ func (m *ComputeBody) VerifyStorageReceiptSignatures(ns common.Namespace, round // VerifyStorageReceipt validates that the provided storage receipt // matches the header. -func (m *ComputeBody) VerifyStorageReceipt(ns common.Namespace, round uint64, receipt *storage.ReceiptBody) error { +func (m *ComputeBody) VerifyStorageReceipt(ns common.Namespace, receipt *storage.ReceiptBody) error { if !receipt.Namespace.Equal(&ns) { return errors.New("roothash: receipt has unexpected namespace") } - if receipt.Round != round { + if receipt.Round != m.Header.Round { return errors.New("roothash: receipt has unexpected round") } diff --git a/go/roothash/api/commitment/merge.go b/go/roothash/api/commitment/merge.go deleted file mode 100644 index b5c03bd49a0..00000000000 --- a/go/roothash/api/commitment/merge.go +++ /dev/null @@ -1,95 +0,0 @@ -// Package commitment defines a roothash commitment. -package commitment - -import ( - "errors" - - "github.com/oasisprotocol/oasis-core/go/common/cbor" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" -) - -// MergeSignatureContext is the signature context used to sign merge -// worker commitments. -var MergeSignatureContext = signature.NewContext("oasis-core/roothash: merge commitment", signature.WithChainSeparation()) - -type MergeBody struct { - ExecutorCommits []ExecutorCommitment `json:"commits"` - Header block.Header `json:"header"` -} - -// MergeCommitment is a roothash commitment from a merge worker. -// -// The signed content is MergeBody. -type MergeCommitment struct { - signature.Signed -} - -// Equal compares vs another MergeCommitment for equality. -func (c *MergeCommitment) Equal(cmp *MergeCommitment) bool { - return c.Signed.Equal(&cmp.Signed) -} - -// OpenMergeCommitment is a merge commitment that has been verified and -// deserialized. -// -// The open commitment still contains the original signed commitment. -type OpenMergeCommitment struct { - MergeCommitment - - Body *MergeBody `json:"-"` // No need to serialize as it can be reconstructed. -} - -// UnmarshalCBOR handles CBOR unmarshalling from passed data. -func (c *OpenMergeCommitment) UnmarshalCBOR(data []byte) error { - if err := cbor.Unmarshal(data, &c.MergeCommitment); err != nil { - return err - } - - c.Body = new(MergeBody) - return cbor.Unmarshal(c.Blob, c.Body) -} - -// MostlyEqual returns true if the commitment is mostly equal to another -// specified commitment as per discrepancy detection criteria. -func (c OpenMergeCommitment) MostlyEqual(other OpenCommitment) bool { - return c.Body.Header.MostlyEqual(&other.(OpenMergeCommitment).Body.Header) -} - -// ToVote returns a hash that represents a vote for this commitment as -// per discrepancy resolution criteria. -func (c OpenMergeCommitment) ToVote() hash.Hash { - return c.Body.Header.EncodedHash() -} - -// ToDDResult returns a commitment-specific result after discrepancy -// detection. -func (c OpenMergeCommitment) ToDDResult() interface{} { - return c.Body.Header -} - -// Open validates the merge commitment signature, and de-serializes the body. -func (c *MergeCommitment) Open() (*OpenMergeCommitment, error) { - var body MergeBody - if err := c.Signed.Open(MergeSignatureContext, &body); err != nil { - return nil, errors.New("roothash/commitment: merge commitment has invalid signature") - } - - return &OpenMergeCommitment{ - MergeCommitment: *c, - Body: &body, - }, nil -} - -// SignMergeCommitment serializes the message and signs the commitment. -func SignMergeCommitment(signer signature.Signer, body *MergeBody) (*MergeCommitment, error) { - signed, err := signature.SignSigned(signer, MergeSignatureContext, body) - if err != nil { - return nil, err - } - - return &MergeCommitment{ - Signed: *signed, - }, nil -} diff --git a/go/roothash/api/commitment/pool.go b/go/roothash/api/commitment/pool.go index 50d62c2a275..2c7fd4c13fd 100644 --- a/go/roothash/api/commitment/pool.go +++ b/go/roothash/api/commitment/pool.go @@ -31,10 +31,9 @@ var ( ErrStillWaiting = errors.New(moduleName, 9, "roothash/commitment: still waiting for commits") ErrInsufficientVotes = errors.New(moduleName, 10, "roothash/commitment: insufficient votes to finalize discrepancy resolution round") ErrBadExecutorCommits = errors.New(moduleName, 11, "roothash/commitment: bad executor commitments") - ErrInvalidCommitteeID = errors.New(moduleName, 12, "roothash/commitment: invalid committee ID") - ErrTxnSchedSigInvalid = p2pError.Permanent(errors.New(moduleName, 13, "roothash/commitment: txn scheduler signature invalid")) - ErrInvalidMessages = p2pError.Permanent(errors.New(moduleName, 14, "roothash/commitment: invalid messages")) - ErrBadStorageReceipts = errors.New(moduleName, 15, "roothash/commitment: bad storage receipts") + ErrTxnSchedSigInvalid = p2pError.Permanent(errors.New(moduleName, 12, "roothash/commitment: txn scheduler signature invalid")) + ErrInvalidMessages = p2pError.Permanent(errors.New(moduleName, 13, "roothash/commitment: invalid messages")) + ErrBadStorageReceipts = errors.New(moduleName, 14, "roothash/commitment: bad storage receipts") ) var logger *logging.Logger = logging.GetLogger("roothash/commitment/pool") @@ -66,9 +65,6 @@ type Pool struct { // ExecuteCommitments are the commitments in the pool iff Committee.Kind // is scheduler.KindComputeExecutor. ExecuteCommitments map[signature.PublicKey]OpenExecutorCommitment `json:"execute_commitments,omitempty"` - // MergeCommitments are the commitments in the pool iff Committee.Kind - // is scheduler.KindComputeMerge. - MergeCommitments map[signature.PublicKey]OpenMergeCommitment `json:"merge_commitments,omitempty"` // Discrepancy is a flag signalling that a discrepancy has been detected. Discrepancy bool `json:"discrepancy"` // NextTimeout is the time when the next call to TryFinalize(true) should @@ -96,21 +92,12 @@ func (p *Pool) isMember(id signature.PublicKey) bool { return p.MemberSet[id] } -// GetCommitteeID returns the identifier of the committee this pool is collecting -// commitments for. -func (p *Pool) GetCommitteeID() hash.Hash { - return p.Committee.EncodedMembersHash() -} - // ResetCommitments resets the commitments in the pool and clears the discrepancy // flag. func (p *Pool) ResetCommitments() { if p.ExecuteCommitments == nil || len(p.ExecuteCommitments) > 0 { p.ExecuteCommitments = make(map[signature.PublicKey]OpenExecutorCommitment) } - if p.MergeCommitments == nil || len(p.MergeCommitments) > 0 { - p.MergeCommitments = make(map[signature.PublicKey]OpenMergeCommitment) - } p.Discrepancy = false p.NextTimeout = time.Time{} } @@ -128,8 +115,6 @@ func (p *Pool) getCommitment(id signature.PublicKey) (OpenCommitment, bool) { switch p.Committee.Kind { case scheduler.KindComputeExecutor: com, ok = p.ExecuteCommitments[id] - case scheduler.KindComputeMerge: - com, ok = p.MergeCommitments[id] default: panic("roothash/commitment: unknown committee kind: " + p.Committee.Kind.String()) } @@ -209,21 +194,9 @@ func (p *Pool) addOpenExecutorCommitment( } } - // Verify that this is for the correct committee. - cID := p.GetCommitteeID() - if !cID.Equal(&body.CommitteeID) { - logger.Debug("executor commitment has invalid committee ID", - "expected_committee_id", cID, - "committee_id", body.CommitteeID, - "node_id", id, - ) - return ErrInvalidCommitteeID - } - // Check if the block is based on the previous block. if !header.IsParentOf(&blk.Header) { logger.Debug("executor commitment is not based on correct block", - "committee_id", cID, "node_id", id, "expected_previous_hash", blk.Header.EncodedHash(), "previous_hash", header.PreviousHash, @@ -235,7 +208,6 @@ func (p *Pool) addOpenExecutorCommitment( currentTxnSchedSig := body.TxnSchedSig if err := sv.VerifyCommitteeSignatures(scheduler.KindComputeTxnScheduler, []signature.Signature{body.TxnSchedSig}); err != nil { logger.Debug("executor commitment has bad transaction scheduler signers", - "committee_id", cID, "node_id", id, "err", err, ) @@ -256,35 +228,33 @@ func (p *Pool) addOpenExecutorCommitment( } if err := sv.VerifyCommitteeSignatures(scheduler.KindStorage, body.StorageSignatures); err != nil { logger.Debug("executor commitment has bad storage receipt signers", - "committee_id", cID, "node_id", id, "err", err, ) return err } - if err := body.VerifyStorageReceiptSignatures(blk.Header.Namespace, blk.Header.Round+1); err != nil { + if err := body.VerifyStorageReceiptSignatures(blk.Header.Namespace); err != nil { logger.Debug("executor commitment has bad storage receipt signatures", - "committee_id", cID, "node_id", id, "err", err, ) return p2pError.Permanent(err) } - // Go through existing commitments and check if the txn scheduler signed - // different batches for the same committee. + // Go through existing commitments and check if the txn scheduler signed different batches for + // the same committee in the same round. for _, com := range p.ExecuteCommitments { cb := com.Body - if cID.Equal(&cb.CommitteeID) { - existingTxnSchedSig := cb.TxnSchedSig - if currentTxnSchedSig.PublicKey.Equal(existingTxnSchedSig.PublicKey) && currentTxnSchedSig.Signature != existingTxnSchedSig.Signature { - // Same committe, same txn sched, but txn sched signatures - // don't match -- txn sched is malicious! - // TODO: Slash stake! (issue #1931) - logger.Warn("txn sched signed two different batches for the same committee ID", - "committee_id", cb.CommitteeID, - ) - } + existingTxnSchedSig := cb.TxnSchedSig + if currentTxnSchedSig.PublicKey.Equal(existingTxnSchedSig.PublicKey) && + !currentTxnSchedSig.Signature.Equal(existingTxnSchedSig.Signature) && + !cb.InputRoot.Equal(&com.Body.InputRoot) { + // TODO: Signal to slash stake! (issue #1931). + logger.Warn("transaction scheduler signed two different batches for the same round", + "txn_scheduler_id", currentTxnSchedSig.PublicKey, + "input_root_a", cb.InputRoot, + "input_root_b", com.Body.InputRoot, + ) } } @@ -346,8 +316,6 @@ func (p *Pool) CheckEnoughCommitments(didTimeout bool) error { switch p.Committee.Kind { case scheduler.KindComputeExecutor: required -= int(p.Runtime.Executor.AllowedStragglers) - case scheduler.KindComputeMerge: - required -= int(p.Runtime.Merge.AllowedStragglers) default: panic("roothash/commitment: unknown committee kind while checking commitments: " + p.Committee.Kind.String()) } @@ -522,133 +490,6 @@ func (p *Pool) TryFinalize( return commit, nil } -// AddMergeCommitment verifies and adds a new merge commitment to the pool. -// -// Any executor commitments are added to the provided pool. -func (p *Pool) AddMergeCommitment( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitment *MergeCommitment, - ccPool *MultiPool, -) error { - if p.Committee == nil { - return ErrNoCommittee - } - if p.Committee.Kind != scheduler.KindComputeMerge { - return ErrInvalidCommitteeKind - } - - id := commitment.Signature.PublicKey - - // Ensure that the node is actually a committee member. We do not enforce specific - // roles based on current discrepancy state to allow commitments arriving in any - // order (e.g., a backup worker can submit a commitment even before there is a - // discrepancy). - if !p.isMember(id) { - return ErrNotInCommittee - } - - // Ensure the node did not already submit a commitment. - if _, ok := p.MergeCommitments[id]; ok { - return ErrAlreadyCommitted - } - - // Check the commitment signature and de-serialize. - openCom, err := commitment.Open() - if err != nil { - return err - } - body := openCom.Body - header := &body.Header - - // Check if the block is based on the previous block. - if !header.IsParentOf(&blk.Header) { - logger.Debug("merge commitment is not based on correct block", - "node_id", id, - "expected_previous_hash", blk.Header.EncodedHash(), - "previous_hash", header.PreviousHash, - ) - return ErrNotBasedOnCorrectBlock - } - - // Check executor commitments -- all commitments must be valid and there - // must be no discrepancy as the merge committee nodes are supposed to - // check this. - if err = ccPool.addExecutorCommitments(ctx, blk, sv, nl, body.ExecutorCommits); err != nil { - return err - } - - // There must be enough executor commits for all committees. - if err = ccPool.CheckEnoughCommitments(); err != nil { - return ErrBadExecutorCommits - } - - for _, sp := range ccPool.Committees { - if !sp.Discrepancy { - // If there was no discrepancy yet there must not be one now. - _, err = sp.DetectDiscrepancy() - switch err { - case nil: - case ErrDiscrepancyDetected: - // We may also be able to already perform discrepancy resolution, check if - // this is possible. - _, err = sp.ResolveDiscrepancy() - if err == nil { - break - } - fallthrough - default: - logger.Debug("discrepancy detection failed for executor committee", - "err", err, - ) - return ErrBadExecutorCommits - } - } else { - // If there was a discrepancy before it must be resolved now. - _, err = sp.ResolveDiscrepancy() - if err != nil { - logger.Debug("discrepancy resolution failed for executor committee", - "err", err, - ) - return ErrBadExecutorCommits - } - } - } - - // Check if the header refers to merkle roots in storage. - if uint64(len(header.StorageSignatures)) < p.Runtime.Storage.MinWriteReplication { - logger.Debug("merge commitment doesn't have enough storage receipts", - "node_id", id, - "min_write_replication", p.Runtime.Storage.MinWriteReplication, - "num_receipts", len(header.StorageSignatures), - ) - return ErrBadStorageReceipts - } - if err = sv.VerifyCommitteeSignatures(scheduler.KindStorage, header.StorageSignatures); err != nil { - logger.Debug("merge commitment has bad storage receipt signers", - "node_id", id, - "err", err, - ) - return err - } - if err = header.VerifyStorageReceiptSignatures(); err != nil { - logger.Debug("merge commitment has bad storage receipt signatures", - "node_id", id, - "err", err, - ) - return err - } - - if p.MergeCommitments == nil { - p.MergeCommitments = make(map[signature.PublicKey]OpenMergeCommitment) - } - p.MergeCommitments[id] = *openCom - - return nil -} - // GetExecutorCommitments returns a list of executor commitments in the pool. func (p *Pool) GetExecutorCommitments() (result []ExecutorCommitment) { for _, c := range p.ExecuteCommitments { @@ -661,143 +502,3 @@ func (p *Pool) GetExecutorCommitments() (result []ExecutorCommitment) { func (p *Pool) IsTimeout(now time.Time) bool { return !p.NextTimeout.IsZero() && !p.NextTimeout.After(now) } - -// MultiPool contains pools for multiple committees and routes operations to -// multiple committees based on commitments' committee IDs. -type MultiPool struct { - Committees map[hash.Hash]*Pool `json:"committees"` -} - -// AddExecutorCommitment verifies and adds a new executor commitment to the pool. -func (m *MultiPool) AddExecutorCommitment( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitment *ExecutorCommitment, -) (*Pool, error) { - // Check the commitment signature and de-serialize into header. - openCom, err := commitment.Open() - if err != nil { - return nil, p2pError.Permanent(err) - } - - p := m.Committees[openCom.Body.CommitteeID] - if p == nil { - return nil, ErrInvalidCommitteeID - } - - return p, p.addOpenExecutorCommitment(ctx, blk, sv, nl, openCom) -} - -// addExecutorCommitments verifies and adds multiple executor commitments to the pool. -// All valid commitments will be added, redundant commitments will be ignored. -// -// Note that any signatures being invalid will result in no changes to the pool. -func (m *MultiPool) addExecutorCommitments( - ctx context.Context, - blk *block.Block, - sv SignatureVerifier, - nl NodeLookup, - commitments []ExecutorCommitment, -) error { - // Batch verify all of the signatures at once. - msgs := make([][]byte, 0, len(commitments)) - sigs := make([]signature.Signature, 0, len(commitments)) - for i := range commitments { - v := commitments[i] // This is deliberate. - msgs = append(msgs, v.Blob) - sigs = append(sigs, v.Signature) - } - - if !signature.VerifyBatch(ExecutorSignatureContext, msgs, sigs) { - return signature.ErrVerifyFailed - } - - // Ok, all of the signatures are valid, deserialize the blobs and add them - // serially. - var hadError bool - for _, v := range commitments { - var body ComputeBody - if err := cbor.Unmarshal(v.Blob, &body); err != nil { - hadError = true - continue - } - - openCom := &OpenExecutorCommitment{ - ExecutorCommitment: v, - Body: &body, - } - - p := m.Committees[openCom.Body.CommitteeID] - if p == nil { - hadError = true - continue - } - - err := p.addOpenExecutorCommitment(ctx, blk, sv, nl, openCom) - switch err { - case nil, ErrAlreadyCommitted: - default: - hadError = true - } - } - if hadError { - return ErrBadExecutorCommits - } - - return nil -} - -// CheckEnoughCommitments checks if there are enough commitments in the pool to be -// able to perform discrepancy detection. -// -// Note that this checks all committees in the multi-pool and returns an error if -// any doesn't have enoguh commitments. -func (m *MultiPool) CheckEnoughCommitments() error { - for _, p := range m.Committees { - if err := p.CheckEnoughCommitments(false); err != nil { - return err - } - } - return nil -} - -// GetExecutorCommitments returns a list of executor commitments in the pool. -func (m *MultiPool) GetOpenExecutorCommitments() (result []OpenExecutorCommitment) { - for _, p := range m.Committees { - for _, c := range p.ExecuteCommitments { - result = append(result, c) - } - } - return -} - -// GetTimeoutCommittees returns a list of committee pools that are up for their -// TryFinalize to be called. -func (m *MultiPool) GetTimeoutCommittees(now time.Time) (result []*Pool) { - for _, p := range m.Committees { - if p.IsTimeout(now) { - result = append(result, p) - } - } - return -} - -// GetNextTimeout returns the minimum next timeout of all committee pools. -func (m *MultiPool) GetNextTimeout() (timeout time.Time) { - for _, p := range m.Committees { - if timeout.IsZero() || (!p.NextTimeout.IsZero() && p.NextTimeout.Before(timeout)) { - timeout = p.NextTimeout - } - } - return -} - -// ResetCommitments resets the commitments in the pool and clears their discrepancy -// flags. -func (m *MultiPool) ResetCommitments() { - for _, p := range m.Committees { - p.ResetCommitments() - } -} diff --git a/go/roothash/api/commitment/pool_test.go b/go/roothash/api/commitment/pool_test.go index 8d54bb6a955..ad7779a063e 100644 --- a/go/roothash/api/commitment/pool_test.go +++ b/go/roothash/api/commitment/pool_test.go @@ -79,6 +79,7 @@ func TestPoolDefault(t *testing.T) { body := ComputeBody{ Header: ComputeResultsHeader{ + Round: blk.Header.Round, PreviousHash: blk.Header.PreviousHash, IORoot: blk.Header.IORoot, StateRoot: blk.Header.StateRoot, @@ -142,10 +143,7 @@ func TestPoolSingleCommitment(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) - - commit, err := SignExecutorCommitment(sk, &body) - require.NoError(t, err, "SignExecutorCommitment") + childBlk, parentBlk, body := generateComputeBody(t) sv := &staticSignatureVerifier{ storagePublicKey: body.StorageSignatures[0].PublicKey, @@ -157,9 +155,31 @@ func TestPoolSingleCommitment(t *testing.T) { }, } - // Adding a commitment not based on correct block should fail. - err = pool.AddExecutorCommitment(context.Background(), parentBlk, sv, nl, commit) - require.Error(t, err, "AddExecutorCommitment") + // Test invalid commitments. + for _, tc := range []struct { + name string + fn func(*ComputeBody) + expectedErr error + }{ + {"BlockBadRound", func(b *ComputeBody) { b.Header.Round-- }, ErrNotBasedOnCorrectBlock}, + {"BlockBadPreviousHash", func(b *ComputeBody) { b.Header.PreviousHash.FromBytes([]byte("invalid")) }, ErrNotBasedOnCorrectBlock}, + {"StorageSigs1", func(b *ComputeBody) { b.StorageSignatures = nil }, ErrBadStorageReceipts}, + } { + invalidBody := body + tc.fn(&invalidBody) + + var commit *ExecutorCommitment + commit, err = SignExecutorCommitment(sk, &invalidBody) + require.NoError(t, err, "SignExecutorCommitment(%s)", tc.name) + + err = pool.AddExecutorCommitment(context.Background(), childBlk, sv, nl, commit) + require.Error(t, err, "AddExecutorCommitment(%s)", tc.name) + require.Equal(t, tc.expectedErr, err, "AddExecutorCommitment(%s)", tc.name) + } + + // Generate a valid commitment. + commit, err := SignExecutorCommitment(sk, &body) + require.NoError(t, err, "SignExecutorCommitment") // There should not be enough executor commitments. err = pool.CheckEnoughCommitments(false) @@ -271,7 +291,7 @@ func TestPoolSingleCommitmentTEE(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) rakSig, err := signature.Sign(skRAK, ComputeResultsHeaderSignatureContext, cbor.Marshal(body.Header)) require.NoError(t, err, "Sign") body.RakSig = rakSig.Signature @@ -279,10 +299,6 @@ func TestPoolSingleCommitmentTEE(t *testing.T) { commit, err := SignExecutorCommitment(sk, &body) require.NoError(t, err, "SignExecutorCommitment") - // Adding a commitment not based on correct block should fail. - err = pool.AddExecutorCommitment(context.Background(), parentBlk, nopSV, nl, commit) - require.Error(t, err, "AddExecutorCommitment") - // There should not be enough executor commitments. err = pool.CheckEnoughCommitments(false) require.Error(t, err, "CheckEnoughCommitments") @@ -327,10 +343,7 @@ func TestPoolTwoCommitments(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) - - bodyInvalidID := body - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -338,15 +351,6 @@ func TestPoolTwoCommitments(t *testing.T) { commit2, err := SignExecutorCommitment(sk2, &body) require.NoError(t, err, "SignExecutorCommitment") - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sk1, &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - // Adding commitment 1 should succeed. err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, commit1) require.NoError(t, err, "AddExecutorCommitment") @@ -383,7 +387,7 @@ func TestPoolTwoCommitments(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, parentBlk, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -492,7 +496,7 @@ func TestPoolSerialization(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) commit, err := SignExecutorCommitment(sk, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -518,410 +522,6 @@ func TestPoolSerialization(t *testing.T) { require.EqualValues(t, &body.Header, &header, "DD should return the same header") } -func TestMultiPoolSerialization(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, sks1, committee1, nl := generateMockCommittee(t) - _, sks2, committee2, _ := generateMockCommittee(t) - com1ID := committee1.EncodedMembersHash() - com2ID := committee2.EncodedMembersHash() - - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, _, body2 := generateComputeBody(t, committee2) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding commitment 1 should succeed. - sp, err := pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // Adding commitment 2 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // Adding commitment 3 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // Adding commitment 4 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - m := cbor.Marshal(pool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough executor commitments. - err = d.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") -} - -func TestPoolMergeCommitment(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, executorSks, executorCommittee, nl := generateMockCommittee(t) - _, mergeSks, mergeCommittee, _ := generateMockCommittee(t) - mergeCommittee.Kind = scheduler.KindComputeMerge - executorCommitteeID := executorCommittee.EncodedMembersHash() - - t.Run("NoDiscrepancy", func(t *testing.T) { - // Create a merge commitment pool. - mergePool := Pool{ - Runtime: rt, - Committee: mergeCommittee, - } - - // Create a executor commitment multi-pool. - executorPool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - executorCommitteeID: { - Runtime: rt, - Committee: executorCommittee, - }, - }, - } - - // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, executorCommittee) - - commit1, err := SignExecutorCommitment(executorSks[0], &body) - require.NoError(t, err, "SignExecutorCommitment") - - commit2, err := SignExecutorCommitment(executorSks[1], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Generate a merge commitment. - mergeBody := MergeBody{ - ExecutorCommits: []ExecutorCommitment{*commit1, *commit2}, - Header: parentBlk.Header, - } - - mergeCommit1, err := SignMergeCommitment(mergeSks[0], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - mergeCommit2, err := SignMergeCommitment(mergeSks[1], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - // Adding a commitment having not enough storage receipts should fail. - mergeBodyNotEnoughStorageSig := mergeBody - mergeBodyNotEnoughStorageSig.Header.StorageSignatures = []signature.Signature{} - incorrectCommit, err := SignMergeCommitment(mergeSks[0], &mergeBodyNotEnoughStorageSig) - require.NoError(t, err, "SignMergeCommitment") - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, incorrectCommit, &executorPool) - require.Error(t, err, "AddMergeCommitment") - require.Equal(t, ErrBadStorageReceipts, err, "AddMergeCommitment") - - // Adding commitment 1 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit1, &executorPool) - require.NoError(t, err, "AddMergeCommitment") - - // There should not be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - err = mergePool.CheckEnoughCommitments(true) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit2, &executorPool) - require.NoError(t, err, "AddExecutorCommitment") - - m := cbor.Marshal(executorPool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.NoError(t, err, "CheckEnoughCommitments") - - // There should be no discrepancy. - dc, err := mergePool.DetectDiscrepancy() - require.NoError(t, err, "DetectDiscrepancy") - require.Equal(t, false, mergePool.Discrepancy) - header := dc.ToDDResult().(block.Header) - require.EqualValues(t, &parentBlk.Header, &header, "DD should return the same header") - }) - - t.Run("ResolvedExecutionDiscrepancy", func(t *testing.T) { - // Create a merge commitment pool. - mergePool := Pool{ - Runtime: rt, - Committee: mergeCommittee, - } - - // Create a executor commitment multi-pool. - executorPool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - executorCommitteeID: { - Runtime: rt, - Committee: executorCommittee, - }, - }, - } - - // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, executorCommittee) - - commit1, err := SignExecutorCommitment(executorSks[0], &body) - require.NoError(t, err, "SignExecutorCommitment") - - commit3, err := SignExecutorCommitment(executorSks[2], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Update state root and fix the storage receipt. - body.Header.StateRoot.FromBytes([]byte("discrepancy")) - body.StorageSignatures = []signature.Signature{generateStorageReceiptSignature(t, parentBlk, &body)} - - commit2, err := SignExecutorCommitment(executorSks[1], &body) - require.NoError(t, err, "SignExecutorCommitment") - - // Generate a merge commitment. - mergeBody := MergeBody{ - ExecutorCommits: []ExecutorCommitment{*commit1, *commit2, *commit3}, - Header: parentBlk.Header, - } - - mergeCommit1, err := SignMergeCommitment(mergeSks[0], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - mergeCommit2, err := SignMergeCommitment(mergeSks[1], &mergeBody) - require.NoError(t, err, "SignMergeCommitment") - - // Adding commitment 1 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit1, &executorPool) - require.NoError(t, err, "AddMergeCommitment") - - // There should not be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - err = mergePool.CheckEnoughCommitments(true) - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - err = mergePool.AddMergeCommitment(context.Background(), childBlk, nopSV, nl, mergeCommit2, &executorPool) - require.NoError(t, err, "AddExecutorCommitment") - - m := cbor.Marshal(executorPool) - var d MultiPool - err = cbor.Unmarshal(m, &d) - require.NoError(t, err) - - // There should be enough merge commitments. - err = mergePool.CheckEnoughCommitments(false) - require.NoError(t, err, "CheckEnoughCommitments") - - // There should be no discrepancy. - dc, err := mergePool.DetectDiscrepancy() - require.NoError(t, err, "DetectDiscrepancy") - require.Equal(t, false, mergePool.Discrepancy) - header := dc.ToDDResult().(block.Header) - require.EqualValues(t, &parentBlk.Header, &header, "DD should return the same header") - }) -} - -func TestMultiPool(t *testing.T) { - genesisTestHelpers.SetTestChainContext() - - rt, sks1, committee1, nl := generateMockCommittee(t) - _, sks2, committee2, _ := generateMockCommittee(t) - com1ID := committee1.EncodedMembersHash() - com2ID := committee2.EncodedMembersHash() - - t.Run("NoDiscrepancy", func(t *testing.T) { - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, _, body2 := generateComputeBody(t, committee2) - - bodyInvalidID := body1 - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sks1[0], &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - - // Adding commitment 1 should succeed. - sp, err := pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com1ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 3 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 4 should succeed. - sp, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - require.Equal(t, pool.Committees[com2ID], sp, "AddExecutorCommitment") - - // There should be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") - }) - - t.Run("Discrepancy", func(t *testing.T) { - // Create a multi-pool. - pool := MultiPool{ - Committees: map[hash.Hash]*Pool{ - com1ID: { - Runtime: rt, - Committee: committee1, - }, - com2ID: { - Runtime: rt, - Committee: committee2, - }, - }, - } - - // Generate commitments. - childBlk, _, body1 := generateComputeBody(t, committee1) - _, parentBlk, body2 := generateComputeBody(t, committee2) - - // First committee. - c1commit1, err := SignExecutorCommitment(sks1[0], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - c1commit2, err := SignExecutorCommitment(sks1[1], &body1) - require.NoError(t, err, "SignExecutorCommitment") - - // Second committee. - c2commit1, err := SignExecutorCommitment(sks2[0], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Update state root and fix the storage receipt. - body2.Header.StateRoot.FromBytes([]byte("discrepancy")) - body2.StorageSignatures = []signature.Signature{generateStorageReceiptSignature(t, parentBlk, &body2)} - - c2commit2, err := SignExecutorCommitment(sks2[1], &body2) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding commitment 1 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit1) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 2 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c1commit2) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 3 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit1) - require.NoError(t, err, "AddExecutorCommitment") - - // There should not be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.Error(t, err, "CheckEnoughCommitments") - require.Equal(t, ErrStillWaiting, err, "CheckEnoughCommitments") - - // Adding commitment 4 should succeed. - _, err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, c2commit2) - require.NoError(t, err, "AddExecutorCommitment") - - // There should be enough executor commitments. - err = pool.CheckEnoughCommitments() - require.NoError(t, err, "CheckEnoughCommitments") - }) -} - func TestTryFinalize(t *testing.T) { genesisTestHelpers.SetTestChainContext() @@ -941,10 +541,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) - - bodyInvalidID := body - bodyInvalidID.CommitteeID.FromBytes([]byte("invalid-committee-id")) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -952,15 +549,6 @@ func TestTryFinalize(t *testing.T) { commit2, err := SignExecutorCommitment(sk2, &body) require.NoError(t, err, "SignExecutorCommitment") - // Invalid committee. - cInvalidCommit, err := SignExecutorCommitment(sk1, &bodyInvalidID) - require.NoError(t, err, "SignExecutorCommitment") - - // Adding a commitment for an invalid committee should fail. - err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, cInvalidCommit) - require.Error(t, err, "AddExecutorCommitment") - require.Equal(t, ErrInvalidCommitteeID, err, "AddExecutorCommitment") - // Adding commitment 1 should succeed. err = pool.AddExecutorCommitment(context.Background(), childBlk, nopSV, nl, commit1) require.NoError(t, err, "AddExecutorCommitment") @@ -989,7 +577,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, parentBlk, body := generateComputeBody(t, committee) + childBlk, parentBlk, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -1049,7 +637,7 @@ func TestTryFinalize(t *testing.T) { } // Generate a commitment. - childBlk, _, body := generateComputeBody(t, committee) + childBlk, _, body := generateComputeBody(t) commit1, err := SignExecutorCommitment(sk1, &body) require.NoError(t, err, "SignExecutorCommitment") @@ -1160,14 +748,14 @@ func generateMockCommittee(t *testing.T) ( return } -func generateComputeBody(t *testing.T, committee *scheduler.Committee) (*block.Block, *block.Block, ComputeBody) { +func generateComputeBody(t *testing.T) (*block.Block, *block.Block, ComputeBody) { var id common.Namespace childBlk := block.NewGenesisBlock(id, 0) parentBlk := block.NewEmptyBlock(childBlk, 1, block.Normal) body := ComputeBody{ - CommitteeID: committee.EncodedMembersHash(), Header: ComputeResultsHeader{ + Round: parentBlk.Header.Round, PreviousHash: parentBlk.Header.PreviousHash, IORoot: parentBlk.Header.IORoot, StateRoot: parentBlk.Header.StateRoot, @@ -1205,7 +793,6 @@ func generateTxnSchedulerSignature(t *testing.T, childBlk *block.Block, body *Co body.InputRoot = hash.Hash{} body.InputStorageSigs = []signature.Signature{} dispatch := &TxnSchedulerBatch{ - CommitteeID: body.CommitteeID, IORoot: body.InputRoot, StorageSignatures: body.InputStorageSigs, Header: childBlk.Header, diff --git a/go/roothash/api/commitment/txnscheduler.go b/go/roothash/api/commitment/txnscheduler.go index e955eb5b5ab..0c1d8feccca 100644 --- a/go/roothash/api/commitment/txnscheduler.go +++ b/go/roothash/api/commitment/txnscheduler.go @@ -16,9 +16,6 @@ var TxnSchedulerBatchSigCtx = signature.NewContext("oasis-core/roothash: tx batc // Don't forget to bump CommitteeProtocol version in go/common/version // if you change anything in this struct. type TxnSchedulerBatch struct { - // CommitteeID is the committee ID of the target executor committee. - CommitteeID hash.Hash `json:"cid"` - // IORoot is the I/O root containing the inputs (transactions) that // the executor node should use. IORoot hash.Hash `json:"io_root"` diff --git a/go/roothash/tests/tester.go b/go/roothash/tests/tester.go index 03665dcf545..1307feef3b3 100644 --- a/go/roothash/tests/tester.go +++ b/go/roothash/tests/tester.go @@ -40,7 +40,6 @@ type runtimeState struct { genesisBlock *block.Block executorCommittee *testCommittee - mergeCommittee *testCommittee storageCommittee *testCommittee txnSchedCommittee *testCommittee } @@ -195,7 +194,7 @@ func (s *runtimeState) testEpochTransitionBlock(t *testing.T, scheduler schedule nodes[node.Node.ID] = node } - s.executorCommittee, s.mergeCommittee, s.storageCommittee, s.txnSchedCommittee = mustGetCommittee(t, s.rt, epoch+1, scheduler, nodes) + s.executorCommittee, s.storageCommittee, s.txnSchedCommittee = mustGetCommittee(t, s.rt, epoch+1, scheduler, nodes) // Wait to receive an epoch transition block. for { @@ -228,7 +227,7 @@ func testSuccessfulRound(t *testing.T, backend api.Backend, consensus consensusA func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, consensus consensusAPI.Backend, identity *identity.Identity) { require := require.New(t) - rt, executorCommittee, mergeCommittee := s.rt, s.executorCommittee, s.mergeCommittee + rt, executorCommittee := s.rt, s.executorCommittee dataDir, err := ioutil.TempDir("", "oasis-storage-test_") require.NoError(err, "TempDir") @@ -299,8 +298,8 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co toCommit = append(toCommit, executorCommittee.workers...) for _, node := range toCommit { commitBody := commitment.ComputeBody{ - CommitteeID: executorCommittee.committee.EncodedMembersHash(), Header: commitment.ComputeResultsHeader{ + Round: parent.Header.Round, PreviousHash: parent.Header.PreviousHash, IORoot: parent.Header.IORoot, StateRoot: parent.Header.StateRoot, @@ -312,7 +311,6 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co // Fake txn scheduler signature. dispatch := &commitment.TxnSchedulerBatch{ - CommitteeID: commitBody.CommitteeID, IORoot: commitBody.InputRoot, StorageSignatures: commitBody.InputStorageSigs, Header: child.Header, @@ -330,28 +328,12 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co executorCommits = append(executorCommits, *commit) } - // Generate all the merge commitments. - var mergeCommits []commitment.MergeCommitment - toCommit = []*registryTests.TestNode{} - toCommit = append(toCommit, mergeCommittee.workers...) - for _, node := range toCommit { - commitBody := commitment.MergeBody{ - ExecutorCommits: executorCommits, - Header: parent.Header, - } - // `err` shadows outside. - commit, err := commitment.SignMergeCommitment(node.Signer, &commitBody) // nolint: vetshadow - require.NoError(err, "SignSigned") - - mergeCommits = append(mergeCommits, *commit) - } - ctx, cancel := context.WithTimeout(context.Background(), recvTimeout) defer cancel() - tx := api.NewMergeCommitTx(0, nil, rt.Runtime.ID, mergeCommits) + tx := api.NewExecutorCommitTx(0, nil, rt.Runtime.ID, executorCommits) err = consensusAPI.SignAndSubmitTx(ctx, consensus, toCommit[0].Signer, tx) - require.NoError(err, "MergeCommit") + require.NoError(err, "ExecutorCommit") // Ensure that the round was finalized. for { @@ -380,15 +362,15 @@ func (s *runtimeState) testSuccessfulRound(t *testing.T, backend api.Backend, co // There should be merge commitment events for all commitments. evts, err := backend.GetEvents(ctx, blk.Height) require.NoError(err, "GetEvents") - // Merge commit event + Finalized event. - require.Len(evts, len(mergeCommits)+1, "should have all events") + // Executor commit event + Finalized event. + require.Len(evts, len(executorCommits)+1, "should have all events") // First event is Finalized. require.EqualValues(&api.FinalizedEvent{Round: header.Round}, evts[0].FinalizedEvent, "finalized event should have the right round") for i, ev := range evts[1:] { switch { - case ev.MergeCommitted != nil: - // Merge commitment event. - require.EqualValues(mergeCommits[i], ev.MergeCommitted.Commit, "merge commitment event should have the right commitment") + case ev.ExecutorCommitted != nil: + // Executor commitment event. + require.EqualValues(executorCommits[i], ev.ExecutorCommitted.Commit, "executor commitment event should have the right commitment") default: // There should be no other event types. t.Fatalf("unexpected event: %+v", ev) @@ -418,7 +400,6 @@ func mustGetCommittee( nodes map[signature.PublicKey]*registryTests.TestNode, ) ( executorCommittee *testCommittee, - mergeCommittee *testCommittee, storageCommittee *testCommittee, txnSchedCommittee *testCommittee, ) { @@ -459,10 +440,8 @@ func mustGetCommittee( groupSize = int(rt.Runtime.TxnScheduler.GroupSize) groupBackupSize = 0 case scheduler.KindComputeExecutor: - fallthrough - case scheduler.KindComputeMerge: - groupSize = int(rt.Runtime.Merge.GroupSize) - groupBackupSize = int(rt.Runtime.Merge.GroupBackupSize) + groupSize = int(rt.Runtime.Executor.GroupSize) + groupBackupSize = int(rt.Runtime.Executor.GroupBackupSize) case scheduler.KindStorage: groupSize = int(rt.Runtime.Storage.GroupSize) } @@ -484,13 +463,11 @@ func mustGetCommittee( txnSchedCommittee = &ret case scheduler.KindComputeExecutor: executorCommittee = &ret - case scheduler.KindComputeMerge: - mergeCommittee = &ret case scheduler.KindStorage: storageCommittee = &ret } - if executorCommittee == nil || mergeCommittee == nil || storageCommittee == nil || txnSchedCommittee == nil { + if executorCommittee == nil || storageCommittee == nil || txnSchedCommittee == nil { continue } diff --git a/go/runtime/host/mock/mock.go b/go/runtime/host/mock/mock.go index 2ca94a136d9..8a341b70c89 100644 --- a/go/runtime/host/mock/mock.go +++ b/go/runtime/host/mock/mock.go @@ -77,6 +77,7 @@ func (r *runtime) Call(ctx context.Context, body *protocol.Body) (*protocol.Body return &protocol.Body{RuntimeExecuteTxBatchResponse: &protocol.RuntimeExecuteTxBatchResponse{ Batch: protocol.ComputedBatch{ Header: commitment.ComputeResultsHeader{ + Round: rq.Block.Header.Round + 1, PreviousHash: rq.Block.Header.EncodedHash(), IORoot: ioRoot, StateRoot: stateRoot, diff --git a/go/scheduler/api/api.go b/go/scheduler/api/api.go index 9aff5b1328a..873206eee1d 100644 --- a/go/scheduler/api/api.go +++ b/go/scheduler/api/api.go @@ -75,14 +75,11 @@ const ( // KindComputeTxnScheduler is a transaction scheduler committee. KindComputeTxnScheduler CommitteeKind = 2 - // KindComputeMerge is a merge committee. - KindComputeMerge CommitteeKind = 3 - // KindStorage is a storage committee. - KindStorage CommitteeKind = 4 + KindStorage CommitteeKind = 3 // MaxCommitteeKind is a dummy value used for iterating all committee kinds. - MaxCommitteeKind = 5 + MaxCommitteeKind = 4 ) // NeedsLeader returns if committee kind needs leader role. @@ -92,8 +89,6 @@ func (k CommitteeKind) NeedsLeader() (bool, error) { return false, nil case KindComputeTxnScheduler: return true, nil - case KindComputeMerge: - return false, nil case KindStorage: return false, nil default: @@ -110,8 +105,6 @@ func (k CommitteeKind) String() string { return "executor" case KindComputeTxnScheduler: return "txn_scheduler" - case KindComputeMerge: - return "merge" case KindStorage: return "storage" default: diff --git a/go/scheduler/tests/tester.go b/go/scheduler/tests/tester.go index ea74a07702a..ea70f650190 100644 --- a/go/scheduler/tests/tester.go +++ b/go/scheduler/tests/tester.go @@ -42,8 +42,8 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend epochtime := consensus.EpochTime().(epochtime.SetableBackend) epoch := epochtimeTests.MustAdvanceEpoch(t, epochtime, 1) - ensureValidCommittees := func(expectedExecutor, expectedTransactionScheduler, expectedMerge, expectedStorage int) { - var executor, transactionScheduler, merge, storage *api.Committee + ensureValidCommittees := func(expectedExecutor, expectedTransactionScheduler, expectedStorage int) { + var executor, transactionScheduler, storage *api.Committee var seen int for seen < 4 { select { @@ -64,10 +64,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend require.Nil(transactionScheduler, "haven't seen a transaction scheduler committee yet") require.Len(committee.Members, expectedTransactionScheduler, "committee has all transaction scheduler nodes") transactionScheduler = committee - case api.KindComputeMerge: - require.Nil(merge, "haven't seen a merge committee yet") - require.Len(committee.Members, expectedMerge, "committee has all merge nodes") - merge = committee case api.KindStorage: require.Nil(storage, "haven't seen a storage committee yet") require.Len(committee.Members, expectedStorage, "committee has all storage nodes") @@ -98,9 +94,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend case api.KindComputeTxnScheduler: require.EqualValues(transactionScheduler, committee, "fetched transaction scheduler committee is identical") transactionScheduler = nil - case api.KindComputeMerge: - require.EqualValues(merge, committee, "fetched merge committee is identical") - merge = nil case api.KindStorage: require.EqualValues(storage, committee, "fetched storage committee is identical") storage = nil @@ -109,7 +102,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend require.Nil(executor, "fetched an executor committee") require.Nil(transactionScheduler, "fetched a transaction scheduler committee") - require.Nil(merge, "fetched a merge committee") require.Nil(storage, "fetched a storage committee") } @@ -125,7 +117,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend ensureValidCommittees( nExecutor, int(rt.Runtime.TxnScheduler.GroupSize), - int(rt.Runtime.Merge.GroupSize)+int(rt.Runtime.Merge.GroupBackupSize), nStorage, ) @@ -141,7 +132,6 @@ func SchedulerImplementationTests(t *testing.T, name string, backend api.Backend ensureValidCommittees( 3, int(rt.Runtime.TxnScheduler.GroupSize), - int(rt.Runtime.Merge.GroupSize)+int(rt.Runtime.Merge.GroupBackupSize), 1, ) @@ -196,9 +186,6 @@ func requireValidCommitteeMembers(t *testing.T, committee *api.Committee, runtim case api.KindComputeExecutor: require.EqualValues(runtime.Executor.GroupSize, workers, "executor committee should have the correct number of workers") require.EqualValues(runtime.Executor.GroupBackupSize, backups, "executor committee should have the correct number of backup workers") - case api.KindComputeMerge: - require.EqualValues(runtime.Merge.GroupSize, workers, "merge committee should have the correct number of workers") - require.EqualValues(runtime.Merge.GroupBackupSize, backups, "merge committee should have the correct number of backup workers") case api.KindStorage, api.KindComputeTxnScheduler: numCommitteeMembersWithoutLeader := len(committee.Members) needsLeader, err := committee.Kind.NeedsLeader() diff --git a/go/worker/common/committee/group.go b/go/worker/common/committee/group.go index 54ccd2e753d..7441ffd247e 100644 --- a/go/worker/common/committee/group.go +++ b/go/worker/common/committee/group.go @@ -56,21 +56,12 @@ type epoch struct { // executorCommittee is the executor committee we are a member of. executorCommittee *CommitteeInfo - // executorCommitteeID is the identifier of our executor committee. - executorCommitteeID hash.Hash - // executorCommittees are all executor committees. - executorCommittees map[hash.Hash]*CommitteeInfo - // executorCommitteeMemberSet is a set of node public keys of executor committee members. - executorCommitteeMemberSet map[signature.PublicKey]bool // txnSchedulerCommitee is the txn scheduler committee we are a member of. txnSchedulerCommittee *CommitteeInfo // txnSchedulerLeader is the node public key of txn scheduler leader. txnSchedulerLeader signature.PublicKey - // mergeCommittee is the merge committee we are a member of. - mergeCommittee *CommitteeInfo - // storageCommittee is the storage committee we are a member of. storageCommittee *CommitteeInfo @@ -81,19 +72,15 @@ type epoch struct { type EpochSnapshot struct { groupVersion int64 - executorCommitteeID hash.Hash - epochNumber api.EpochTime executorRole scheduler.Role txnSchedulerRole scheduler.Role - mergeRole scheduler.Role runtime *registry.Runtime - executorCommittees map[hash.Hash]*CommitteeInfo + executorCommittee *CommitteeInfo txnSchedulerCommittee *CommitteeInfo - mergeCommittee *CommitteeInfo storageCommittee *CommitteeInfo nodes committee.NodeDescriptorLookup @@ -101,14 +88,7 @@ type EpochSnapshot struct { // NewMockEpochSnapshot returns a mock epoch snapshot to be used in tests. func NewMockEpochSnapshot() *EpochSnapshot { - executorCommitteeID := hash.NewFromBytes([]byte("mock committee id")) - - return &EpochSnapshot{ - executorCommitteeID: executorCommitteeID, - executorCommittees: map[hash.Hash]*CommitteeInfo{ - executorCommitteeID: {}, - }, - } + return &EpochSnapshot{} } // GetGroupVersion returns the consensus backend block height of the last @@ -122,17 +102,9 @@ func (e *EpochSnapshot) GetRuntime() *registry.Runtime { return e.runtime } -// GetExecutorCommittees returns the current executor committees. -func (e *EpochSnapshot) GetExecutorCommittees() map[hash.Hash]*CommitteeInfo { - return e.executorCommittees -} - -// GetExecutorCommitteeID returns ID of the executor committee the current node is -// a member of. -// -// NOTE: Will return an invalid all-zero ID if not a member. -func (e *EpochSnapshot) GetExecutorCommitteeID() hash.Hash { - return e.executorCommitteeID +// GetExecutorCommittee returns the current executor committee. +func (e *EpochSnapshot) GetExecutorCommittee() *CommitteeInfo { + return e.executorCommittee } // GetEpochNumber returns the sequential number of the epoch. @@ -169,29 +141,6 @@ func (e *EpochSnapshot) IsTransactionSchedulerLeader() bool { return e.txnSchedulerRole == scheduler.Leader } -// GetMergeCommittee returns the current merge committee. -func (e *EpochSnapshot) GetMergeCommittee() *CommitteeInfo { - return e.mergeCommittee -} - -// IsMergeMember checks if the current node is a member of the merge committee -// in the current epoch. -func (e *EpochSnapshot) IsMergeMember() bool { - return e.mergeRole != scheduler.Invalid -} - -// IsMergeWorker checks if the current node is a worker of the merge committee in -// the current epoch. -func (e *EpochSnapshot) IsMergeWorker() bool { - return e.mergeRole == scheduler.Worker -} - -// IsMergeBackupWorker checks if the current node is a backup worker of the merge committee in -// the current epoch. -func (e *EpochSnapshot) IsMergeBackupWorker() bool { - return e.mergeRole == scheduler.BackupWorker -} - // GetStorageCommittee returns the current storage committee. func (e *EpochSnapshot) GetStorageCommittee() *CommitteeInfo { return e.storageCommittee @@ -322,10 +271,7 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { } // Find the current committees. - executorCommittees := make(map[hash.Hash]*CommitteeInfo) - executorCommitteeMemberSet := make(map[signature.PublicKey]bool) - var executorCommittee, txnSchedulerCommittee, mergeCommittee, storageCommittee *CommitteeInfo - var executorCommitteeID hash.Hash + var executorCommittee, txnSchedulerCommittee, storageCommittee *CommitteeInfo var txnSchedulerLeader signature.PublicKey publicIdentity := g.identity.NodeSigner.Public() for _, cm := range committees { @@ -356,41 +302,22 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { switch cm.Kind { case scheduler.KindComputeExecutor: - // There can be multiple executor committees per runtime. - cID := cm.EncodedMembersHash() - executorCommittees[cID] = ci - if role != scheduler.Invalid { - if executorCommittee != nil { - return fmt.Errorf("member of multiple executor committees") - } - - executorCommittee = ci - executorCommitteeID = cID - } - - for _, m := range cm.Members { - executorCommitteeMemberSet[m.PublicKey] = true - } + executorCommittee = ci case scheduler.KindComputeTxnScheduler: txnSchedulerCommittee = ci if leader.IsValid() { txnSchedulerLeader = leader } - case scheduler.KindComputeMerge: - mergeCommittee = ci case scheduler.KindStorage: storageCommittee = ci } } - if len(executorCommittees) == 0 { + if executorCommittee == nil { return fmt.Errorf("no executor committees") } if txnSchedulerCommittee == nil { return fmt.Errorf("no transaction scheduler committee") } - if mergeCommittee == nil { - return fmt.Errorf("no merge committee") - } if storageCommittee == nil { return fmt.Errorf("no storage committee") } @@ -413,34 +340,23 @@ func (g *Group) EpochTransition(ctx context.Context, height int64) error { // Update the current epoch. g.activeEpoch = &epoch{ - epochNumber: epochNumber, - epochCtx: epochCtx, - cancelEpochCtx: cancelEpochCtx, - roundCtx: roundCtx, - cancelRoundCtx: cancelRoundCtx, - groupVersion: height, - executorCommittee: executorCommittee, - executorCommitteeID: executorCommitteeID, - executorCommittees: executorCommittees, - executorCommitteeMemberSet: executorCommitteeMemberSet, - txnSchedulerCommittee: txnSchedulerCommittee, - txnSchedulerLeader: txnSchedulerLeader, - mergeCommittee: mergeCommittee, - storageCommittee: storageCommittee, - runtime: runtime, - } - - // Executor committee may be nil in case we are not a member of any committee. - var executorRole scheduler.Role - if executorCommittee != nil { - executorRole = executorCommittee.Role + epochNumber: epochNumber, + epochCtx: epochCtx, + cancelEpochCtx: cancelEpochCtx, + roundCtx: roundCtx, + cancelRoundCtx: cancelRoundCtx, + groupVersion: height, + executorCommittee: executorCommittee, + txnSchedulerCommittee: txnSchedulerCommittee, + txnSchedulerLeader: txnSchedulerLeader, + storageCommittee: storageCommittee, + runtime: runtime, } g.logger.Info("epoch transition complete", "group_version", height, - "executor_role", executorRole, + "executor_role", executorCommittee.Role, "txn_scheduler_role", txnSchedulerCommittee.Role, - "merge_role", mergeCommittee.Role, ) return nil @@ -461,26 +377,17 @@ func (g *Group) GetEpochSnapshot() *EpochSnapshot { } s := &EpochSnapshot{ - epochNumber: g.activeEpoch.epochNumber, - groupVersion: g.activeEpoch.groupVersion, - // NOTE: Transaction scheduler and merge committees are always set. + epochNumber: g.activeEpoch.epochNumber, + groupVersion: g.activeEpoch.groupVersion, + executorRole: g.activeEpoch.executorCommittee.Role, txnSchedulerRole: g.activeEpoch.txnSchedulerCommittee.Role, - mergeRole: g.activeEpoch.mergeCommittee.Role, runtime: g.activeEpoch.runtime, - executorCommittees: g.activeEpoch.executorCommittees, + executorCommittee: g.activeEpoch.executorCommittee, txnSchedulerCommittee: g.activeEpoch.txnSchedulerCommittee, - mergeCommittee: g.activeEpoch.mergeCommittee, storageCommittee: g.activeEpoch.storageCommittee, nodes: g.nodes, } - // Executor committee may be nil in case we are not a member of any committee. - xc := g.activeEpoch.executorCommittee - if xc != nil { - s.executorRole = xc.Role - s.executorCommitteeID = g.activeEpoch.executorCommitteeID - } - return s } @@ -498,21 +405,13 @@ func (g *Group) AuthenticatePeer(peerID signature.PublicKey, msg *p2p.Message) e // If we are in the executor committee, we accept messages from the transaction // scheduler committee leader. - if g.activeEpoch.executorCommittee != nil && g.activeEpoch.txnSchedulerLeader.IsValid() { + if g.activeEpoch.executorCommittee.Role != scheduler.Invalid && g.activeEpoch.txnSchedulerLeader.IsValid() { n := g.nodes.LookupByPeerID(peerID) if n != nil { authorized = authorized || g.activeEpoch.txnSchedulerLeader.Equal(n.ID) } } - // If we are in the merge committee, we accept messages from any executor committee member. - if g.activeEpoch.mergeCommittee.Role != scheduler.Invalid { - n := g.nodes.LookupByPeerID(peerID) - if n != nil { - authorized = authorized || g.activeEpoch.executorCommitteeMemberSet[n.ID] - } - } - if !authorized { err := fmt.Errorf("group: peer is not authorized") @@ -567,11 +466,7 @@ func (g *Group) HandlePeerMessage(unusedPeerID signature.PublicKey, msg *p2p.Mes return g.handler.HandlePeerMessage(ctx, msg) } -func (g *Group) publishLocked( - spanCtx opentracing.SpanContext, - ci *CommitteeInfo, - msg *p2p.Message, -) error { +func (g *Group) publishLocked(spanCtx opentracing.SpanContext, msg *p2p.Message) error { if g.p2p == nil { return fmt.Errorf("group: p2p transport is not enabled") } @@ -597,7 +492,6 @@ func (g *Group) publishLocked( // Returns the transaction scheduler's signature for this batch. func (g *Group) PublishScheduledBatch( spanCtx opentracing.SpanContext, - committeeID hash.Hash, ioRoot hash.Hash, storageSignatures []signature.Signature, hdr block.Header, @@ -609,13 +503,8 @@ func (g *Group) PublishScheduledBatch( return nil, fmt.Errorf("group: not leader of txn scheduler committee") } - xc := g.activeEpoch.executorCommittees[committeeID] - if xc == nil { - return nil, fmt.Errorf("group: invalid executor committee") - } - + // XXX: this should be moved to the txn scheduler and passed as argument dispatchMsg := &commitment.TxnSchedulerBatch{ - CommitteeID: committeeID, IORoot: ioRoot, StorageSignatures: storageSignatures, Header: hdr, @@ -628,16 +517,14 @@ func (g *Group) PublishScheduledBatch( return &signedDispatchMsg.Signature, g.publishLocked( spanCtx, - xc, &p2p.Message{ TxnSchedulerBatch: signedDispatchMsg, }, ) } -// PublishExecuteFinished publishes an execute commitment to all members in the merge -// committee. -func (g *Group) PublishExecuteFinished(spanCtx opentracing.SpanContext, c *commitment.ExecutorCommitment) error { +// PublishExecutorCommit publishes an execute commitment. +func (g *Group) PublishExecutorCommit(spanCtx opentracing.SpanContext, c *commitment.ExecutorCommitment) error { g.RLock() defer g.RUnlock() @@ -647,7 +534,6 @@ func (g *Group) PublishExecuteFinished(spanCtx opentracing.SpanContext, c *commi return g.publishLocked( spanCtx, - g.activeEpoch.mergeCommittee, &p2p.Message{ ExecutorCommit: c, }, diff --git a/go/worker/compute/executor/committee/fault.go b/go/worker/compute/executor/committee/fault.go deleted file mode 100644 index 109787609ef..00000000000 --- a/go/worker/compute/executor/committee/fault.go +++ /dev/null @@ -1,186 +0,0 @@ -package committee - -import ( - "context" - "time" - - "github.com/oasisprotocol/oasis-core/go/common/logging" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - runtimeRegistry "github.com/oasisprotocol/oasis-core/go/runtime/registry" -) - -type faultSubmitter interface { - // SubmitExecutorCommit submits an executor commitment when a fault is detected. - SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error -} - -type nodeFaultSubmitter struct { - node *Node -} - -// Implements faultSubmitter. -func (nf *nodeFaultSubmitter) SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error { - tx := roothash.NewExecutorCommitTx(0, nil, nf.node.commonNode.Runtime.ID(), []commitment.ExecutorCommitment{*commit}) - return consensus.SignAndSubmitTx(ctx, nf.node.commonNode.Consensus, nf.node.commonNode.Identity.NodeSigner, tx) -} - -func newNodeFaultSubmitter(node *Node) faultSubmitter { - return &nodeFaultSubmitter{node} -} - -type faultDetector struct { - runtime runtimeRegistry.Runtime - submitter faultSubmitter - commit *commitment.ExecutorCommitment - - quitCh chan struct{} - eventCh chan *roothash.Event - - logger *logging.Logger -} - -func (d *faultDetector) notify(ev *roothash.Event) { - select { - case <-d.quitCh: - // In case the worker has quit, prevent blocking on the event channel. - case d.eventCh <- ev: - } -} - -func (d *faultDetector) submit(ctx context.Context) { - d.logger.Warn("independently submitting executor commit") - - err := d.submitter.SubmitExecutorCommit(ctx, d.commit) - switch err { - case nil: - d.logger.Info("independently submitted executor commit") - default: - d.logger.Error("failed to submit executor commit independently", - "err", err, - ) - } -} - -func (d *faultDetector) worker(ctx context.Context) { - // We should submit the commitment immediately in case when: - // - // - We see a merge commit that does not have our commitment. - // - We see an executor commit. - // - RoundTimeout elapses without seeing our commitment. - // - defer close(d.quitCh) - - // Determine the round timeout and start a local timer. - rtDesc, err := d.runtime.RegistryDescriptor(ctx) - if err != nil { - d.logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return - } - // Add a small amount to compensate for network latency. - timer := time.NewTimer(rtDesc.Executor.RoundTimeout + 1*time.Second) - - // Extract committee ID for easier comparison. - openCommit, err := d.commit.Open() - if err != nil { - // This should NEVER happen. - d.logger.Error("bad own commitment", - "err", err, - ) - return - } - - // TODO: Once we have P2P gossipsub also look at gossiped commitments in addition to consensus. - - for { - select { - case <-ctx.Done(): - return - case <-timer.C: - // Local round timeout expired. - d.logger.Warn("local round timeout expired without seeing our commitment") - go d.submit(ctx) - return - case ev := <-d.eventCh: - // Received a roothash event for our runtime. - switch { - case ev.ExecutorCommitted != nil: - // Executor committed independently, check if it is for our committee. - ec, err := ev.ExecutorCommitted.Commit.Open() - if err != nil { - // This should NEVER happen as the consensus backend verifies this. - d.logger.Error("bad executor commitment from consensus backend?", - "err", err, - ) - continue - } - - if !ec.Body.CommitteeID.Equal(&openCommit.Body.CommitteeID) { - continue - } - - // If this is our own commit (in theory anyone could submit it on our behalf), we - // don't need to do anything. - if ec.Equal(d.commit) { - d.logger.Info("our commitment has been submitted to consensus layer by an external party") - return - } - - // Executor committed independently, we should too as this means that so far we have - // not seen any separate commitments. - d.logger.Warn("seen another executor independently submit commitments, following", - "executor_node_id", ev.ExecutorCommitted.Commit.Signature.PublicKey, - ) - go d.submit(ctx) - return - case ev.MergeCommitted != nil: - // Merge node committed. If our commit is included, then we can stop as there is at - // least one honest merge node. - mc, err := ev.MergeCommitted.Commit.Open() - if err != nil { - // This should NEVER happen as the consensus backend verifies this. - d.logger.Error("bad merge commitment from consensus backend?", - "err", err, - ) - continue - } - - for _, ec := range mc.Body.ExecutorCommits { - if ec.Equal(d.commit) { - // Found our commitment, stop right here. - d.logger.Info("our commitment has been submitted to consensus layer by an honest merge node") - return - } - } - - // A merge node submitted commitments but didn't include ours. - d.logger.Warn("seen merge commitment without our commitment", - "merge_node_id", ev.MergeCommitted.Commit.Signature.PublicKey, - ) - go d.submit(ctx) - return - } - } - } -} - -func newFaultDetector( - ctx context.Context, - rt runtimeRegistry.Runtime, - commit *commitment.ExecutorCommitment, - submitter faultSubmitter, -) *faultDetector { - d := &faultDetector{ - runtime: rt, - submitter: submitter, - commit: commit, - quitCh: make(chan struct{}), - eventCh: make(chan *roothash.Event), - logger: logging.GetLogger("worker/executor/committee/fault").With("runtime_id", rt.ID()), - } - go d.worker(ctx) - return d -} diff --git a/go/worker/compute/executor/committee/fault_test.go b/go/worker/compute/executor/committee/fault_test.go deleted file mode 100644 index d039063b459..00000000000 --- a/go/worker/compute/executor/committee/fault_test.go +++ /dev/null @@ -1,247 +0,0 @@ -package committee - -import ( - "context" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - memorySigner "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" - "github.com/oasisprotocol/oasis-core/go/common/pubsub" - genesisTestHelpers "github.com/oasisprotocol/oasis-core/go/genesis/tests" - registry "github.com/oasisprotocol/oasis-core/go/registry/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - "github.com/oasisprotocol/oasis-core/go/runtime/history" - "github.com/oasisprotocol/oasis-core/go/runtime/localstorage" - "github.com/oasisprotocol/oasis-core/go/runtime/tagindexer" - storage "github.com/oasisprotocol/oasis-core/go/storage/api" -) - -type testFaultSubmitter struct { - sync.Mutex - - faults []commitment.ExecutorCommitment -} - -// Implements faultSubmitter. -func (tf *testFaultSubmitter) SubmitExecutorCommit(ctx context.Context, commit *commitment.ExecutorCommitment) error { - tf.Lock() - defer tf.Unlock() - - tf.faults = append(tf.faults, *commit) - return nil -} - -func (tf *testFaultSubmitter) getFaults() []commitment.ExecutorCommitment { - tf.Lock() - defer tf.Unlock() - - return append([]commitment.ExecutorCommitment{}, tf.faults...) -} - -type testRuntime struct { -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) ID() common.Namespace { - return common.Namespace{} -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) RegistryDescriptor(ctx context.Context) (*registry.Runtime, error) { - return ®istry.Runtime{}, nil -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) WatchRegistryDescriptor() (<-chan *registry.Runtime, pubsub.ClosableSubscription, error) { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) History() history.History { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) TagIndexer() tagindexer.QueryableBackend { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) Storage() storage.Backend { - panic("not implemented") -} - -// Implements runtimeRegistry.Runtime. -func (rt *testRuntime) LocalStorage() localstorage.LocalStorage { - panic("not implemented") -} - -func TestFaultDetector(t *testing.T) { - require := require.New(t) - - genesisTestHelpers.SetTestChainContext() - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test") - commit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - rt := testRuntime{} - - for _, tc := range []struct { - name string - fn func(*testing.T, *faultDetector, *testFaultSubmitter, *commitment.ExecutorCommitment) - }{ - {"Timeout", testFaultDetectorTimeout}, - {"EarlyExecutor", testFaultDetectorEarlyExecutor}, - {"ExternalSubmission", testFaultDetectorExternalSubmission}, - {"FaultyMerge", testFaultDetectorFaultyMerge}, - {"HonestMerge", testFaultDetectorHonestMerge}, - } { - t.Run(tc.name, func(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - tf := testFaultSubmitter{} - fd := newFaultDetector(ctx, &rt, commit, &tf) - - tc.fn(t, fd, &tf, commit) - }) - } -} - -func testFaultDetectorTimeout(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - // The fault detector should timeout in one second even if we don't do any notifies. - time.Sleep(1200 * time.Millisecond) - - faults := tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after timeout") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorEarlyExecutor(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: EarlyExecutor") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{ - CommitteeID: hash.NewFromBytes([]byte("EarlyExecutorBadCommitteeID")), - }) - require.NoError(err, "SignExecutorCommitment") - - // Nothing should happen if the commitee ID doesn't match. - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *earlyCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be no submissions. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit anything in case of events for other committees") - - // Notify the detector of an early executor submitting their commitment. - earlyCommit, err = commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *earlyCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be a submission. - faults = tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after early executor") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorExternalSubmission(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - // Notify the detector of an external process submitting our commitment. - fd.notify(&roothash.Event{ - ExecutorCommitted: &roothash.ExecutorCommittedEvent{ - Commit: *commit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should not be a submission. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit commitment after seeing own commit") - - // The fault detector should stop after seeing an honest merge node, so even waiting for the - // timeout amount should not trigger it. - time.Sleep(1200 * time.Millisecond) - - faults = tf.getFaults() - require.Len(faults, 0, "fault detector should be stopped") -} - -func testFaultDetectorFaultyMerge(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: FaultyMerge") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - mergeCommit, err := commitment.SignMergeCommitment(signer, &commitment.MergeBody{ - ExecutorCommits: []commitment.ExecutorCommitment{*earlyCommit}, - }) - require.NoError(err, "SignMergeCommitment") - - // Notify the detector of a merge commit that does not include own commit. - fd.notify(&roothash.Event{ - MergeCommitted: &roothash.MergeCommittedEvent{ - Commit: *mergeCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should be a submission. - faults := tf.getFaults() - require.Len(faults, 1, "fault detector should submit commitment after merge without own commit") - require.EqualValues(*commit, faults[0], "the submitted commitment should be the same") -} - -func testFaultDetectorHonestMerge(t *testing.T, fd *faultDetector, tf *testFaultSubmitter, commit *commitment.ExecutorCommitment) { - require := require.New(t) - - signer := memorySigner.NewTestSigner("worker/compute/executor/committee/fault test: HonestMerge") - earlyCommit, err := commitment.SignExecutorCommitment(signer, &commitment.ComputeBody{}) - require.NoError(err, "SignExecutorCommitment") - - // Merge commit that includes our commit -- should not trigger a submission. - mergeCommit, err := commitment.SignMergeCommitment(signer, &commitment.MergeBody{ - ExecutorCommits: []commitment.ExecutorCommitment{*commit, *earlyCommit}, - }) - require.NoError(err, "SignMergeCommitment") - - // Notify the detector of a merge commit that includes own commit. - fd.notify(&roothash.Event{ - MergeCommitted: &roothash.MergeCommittedEvent{ - Commit: *mergeCommit, - }, - }) - // Give the fault detector some time to process requests. - time.Sleep(100 * time.Millisecond) - // There should not be a submission. - faults := tf.getFaults() - require.Len(faults, 0, "fault detector should not submit commitment after merge without own commit") - - // The fault detector should stop after seeing an honest merge node, so even waiting for the - // timeout amount should not trigger it. - time.Sleep(1200 * time.Millisecond) - - faults = tf.getFaults() - require.Len(faults, 0, "fault detector should be stopped") -} diff --git a/go/worker/compute/executor/committee/node.go b/go/worker/compute/executor/committee/node.go index a230ea2d7e3..a62b1ad5802 100644 --- a/go/worker/compute/executor/committee/node.go +++ b/go/worker/compute/executor/committee/node.go @@ -20,6 +20,7 @@ import ( "github.com/oasisprotocol/oasis-core/go/common/pubsub" "github.com/oasisprotocol/oasis-core/go/common/tracing" "github.com/oasisprotocol/oasis-core/go/common/version" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" "github.com/oasisprotocol/oasis-core/go/roothash/api/block" "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" @@ -32,7 +33,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" p2pError "github.com/oasisprotocol/oasis-core/go/worker/common/p2p/error" - mergeCommittee "github.com/oasisprotocol/oasis-core/go/worker/compute/merge/committee" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -114,7 +114,6 @@ type Node struct { *commonWorker.RuntimeHostNode commonNode *committee.Node - mergeNode *mergeCommittee.Node commonCfg commonWorker.Config roleProvider registration.RoleProvider @@ -138,9 +137,6 @@ type Node struct { // Bump this when we need to change what the worker selects over. reselect chan struct{} - // Guarded by .commonNode.CrossNode. - faultDetector *faultDetector - logger *logging.Logger } @@ -213,7 +209,7 @@ func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (boo return false, p2pError.Permanent(err) } - err := n.queueBatchBlocking(ctx, bd.CommitteeID, bd.IORoot, bd.StorageSignatures, bd.Header, sbd.Signature) + err := n.queueBatchBlocking(ctx, bd.IORoot, bd.StorageSignatures, bd.Header, sbd.Signature) if err != nil { return false, err } @@ -224,7 +220,6 @@ func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (boo func (n *Node) queueBatchBlocking( ctx context.Context, - committeeID hash.Hash, ioRootHash hash.Hash, storageSignatures []signature.Signature, hdr block.Header, @@ -292,25 +287,18 @@ func (n *Node) queueBatchBlocking( n.commonNode.CrossNode.Lock() defer n.commonNode.CrossNode.Unlock() - return n.handleExternalBatchLocked(committeeID, batch, hdr) + return n.handleExternalBatchLocked(batch, hdr) } // HandleBatchFromTransactionSchedulerLocked processes a batch from the transaction scheduler. // Guarded by n.commonNode.CrossNode. func (n *Node) HandleBatchFromTransactionSchedulerLocked( batchSpanCtx opentracing.SpanContext, - committeeID hash.Hash, ioRoot hash.Hash, batch transaction.RawBatch, txnSchedSig signature.Signature, inputStorageSigs []signature.Signature, ) { - epoch := n.commonNode.Group.GetEpochSnapshot() - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&committeeID) { - return - } - n.maybeStartProcessingBatchLocked(&unresolvedBatch{ ioRoot: storage.Root{ Namespace: n.commonNode.CurrentBlock.Header.Namespace, @@ -602,7 +590,6 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { // Generate proposed compute results. proposedResults := &commitment.ComputeBody{ - CommitteeID: epoch.GetExecutorCommitteeID(), Header: batch.Header, RakSig: batch.RakSig, TxnSchedSig: state.batch.txnSchedSignature, @@ -664,7 +651,7 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { ) return err } - if err = proposedResults.VerifyStorageReceipt(lastHeader.Namespace, lastHeader.Round+1, &receiptBody); err != nil { + if err = proposedResults.VerifyStorageReceipt(lastHeader.Namespace, &receiptBody); err != nil { n.logger.Error("failed to validate receipt body", "receipt body", receiptBody, "err", err, @@ -690,7 +677,7 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { return } - // Commit. + // Sign the commitment and submit. commit, err := commitment.SignExecutorCommitment(n.commonNode.Identity.NodeSigner, proposedResults) if err != nil { n.logger.Error("failed to sign commitment", @@ -700,71 +687,39 @@ func (n *Node) proposeBatchLocked(batch *protocol.ComputedBatch) { return } - // Publish commitment to merge committee. - spanPublish := opentracing.StartSpan("PublishExecuteFinished(commitment)", - opentracing.ChildOf(state.batch.spanCtx), - ) - err = n.commonNode.Group.PublishExecuteFinished(state.batch.spanCtx, commit) - if err != nil { - spanPublish.Finish() - n.logger.Error("failed to publish results to committee", - "err", err, - ) - n.abortBatchLocked(err) - return - } - spanPublish.Finish() + // Publish commitment to the consensus layer. + tx := roothash.NewExecutorCommitTx(0, nil, n.commonNode.Runtime.ID(), []commitment.ExecutorCommitment{*commit}) + go func() { + commitErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) + switch commitErr { + case nil: + n.logger.Info("executor commit finalized") + default: + n.logger.Error("failed to submit executor commit", + "err", commitErr, + ) + } + }() // TODO: Add crash point. - // Set up the fault detector so that we can submit the commitment independently from any other - // merge nodes in case a fault is detected (which would indicate that the entire merge committee - // is faulty). - n.faultDetector = newFaultDetector(n.roundCtx, n.commonNode.Runtime, commit, newNodeFaultSubmitter(n)) - n.transitionLocked(StateWaitingForFinalize{ batchStartTime: state.batchStartTime, }) - if epoch.IsMergeMember() { - if n.mergeNode == nil { - n.logger.Error("scheduler says we are a merge worker, but we are not") - } else { - n.mergeNode.HandleResultsFromExecutorWorkerLocked(state.batch.spanCtx, commit) - } - } - crash.Here(crashPointBatchProposeAfter) } // HandleNewEventLocked implements NodeHooks. // Guarded by n.commonNode.CrossNode. func (n *Node) HandleNewEventLocked(ev *roothash.Event) { - // In case a fault detector exists, notify it of events. - if n.faultDetector != nil { - n.faultDetector.notify(ev) - } - dis := ev.ExecutionDiscrepancyDetected if dis == nil { // Ignore other events. return } - // Check if the discrepancy occurred in our committee. - epoch := n.commonNode.Group.GetEpochSnapshot() - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&dis.CommitteeID) { - n.logger.Debug("ignoring discrepancy event for a different committee", - "expected_committee", expectedID, - "committee", dis.CommitteeID, - ) - return - } - - n.logger.Warn("execution discrepancy detected", - "committee_id", dis.CommitteeID, - ) + n.logger.Warn("execution discrepancy detected") crash.Here(crashPointDiscrepancyDetectedAfter) @@ -803,7 +758,7 @@ func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snaps } // Guarded by n.commonNode.CrossNode. -func (n *Node) handleExternalBatchLocked(committeeID hash.Hash, batch *unresolvedBatch, hdr block.Header) error { +func (n *Node) handleExternalBatchLocked(batch *unresolvedBatch, hdr block.Header) error { // If we are not waiting for a batch, don't do anything. if _, ok := n.state.(StateWaitingForBatch); !ok { return errIncorrectState @@ -817,16 +772,6 @@ func (n *Node) handleExternalBatchLocked(committeeID hash.Hash, batch *unresolve return errIncorrectRole } - // We only accept batches for our own committee. - expectedID := epoch.GetExecutorCommitteeID() - if !expectedID.Equal(&committeeID) { - n.logger.Error("got external batch for a different executor committee", - "expected_committee", expectedID, - "committee", committeeID, - ) - return nil - } - // Check if we have the correct block -- in this case, start processing the batch. if n.commonNode.CurrentBlock.Header.MostlyEqual(&hdr) { n.maybeStartProcessingBatchLocked(batch) @@ -989,7 +934,6 @@ func (n *Node) worker() { func NewNode( commonNode *committee.Node, - mergeNode *mergeCommittee.Node, commonCfg commonWorker.Config, roleProvider registration.RoleProvider, ) (*Node, error) { @@ -1008,7 +952,6 @@ func NewNode( n := &Node{ RuntimeHostNode: rhn, commonNode: commonNode, - mergeNode: mergeNode, commonCfg: commonCfg, roleProvider: roleProvider, ctx: ctx, diff --git a/go/worker/compute/executor/init.go b/go/worker/compute/executor/init.go index 06b5c96a5d7..aef900e93d1 100644 --- a/go/worker/compute/executor/init.go +++ b/go/worker/compute/executor/init.go @@ -3,7 +3,6 @@ package executor import ( workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" "github.com/oasisprotocol/oasis-core/go/worker/compute" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -11,8 +10,7 @@ import ( func New( dataDir string, commonWorker *workerCommon.Worker, - mergeWorker *merge.Worker, registration *registration.Worker, ) (*Worker, error) { - return newWorker(dataDir, compute.Enabled(), commonWorker, mergeWorker, registration) + return newWorker(dataDir, compute.Enabled(), commonWorker, registration) } diff --git a/go/worker/compute/executor/worker.go b/go/worker/compute/executor/worker.go index 911ab911133..9013eace4b8 100644 --- a/go/worker/compute/executor/worker.go +++ b/go/worker/compute/executor/worker.go @@ -10,7 +10,6 @@ import ( workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" "github.com/oasisprotocol/oasis-core/go/worker/compute/executor/committee" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge" "github.com/oasisprotocol/oasis-core/go/worker/registration" ) @@ -19,7 +18,6 @@ type Worker struct { enabled bool commonWorker *workerCommon.Worker - merge *merge.Worker registration *registration.Worker runtimes map[common.Namespace]*committee.Node @@ -141,16 +139,13 @@ func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { "runtime_id", id, ) - // Get other nodes from this runtime. - mergeNode := w.merge.GetRuntime(id) - rp, err := w.registration.NewRuntimeRoleProvider(node.RoleComputeWorker, id) if err != nil { return fmt.Errorf("failed to create role provider: %w", err) } // Create committee node for the given runtime. - node, err := committee.NewNode(commonNode, mergeNode, w.commonWorker.GetConfig(), rp) + node, err := committee.NewNode(commonNode, w.commonWorker.GetConfig(), rp) if err != nil { return err } @@ -169,7 +164,6 @@ func newWorker( dataDir string, enabled bool, commonWorker *workerCommon.Worker, - merge *merge.Worker, registration *registration.Worker, ) (*Worker, error) { ctx, cancelCtx := context.WithCancel(context.Background()) @@ -177,7 +171,6 @@ func newWorker( w := &Worker{ enabled: enabled, commonWorker: commonWorker, - merge: merge, registration: registration, runtimes: make(map[common.Namespace]*committee.Node), ctx: ctx, diff --git a/go/worker/compute/init.go b/go/worker/compute/init.go index 228fd029ec2..2cd78fa5b8c 100644 --- a/go/worker/compute/init.go +++ b/go/worker/compute/init.go @@ -6,7 +6,7 @@ import ( ) const ( - // CfgWorkerEnabled enables the compute worker, tx scheduler worker, and merge worker. + // CfgWorkerEnabled enables the compute worker and the tx scheduler worker. CfgWorkerEnabled = "worker.compute.enabled" ) diff --git a/go/worker/compute/merge/committee/node.go b/go/worker/compute/merge/committee/node.go deleted file mode 100644 index 26b2e888e84..00000000000 --- a/go/worker/compute/merge/committee/node.go +++ /dev/null @@ -1,748 +0,0 @@ -package committee - -import ( - "context" - "errors" - "fmt" - "math" - "sync" - "time" - - "github.com/cenkalti/backoff/v4" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" - "github.com/oasisprotocol/oasis-core/go/common/logging" - "github.com/oasisprotocol/oasis-core/go/common/node" - "github.com/oasisprotocol/oasis-core/go/common/pubsub" - consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/block" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" - runtimeCommittee "github.com/oasisprotocol/oasis-core/go/runtime/committee" - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - "github.com/oasisprotocol/oasis-core/go/worker/common/committee" - "github.com/oasisprotocol/oasis-core/go/worker/common/p2p" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -var ( - errIncorrectState = errors.New("merge: incorrect state") - errSeenNewerBlock = errors.New("merge: seen newer block") - errMergeFailed = errors.New("merge: failed to perform merge") -) - -var ( - discrepancyDetectedCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_merge_discrepancy_detected_count", - Help: "Number of detected merge discrepancies.", - }, - []string{"runtime"}, - ) - roothashCommitLatency = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Name: "oasis_worker_roothash_merge_commit_latency", - Help: "Latency of roothash merge commit (seconds).", - }, - []string{"runtime"}, - ) - abortedMergeCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_aborted_merge_count", - Help: "Number of aborted merges.", - }, - []string{"runtime"}, - ) - inconsistentMergeRootCount = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "oasis_worker_inconsistent_merge_root_count", - Help: "Number of inconsistent merge roots.", - }, - []string{"runtime"}, - ) - nodeCollectors = []prometheus.Collector{ - discrepancyDetectedCount, - roothashCommitLatency, - abortedMergeCount, - inconsistentMergeRootCount, - } - - metricsOnce sync.Once - - infiniteTimeout = time.Duration(math.MaxInt64) -) - -// Node is a committee node. -type Node struct { // nolint: maligned - commonNode *committee.Node - commonCfg workerCommon.Config - - roleProvider registration.RoleProvider - - ctx context.Context - cancelCtx context.CancelFunc - stopCh chan struct{} - stopOnce sync.Once - quitCh chan struct{} - initCh chan struct{} - - // Mutable and shared with common node's worker. - // Guarded by .commonNode.CrossNode. - state NodeState - // Context valid until the next round. - // Guarded by .commonNode.CrossNode. - roundCtx context.Context - roundCancelCtx context.CancelFunc - - stateTransitions *pubsub.Broker - // Bump this when we need to change what the worker selects over. - reselect chan struct{} - - logger *logging.Logger -} - -// Name returns the service name. -func (n *Node) Name() string { - return "committee node" -} - -// Start starts the service. -func (n *Node) Start() error { - go n.worker() - return nil -} - -// Stop halts the service. -func (n *Node) Stop() { - n.stopOnce.Do(func() { close(n.stopCh) }) -} - -// Quit returns a channel that will be closed when the service terminates. -func (n *Node) Quit() <-chan struct{} { - return n.quitCh -} - -// Cleanup performs the service specific post-termination cleanup. -func (n *Node) Cleanup() { -} - -// Initialized returns a channel that will be closed when the node is -// initialized and ready to service requests. -func (n *Node) Initialized() <-chan struct{} { - return n.initCh -} - -// WatchStateTransitions subscribes to the node's state transitions. -func (n *Node) WatchStateTransitions() (<-chan NodeState, *pubsub.Subscription) { - sub := n.stateTransitions.Subscribe() - ch := make(chan NodeState) - sub.Unwrap(ch) - - return ch, sub -} - -func (n *Node) getMetricLabels() prometheus.Labels { - return prometheus.Labels{ - "runtime": n.commonNode.Runtime.ID().String(), - } -} - -// HandlePeerMessage implements NodeHooks. -func (n *Node) HandlePeerMessage(ctx context.Context, message *p2p.Message) (bool, error) { - if message.ExecutorCommit != nil { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - err := n.handleResultsLocked(ctx, message.ExecutorCommit) - if err != nil { - return false, err - } - return true, nil - } - return false, nil -} - -func (n *Node) bumpReselect() { - select { - case n.reselect <- struct{}{}: - default: - // If there's one already queued, we don't need to do anything. - } -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) transitionLocked(state NodeState) { - n.logger.Info("state transition", - "current_state", n.state, - "new_state", state, - ) - - // Validate state transition. - dests := validStateTransitions[n.state.Name()] - - var valid bool - for _, dest := range dests[:] { - if dest == state.Name() { - valid = true - break - } - } - - if !valid { - panic(fmt.Sprintf("invalid state transition: %s -> %s", n.state, state)) - } - - n.state = state - n.stateTransitions.Broadcast(state) - // Restart our worker's select in case our state-specific channels have changed. - n.bumpReselect() -} - -func (n *Node) newStateWaitingForResultsLocked(epoch *committee.EpochSnapshot) StateWaitingForResults { - pool := &commitment.MultiPool{ - Committees: make(map[hash.Hash]*commitment.Pool), - } - - for cID, ci := range epoch.GetExecutorCommittees() { - pool.Committees[cID] = &commitment.Pool{ - Runtime: epoch.GetRuntime(), - Committee: ci.Committee, - } - } - - return StateWaitingForResults{ - pool: pool, - timer: time.NewTimer(infiniteTimeout), - consensusTimeout: make(map[hash.Hash]bool), - } -} - -// HandleEpochTransitionLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleEpochTransitionLocked(epoch *committee.EpochSnapshot) { - if epoch.IsMergeWorker() || epoch.IsMergeBackupWorker() { - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - } else { - n.transitionLocked(StateNotReady{}) - } -} - -// HandleNewBlockEarlyLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewBlockEarlyLocked(blk *block.Block) { - // If we have seen a new block while waiting for results, we need to - // abort it no matter what as any processed state may be invalid. - n.abortMergeLocked(errSeenNewerBlock) -} - -// HandleNewBlockLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewBlockLocked(blk *block.Block) { - epoch := n.commonNode.Group.GetEpochSnapshot() - - // Cancel old round context, start a new one. - if n.roundCancelCtx != nil { - (n.roundCancelCtx)() - } - n.roundCtx, n.roundCancelCtx = context.WithCancel(n.ctx) - - // Perform actions based on current state. - switch n.state.(type) { - case StateWaitingForEvent: - // Block finalized without the need for a backup worker. - n.logger.Info("considering the round finalized", - "round", blk.Header.Round, - "header_hash", blk.Header.EncodedHash(), - ) - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - case StateWaitingForFinalize: - // A new block means the round has been finalized. - n.logger.Info("considering the round finalized", - "round", blk.Header.Round, - "header_hash", blk.Header.EncodedHash(), - ) - n.transitionLocked(n.newStateWaitingForResultsLocked(epoch)) - } -} - -// HandleResultsFromExecutorWorkerLocked processes results from an executor worker. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleResultsFromExecutorWorkerLocked(spanCtx opentracing.SpanContext, commit *commitment.ExecutorCommitment) { - // Spawn retry in a goroutine to prevent blocking processing. - go func() { - call := func() error { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - return n.handleResultsLocked(n.roundCtx, commit) - } - bkoff := backoff.WithContext(backoff.NewExponentialBackOff(), n.roundCtx) - if err := backoff.Retry(call, bkoff); err != nil { - n.logger.Warn("failed to handle results from local executor worker", - "err", err, - ) - } - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleResultsLocked(ctx context.Context, commit *commitment.ExecutorCommitment) error { - // If we are not waiting for results, don't do anything. - state, ok := n.state.(StateWaitingForResults) - if !ok { - return errIncorrectState - } - - n.logger.Debug("received new executor commitment", - "node_id", commit.Signature.PublicKey, - ) - - epoch := n.commonNode.Group.GetEpochSnapshot() - sp, err := state.pool.AddExecutorCommitment(ctx, n.commonNode.CurrentBlock, epoch, epoch, commit) - if err != nil { - return err - } - - // Attempt finalization. We defer this part in order to not block P2P relaying. - expectedRound := n.commonNode.CurrentBlock.Header.Round - go func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - // Ignore defered finalization attempt if state has changed. - if _, ok := n.state.(StateWaitingForResults); !ok { - return - } - - // Ignore defered finalization attempt if current block has changed. - if n.commonNode.CurrentBlock.Header.Round != expectedRound { - return - } - - n.tryFinalizeResultsLocked(sp, false) - }() - return nil -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) tryFinalizeResultsLocked(pool *commitment.Pool, didTimeout bool) { - state := n.state.(StateWaitingForResults) - now := time.Now() - - defer func() { - if !didTimeout && !state.timer.Stop() { - <-state.timer.C - } - - nextTimeout := state.pool.GetNextTimeout() - if nextTimeout.IsZero() { - // Disarm timer. - n.logger.Debug("disarming round timeout") - state.timer.Reset(infiniteTimeout) - } else { - // (Re-)arm timer. - n.logger.Debug("(re-)arming round timeout") - state.timer.Reset(nextTimeout.Sub(now)) - } - }() - - epoch := n.commonNode.Group.GetEpochSnapshot() - // The roothash backend will start counting its timeout on its own based on - // any received commits so in the worst case the actual timeout will be - // 2*roundTimeout. - // - // We have two kinds of timeouts -- the first is based on local monotonic time and - // starts counting as soon as the first commitment for a committee is received. It - // is used to trigger submission of executor commitments to the consensus layer for - // proof of timeout. The consensus layer starts its own timeout and this is the - // second timeout. - // - // The timeout is only considered authoritative once confirmed by consensus. In - // case of a local-only timeout, we will submit what executor commitments we have - // to consensus and not change the internal Discrepancy flag. - cid := pool.GetCommitteeID() - logger := n.logger.With("committee_id", cid) - consensusTimeout := state.consensusTimeout[cid] - rt, err := n.commonNode.Runtime.RegistryDescriptor(n.roundCtx) - if err != nil { - logger.Error("failed to retrieve runtime registry descriptor", - "err", err, - ) - return - } - runtimeTimeout := rt.Executor.RoundTimeout - - commit, err := pool.TryFinalize(now, runtimeTimeout, didTimeout, consensusTimeout) - switch err { - case nil: - case commitment.ErrStillWaiting: - // Not enough commitments. - logger.Debug("still waiting for commitments") - return - case commitment.ErrDiscrepancyDetected: - // We may also be able to already perform discrepancy resolution, check if - // this is possible. This may be the case if we receive commits from backup - // workers before receiving commits from regular workers. - commit, err = pool.TryFinalize(now, runtimeTimeout, false, false) - if err == nil { - // Discrepancy was already resolved, proceed with merge. - break - } - - // Discrepancy detected. - fallthrough - case commitment.ErrInsufficientVotes: - // Discrepancy resolution failed. - logger.Warn("insufficient votes, performing executor commit") - - // Submit executor commit to BFT. - ccs := pool.GetExecutorCommitments() - go func() { - tx := roothash.NewExecutorCommitTx(0, nil, n.commonNode.Runtime.ID(), ccs) - ccErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) - - switch ccErr { - case nil: - logger.Info("executor commit finalized") - default: - logger.Warn("failed to submit executor commit", - "err", ccErr, - ) - } - }() - return - default: - n.abortMergeLocked(err) - return - } - - // Check that we have everything from all committees. - result := commit.ToDDResult().(commitment.ComputeResultsHeader) - state.results = append(state.results, &result) - if len(state.results) < len(state.pool.Committees) { - n.logger.Debug("still waiting for other committees") - // State transition to store the updated results. - n.transitionLocked(state) - return - } - - n.logger.Info("have valid commitments from all committees, merging") - - commitments := state.pool.GetOpenExecutorCommitments() - - if epoch.IsMergeBackupWorker() && state.pendingEvent == nil { - // Backup workers only perform merge after receiving a discrepancy event. - n.transitionLocked(StateWaitingForEvent{commitments: commitments, results: state.results}) - return - } - - // No discrepancy, perform merge. - n.startMergeLocked(commitments, state.results) -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) startMergeLocked(commitments []commitment.OpenExecutorCommitment, results []*commitment.ComputeResultsHeader) { - doneCh := make(chan *commitment.MergeBody, 1) - ctx, cancel := context.WithCancel(n.roundCtx) - - // Create empty block based on previous block while we hold the lock. - prevBlk := n.commonNode.CurrentBlock - blk := block.NewEmptyBlock(prevBlk, 0, block.Normal) - - n.transitionLocked(StateProcessingMerge{doneCh: doneCh, cancel: cancel}) - - // Start processing merge in a separate goroutine. This is to make it possible - // to abort the merge if a newer block is seen while we are merging. - go func() { - defer close(doneCh) - - // Merge results to storage. - ctx, cancel = context.WithTimeout(ctx, n.commonCfg.StorageCommitTimeout) - defer cancel() - - var mergeBody commitment.MergeBody - switch len(results) { - case 1: - // Optimize the case where there is only a single committee -- there is nothing to merge - // so we can avoid a round trip to the storage nodes which already have the roots. - blk.Header.Messages = results[0].Messages - blk.Header.IORoot = results[0].IORoot - blk.Header.StateRoot = results[0].StateRoot - - // Collect all distinct storage signatures. - storageSigSet := make(map[signature.PublicKey]bool) - for _, ec := range commitments { - mergeBody.ExecutorCommits = append(mergeBody.ExecutorCommits, ec.ExecutorCommitment) - - for _, s := range ec.Body.StorageSignatures { - if storageSigSet[s.PublicKey] { - continue - } - storageSigSet[s.PublicKey] = true - blk.Header.StorageSignatures = append(blk.Header.StorageSignatures, s) - } - } - - mergeBody.Header = blk.Header - default: - // Multiple committees, we need to perform a storage merge operation. - n.logger.Error("merge from multiple committees not supported") - return - } - - // Submit the merge result. - doneCh <- &mergeBody - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) proposeHeaderLocked(result *commitment.MergeBody) { - n.logger.Debug("proposing header", - "previous_hash", result.Header.PreviousHash, - "round", result.Header.Round, - ) - - // Submit MC-Commit to BFT for DD and finalization. - mc, err := commitment.SignMergeCommitment(n.commonNode.Identity.NodeSigner, result) - if err != nil { - n.logger.Error("failed to sign merge commitment", - "err", err, - ) - n.abortMergeLocked(err) - return - } - - n.transitionLocked(StateWaitingForFinalize{}) - - // TODO: Tracing. - // span := opentracing.StartSpan("roothash.MergeCommit", opentracing.ChildOf(state.batchSpanCtx)) - // defer span.Finish() - - // Submit merge commit to consensus. - mcs := []commitment.MergeCommitment{*mc} - mergeCommitStart := time.Now() - go func() { - tx := roothash.NewMergeCommitTx(0, nil, n.commonNode.Runtime.ID(), mcs) - mcErr := consensus.SignAndSubmitTx(n.roundCtx, n.commonNode.Consensus, n.commonNode.Identity.NodeSigner, tx) - // Record merge commit latency. - roothashCommitLatency.With(n.getMetricLabels()).Observe(time.Since(mergeCommitStart).Seconds()) - - switch mcErr { - case nil: - n.logger.Info("merge commit finalized") - default: - n.logger.Error("failed to submit merge commit", - "err", mcErr, - ) - } - }() -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) abortMergeLocked(reason error) { - switch state := n.state.(type) { - case StateWaitingForResults: - case StateProcessingMerge: - // Cancel merge processing. - state.cancel() - default: - return - } - - n.logger.Warn("aborting merge", - "reason", reason, - ) - - // TODO: Return transactions to transaction scheduler. - - abortedMergeCount.With(n.getMetricLabels()).Inc() - - // After the batch has been aborted, we must wait for the round to be - // finalized. - n.transitionLocked(StateWaitingForFinalize{}) -} - -// HandleNewEventLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNewEventLocked(ev *roothash.Event) { - switch { - case ev.MergeDiscrepancyDetected != nil: - n.handleMergeDiscrepancyLocked(ev.MergeDiscrepancyDetected) - case ev.ExecutionDiscrepancyDetected != nil: - n.handleExecutorDiscrepancyLocked(ev.ExecutionDiscrepancyDetected) - default: - // Ignore other events. - } -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleMergeDiscrepancyLocked(ev *roothash.MergeDiscrepancyDetectedEvent) { - n.logger.Warn("merge discrepancy detected") - - discrepancyDetectedCount.With(n.getMetricLabels()).Inc() - - if !n.commonNode.Group.GetEpochSnapshot().IsMergeBackupWorker() { - return - } - - var state StateWaitingForEvent - switch s := n.state.(type) { - case StateWaitingForResults: - // Discrepancy detected event received before the results. We need to - // record the received event and keep waiting for the results. - s.pendingEvent = ev - n.transitionLocked(s) - return - case StateWaitingForEvent: - state = s - default: - n.logger.Warn("ignoring received discrepancy event in incorrect state", - "state", s, - ) - return - } - - // Backup worker, start processing merge. - n.logger.Info("backup worker activating and processing merge") - n.startMergeLocked(state.commitments, state.results) -} - -// Guarded by n.commonNode.CrossNode. -func (n *Node) handleExecutorDiscrepancyLocked(ev *roothash.ExecutionDiscrepancyDetectedEvent) { - n.logger.Warn("execution discrepancy detected", - "committee_id", ev.CommitteeID, - "timeout", ev.Timeout, - ) - - switch s := n.state.(type) { - case StateWaitingForResults: - // If the discrepancy was due to a timeout, record it. - pool := s.pool.Committees[ev.CommitteeID] - if pool == nil { - n.logger.Error("execution discrepancy event for unknown committee", - "committee_id", ev.CommitteeID, - ) - return - } - - if ev.Timeout { - s.consensusTimeout[ev.CommitteeID] = true - n.tryFinalizeResultsLocked(pool, true) - } - default: - } -} - -// HandleNodeUpdateLocked implements NodeHooks. -// Guarded by n.commonNode.CrossNode. -func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snapshot *committee.EpochSnapshot) { - // Nothing to do here. -} - -func (n *Node) worker() { - defer close(n.quitCh) - defer (n.cancelCtx)() - - // Wait for the common node to be initialized. - select { - case <-n.commonNode.Initialized(): - case <-n.stopCh: - close(n.initCh) - return - } - - n.logger.Info("starting committee node") - - // We are initialized. - close(n.initCh) - - // We are now ready to service requests. - n.roleProvider.SetAvailable(func(*node.Node) error { return nil }) - - for { - // Select over some channels based on current state. - var timerCh <-chan time.Time - var mergeDoneCh <-chan *commitment.MergeBody - - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - switch state := n.state.(type) { - case StateWaitingForResults: - timerCh = state.timer.C - case StateProcessingMerge: - mergeDoneCh = state.doneCh - default: - } - }() - - select { - case <-n.stopCh: - n.logger.Info("termination requested") - return - case <-timerCh: - n.logger.Warn("round timeout expired, forcing finalization") - - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - state, ok := n.state.(StateWaitingForResults) - if !ok || state.timer.C != timerCh { - return - } - - for _, pool := range state.pool.GetTimeoutCommittees(time.Now()) { - n.tryFinalizeResultsLocked(pool, true) - } - }() - case result := <-mergeDoneCh: - func() { - n.commonNode.CrossNode.Lock() - defer n.commonNode.CrossNode.Unlock() - - if state, ok := n.state.(StateProcessingMerge); !ok || state.doneCh != mergeDoneCh { - return - } - - if result == nil { - n.logger.Warn("merge aborted") - n.abortMergeLocked(errMergeFailed) - } else { - n.logger.Info("merge completed, proposing header") - n.proposeHeaderLocked(result) - } - }() - case <-n.reselect: - // Recalculate select set. - } - } -} - -func NewNode(commonNode *committee.Node, commonCfg workerCommon.Config, roleProvider registration.RoleProvider) (*Node, error) { - metricsOnce.Do(func() { - prometheus.MustRegister(nodeCollectors...) - }) - - ctx, cancel := context.WithCancel(context.Background()) - - n := &Node{ - commonNode: commonNode, - commonCfg: commonCfg, - roleProvider: roleProvider, - ctx: ctx, - cancelCtx: cancel, - stopCh: make(chan struct{}), - quitCh: make(chan struct{}), - initCh: make(chan struct{}), - state: StateNotReady{}, - stateTransitions: pubsub.NewBroker(false), - reselect: make(chan struct{}, 1), - logger: logging.GetLogger("worker/merge/committee").With("runtime_id", commonNode.Runtime.ID()), - } - - return n, nil -} diff --git a/go/worker/compute/merge/committee/state.go b/go/worker/compute/merge/committee/state.go deleted file mode 100644 index 4df3b8707eb..00000000000 --- a/go/worker/compute/merge/committee/state.go +++ /dev/null @@ -1,160 +0,0 @@ -package committee - -import ( - "context" - "time" - - "github.com/oasisprotocol/oasis-core/go/common/crypto/hash" - roothash "github.com/oasisprotocol/oasis-core/go/roothash/api" - "github.com/oasisprotocol/oasis-core/go/roothash/api/commitment" -) - -// StateName is a symbolic state without the attached values. -type StateName string - -const ( - // NotReady is the name of StateNotReady. - NotReady = "NotReady" - // WaitingForResults is the name of StateWaitingForResults. - WaitingForResults = "WaitingForResults" - // WaitingForEvent is the name of StateWaitingForEvent. - WaitingForEvent = "WaitingForEvent" - // ProcessingMerge is the name of StateProcessingMerge. - ProcessingMerge = "ProcessingMerge" - // WaitingForFinalize is the name of StateWaitingForFinalize. - WaitingForFinalize = "WaitingForFinalize" -) - -// Valid state transitions. -var validStateTransitions = map[StateName][]StateName{ - // Transitions from NotReady state. - NotReady: { - // Epoch transition occurred and we are not in the committee. - NotReady, - // Epoch transition occurred and we are in the committee. - WaitingForResults, - }, - - // Transitions from WaitingForResults state. - WaitingForResults: { - // Abort: seen newer block while waiting for results. - WaitingForFinalize, - // We are waiting for more results. - WaitingForResults, - // Received results, waiting for disrepancy event. - WaitingForEvent, - // Got all results, merging. - ProcessingMerge, - }, - - // Transitions from WaitingForEvent state. - WaitingForEvent: { - // Abort: seen newer block while waiting for event. - WaitingForResults, - // Discrepancy event received. - ProcessingMerge, - // Epoch transition occurred and we are not in the committee. - NotReady, - }, - - // Transitions from ProcessingMerge state. - ProcessingMerge: { - // Merge completed (or abort due to newer block seen). - WaitingForFinalize, - }, - - // Transitions from WaitingForFinalize state. - WaitingForFinalize: { - // Round has been finalized. - WaitingForResults, - // Epoch transition occurred and we are no longer in the committee. - NotReady, - }, -} - -// NodeState is a node's state. -type NodeState interface { - // Name returns the name of the state. - Name() StateName -} - -// StateNotReady is the not ready state. -type StateNotReady struct { -} - -// Name returns the name of the state. -func (s StateNotReady) Name() StateName { - return NotReady -} - -// String returns a string representation of the state. -func (s StateNotReady) String() string { - return string(s.Name()) -} - -// StateWaitingForResults is the waiting for results state. -type StateWaitingForResults struct { - pool *commitment.MultiPool - timer *time.Timer - consensusTimeout map[hash.Hash]bool - results []*commitment.ComputeResultsHeader - // Pending merge discrepancy detected event in case the node is a - // backup worker and the event was received before the results. - pendingEvent *roothash.MergeDiscrepancyDetectedEvent -} - -// Name returns the name of the state. -func (s StateWaitingForResults) Name() StateName { - return WaitingForResults -} - -// String returns a string representation of the state. -func (s StateWaitingForResults) String() string { - return string(s.Name()) -} - -// StateWaitingForEvent is the waiting for event state. -type StateWaitingForEvent struct { - commitments []commitment.OpenExecutorCommitment - results []*commitment.ComputeResultsHeader -} - -// Name returns the name of the state. -func (s StateWaitingForEvent) Name() StateName { - return WaitingForEvent -} - -// String returns a string representation of the state. -func (s StateWaitingForEvent) String() string { - return string(s.Name()) -} - -// StateProcessingMerge is the processing merge state. -type StateProcessingMerge struct { - doneCh <-chan *commitment.MergeBody - cancel context.CancelFunc -} - -// Name returns the name of the state. -func (s StateProcessingMerge) Name() StateName { - return ProcessingMerge -} - -// String returns a string representation of the state. -func (s StateProcessingMerge) String() string { - return string(s.Name()) -} - -// StateWaitingForFinalize is the waiting for finalize state. -type StateWaitingForFinalize struct { -} - -// Name returns the name of the state. -func (s StateWaitingForFinalize) Name() StateName { - return WaitingForFinalize -} - -// String returns a string representation of the state. -func (s StateWaitingForFinalize) String() string { - return string(s.Name()) -} diff --git a/go/worker/compute/merge/init.go b/go/worker/compute/merge/init.go deleted file mode 100644 index e7ab946ad6f..00000000000 --- a/go/worker/compute/merge/init.go +++ /dev/null @@ -1,12 +0,0 @@ -package merge - -import ( - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - "github.com/oasisprotocol/oasis-core/go/worker/compute" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -// New creates a new worker. -func New(commonWorker *workerCommon.Worker, registration *registration.Worker) (*Worker, error) { - return newWorker(compute.Enabled(), commonWorker, registration) -} diff --git a/go/worker/compute/merge/worker.go b/go/worker/compute/merge/worker.go deleted file mode 100644 index 1f5d6e499c7..00000000000 --- a/go/worker/compute/merge/worker.go +++ /dev/null @@ -1,190 +0,0 @@ -package merge - -import ( - "context" - "fmt" - - "github.com/oasisprotocol/oasis-core/go/common" - "github.com/oasisprotocol/oasis-core/go/common/logging" - "github.com/oasisprotocol/oasis-core/go/common/node" - workerCommon "github.com/oasisprotocol/oasis-core/go/worker/common" - committeeCommon "github.com/oasisprotocol/oasis-core/go/worker/common/committee" - "github.com/oasisprotocol/oasis-core/go/worker/compute/merge/committee" - "github.com/oasisprotocol/oasis-core/go/worker/registration" -) - -// Worker is a merge worker. -type Worker struct { - enabled bool - - commonWorker *workerCommon.Worker - registration *registration.Worker - - runtimes map[common.Namespace]*committee.Node - - ctx context.Context - cancelCtx context.CancelFunc - quitCh chan struct{} - initCh chan struct{} - - logger *logging.Logger -} - -// Name returns the service name. -func (w *Worker) Name() string { - return "merge worker" -} - -// Start starts the service. -func (w *Worker) Start() error { - if !w.enabled { - w.logger.Info("not starting merge worker as it is disabled") - - // In case the worker is not enabled, close the init channel immediately. - close(w.initCh) - - return nil - } - - // Wait for all runtimes to terminate. - go func() { - defer close(w.quitCh) - - for _, rt := range w.runtimes { - <-rt.Quit() - } - }() - - // Wait for all runtimes to be initialized and for the node - // to be registered for the current epoch. - go func() { - for _, rt := range w.runtimes { - <-rt.Initialized() - } - - <-w.registration.InitialRegistrationCh() - - close(w.initCh) - }() - - // Start runtime services. - for id, rt := range w.runtimes { - w.logger.Info("starting services for runtime", - "runtime_id", id, - ) - - if err := rt.Start(); err != nil { - return err - } - } - - return nil -} - -// Stop halts the service. -func (w *Worker) Stop() { - if !w.enabled { - close(w.quitCh) - return - } - - for id, rt := range w.runtimes { - w.logger.Info("stopping services for runtime", - "runtime_id", id, - ) - - rt.Stop() - } -} - -// Enabled returns if worker is enabled. -func (w *Worker) Enabled() bool { - return w.enabled -} - -// Quit returns a channel that will be closed when the service terminates. -func (w *Worker) Quit() <-chan struct{} { - return w.quitCh -} - -// Cleanup performs the service specific post-termination cleanup. -func (w *Worker) Cleanup() { - if !w.enabled { - return - } - - for _, rt := range w.runtimes { - rt.Cleanup() - } -} - -// Initialized returns a channel that will be closed when the merge worker -// is initialized and ready to service requests. -func (w *Worker) Initialized() <-chan struct{} { - return w.initCh -} - -// GetRuntime returns a registered runtime. -// -// In case the runtime with the specified id was not registered it -// returns nil. -func (w *Worker) GetRuntime(id common.Namespace) *committee.Node { - return w.runtimes[id] -} - -func (w *Worker) registerRuntime(commonNode *committeeCommon.Node) error { - id := commonNode.Runtime.ID() - w.logger.Info("registering new runtime", - "runtime_id", id, - ) - - rp, err := w.registration.NewRuntimeRoleProvider(node.RoleComputeWorker, id) - if err != nil { - return fmt.Errorf("failed to create role provider: %w", err) - } - - node, err := committee.NewNode(commonNode, w.commonWorker.GetConfig(), rp) - if err != nil { - return err - } - - commonNode.AddHooks(node) - w.runtimes[id] = node - - w.logger.Info("new runtime registered", - "runtime_id", id, - ) - - return nil -} - -func newWorker(enabled bool, commonWorker *workerCommon.Worker, registration *registration.Worker) (*Worker, error) { - ctx, cancelCtx := context.WithCancel(context.Background()) - - w := &Worker{ - enabled: enabled, - commonWorker: commonWorker, - registration: registration, - runtimes: make(map[common.Namespace]*committee.Node), - ctx: ctx, - cancelCtx: cancelCtx, - quitCh: make(chan struct{}), - initCh: make(chan struct{}), - logger: logging.GetLogger("worker/merge"), - } - - if enabled { - if !w.commonWorker.Enabled() { - panic("common worker should have been enabled for merge worker") - } - - // Register all configured runtimes. - for _, rt := range commonWorker.GetRuntimes() { - if err := w.registerRuntime(rt); err != nil { - return nil, err - } - } - } - - return w, nil -} diff --git a/go/worker/compute/txnscheduler/algorithm/api/api.go b/go/worker/compute/txnscheduler/algorithm/api/api.go index b6e981ec82b..343d4fa2a0f 100644 --- a/go/worker/compute/txnscheduler/algorithm/api/api.go +++ b/go/worker/compute/txnscheduler/algorithm/api/api.go @@ -44,5 +44,5 @@ type Algorithm interface { // TransactionDispatcher dispatches transactions to a scheduled executor committee. type TransactionDispatcher interface { // Dispatch attempts to dispatch a batch to a executor committee. - Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error + Dispatch(batch transaction.RawBatch) error } diff --git a/go/worker/compute/txnscheduler/algorithm/batching/batching.go b/go/worker/compute/txnscheduler/algorithm/batching/batching.go index b33c824e642..605a19c4df2 100644 --- a/go/worker/compute/txnscheduler/algorithm/batching/batching.go +++ b/go/worker/compute/txnscheduler/algorithm/batching/batching.go @@ -44,29 +44,6 @@ type config struct { } func (s *batchingState) scheduleBatch(force bool) error { - // The simple batching algorithm only supports a single executor committee. Use - // with multiple committees will currently cause the rounds to fail as all other - // committees will be idle. - var committeeID *hash.Hash - func() { - // Guarding against EpochTransition() modifying current epoch. - s.RLock() - defer s.RUnlock() - - // We cannot schedule anything until there is an epoch transition. - if s.epoch == nil { - return - } - - for id := range s.epoch.GetExecutorCommittees() { - committeeID = &id - break - } - }() - if committeeID == nil { - return nil - } - batch, err := s.incomingQueue.Take(force) if err != nil && err != errNoBatchAvailable { s.logger.Error("failed to get batch from the queue", @@ -76,8 +53,8 @@ func (s *batchingState) scheduleBatch(force bool) error { } if len(batch) > 0 { - // Try to dispatch batch to the first committee. - if err := s.dispatcher.Dispatch(*committeeID, batch); err != nil { + // Try to dispatch batch. + if err := s.dispatcher.Dispatch(batch); err != nil { // Put the batch back into the incoming queue in case this failed. if errAB := s.incomingQueue.AddBatch(batch); errAB != nil { s.logger.Error("failed to add batch back into the incoming queue", diff --git a/go/worker/compute/txnscheduler/algorithm/tests/tester.go b/go/worker/compute/txnscheduler/algorithm/tests/tester.go index 4aadfa74bba..4d5971991a6 100644 --- a/go/worker/compute/txnscheduler/algorithm/tests/tester.go +++ b/go/worker/compute/txnscheduler/algorithm/tests/tester.go @@ -22,7 +22,7 @@ func (t *testDispatcher) Clear() { t.DispatchedBatches = []transaction.RawBatch{} } -func (t *testDispatcher) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error { +func (t *testDispatcher) Dispatch(batch transaction.RawBatch) error { if t.ShouldFail { return errors.New("dispatch failed") } diff --git a/go/worker/compute/txnscheduler/committee/node.go b/go/worker/compute/txnscheduler/committee/node.go index 3c751f5d283..184199e4cbc 100644 --- a/go/worker/compute/txnscheduler/committee/node.go +++ b/go/worker/compute/txnscheduler/committee/node.go @@ -351,7 +351,7 @@ func (n *Node) HandleNodeUpdateLocked(update *runtimeCommittee.NodeUpdate, snaps } // Dispatch dispatches a batch to the executor committee. -func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error { +func (n *Node) Dispatch(batch transaction.RawBatch) error { n.commonNode.CrossNode.Lock() defer n.commonNode.CrossNode.Unlock() @@ -438,7 +438,6 @@ func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error } txnSchedSig, err := n.commonNode.Group.PublishScheduledBatch( batchSpanCtx, - committeeID, ioRoot, ioReceiptSignatures, n.commonNode.CurrentBlock.Header, @@ -461,7 +460,6 @@ func (n *Node) Dispatch(committeeID hash.Hash, batch transaction.RawBatch) error } else { n.executorNode.HandleBatchFromTransactionSchedulerLocked( batchSpanCtx, - committeeID, ioRoot, batch, *txnSchedSig, diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index 6ade2be2675..5b23216f48e 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -659,10 +659,8 @@ func (crw *clientRuntimeWatcher) updateExternalServicePolicyLocked(snapshot *com policy := accessctl.NewPolicy() // Apply rules to current executor committee members. - for _, xc := range snapshot.GetExecutorCommittees() { - if xc != nil { - executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) - } + if xc := snapshot.GetExecutorCommittee(); xc != nil { + executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) } // Apply rules for configured sentry nodes. diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/committee/node.go index cf5b2a3554b..86442853a6b 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/committee/node.go @@ -372,17 +372,12 @@ func (n *Node) updateExternalServicePolicyLocked(snapshot *committee.EpochSnapsh sentryNodesPolicy.AddPublicKeyPolicy(&policy, addr.PubKey) } - for _, xc := range snapshot.GetExecutorCommittees() { - if xc != nil { - executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) - } + if xc := snapshot.GetExecutorCommittee(); xc != nil { + executorCommitteePolicy.AddRulesForCommittee(&policy, xc, snapshot.Nodes()) } if tsc := snapshot.GetTransactionSchedulerCommittee(); tsc != nil { txnSchedulerCommitteePolicy.AddRulesForCommittee(&policy, tsc, snapshot.Nodes()) } - if mc := snapshot.GetMergeCommittee(); mc != nil { - mergeCommitteePolicy.AddRulesForCommittee(&policy, mc, snapshot.Nodes()) - } // TODO: Query registry only for storage nodes after // https://github.com/oasisprotocol/oasis-core/issues/1923 is implemented. nodes, err := n.commonNode.Consensus.Registry().GetNodes(context.Background(), snapshot.GetGroupVersion()) diff --git a/go/worker/storage/committee/policy.go b/go/worker/storage/committee/policy.go index 10285d413db..e4fd3816437 100644 --- a/go/worker/storage/committee/policy.go +++ b/go/worker/storage/committee/policy.go @@ -21,9 +21,6 @@ var ( accessctl.Action(api.MethodApplyBatch.FullName()), }, } - mergeCommitteePolicy = &committee.AccessPolicy{ - Actions: []accessctl.Action{}, - } // NOTE: GetDiff/GetCheckpoint* need to be accessible to all storage nodes, // not just the ones in the current storage committee so that new nodes can // sync-up. diff --git a/runtime/src/common/roothash.rs b/runtime/src/common/roothash.rs index 666dbde480b..f80e0025fe1 100644 --- a/runtime/src/common/roothash.rs +++ b/runtime/src/common/roothash.rs @@ -97,6 +97,8 @@ pub const COMPUTE_RESULTS_HEADER_CONTEXT: &'static [u8] = /// the actual results. #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct ComputeResultsHeader { + /// Round number. + pub round: u64, /// Hash of the previous block header this batch was computed against. pub previous_hash: Hash, /// The I/O merkle root. diff --git a/runtime/src/dispatcher.rs b/runtime/src/dispatcher.rs index 41a0658f596..d233b3c2b49 100644 --- a/runtime/src/dispatcher.rs +++ b/runtime/src/dispatcher.rs @@ -405,6 +405,7 @@ impl Dispatcher { .expect("io commit must succeed"); let header = ComputeResultsHeader { + round: block.header.round + 1, previous_hash: block.header.encoded_hash(), io_root, state_root: new_state_root,