diff --git a/.changelog/2880.breaking.md b/.changelog/2880.breaking.md new file mode 100644 index 00000000000..c9526480fd4 --- /dev/null +++ b/.changelog/2880.breaking.md @@ -0,0 +1,13 @@ +go/consensus: Enable periodic state checkpoints + +This adds the following consensus parameters which control how state +checkpointing is to be performed (currently not enforced): + +- `state_checkpoint_interval` is the interval (in blocks) on which state + checkpoints should be taken. + +- `state_checkpoint_num_kept` is the number of past state checkpoints to + keep. + +- `state_checkpoint_chunk_size` is the chunk size that should be used when + creating state checkpoints. diff --git a/go/consensus/genesis/genesis.go b/go/consensus/genesis/genesis.go index 0c123c6696d..73b6044c7a2 100644 --- a/go/consensus/genesis/genesis.go +++ b/go/consensus/genesis/genesis.go @@ -27,6 +27,13 @@ type Parameters struct { MaxEvidenceAgeBlocks uint64 `json:"max_evidence_age_blocks"` MaxEvidenceAgeTime time.Duration `json:"max_evidence_age_time"` + // StateCheckpointInterval is the expected state checkpoint interval (in blocks). + StateCheckpointInterval uint64 `json:"state_checkpoint_interval"` + // StateCheckpointNumKept is the expected minimum number of state checkpoints to keep. + StateCheckpointNumKept uint64 `json:"state_checkpoint_num_kept,omitempty"` + // StateCheckpointChunkSize is the chunk size parameter for checkpoint creation. + StateCheckpointChunkSize uint64 `json:"state_checkpoint_chunk_size,omitempty"` + // GasCosts are the base transaction gas costs. GasCosts transaction.Costs `json:"gas_costs,omitempty"` @@ -41,13 +48,29 @@ const ( // SanityCheck does basic sanity checking on the genesis state. func (g *Genesis) SanityCheck() error { - if g.Parameters.TimeoutCommit < 1*time.Millisecond && !g.Parameters.SkipTimeoutCommit { + params := g.Parameters + + if params.TimeoutCommit < 1*time.Millisecond && !params.SkipTimeoutCommit { return fmt.Errorf("consensus: sanity check failed: timeout commit must be >= 1ms") } + if params.StateCheckpointInterval > 0 { + if params.StateCheckpointInterval < 1000 { + return fmt.Errorf("consensus: sanity check failed: state checkpoint interval must be >= 1000") + } + + if params.StateCheckpointNumKept == 0 { + return fmt.Errorf("consensus: sanity check failed: number of kept state checkpoints must be > 0") + } + + if params.StateCheckpointChunkSize < 1024*1024 { + return fmt.Errorf("consensus: sanity check failed: state checkpoint chunk size must be >= 1 MiB") + } + } + // Check for duplicate entries in the pk blacklist. m := make(map[signature.PublicKey]bool) - for _, v := range g.Parameters.PublicKeyBlacklist { + for _, v := range params.PublicKeyBlacklist { if m[v] { return fmt.Errorf("consensus: sanity check failed: redundant blacklisted public key: '%s'", v) } diff --git a/go/consensus/tendermint/abci/state.go b/go/consensus/tendermint/abci/state.go index 1317a3de9fb..e5cf7d3f683 100644 --- a/go/consensus/tendermint/abci/state.go +++ b/go/consensus/tendermint/abci/state.go @@ -26,6 +26,7 @@ import ( storage "github.com/oasisprotocol/oasis-core/go/storage/api" storageDB "github.com/oasisprotocol/oasis-core/go/storage/database" "github.com/oasisprotocol/oasis-core/go/storage/mkvs" + "github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint" ) var _ api.ApplicationState = (*applicationState)(nil) @@ -48,6 +49,8 @@ type applicationState struct { // nolint: maligned prunerClosedCh chan struct{} prunerNotifyCh *channels.RingChannel + checkpointer checkpoint.Checkpointer + blockLock sync.RWMutex blockTime time.Time blockCtx *api.BlockContext @@ -269,10 +272,12 @@ func (s *applicationState) doCommit(now time.Time) (uint64, error) { s.checkTxTree.Close() s.checkTxTree = mkvs.NewWithRoot(nil, s.storage.NodeDB(), s.stateRoot, mkvs.WithoutWriteLog()) - // Notify pruner of a new block. + // Notify pruner and checkpointer of a new block. s.prunerNotifyCh.In() <- s.stateRoot.Version // Discover the version below which all versions can be discarded from block history. lastRetainedVersion := s.statePruner.GetLastRetainedVersion() + // Notify the checkpointer of the new version. + s.checkpointer.NotifyNewVersion(s.stateRoot.Version) return lastRetainedVersion, nil } @@ -499,6 +504,25 @@ func newApplicationState(ctx context.Context, cfg *ApplicationConfig) (*applicat } } + // Initialize the checkpointer. + checkpointerCfg := checkpoint.CheckpointerConfig{ + Name: "consensus", + CheckInterval: 1 * time.Minute, // XXX: Make this configurable. + RootsPerVersion: 1, + GetParameters: func(ctx context.Context) (*checkpoint.CreationParameters, error) { + params := s.ConsensusParameters() + return &checkpoint.CreationParameters{ + Interval: params.StateCheckpointInterval, + NumKept: params.StateCheckpointNumKept, + ChunkSize: params.StateCheckpointChunkSize, + }, nil + }, + } + s.checkpointer, err = checkpoint.NewCheckpointer(s.ctx, ndb, ldb.Checkpointer(), checkpointerCfg) + if err != nil { + return nil, fmt.Errorf("state: failed to create checkpointer: %w", err) + } + go s.metricsWorker() go s.pruneWorker() diff --git a/go/genesis/genesis_test.go b/go/genesis/genesis_test.go index a6ac07c5d89..fd3925daf19 100644 --- a/go/genesis/genesis_test.go +++ b/go/genesis/genesis_test.go @@ -128,7 +128,7 @@ func TestGenesisChainContext(t *testing.T) { // on each run. stableDoc.Staking = staking.Genesis{} - require.Equal(t, "1024b5ca04a34e17cab59fdae43c32c05e1a51875841b99ea49321a4ec83adb3", stableDoc.ChainContext()) + require.Equal(t, "935f38f4eba20a2391418c3c708f4a0359725e0c235821923edf5da43142e180", stableDoc.ChainContext()) } func TestGenesisSanityCheck(t *testing.T) { diff --git a/go/oasis-node/cmd/genesis/genesis.go b/go/oasis-node/cmd/genesis/genesis.go index 7f663fef448..f7a2d4ce614 100644 --- a/go/oasis-node/cmd/genesis/genesis.go +++ b/go/oasis-node/cmd/genesis/genesis.go @@ -79,16 +79,19 @@ const ( cfgRoothashDebugBypassStake = "roothash.debug.bypass_stake" // nolint: gosec // Tendermint config flags. - cfgConsensusTimeoutCommit = "consensus.tendermint.timeout_commit" - cfgConsensusSkipTimeoutCommit = "consensus.tendermint.skip_timeout_commit" - cfgConsensusEmptyBlockInterval = "consensus.tendermint.empty_block_interval" - cfgConsensusMaxTxSizeBytes = "consensus.tendermint.max_tx_size" - cfgConsensusMaxBlockSizeBytes = "consensus.tendermint.max_block_size" - cfgConsensusMaxBlockGas = "consensus.tendermint.max_block_gas" - cfgConsensusMaxEvidenceAgeBlocks = "consensus.tendermint.max_evidence_age_blocks" - cfgConsensusMaxEvidenceAgeTime = "consensus.tendermint.max_evidence_age_time" - CfgConsensusGasCostsTxByte = "consensus.gas_costs.tx_byte" - cfgConsensusBlacklistPublicKey = "consensus.blacklist_public_key" + cfgConsensusTimeoutCommit = "consensus.tendermint.timeout_commit" + cfgConsensusSkipTimeoutCommit = "consensus.tendermint.skip_timeout_commit" + cfgConsensusEmptyBlockInterval = "consensus.tendermint.empty_block_interval" + cfgConsensusMaxTxSizeBytes = "consensus.tendermint.max_tx_size" + cfgConsensusMaxBlockSizeBytes = "consensus.tendermint.max_block_size" + cfgConsensusMaxBlockGas = "consensus.tendermint.max_block_gas" + cfgConsensusMaxEvidenceAgeBlocks = "consensus.tendermint.max_evidence_age_blocks" + cfgConsensusMaxEvidenceAgeTime = "consensus.tendermint.max_evidence_age_time" + cfgConsensusStateCheckpointInterval = "consensus.state_checkpoint.interval" + cfgConsensusStateCheckpointNumKept = "consensus.state_checkpoint.num_kept" + cfgConsensusStateCheckpointChunkSize = "consensus.state_checkpoint.chunk_size" + CfgConsensusGasCostsTxByte = "consensus.gas_costs.tx_byte" + cfgConsensusBlacklistPublicKey = "consensus.blacklist_public_key" // Consensus backend config flag. cfgConsensusBackend = "consensus.backend" @@ -231,14 +234,17 @@ func doInitGenesis(cmd *cobra.Command, args []string) { doc.Consensus = consensusGenesis.Genesis{ Backend: viper.GetString(cfgConsensusBackend), Parameters: consensusGenesis.Parameters{ - TimeoutCommit: viper.GetDuration(cfgConsensusTimeoutCommit), - SkipTimeoutCommit: viper.GetBool(cfgConsensusSkipTimeoutCommit), - EmptyBlockInterval: viper.GetDuration(cfgConsensusEmptyBlockInterval), - MaxTxSize: uint64(viper.GetSizeInBytes(cfgConsensusMaxTxSizeBytes)), - MaxBlockSize: uint64(viper.GetSizeInBytes(cfgConsensusMaxBlockSizeBytes)), - MaxBlockGas: transaction.Gas(viper.GetUint64(cfgConsensusMaxBlockGas)), - MaxEvidenceAgeBlocks: viper.GetUint64(cfgConsensusMaxEvidenceAgeBlocks), - MaxEvidenceAgeTime: viper.GetDuration(cfgConsensusMaxEvidenceAgeTime), + TimeoutCommit: viper.GetDuration(cfgConsensusTimeoutCommit), + SkipTimeoutCommit: viper.GetBool(cfgConsensusSkipTimeoutCommit), + EmptyBlockInterval: viper.GetDuration(cfgConsensusEmptyBlockInterval), + MaxTxSize: uint64(viper.GetSizeInBytes(cfgConsensusMaxTxSizeBytes)), + MaxBlockSize: uint64(viper.GetSizeInBytes(cfgConsensusMaxBlockSizeBytes)), + MaxBlockGas: transaction.Gas(viper.GetUint64(cfgConsensusMaxBlockGas)), + MaxEvidenceAgeBlocks: viper.GetUint64(cfgConsensusMaxEvidenceAgeBlocks), + MaxEvidenceAgeTime: viper.GetDuration(cfgConsensusMaxEvidenceAgeTime), + StateCheckpointInterval: viper.GetUint64(cfgConsensusStateCheckpointInterval), + StateCheckpointNumKept: viper.GetUint64(cfgConsensusStateCheckpointNumKept), + StateCheckpointChunkSize: uint64(viper.GetSizeInBytes(cfgConsensusStateCheckpointChunkSize)), GasCosts: transaction.Costs{ consensusGenesis.GasOpTxByte: transaction.Gas(viper.GetUint64(CfgConsensusGasCostsTxByte)), }, @@ -742,6 +748,9 @@ func init() { initGenesisFlags.Uint64(cfgConsensusMaxBlockGas, 0, "tendermint max gas used per block") initGenesisFlags.Uint64(cfgConsensusMaxEvidenceAgeBlocks, 100000, "tendermint max evidence age (in blocks)") initGenesisFlags.Duration(cfgConsensusMaxEvidenceAgeTime, 48*time.Hour, "tendermint max evidence age (in time)") + initGenesisFlags.Uint64(cfgConsensusStateCheckpointInterval, 10000, "consensus state checkpoint interval (in blocks)") + initGenesisFlags.Uint64(cfgConsensusStateCheckpointNumKept, 2, "number of kept consensus state checkpoints") + initGenesisFlags.String(cfgConsensusStateCheckpointChunkSize, "8mb", "consensus state checkpoint chunk size (in bytes)") initGenesisFlags.Uint64(CfgConsensusGasCostsTxByte, 1, "consensus gas costs: each transaction byte") initGenesisFlags.StringSlice(cfgConsensusBlacklistPublicKey, nil, "blacklist public key") diff --git a/go/storage/mkvs/checkpoint/checkpointer.go b/go/storage/mkvs/checkpoint/checkpointer.go index cb7ef52ad68..251ff74c37d 100644 --- a/go/storage/mkvs/checkpoint/checkpointer.go +++ b/go/storage/mkvs/checkpoint/checkpointer.go @@ -17,6 +17,9 @@ import ( // CheckpointerConfig is a checkpointer configuration. type CheckpointerConfig struct { + // Name identifying this checkpointer in logs. + Name string + // Namespace is the storage namespace this checkpointer is for. Namespace common.Namespace @@ -266,7 +269,7 @@ func NewCheckpointer( ndb: ndb, creator: creator, notifyCh: channels.NewRingChannel(1), - logger: logging.GetLogger("storage/mkvs/checkpointer").With("namespace", cfg.Namespace), + logger: logging.GetLogger("storage/mkvs/checkpoint/"+cfg.Name).With("namespace", cfg.Namespace), } go c.worker(ctx) return c, nil diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/committee/node.go index 76f5cbd3ee5..48321c10545 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/committee/node.go @@ -243,6 +243,7 @@ func NewNode( // Create a new checkpointer. checkpointerCfg = checkpoint.CheckpointerConfig{ + Name: "runtime", Namespace: commonNode.Runtime.ID(), CheckInterval: checkpointerCfg.CheckInterval, RootsPerVersion: 2, // State root and I/O root.