diff --git a/.changelog/3088.feature.md b/.changelog/3088.feature.md new file mode 100644 index 00000000000..be317b357ac --- /dev/null +++ b/.changelog/3088.feature.md @@ -0,0 +1 @@ +go/worker/storage: Added round sync metrics diff --git a/docs/oasis-node/metrics.md b/docs/oasis-node/metrics.md index bd11c772df9..e34374b9a18 100644 --- a/docs/oasis-node/metrics.md +++ b/docs/oasis-node/metrics.md @@ -89,6 +89,9 @@ oasis_worker_processed_block_count | Counter | Number of processed roothash bloc oasis_worker_processed_event_count | Counter | Number of processed roothash events. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go) oasis_worker_roothash_merge_commit_latency | Summary | Latency of roothash merge commit (seconds). | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go) oasis_worker_storage_commit_latency | Summary | Latency of storage commit calls (state + outputs) (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go) +oasis_worker_storage_full_round | Gauge | The last round that was fully synced and finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) +oasis_worker_storage_pending_round | Gauge | The last round that is in-flight for syncing. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) +oasis_worker_storage_synced_round | Gauge | The last round that was synced but not yet finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go) oasis_worker_txnscheduler_incoming_queue_size | Gauge | Size of the incoming queue (number of entries). | runtime | [worker/compute/txnscheduler/committee](../../go/worker/compute/txnscheduler/committee/node.go) diff --git a/go/worker/storage/committee/node.go b/go/worker/storage/committee/node.go index 48321c10545..3474f1ba9dd 100644 --- a/go/worker/storage/committee/node.go +++ b/go/worker/storage/committee/node.go @@ -10,6 +10,7 @@ import ( "sync" "github.com/eapache/channels" + "github.com/prometheus/client_golang/prometheus" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/accessctl" @@ -40,6 +41,38 @@ var ( // ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface. ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage") + + storageWorkerLastFullRound = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_storage_full_round", + Help: "The last round that was fully synced and finalized.", + }, + []string{"runtime"}, + ) + + storageWorkerLastSyncedRound = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_storage_synced_round", + Help: "The last round that was synced but not yet finalized.", + }, + []string{"runtime"}, + ) + + storageWorkerLastPendingRound = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "oasis_worker_storage_pending_round", + Help: "The last round that is in-flight for syncing.", + }, + []string{"runtime"}, + ) + + storageWorkerCollectors = []prometheus.Collector{ + storageWorkerLastFullRound, + storageWorkerLastSyncedRound, + storageWorkerLastPendingRound, + } + + prometheusOnce sync.Once ) const ( @@ -282,6 +315,10 @@ func NewNode( node: node, }) + prometheusOnce.Do(func() { + prometheus.MustRegister(storageWorkerCollectors...) + }) + return node, nil } @@ -318,6 +355,12 @@ func (n *Node) Initialized() <-chan struct{} { return n.initCh } +func (n *Node) getMetricLabels() prometheus.Labels { + return prometheus.Labels{ + "runtime": n.commonNode.Runtime.ID().String(), + } +} + // NodeHooks implementation. func (n *Node) updateExternalServicePolicyLocked(snapshot *committee.EpochSnapshot) { @@ -668,6 +711,8 @@ mainLoop: summary := hashCache[lastDiff.round] delete(hashCache, lastDiff.round-1) + storageWorkerLastSyncedRound.With(n.getMetricLabels()).Set(float64(lastDiff.round)) + // Finalize storage for this round. This happens asynchronously // with respect to Apply operations for subsequent rounds. lastFullyAppliedRound = lastDiff.round @@ -752,6 +797,10 @@ mainLoop: awaitingRetry: maskAll, } syncingRounds[i] = syncing + + if i == blk.Header.Round { + storageWorkerLastPendingRound.With(n.getMetricLabels()).Set(float64(i)) + } } n.logger.Debug("preparing round sync", "round", i, @@ -817,6 +866,8 @@ mainLoop: n.logger.Error("can't store watcher state to database", "err", err) } + storageWorkerLastFullRound.With(n.getMetricLabels()).Set(float64(finalized.Round)) + // Notify the checkpointer that there is a new finalized round. n.checkpointer.NotifyNewVersion(finalized.Round)