Skip to content

Commit

Permalink
go/worker/storage: Add round sync metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jberci committed Jul 9, 2020
1 parent 0708e86 commit 713d77f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 0 deletions.
1 change: 1 addition & 0 deletions .changelog/3088.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
go/worker/storage: Added round sync metrics
3 changes: 3 additions & 0 deletions docs/oasis-node/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ oasis_worker_processed_block_count | Counter | Number of processed roothash bloc
oasis_worker_processed_event_count | Counter | Number of processed roothash events. | runtime | [worker/common/committee](../../go/worker/common/committee/node.go)
oasis_worker_roothash_merge_commit_latency | Summary | Latency of roothash merge commit (seconds). | runtime | [worker/compute/merge/committee](../../go/worker/compute/merge/committee/node.go)
oasis_worker_storage_commit_latency | Summary | Latency of storage commit calls (state + outputs) (seconds). | runtime | [worker/compute/executor/committee](../../go/worker/compute/executor/committee/node.go)
oasis_worker_storage_full_round | Gauge | The last round that was fully synced and finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go)
oasis_worker_storage_pending_round | Gauge | The last round that is in-flight for syncing. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go)
oasis_worker_storage_synced_round | Gauge | The last round that was synced but not yet finalized. | runtime | [worker/storage/committee](../../go/worker/storage/committee/node.go)
oasis_worker_txnscheduler_incoming_queue_size | Gauge | Size of the incoming queue (number of entries). | runtime | [worker/compute/txnscheduler/committee](../../go/worker/compute/txnscheduler/committee/node.go)

<!-- markdownlint-enable line-length -->
Expand Down
51 changes: 51 additions & 0 deletions go/worker/storage/committee/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"

"github.com/eapache/channels"
"github.com/prometheus/client_golang/prometheus"

"github.com/oasisprotocol/oasis-core/go/common"
"github.com/oasisprotocol/oasis-core/go/common/accessctl"
Expand Down Expand Up @@ -40,6 +41,38 @@ var (

// ErrNonLocalBackend is the error returned when the storage backend doesn't implement the LocalBackend interface.
ErrNonLocalBackend = errors.New("storage: storage backend doesn't support local storage")

storageWorkerLastFullRound = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_worker_storage_full_round",
Help: "The last round that was fully synced and finalized.",
},
[]string{"runtime"},
)

storageWorkerLastSyncedRound = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_worker_storage_synced_round",
Help: "The last round that was synced but not yet finalized.",
},
[]string{"runtime"},
)

storageWorkerLastPendingRound = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "oasis_worker_storage_pending_round",
Help: "The last round that is in-flight for syncing.",
},
[]string{"runtime"},
)

storageWorkerCollectors = []prometheus.Collector{
storageWorkerLastFullRound,
storageWorkerLastSyncedRound,
storageWorkerLastPendingRound,
}

prometheusOnce sync.Once
)

const (
Expand Down Expand Up @@ -282,6 +315,10 @@ func NewNode(
node: node,
})

prometheusOnce.Do(func() {
prometheus.MustRegister(storageWorkerCollectors...)
})

return node, nil
}

Expand Down Expand Up @@ -318,6 +355,12 @@ func (n *Node) Initialized() <-chan struct{} {
return n.initCh
}

func (n *Node) getMetricLabels() prometheus.Labels {
return prometheus.Labels{
"runtime": n.commonNode.Runtime.ID().String(),
}
}

// NodeHooks implementation.

func (n *Node) updateExternalServicePolicyLocked(snapshot *committee.EpochSnapshot) {
Expand Down Expand Up @@ -668,6 +711,8 @@ mainLoop:
summary := hashCache[lastDiff.round]
delete(hashCache, lastDiff.round-1)

storageWorkerLastSyncedRound.With(n.getMetricLabels()).Set(float64(lastDiff.round))

// Finalize storage for this round. This happens asynchronously
// with respect to Apply operations for subsequent rounds.
lastFullyAppliedRound = lastDiff.round
Expand Down Expand Up @@ -752,6 +797,10 @@ mainLoop:
awaitingRetry: maskAll,
}
syncingRounds[i] = syncing

if i == blk.Header.Round {
storageWorkerLastPendingRound.With(n.getMetricLabels()).Set(float64(i))
}
}
n.logger.Debug("preparing round sync",
"round", i,
Expand Down Expand Up @@ -817,6 +866,8 @@ mainLoop:
n.logger.Error("can't store watcher state to database", "err", err)
}

storageWorkerLastFullRound.With(n.getMetricLabels()).Set(float64(finalized.Round))

// Notify the checkpointer that there is a new finalized round.
n.checkpointer.NotifyNewVersion(finalized.Round)

Expand Down

0 comments on commit 713d77f

Please sign in to comment.