Skip to content

Commit

Permalink
Merge pull request #3161 from oasisprotocol/ptrus/fix/runtimes-metric
Browse files Browse the repository at this point in the history
go/registry/metrics: Runtimes metrics: query for runtimes
  • Loading branch information
ptrus authored Aug 3, 2020
2 parents 4b70d95 + ea4fedd commit 0a44403
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 27 deletions.
5 changes: 5 additions & 0 deletions .changelog/3161.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
go/registry/metrics: Fix `oasis_registry_runtimes` metric

Metric was counting runtime events, which does not correctly take into account
the case where runtime is suspended and resumed.
The metric is now computed by querying the registry.
24 changes: 12 additions & 12 deletions go/consensus/tendermint/full/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,14 @@ type fullService struct { // nolint: maligned

stateDb tmdb.DB

beacon beaconAPI.Backend
epochtime epochtimeAPI.Backend
keymanager keymanagerAPI.Backend
registry registryAPI.Backend
registryMetrics *registry.MetricsUpdater
roothash roothashAPI.Backend
staking stakingAPI.Backend
scheduler schedulerAPI.Backend
submissionMgr consensusAPI.SubmissionManager
beacon beaconAPI.Backend
epochtime epochtimeAPI.Backend
keymanager keymanagerAPI.Backend
registry registryAPI.Backend
roothash roothashAPI.Backend
staking stakingAPI.Backend
scheduler schedulerAPI.Backend
submissionMgr consensusAPI.SubmissionManager

serviceClients []api.ServiceClient
serviceClientsWg sync.WaitGroup
Expand Down Expand Up @@ -229,7 +228,7 @@ func (t *fullService) Start() error {
// Start block notifier.
go t.blockNotifierWorker()
// Optionally start metrics updater.
if viper.GetString(cmmetrics.CfgMetricsMode) != cmmetrics.MetricsModeNone {
if cmmetrics.Enabled() {
go t.metrics()
}
case false:
Expand Down Expand Up @@ -922,10 +921,11 @@ func (t *fullService) initialize() error {
return err
}
t.registry = scRegistry
t.registryMetrics = registry.NewMetricsUpdater(t.ctx, t.registry)
if cmmetrics.Enabled() {
t.svcMgr.RegisterCleanupOnly(registry.NewMetricsUpdater(t.ctx, t.registry), "registry metrics updater")
}
t.serviceClients = append(t.serviceClients, scRegistry)
t.svcMgr.RegisterCleanupOnly(t.registry, "registry backend")
t.svcMgr.RegisterCleanupOnly(t.registryMetrics, "registry metrics updater")

var scStaking tmstaking.ServiceClient
if scStaking, err = tmstaking.New(t.ctx, t); err != nil {
Expand Down
5 changes: 5 additions & 0 deletions go/oasis-node/cmd/common/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ func New(ctx context.Context) (service.BackgroundService, error) {
}
}

// Enabled returns if metrics are enabled.
func Enabled() bool {
return viper.GetString(CfgMetricsMode) != MetricsModeNone
}

// EscapeLabelCharacters replaces invalid prometheus label name characters with "_".
func EscapeLabelCharacters(l string) string {
return strings.Replace(l, ".", "_", -1)
Expand Down
19 changes: 6 additions & 13 deletions go/registry/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"github.com/oasisprotocol/oasis-core/go/registry/api"
)

const metricsUpdateInterval = 10 * time.Second
const metricsUpdateInterval = 60 * time.Second

var (
registryNodes = prometheus.NewGauge(
Expand Down Expand Up @@ -67,22 +67,10 @@ func (m *MetricsUpdater) worker(ctx context.Context) {
t := time.NewTicker(metricsUpdateInterval)
defer t.Stop()

runtimeCh, sub, err := m.backend.WatchRuntimes(ctx)
if err != nil {
m.logger.Error("failed to watch runtimes, metrics will not be updated",
"err", err,
)
return
}
defer sub.Close()

for {
select {
case <-m.closeCh:
return
case <-runtimeCh:
registryRuntimes.Inc()
continue
case <-t.C:
}

Expand All @@ -100,6 +88,11 @@ func (m *MetricsUpdater) updatePeriodicMetrics(ctx context.Context) {
if err == nil {
registryEntities.Set(float64(len(entities)))
}

runtimes, err := m.backend.GetRuntimes(ctx, &api.GetRuntimesQuery{Height: consensus.HeightLatest, IncludeSuspended: false})
if err == nil {
registryRuntimes.Set(float64(len(runtimes)))
}
}

// NewMetricsUpdater creates a new registry metrics updater.
Expand Down
3 changes: 1 addition & 2 deletions go/runtime/host/protocol/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/opentracing/opentracing-go"
opentracingExt "github.com/opentracing/opentracing-go/ext"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/viper"

"github.com/oasisprotocol/oasis-core/go/common"
"github.com/oasisprotocol/oasis-core/go/common/cbor"
Expand Down Expand Up @@ -232,7 +231,7 @@ func (c *connection) Call(ctx context.Context, body *Body) (*Body, error) {
func (c *connection) call(ctx context.Context, body *Body) (result *Body, err error) {
start := time.Now()
defer func() {
if viper.GetString(metrics.CfgMetricsMode) != metrics.MetricsModeNone {
if metrics.Enabled() {
rhpLatency.With(prometheus.Labels{"call": body.Type()}).Observe(time.Since(start).Seconds())
if err != nil {
rhpCallFailures.With(prometheus.Labels{"call": body.Type()}).Inc()
Expand Down

0 comments on commit 0a44403

Please sign in to comment.