Skip to content

Commit

Permalink
go/registry/metrics: Fix oasis_registry_runtimes metric
Browse files Browse the repository at this point in the history
  • Loading branch information
ptrus committed Aug 3, 2020
1 parent 4b70d95 commit 6fb70a2
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 18 deletions.
5 changes: 5 additions & 0 deletions .changelog/3161.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
go/registry/metrics: Fix `oasis_registry_runtimes` metric

Metric was counting runtime events, which does not correctly take into account
the case where runtime is suspended and resumed.
The metric is now computed by querying the registry.
8 changes: 5 additions & 3 deletions go/consensus/tendermint/full/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (t *fullService) Start() error {
// Start block notifier.
go t.blockNotifierWorker()
// Optionally start metrics updater.
if viper.GetString(cmmetrics.CfgMetricsMode) != cmmetrics.MetricsModeNone {
if cmmetrics.Enabled() {
go t.metrics()
}
case false:
Expand Down Expand Up @@ -922,10 +922,12 @@ func (t *fullService) initialize() error {
return err
}
t.registry = scRegistry
t.registryMetrics = registry.NewMetricsUpdater(t.ctx, t.registry)
if cmmetrics.Enabled() {
t.registryMetrics = registry.NewMetricsUpdater(t.ctx, t.registry)
t.svcMgr.RegisterCleanupOnly(t.registryMetrics, "registry metrics updater")
}
t.serviceClients = append(t.serviceClients, scRegistry)
t.svcMgr.RegisterCleanupOnly(t.registry, "registry backend")
t.svcMgr.RegisterCleanupOnly(t.registryMetrics, "registry metrics updater")

var scStaking tmstaking.ServiceClient
if scStaking, err = tmstaking.New(t.ctx, t); err != nil {
Expand Down
5 changes: 5 additions & 0 deletions go/oasis-node/cmd/common/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ func New(ctx context.Context) (service.BackgroundService, error) {
}
}

// Enabled returns if metrics are enabled.
func Enabled() bool {
return viper.GetString(CfgMetricsMode) != MetricsModeNone
}

// EscapeLabelCharacters replaces invalid prometheus label name characters with "_".
func EscapeLabelCharacters(l string) string {
return strings.Replace(l, ".", "_", -1)
Expand Down
19 changes: 6 additions & 13 deletions go/registry/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"github.com/oasisprotocol/oasis-core/go/registry/api"
)

const metricsUpdateInterval = 10 * time.Second
const metricsUpdateInterval = 60 * time.Second

var (
registryNodes = prometheus.NewGauge(
Expand Down Expand Up @@ -67,22 +67,10 @@ func (m *MetricsUpdater) worker(ctx context.Context) {
t := time.NewTicker(metricsUpdateInterval)
defer t.Stop()

runtimeCh, sub, err := m.backend.WatchRuntimes(ctx)
if err != nil {
m.logger.Error("failed to watch runtimes, metrics will not be updated",
"err", err,
)
return
}
defer sub.Close()

for {
select {
case <-m.closeCh:
return
case <-runtimeCh:
registryRuntimes.Inc()
continue
case <-t.C:
}

Expand All @@ -100,6 +88,11 @@ func (m *MetricsUpdater) updatePeriodicMetrics(ctx context.Context) {
if err == nil {
registryEntities.Set(float64(len(entities)))
}

runtimes, err := m.backend.GetRuntimes(ctx, &api.GetRuntimesQuery{Height: consensus.HeightLatest, IncludeSuspended: false})
if err == nil {
registryRuntimes.Set(float64(len(runtimes)))
}
}

// NewMetricsUpdater creates a new registry metrics updater.
Expand Down
3 changes: 1 addition & 2 deletions go/runtime/host/protocol/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"github.com/opentracing/opentracing-go"
opentracingExt "github.com/opentracing/opentracing-go/ext"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/viper"

"github.com/oasisprotocol/oasis-core/go/common"
"github.com/oasisprotocol/oasis-core/go/common/cbor"
Expand Down Expand Up @@ -232,7 +231,7 @@ func (c *connection) Call(ctx context.Context, body *Body) (*Body, error) {
func (c *connection) call(ctx context.Context, body *Body) (result *Body, err error) {
start := time.Now()
defer func() {
if viper.GetString(metrics.CfgMetricsMode) != metrics.MetricsModeNone {
if metrics.Enabled() {
rhpLatency.With(prometheus.Labels{"call": body.Type()}).Observe(time.Since(start).Seconds())
if err != nil {
rhpCallFailures.With(prometheus.Labels{"call": body.Type()}).Inc()
Expand Down

0 comments on commit 6fb70a2

Please sign in to comment.