From a86b686610b64afe271bbe4cc391ad6a832835d2 Mon Sep 17 00:00:00 2001 From: Lidor Carmel Date: Fri, 19 Nov 2021 10:31:07 -0800 Subject: [PATCH] kv: add telemetry for node liveness Exporting the existing metrics 'HeartbeatFailures' and 'EpochIncrements' as telementry counters. These telemetry values can be seen in cockroach demo by decommissioning a node and then querying crdb_internal.feature_usage, and also in the 'Diagnostics Reporting Data' page (/_status/diagnostics/local -> "featureUsage"). Issue #71662 Release note: None --- pkg/kv/kvserver/liveness/BUILD.bazel | 1 + pkg/kv/kvserver/liveness/liveness.go | 13 +++++++------ pkg/kv/kvserver/node_liveness_test.go | 2 +- pkg/server/telemetry/BUILD.bazel | 1 + pkg/server/telemetry/features.go | 11 +++++++++++ pkg/server/telemetry/features_test.go | 13 +++++++++++++ 6 files changed, 34 insertions(+), 7 deletions(-) diff --git a/pkg/kv/kvserver/liveness/BUILD.bazel b/pkg/kv/kvserver/liveness/BUILD.bazel index 92e2a790a5b5..1fa6121d1d69 100644 --- a/pkg/kv/kvserver/liveness/BUILD.bazel +++ b/pkg/kv/kvserver/liveness/BUILD.bazel @@ -12,6 +12,7 @@ go_library( "//pkg/kv", "//pkg/kv/kvserver/liveness/livenesspb", "//pkg/roachpb:with-mocks", + "//pkg/server/telemetry", "//pkg/settings/cluster", "//pkg/storage", "//pkg/util/contextutil", diff --git a/pkg/kv/kvserver/liveness/liveness.go b/pkg/kv/kvserver/liveness/liveness.go index db755c8cb137..08226bb9b97d 100644 --- a/pkg/kv/kvserver/liveness/liveness.go +++ b/pkg/kv/kvserver/liveness/liveness.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/util/contextutil" @@ -139,8 +140,8 @@ type Metrics struct { LiveNodes *metric.Gauge HeartbeatsInFlight *metric.Gauge HeartbeatSuccesses *metric.Counter - HeartbeatFailures *metric.Counter - EpochIncrements *metric.Counter + HeartbeatFailures telemetry.CounterWithMetric + EpochIncrements telemetry.CounterWithMetric HeartbeatLatency *metric.Histogram } @@ -295,8 +296,8 @@ func NewNodeLiveness(opts NodeLivenessOptions) *NodeLiveness { LiveNodes: metric.NewFunctionalGauge(metaLiveNodes, nl.numLiveNodes), HeartbeatsInFlight: metric.NewGauge(metaHeartbeatsInFlight), HeartbeatSuccesses: metric.NewCounter(metaHeartbeatSuccesses), - HeartbeatFailures: metric.NewCounter(metaHeartbeatFailures), - EpochIncrements: metric.NewCounter(metaEpochIncrements), + HeartbeatFailures: telemetry.NewCounterWithMetric(metaHeartbeatFailures), + EpochIncrements: telemetry.NewCounterWithMetric(metaEpochIncrements), HeartbeatLatency: metric.NewLatency(metaHeartbeatLatency, opts.HistogramWindowInterval), } nl.mu.nodes = make(map[roachpb.NodeID]Record) @@ -978,7 +979,7 @@ func (nl *NodeLiveness) heartbeatInternal( nl.metrics.HeartbeatSuccesses.Inc(1) return nil } - nl.metrics.HeartbeatFailures.Inc(1) + nl.metrics.HeartbeatFailures.Inc() return err } @@ -1211,7 +1212,7 @@ func (nl *NodeLiveness) IncrementEpoch(ctx context.Context, liveness livenesspb. log.Infof(ctx, "incremented n%d liveness epoch to %d", written.NodeID, written.Epoch) nl.maybeUpdate(ctx, written) - nl.metrics.EpochIncrements.Inc(1) + nl.metrics.EpochIncrements.Inc() return nil } diff --git a/pkg/kv/kvserver/node_liveness_test.go b/pkg/kv/kvserver/node_liveness_test.go index 529410a96fde..260bae364732 100644 --- a/pkg/kv/kvserver/node_liveness_test.go +++ b/pkg/kv/kvserver/node_liveness_test.go @@ -467,7 +467,7 @@ func TestNodeLivenessEpochIncrement(t *testing.T) { }) // Verify epoch increment metric count. - if c := tc.Servers[0].NodeLiveness().(*liveness.NodeLiveness).Metrics().EpochIncrements.Count(); c != 1 { + if c := tc.Servers[0].NodeLiveness().(*liveness.NodeLiveness).Metrics().EpochIncrements.MetricValue(); c != 1 { t.Errorf("expected epoch increment == 1; got %d", c) } diff --git a/pkg/server/telemetry/BUILD.bazel b/pkg/server/telemetry/BUILD.bazel index ba02d72a9546..77474e81219d 100644 --- a/pkg/server/telemetry/BUILD.bazel +++ b/pkg/server/telemetry/BUILD.bazel @@ -23,6 +23,7 @@ go_test( srcs = ["features_test.go"], deps = [ ":telemetry", + "//pkg/util/metric", "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/server/telemetry/features.go b/pkg/server/telemetry/features.go index ce704965c902..5af6527ea160 100644 --- a/pkg/server/telemetry/features.go +++ b/pkg/server/telemetry/features.go @@ -128,6 +128,17 @@ func (c CounterWithMetric) Inc() { c.metric.Inc(1) } +// CounterValue returns the telemetry value. Note that this value can be +// different from MetricValue because telemetry may reset to zero occasionally. +func (c CounterWithMetric) CounterValue() int32 { + return Read(c.telemetry) +} + +// MetricValue returns the value of the metric, not the telemetry. +func (c CounterWithMetric) MetricValue() int64 { + return c.metric.Count() +} + // Forward the metric.Iterable interface to the metric counter. We // don't just embed the counter because our Inc() interface is a bit // different. diff --git a/pkg/server/telemetry/features_test.go b/pkg/server/telemetry/features_test.go index fe0f12787880..e8afe3bc60cf 100644 --- a/pkg/server/telemetry/features_test.go +++ b/pkg/server/telemetry/features_test.go @@ -16,6 +16,7 @@ import ( "testing" "github.com/cockroachdb/cockroach/pkg/server/telemetry" + "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/stretchr/testify/require" ) @@ -91,3 +92,15 @@ func TestBucket(t *testing.T) { } } } + +// TestCounterWithMetric verifies that only the telemetry is reset to zero when, +// for example, a report is created. +func TestCounterWithMetric(t *testing.T) { + cm := telemetry.NewCounterWithMetric(metric.Metadata{}) + cm.Inc() + require.Equal(t, int32(1), cm.CounterValue()) + require.Equal(t, int64(1), cm.MetricValue()) + telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts) + require.Equal(t, int32(0), cm.CounterValue()) + require.Equal(t, int64(1), cm.MetricValue()) +}