From a832333cf08e765c77c6536d79c6914cf9f30b5a Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:01:27 -0400 Subject: [PATCH 01/53] Start metrics revamp --- api/metrics/README.md | 15 ++ api/metrics/gatherer_test.go | 10 +- api/metrics/label_gatherer.go | 91 +++++++++++ api/metrics/label_gatherer_test.go | 182 ++++++++++++++++++++++ api/metrics/multi_gatherer.go | 75 ++------- api/metrics/multi_gatherer_test.go | 137 ---------------- api/metrics/prefix_gatherer.go | 90 +++++++++++ api/metrics/prefix_gatherer_test.go | 233 ++++++++++++++++++++++++++++ utils/metric/namespace.go | 7 +- 9 files changed, 628 insertions(+), 212 deletions(-) create mode 100644 api/metrics/README.md create mode 100644 api/metrics/label_gatherer.go create mode 100644 api/metrics/label_gatherer_test.go delete mode 100644 api/metrics/multi_gatherer_test.go create mode 100644 api/metrics/prefix_gatherer.go create mode 100644 api/metrics/prefix_gatherer_test.go diff --git a/api/metrics/README.md b/api/metrics/README.md new file mode 100644 index 00000000000..498a340804f --- /dev/null +++ b/api/metrics/README.md @@ -0,0 +1,15 @@ +# Metrics + +```mermaid +graph LR + A[P2P] --> B[Chain Router] + B --> C[Handler] + C --> D[Consensus Engine] + D --> E[Consensus] + D --> F[VM] + D --> G[DB] + D --> I[Sender] + F --> G + I --> A + I --> B +``` diff --git a/api/metrics/gatherer_test.go b/api/metrics/gatherer_test.go index 334c361ebcc..df6919cfb27 100644 --- a/api/metrics/gatherer_test.go +++ b/api/metrics/gatherer_test.go @@ -3,15 +3,7 @@ package metrics -import ( - dto "github.com/prometheus/client_model/go" -) - -var ( - hello = "hello" - world = "world" - helloWorld = "hello_world" -) +import dto "github.com/prometheus/client_model/go" type testGatherer struct { mfs []*dto.MetricFamily diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go new file mode 100644 index 00000000000..e2f08279bb4 --- /dev/null +++ b/api/metrics/label_gatherer.go @@ -0,0 +1,91 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "cmp" + "errors" + "fmt" + "slices" + + "github.com/prometheus/client_golang/prometheus" + + dto "github.com/prometheus/client_model/go" +) + +var ( + _ MultiGatherer = (*prefixGatherer)(nil) + + errDuplicateGatherer = errors.New("attempt to register duplicate gatherer") +) + +func NewLabelGatherer(labelName string) MultiGatherer { + return &labelGatherer{ + labelName: labelName, + } +} + +type labelGatherer struct { + multiGatherer + + labelName string +} + +func (g *labelGatherer) Register(labelValue string, gatherer prometheus.Gatherer) error { + g.lock.Lock() + defer g.lock.Unlock() + + if slices.Contains(g.names, labelValue) { + return fmt.Errorf("%w: for %q with label %q", + errDuplicateGatherer, + g.labelName, + labelValue, + ) + } + + g.names = append(g.names, labelValue) + g.gatherers = append(g.gatherers, &labeledGatherer{ + labelName: g.labelName, + labelValue: labelValue, + gatherer: gatherer, + }) + return nil +} + +type labeledGatherer struct { + labelName string + labelValue string + gatherer prometheus.Gatherer +} + +func (g *labeledGatherer) Gather() ([]*dto.MetricFamily, error) { + gatheredMetricFamilies, err := g.gatherer.Gather() + if err != nil { + return nil, err + } + + for _, gatheredMetricFamily := range gatheredMetricFamilies { + if gatheredMetricFamily == nil { + continue + } + + metrics := gatheredMetricFamily.Metric[:0] + for _, gatheredMetric := range gatheredMetricFamily.Metric { + if gatheredMetric == nil { + continue + } + + gatheredMetric.Label = append(gatheredMetric.Label, &dto.LabelPair{ + Name: &g.labelName, + Value: &g.labelValue, + }) + slices.SortFunc(gatheredMetric.Label, func(i, j *dto.LabelPair) int { + return cmp.Compare(i.GetName(), j.GetName()) + }) + metrics = append(metrics, gatheredMetric) + } + gatheredMetricFamily.Metric = metrics + } + return gatheredMetricFamilies, nil +} diff --git a/api/metrics/label_gatherer_test.go b/api/metrics/label_gatherer_test.go new file mode 100644 index 00000000000..abd8b61e611 --- /dev/null +++ b/api/metrics/label_gatherer_test.go @@ -0,0 +1,182 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + dto "github.com/prometheus/client_model/go" +) + +func TestLabelGatherer_Gather(t *testing.T) { + require := require.New(t) + + gatherer := NewLabelGatherer("smith") + require.NotNil(gatherer) + + registerA := prometheus.NewRegistry() + require.NoError(gatherer.Register("rick", registerA)) + + registerB := prometheus.NewRegistry() + require.NoError(gatherer.Register("morty", registerB)) + + counterA := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + require.NoError(registerA.Register(counterA)) + + counterB := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + counterB.Inc() + require.NoError(registerB.Register(counterB)) + + metrics, err := gatherer.Gather() + require.NoError(err) + require.Equal( + []*dto.MetricFamily{ + { + Name: proto.String("counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("smith"), + Value: proto.String("morty"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("smith"), + Value: proto.String("rick"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(0), + }, + }, + }, + }, + }, + metrics, + ) +} + +func TestLabelGatherer_Register(t *testing.T) { + tests := []struct { + name string + labelGatherer *labelGatherer + labelValue string + gatherer prometheus.Gatherer + expectedErr error + expectedLabelGatherer *labelGatherer + }{ + { + name: "first registration", + labelGatherer: &labelGatherer{}, + labelValue: "first", + gatherer: &testGatherer{}, + expectedErr: nil, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + { + name: "second registration", + labelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + labelValue: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: nil, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first", "second"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + &labeledGatherer{ + labelValue: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + }, + }, + }, + }, + }, + { + name: "conflicts with previous registration", + labelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + labelValue: "first", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: errDuplicateGatherer, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + err := test.labelGatherer.Register(test.labelValue, test.gatherer) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedLabelGatherer, test.labelGatherer) + }) + } +} diff --git a/api/metrics/multi_gatherer.go b/api/metrics/multi_gatherer.go index 4bd0900a022..04e35da6b1e 100644 --- a/api/metrics/multi_gatherer.go +++ b/api/metrics/multi_gatherer.go @@ -4,92 +4,37 @@ package metrics import ( - "cmp" - "errors" - "fmt" - "slices" "sync" "github.com/prometheus/client_golang/prometheus" - "github.com/ava-labs/avalanchego/utils/metric" - dto "github.com/prometheus/client_model/go" ) -var ( - _ MultiGatherer = (*multiGatherer)(nil) - - errReregisterGatherer = errors.New("attempt to register existing gatherer") -) - // MultiGatherer extends the Gatherer interface by allowing additional gatherers // to be registered. type MultiGatherer interface { prometheus.Gatherer // Register adds the outputs of [gatherer] to the results of future calls to - // Gather with the provided [namespace] added to the metrics. - Register(namespace string, gatherer prometheus.Gatherer) error + // Gather with the provided [name] added to the metrics. + Register(name string, gatherer prometheus.Gatherer) error } -type multiGatherer struct { - lock sync.RWMutex - gatherers map[string]prometheus.Gatherer +// Deprecated: Use NewPrefixGatherer instead. +func NewMultiGatherer() MultiGatherer { + return NewPrefixGatherer() } -func NewMultiGatherer() MultiGatherer { - return &multiGatherer{ - gatherers: make(map[string]prometheus.Gatherer), - } +type multiGatherer struct { + lock sync.RWMutex + names []string + gatherers prometheus.Gatherers } func (g *multiGatherer) Gather() ([]*dto.MetricFamily, error) { g.lock.RLock() defer g.lock.RUnlock() - var results []*dto.MetricFamily - for namespace, gatherer := range g.gatherers { - gatheredMetrics, err := gatherer.Gather() - if err != nil { - return nil, err - } - for _, gatheredMetric := range gatheredMetrics { - var name string - if gatheredMetric.Name != nil { - name = metric.AppendNamespace(namespace, *gatheredMetric.Name) - } else { - name = namespace - } - gatheredMetric.Name = &name - results = append(results, gatheredMetric) - } - } - // Because we overwrite every metric's name, we are guaranteed that there - // are no metrics with nil names. - sortMetrics(results) - return results, nil -} - -func (g *multiGatherer) Register(namespace string, gatherer prometheus.Gatherer) error { - g.lock.Lock() - defer g.lock.Unlock() - - if existingGatherer, exists := g.gatherers[namespace]; exists { - return fmt.Errorf("%w for namespace %q; existing: %#v; new: %#v", - errReregisterGatherer, - namespace, - existingGatherer, - gatherer, - ) - } - - g.gatherers[namespace] = gatherer - return nil -} - -func sortMetrics(m []*dto.MetricFamily) { - slices.SortFunc(m, func(i, j *dto.MetricFamily) int { - return cmp.Compare(*i.Name, *j.Name) - }) + return g.gatherers.Gather() } diff --git a/api/metrics/multi_gatherer_test.go b/api/metrics/multi_gatherer_test.go deleted file mode 100644 index 51b548d18a6..00000000000 --- a/api/metrics/multi_gatherer_test.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package metrics - -import ( - "errors" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - - dto "github.com/prometheus/client_model/go" -) - -func TestMultiGathererEmptyGather(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - mfs, err := g.Gather() - require.NoError(err) - require.Empty(mfs) -} - -func TestMultiGathererDuplicatedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - og := prometheus.NewRegistry() - - require.NoError(g.Register("", og)) - - err := g.Register("", og) - require.ErrorIs(err, errReregisterGatherer) - - require.NoError(g.Register("lol", og)) -} - -func TestMultiGathererAddedError(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - errTest := errors.New("non-nil error") - tg := &testGatherer{ - err: errTest, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.ErrorIs(err, errTest) - require.Empty(mfs) -} - -func TestMultiGathererNoAddedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{ - Name: &hello, - }}, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&hello, mfs[0].Name) -} - -func TestMultiGathererAddedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{ - Name: &world, - }}, - } - - require.NoError(g.Register(hello, tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&helloWorld, mfs[0].Name) -} - -func TestMultiGathererJustPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - } - - require.NoError(g.Register(hello, tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&hello, mfs[0].Name) -} - -func TestMultiGathererSorted(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - name0 := "a" - name1 := "z" - tg := &testGatherer{ - mfs: []*dto.MetricFamily{ - { - Name: &name1, - }, - { - Name: &name0, - }, - }, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 2) - require.Equal(&name0, mfs[0].Name) - require.Equal(&name1, mfs[1].Name) -} diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go new file mode 100644 index 00000000000..24128ed9909 --- /dev/null +++ b/api/metrics/prefix_gatherer.go @@ -0,0 +1,90 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "errors" + "fmt" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/ava-labs/avalanchego/utils/metric" + + dto "github.com/prometheus/client_model/go" +) + +var ( + _ MultiGatherer = (*prefixGatherer)(nil) + + errOverlappingNamespaces = errors.New("prefix could create overlapping namespaces") +) + +func NewPrefixGatherer() MultiGatherer { + return &prefixGatherer{} +} + +type prefixGatherer struct { + multiGatherer +} + +func (g *prefixGatherer) Register(prefix string, gatherer prometheus.Gatherer) error { + g.lock.Lock() + defer g.lock.Unlock() + + for _, existingPrefix := range g.names { + if eitherIsPrefix(prefix, existingPrefix) { + return fmt.Errorf("%w: %q conflicts with %q", + errOverlappingNamespaces, + prefix, + existingPrefix, + ) + } + } + + g.names = append(g.names, prefix) + g.gatherers = append(g.gatherers, &prefixedGatherer{ + prefix: prefix, + gatherer: gatherer, + }) + return nil +} + +type prefixedGatherer struct { + prefix string + gatherer prometheus.Gatherer +} + +func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { + gatheredMetricFamilies, err := g.gatherer.Gather() + if err != nil { + return nil, err + } + + metricFamilies := gatheredMetricFamilies[:0] + for _, gatheredMetricFamily := range gatheredMetricFamilies { + if gatheredMetricFamily == nil { + continue + } + + name := metric.AppendNamespace( + g.prefix, + gatheredMetricFamily.GetName(), + ) + gatheredMetricFamily.Name = &name + metricFamilies = append(metricFamilies, gatheredMetricFamily) + } + return metricFamilies, nil +} + +// eitherIsPrefix returns true if either [a] is a prefix of [b] or [b] is a +// prefix of [a] +func eitherIsPrefix(a, b string) bool { + if len(a) > len(b) { + a, b = b, a + } + return a == b[:len(a)] && // a is a prefix of b + (len(a) == 0 || // a is empty + len(a) == len(b) || // a is equal to b + b[len(a)] == metric.NamespaceSeparatorByte) // a ends at a namespace boundary of b +} diff --git a/api/metrics/prefix_gatherer_test.go b/api/metrics/prefix_gatherer_test.go new file mode 100644 index 00000000000..0a4dcd3ce5a --- /dev/null +++ b/api/metrics/prefix_gatherer_test.go @@ -0,0 +1,233 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + dto "github.com/prometheus/client_model/go" +) + +func TestPrefixGatherer_Gather(t *testing.T) { + require := require.New(t) + + gatherer := NewPrefixGatherer() + require.NotNil(gatherer) + + registerA := prometheus.NewRegistry() + require.NoError(gatherer.Register("a", registerA)) + + registerB := prometheus.NewRegistry() + require.NoError(gatherer.Register("b", registerB)) + + counterA := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + require.NoError(registerA.Register(counterA)) + + counterB := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + counterB.Inc() + require.NoError(registerB.Register(counterB)) + + metrics, err := gatherer.Gather() + require.NoError(err) + require.Equal( + []*dto.MetricFamily{ + { + Name: proto.String("a_counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{}, + Counter: &dto.Counter{ + Value: proto.Float64(0), + }, + }, + }, + }, + { + Name: proto.String("b_counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{}, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + metrics, + ) +} + +func TestPrefixGatherer_Register(t *testing.T) { + tests := []struct { + name string + prefixGatherer *prefixGatherer + prefix string + gatherer prometheus.Gatherer + expectedErr error + expectedPrefixGatherer *prefixGatherer + }{ + { + name: "first registration", + prefixGatherer: &prefixGatherer{}, + prefix: "first", + gatherer: &testGatherer{}, + expectedErr: nil, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + { + name: "second registration", + prefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + prefix: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: nil, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first", "second"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + &prefixedGatherer{ + prefix: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + }, + }, + }, + }, + }, + { + name: "conflicts with previous registration", + prefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + prefix: "first", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: errOverlappingNamespaces, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + err := test.prefixGatherer.Register(test.prefix, test.gatherer) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedPrefixGatherer, test.prefixGatherer) + }) + } +} + +func TestEitherIsPrefix(t *testing.T) { + tests := []struct { + name string + a string + b string + expected bool + }{ + { + name: "empty strings", + a: "", + b: "", + expected: true, + }, + { + name: "an empty string", + a: "", + b: "hello", + expected: true, + }, + { + name: "same strings", + a: "x", + b: "x", + expected: true, + }, + { + name: "different strings", + a: "x", + b: "y", + expected: false, + }, + { + name: "splits namespace", + a: "hello", + b: "hello_world", + expected: true, + }, + { + name: "is prefix before separator", + a: "hello", + b: "helloworld", + expected: false, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + require.Equal(test.expected, eitherIsPrefix(test.a, test.b)) + require.Equal(test.expected, eitherIsPrefix(test.b, test.a)) + }) + } +} diff --git a/utils/metric/namespace.go b/utils/metric/namespace.go index 4371bb1dc07..19f8dd37360 100644 --- a/utils/metric/namespace.go +++ b/utils/metric/namespace.go @@ -5,6 +5,11 @@ package metric import "strings" +const ( + NamespaceSeparator = "_" + NamespaceSeparatorByte = '_' +) + func AppendNamespace(prefix, suffix string) string { switch { case len(prefix) == 0: @@ -12,6 +17,6 @@ func AppendNamespace(prefix, suffix string) string { case len(suffix) == 0: return prefix default: - return strings.Join([]string{prefix, suffix}, "_") + return strings.Join([]string{prefix, suffix}, NamespaceSeparator) } } From d609481bcc749a0cfc143cd216a2105543c1c773 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:07:38 -0400 Subject: [PATCH 02/53] nit --- api/metrics/README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 498a340804f..db2ba41e884 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -2,14 +2,6 @@ ```mermaid graph LR - A[P2P] --> B[Chain Router] - B --> C[Handler] - C --> D[Consensus Engine] - D --> E[Consensus] - D --> F[VM] - D --> G[DB] - D --> I[Sender] - F --> G - I --> A - I --> B + A[avalanchego] --> B[chain] + A --> C[network] ``` From eeceb4afeace1086040fc6a4df7cfcbec7301738 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:12:03 -0400 Subject: [PATCH 03/53] nit --- api/metrics/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index db2ba41e884..2c14f93f931 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -2,6 +2,13 @@ ```mermaid graph LR - A[avalanchego] --> B[chain] + A[avalanche] --> B[chain] A --> C[network] + A --> D[api] + A --> D[db] + A --> E[go] + A --> F[health] + A --> G[system_resources] + A --> H[resource_tracker] + A --> I[requests] ``` From eb1bac2e89e3728d5b543a4556825c3597e9e082 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:13:37 -0400 Subject: [PATCH 04/53] nit --- api/metrics/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 2c14f93f931..2258813a5c7 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -5,10 +5,10 @@ graph LR A[avalanche] --> B[chain] A --> C[network] A --> D[api] - A --> D[db] - A --> E[go] - A --> F[health] - A --> G[system_resources] - A --> H[resource_tracker] - A --> I[requests] + A --> E[db] + A --> F[go] + A --> G[health] + A --> H[system_resources] + A --> I[resource_tracker] + A --> J[requests] ``` From 76f88644b9cce5ad4d051e180235190c13bb4865 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:16:39 -0400 Subject: [PATCH 05/53] update readme --- api/metrics/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/api/metrics/README.md b/api/metrics/README.md index 2258813a5c7..50fd0c15965 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -11,4 +11,5 @@ graph LR A --> H[system_resources] A --> I[resource_tracker] A --> J[requests] + B --> |$chainID|K[$vmID] ``` From 73bb5106955fd95ae72a6b7c72be9f699dbf02a3 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:17:36 -0400 Subject: [PATCH 06/53] update readme --- api/metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 50fd0c15965..706b2f933dd 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -11,5 +11,5 @@ graph LR A --> H[system_resources] A --> I[resource_tracker] A --> J[requests] - B --> |$chainID|K[$vmID] + B -- |$chainID| --> K[$vmID] ``` From 5e8c0f745be4f77710e5c0c37bbb012b544f088a Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:17:55 -0400 Subject: [PATCH 07/53] update readme --- api/metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 706b2f933dd..ae2c62fb8b2 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -11,5 +11,5 @@ graph LR A --> H[system_resources] A --> I[resource_tracker] A --> J[requests] - B -- |$chainID| --> K[$vmID] + B -- $chainID --> K[$vmID] ``` From a90f1cfa6c1abd640cc5e7bb7a19af22f3d668a7 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:26:46 -0400 Subject: [PATCH 08/53] update readme --- api/metrics/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/metrics/README.md b/api/metrics/README.md index ae2c62fb8b2..26e0c6665d5 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -12,4 +12,7 @@ graph LR A --> I[resource_tracker] A --> J[requests] B -- $chainID --> K[$vmID] + B -- $chainID, $proposervmSide --> L[meterchainvm] + B -- $chainID --> M[meterdagvm] + B -- $chainID --> N[proposervm] ``` From 6d5c742dcada2600ae9e58e772da847600538d2b Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 12:27:26 -0400 Subject: [PATCH 09/53] update readme --- api/metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 26e0c6665d5..8100442ff81 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -12,7 +12,7 @@ graph LR A --> I[resource_tracker] A --> J[requests] B -- $chainID --> K[$vmID] - B -- $chainID, $proposervmSide --> L[meterchainvm] + B -- $chainID, $isProposerVM --> L[meterchainvm] B -- $chainID --> M[meterdagvm] B -- $chainID --> N[proposervm] ``` From f781967236140b0c87cfdaaaede9624e5d2822a2 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 13:15:07 -0400 Subject: [PATCH 10/53] update networking metrics --- message/creator.go | 5 +- network/config.go | 1 - network/ip_tracker.go | 15 +-- network/ip_tracker_test.go | 2 +- network/metrics.go | 91 ++++++++----------- network/network.go | 8 +- network/network_test.go | 2 - network/peer/metrics.go | 40 +++----- network/peer/peer_test.go | 2 - network/peer/test_peer.go | 2 - network/test_network.go | 1 - network/throttling/bandwidth_throttler.go | 8 +- .../throttling/bandwidth_throttler_test.go | 2 +- .../inbound_msg_buffer_throttler.go | 12 +-- .../inbound_msg_buffer_throttler_test.go | 4 +- .../throttling/inbound_msg_byte_throttler.go | 27 +++--- .../inbound_msg_byte_throttler_test.go | 5 - network/throttling/inbound_msg_throttler.go | 9 +- network/throttling/outbound_msg_throttler.go | 30 +++--- .../throttling/outbound_msg_throttler_test.go | 3 - node/node.go | 21 +++-- snow/networking/sender/sender_test.go | 3 - snow/snowtest/snowtest.go | 2 +- vms/platformvm/vm_test.go | 7 +- 24 files changed, 118 insertions(+), 184 deletions(-) diff --git a/message/creator.go b/message/creator.go index 8040bccb186..e4ef1d6efda 100644 --- a/message/creator.go +++ b/message/creator.go @@ -10,7 +10,6 @@ import ( "github.com/ava-labs/avalanchego/utils/compression" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" ) var _ Creator = (*creator)(nil) @@ -28,14 +27,12 @@ type creator struct { func NewCreator( log logging.Logger, metrics prometheus.Registerer, - parentNamespace string, compressionType compression.Type, maxMessageTimeout time.Duration, ) (Creator, error) { - namespace := metric.AppendNamespace(parentNamespace, "codec") builder, err := newMsgBuilder( log, - namespace, + "codec", metrics, maxMessageTimeout, ) diff --git a/network/config.go b/network/config.go index ed82ea507e8..3004a12bdc5 100644 --- a/network/config.go +++ b/network/config.go @@ -110,7 +110,6 @@ type Config struct { TLSKeyLogFile string `json:"tlsKeyLogFile"` - Namespace string `json:"namespace"` MyNodeID ids.NodeID `json:"myNodeID"` MyIPPort ips.DynamicIPPort `json:"myIP"` NetworkID uint32 `json:"networkID"` diff --git a/network/ip_tracker.go b/network/ip_tracker.go index 03040b15337..370c7d47da9 100644 --- a/network/ip_tracker.go +++ b/network/ip_tracker.go @@ -17,7 +17,6 @@ import ( "github.com/ava-labs/avalanchego/utils/crypto/bls" "github.com/ava-labs/avalanchego/utils/ips" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" "github.com/ava-labs/avalanchego/utils/sampler" "github.com/ava-labs/avalanchego/utils/set" ) @@ -42,25 +41,21 @@ var _ validators.SetCallbackListener = (*ipTracker)(nil) func newIPTracker( log logging.Logger, - namespace string, registerer prometheus.Registerer, ) (*ipTracker, error) { - bloomNamespace := metric.AppendNamespace(namespace, "ip_bloom") - bloomMetrics, err := bloom.NewMetrics(bloomNamespace, registerer) + bloomMetrics, err := bloom.NewMetrics("ip_bloom", registerer) if err != nil { return nil, err } tracker := &ipTracker{ log: log, numTrackedIPs: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "tracked_ips", - Help: "Number of IPs this node is willing to dial", + Name: "tracked_ips", + Help: "Number of IPs this node is willing to dial", }), numGossipableIPs: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "gossipable_ips", - Help: "Number of IPs this node is willing to gossip", + Name: "gossipable_ips", + Help: "Number of IPs this node is willing to gossip", }), bloomMetrics: bloomMetrics, mostRecentTrackedIPs: make(map[ids.NodeID]*ips.ClaimedIPPort), diff --git a/network/ip_tracker_test.go b/network/ip_tracker_test.go index be9ea59a51e..edae70de5b9 100644 --- a/network/ip_tracker_test.go +++ b/network/ip_tracker_test.go @@ -17,7 +17,7 @@ import ( ) func newTestIPTracker(t *testing.T) *ipTracker { - tracker, err := newIPTracker(logging.NoLog{}, "", prometheus.NewRegistry()) + tracker, err := newIPTracker(logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return tracker } diff --git a/network/metrics.go b/network/metrics.go index c6b47a1360a..8cc5155ec10 100644 --- a/network/metrics.go +++ b/network/metrics.go @@ -44,111 +44,92 @@ type metrics struct { } func newMetrics( - namespace string, registerer prometheus.Registerer, trackedSubnets set.Set[ids.ID], ) (*metrics, error) { m := &metrics{ trackedSubnets: trackedSubnets, numPeers: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peers", - Help: "Number of network peers", + Name: "peers", + Help: "Number of network peers", }), numTracked: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "tracked", - Help: "Number of currently tracked IPs attempting to be connected to", + Name: "tracked", + Help: "Number of currently tracked IPs attempting to be connected to", }), numSubnetPeers: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peers_subnet", - Help: "Number of peers that are validating a particular subnet", + Name: "peers_subnet", + Help: "Number of peers that are validating a particular subnet", }, []string{"subnetID"}, ), timeSinceLastMsgReceived: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_since_last_msg_received", - Help: "Time (in ns) since the last msg was received", + Name: "time_since_last_msg_received", + Help: "Time (in ns) since the last msg was received", }), timeSinceLastMsgSent: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_since_last_msg_sent", - Help: "Time (in ns) since the last msg was sent", + Name: "time_since_last_msg_sent", + Help: "Time (in ns) since the last msg was sent", }), sendFailRate: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "send_fail_rate", - Help: "Portion of messages that recently failed to be sent over the network", + Name: "send_fail_rate", + Help: "Portion of messages that recently failed to be sent over the network", }), connected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "times_connected", - Help: "Times this node successfully completed a handshake with a peer", + Name: "times_connected", + Help: "Times this node successfully completed a handshake with a peer", }), disconnected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "times_disconnected", - Help: "Times this node disconnected from a peer it had completed a handshake with", + Name: "times_disconnected", + Help: "Times this node disconnected from a peer it had completed a handshake with", }), acceptFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "accept_failed", - Help: "Times this node's listener failed to accept an inbound connection", + Name: "accept_failed", + Help: "Times this node's listener failed to accept an inbound connection", }), inboundConnAllowed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "inbound_conn_throttler_allowed", - Help: "Times this node allowed (attempted to upgrade) an inbound connection", + Name: "inbound_conn_throttler_allowed", + Help: "Times this node allowed (attempted to upgrade) an inbound connection", }), tlsConnRejected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tls_conn_rejected", - Help: "Times this node rejected a connection due to an unsupported TLS certificate", + Name: "tls_conn_rejected", + Help: "Times this node rejected a connection due to an unsupported TLS certificate", }), numUselessPeerListBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_peerlist_bytes", - Help: "Amount of useless bytes (i.e. information about nodes we already knew/don't want to connect to) received in PeerList messages", + Name: "num_useless_peerlist_bytes", + Help: "Amount of useless bytes (i.e. information about nodes we already knew/don't want to connect to) received in PeerList messages", }), inboundConnRateLimited: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "inbound_conn_throttler_rate_limited", - Help: "Times this node rejected an inbound connection due to rate-limiting", + Name: "inbound_conn_throttler_rate_limited", + Help: "Times this node rejected an inbound connection due to rate-limiting", }), nodeUptimeWeightedAverage: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_uptime_weighted_average", - Help: "This node's uptime average weighted by observing peer stakes", + Name: "node_uptime_weighted_average", + Help: "This node's uptime average weighted by observing peer stakes", }), nodeUptimeRewardingStake: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_uptime_rewarding_stake", - Help: "The percentage of total stake which thinks this node is eligible for rewards", + Name: "node_uptime_rewarding_stake", + Help: "The percentage of total stake which thinks this node is eligible for rewards", }), nodeSubnetUptimeWeightedAverage: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_subnet_uptime_weighted_average", - Help: "This node's subnet uptime averages weighted by observing subnet peer stakes", + Name: "node_subnet_uptime_weighted_average", + Help: "This node's subnet uptime averages weighted by observing subnet peer stakes", }, []string{"subnetID"}, ), nodeSubnetUptimeRewardingStake: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_subnet_uptime_rewarding_stake", - Help: "The percentage of subnet's total stake which thinks this node is eligible for subnet's rewards", + Name: "node_subnet_uptime_rewarding_stake", + Help: "The percentage of subnet's total stake which thinks this node is eligible for subnet's rewards", }, []string{"subnetID"}, ), peerConnectedLifetimeAverage: prometheus.NewGauge( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peer_connected_duration_average", - Help: "The average duration of all peer connections in nanoseconds", + Name: "peer_connected_duration_average", + Help: "The average duration of all peer connections in nanoseconds", }, ), peerConnectedStartTimes: make(map[ids.NodeID]float64), diff --git a/network/network.go b/network/network.go index 9963612c016..51ded9b8cf9 100644 --- a/network/network.go +++ b/network/network.go @@ -202,7 +202,6 @@ func NewNetwork( inboundMsgThrottler, err := throttling.NewInboundMsgThrottler( log, - config.Namespace, metricsRegisterer, config.Validators, config.ThrottlerConfig.InboundMsgThrottlerConfig, @@ -216,7 +215,6 @@ func NewNetwork( outboundMsgThrottler, err := throttling.NewSybilOutboundMsgThrottler( log, - config.Namespace, metricsRegisterer, config.Validators, config.ThrottlerConfig.OutboundMsgThrottlerConfig, @@ -225,17 +223,17 @@ func NewNetwork( return nil, fmt.Errorf("initializing outbound message throttler failed with: %w", err) } - peerMetrics, err := peer.NewMetrics(config.Namespace, metricsRegisterer) + peerMetrics, err := peer.NewMetrics(metricsRegisterer) if err != nil { return nil, fmt.Errorf("initializing peer metrics failed with: %w", err) } - metrics, err := newMetrics(config.Namespace, metricsRegisterer, config.TrackedSubnets) + metrics, err := newMetrics(metricsRegisterer, config.TrackedSubnets) if err != nil { return nil, fmt.Errorf("initializing network metrics failed with: %w", err) } - ipTracker, err := newIPTracker(log, config.Namespace, metricsRegisterer) + ipTracker, err := newIPTracker(log, metricsRegisterer) if err != nil { return nil, fmt.Errorf("initializing ip tracker failed with: %w", err) } diff --git a/network/network_test.go b/network/network_test.go index f8f7b56427f..5ae2cef5af3 100644 --- a/network/network_test.go +++ b/network/network_test.go @@ -104,7 +104,6 @@ var ( DialerConfig: defaultDialerConfig, - Namespace: "", NetworkID: 49463, MaxClockDifference: time.Minute, PingFrequency: constants.DefaultPingFrequency, @@ -196,7 +195,6 @@ func newMessageCreator(t *testing.T) message.Creator { mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) diff --git a/network/peer/metrics.go b/network/peer/metrics.go index 94d46ac1e5f..7547d7a827d 100644 --- a/network/peer/metrics.go +++ b/network/peer/metrics.go @@ -39,55 +39,45 @@ type Metrics struct { BytesSaved *prometheus.GaugeVec // io + op } -func NewMetrics( - namespace string, - registerer prometheus.Registerer, -) (*Metrics, error) { +func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { m := &Metrics{ ClockSkewCount: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "clock_skew_count", - Help: "number of handshake timestamps inspected (n)", + Name: "clock_skew_count", + Help: "number of handshake timestamps inspected (n)", }), ClockSkewSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "clock_skew_sum", - Help: "sum of (peer timestamp - local timestamp) from handshake messages (s)", + Name: "clock_skew_sum", + Help: "sum of (peer timestamp - local timestamp) from handshake messages (s)", }), NumFailedToParse: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_failed_to_parse", - Help: "number of received messages that could not be parsed", + Name: "msgs_failed_to_parse", + Help: "number of received messages that could not be parsed", }), NumSendFailed: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_failed_to_send", - Help: "number of messages that failed to be sent", + Name: "msgs_failed_to_send", + Help: "number of messages that failed to be sent", }, opLabels, ), Messages: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs", - Help: "number of handled messages", + Name: "msgs", + Help: "number of handled messages", }, ioOpCompressedLabels, ), Bytes: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_bytes", - Help: "number of message bytes", + Name: "msgs_bytes", + Help: "number of message bytes", }, ioOpLabels, ), BytesSaved: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "msgs_bytes_saved", - Help: "number of message bytes saved", + Name: "msgs_bytes_saved", + Help: "number of message bytes saved", }, ioOpLabels, ), diff --git a/network/peer/peer_test.go b/network/peer/peer_test.go index ffd5915aa2c..6605fee670b 100644 --- a/network/peer/peer_test.go +++ b/network/peer/peer_test.go @@ -50,7 +50,6 @@ func newMessageCreator(t *testing.T) message.Creator { mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -64,7 +63,6 @@ func newConfig(t *testing.T) Config { require := require.New(t) metrics, err := NewMetrics( - "", prometheus.NewRegistry(), ) require.NoError(err) diff --git a/network/peer/test_peer.go b/network/peer/test_peer.go index a8f633ccf65..ace7128bf52 100644 --- a/network/peer/test_peer.go +++ b/network/peer/test_peer.go @@ -76,7 +76,6 @@ func StartTestPeer( mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -85,7 +84,6 @@ func StartTestPeer( } metrics, err := NewMetrics( - "", prometheus.NewRegistry(), ) if err != nil { diff --git a/network/test_network.go b/network/test_network.go index 25039ad046b..6a6bcdfcc08 100644 --- a/network/test_network.go +++ b/network/test_network.go @@ -82,7 +82,6 @@ func NewTestNetwork( msgCreator, err := message.NewCreator( logging.NoLog{}, metrics, - "", constants.DefaultNetworkCompressionType, constants.DefaultNetworkMaximumInboundTimeout, ) diff --git a/network/throttling/bandwidth_throttler.go b/network/throttling/bandwidth_throttler.go index cde94b96124..12ca3ac9a84 100644 --- a/network/throttling/bandwidth_throttler.go +++ b/network/throttling/bandwidth_throttler.go @@ -58,7 +58,6 @@ type BandwidthThrottlerConfig struct { func newBandwidthThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, config BandwidthThrottlerConfig, ) (bandwidthThrottler, error) { @@ -69,16 +68,15 @@ func newBandwidthThrottler( limiters: make(map[ids.NodeID]*rate.Limiter), metrics: bandwidthThrottlerMetrics{ acquireLatency: metric.NewAveragerWithErrs( - namespace, + "", "bandwidth_throttler_inbound_acquire_latency", "average time (in ns) to acquire bytes from the inbound bandwidth throttler", registerer, &errs, ), awaitingAcquire: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "bandwidth_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to acquire bandwidth from the inbound bandwidth throttler", + Name: "bandwidth_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to acquire bandwidth from the inbound bandwidth throttler", }), }, } diff --git a/network/throttling/bandwidth_throttler_test.go b/network/throttling/bandwidth_throttler_test.go index 9f9195477b3..da9ac6ded28 100644 --- a/network/throttling/bandwidth_throttler_test.go +++ b/network/throttling/bandwidth_throttler_test.go @@ -22,7 +22,7 @@ func TestBandwidthThrottler(t *testing.T) { RefillRate: 8, MaxBurstSize: 10, } - throttlerIntf, err := newBandwidthThrottler(logging.NoLog{}, "", prometheus.NewRegistry(), config) + throttlerIntf, err := newBandwidthThrottler(logging.NoLog{}, prometheus.NewRegistry(), config) require.NoError(err) require.IsType(&bandwidthThrottlerImpl{}, throttlerIntf) throttler := throttlerIntf.(*bandwidthThrottlerImpl) diff --git a/network/throttling/inbound_msg_buffer_throttler.go b/network/throttling/inbound_msg_buffer_throttler.go index 65306eea7d5..395b6da1688 100644 --- a/network/throttling/inbound_msg_buffer_throttler.go +++ b/network/throttling/inbound_msg_buffer_throttler.go @@ -18,7 +18,6 @@ import ( // See inbound_msg_throttler.go func newInboundMsgBufferThrottler( - namespace string, registerer prometheus.Registerer, maxProcessingMsgsPerNode uint64, ) (*inboundMsgBufferThrottler, error) { @@ -27,7 +26,7 @@ func newInboundMsgBufferThrottler( awaitingAcquire: make(map[ids.NodeID]chan struct{}), nodeToNumProcessingMsgs: make(map[ids.NodeID]uint64), } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } // Rate-limits inbound messages based on the number of @@ -130,19 +129,18 @@ type inboundMsgBufferThrottlerMetrics struct { awaitingAcquire prometheus.Gauge } -func (m *inboundMsgBufferThrottlerMetrics) initialize(namespace string, reg prometheus.Registerer) error { +func (m *inboundMsgBufferThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - namespace, + "", "buffer_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message buffer", reg, &errs, ) m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "buffer_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to take space on the inbound message buffer", + Name: "buffer_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to take space on the inbound message buffer", }) errs.Add( reg.Register(m.awaitingAcquire), diff --git a/network/throttling/inbound_msg_buffer_throttler_test.go b/network/throttling/inbound_msg_buffer_throttler_test.go index 38e6d735097..d9f3e4d29bc 100644 --- a/network/throttling/inbound_msg_buffer_throttler_test.go +++ b/network/throttling/inbound_msg_buffer_throttler_test.go @@ -17,7 +17,7 @@ import ( // Test inboundMsgBufferThrottler func TestMsgBufferThrottler(t *testing.T) { require := require.New(t) - throttler, err := newInboundMsgBufferThrottler("", prometheus.NewRegistry(), 3) + throttler, err := newInboundMsgBufferThrottler(prometheus.NewRegistry(), 3) require.NoError(err) nodeID1, nodeID2 := ids.GenerateTestNodeID(), ids.GenerateTestNodeID() @@ -69,7 +69,7 @@ func TestMsgBufferThrottler(t *testing.T) { // Test inboundMsgBufferThrottler when an acquire is cancelled func TestMsgBufferThrottlerContextCancelled(t *testing.T) { require := require.New(t) - throttler, err := newInboundMsgBufferThrottler("", prometheus.NewRegistry(), 3) + throttler, err := newInboundMsgBufferThrottler(prometheus.NewRegistry(), 3) require.NoError(err) vdr1Context, vdr1ContextCancelFunc := context.WithCancel(context.Background()) diff --git a/network/throttling/inbound_msg_byte_throttler.go b/network/throttling/inbound_msg_byte_throttler.go index 6bdacb28092..3e20762f85e 100644 --- a/network/throttling/inbound_msg_byte_throttler.go +++ b/network/throttling/inbound_msg_byte_throttler.go @@ -23,7 +23,6 @@ import ( func newInboundMsgByteThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, config MsgByteThrottlerConfig, @@ -42,7 +41,7 @@ func newInboundMsgByteThrottler( waitingToAcquire: linked.NewHashmap[uint64, *msgMetadata](), nodeToWaitingMsgID: make(map[ids.NodeID]uint64), } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } // Information about a message waiting to be read. @@ -306,34 +305,30 @@ type inboundMsgByteThrottlerMetrics struct { awaitingRelease prometheus.Gauge } -func (m *inboundMsgByteThrottlerMetrics) initialize(namespace string, reg prometheus.Registerer) error { +func (m *inboundMsgByteThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - namespace, + "", "byte_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message byte buffer", reg, &errs, ) m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_remaining_at_large_bytes", - Help: "Bytes remaining in the at-large byte buffer", + Name: "byte_throttler_inbound_remaining_at_large_bytes", + Help: "Bytes remaining in the at-large byte buffer", }) m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_remaining_validator_bytes", - Help: "Bytes remaining in the validator byte buffer", + Name: "byte_throttler_inbound_remaining_validator_bytes", + Help: "Bytes remaining in the validator byte buffer", }) m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer", + Name: "byte_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer", }) m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_awaiting_release", - Help: "Number of messages currently being read/handled", + Name: "byte_throttler_inbound_awaiting_release", + Help: "Number of messages currently being read/handled", }) errs.Add( reg.Register(m.remainingAtLargeBytes), diff --git a/network/throttling/inbound_msg_byte_throttler_test.go b/network/throttling/inbound_msg_byte_throttler_test.go index 4fc931e3f37..72ca316de44 100644 --- a/network/throttling/inbound_msg_byte_throttler_test.go +++ b/network/throttling/inbound_msg_byte_throttler_test.go @@ -30,7 +30,6 @@ func TestInboundMsgByteThrottlerCancelContextDeadlock(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -60,7 +59,6 @@ func TestInboundMsgByteThrottlerCancelContext(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -118,7 +116,6 @@ func TestInboundMsgByteThrottler(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -333,7 +330,6 @@ func TestSybilMsgThrottlerMaxNonVdr(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -384,7 +380,6 @@ func TestMsgThrottlerNextMsg(t *testing.T) { maxBytes := maxVdrBytes throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, diff --git a/network/throttling/inbound_msg_throttler.go b/network/throttling/inbound_msg_throttler.go index ea9167deca1..faf64ed083a 100644 --- a/network/throttling/inbound_msg_throttler.go +++ b/network/throttling/inbound_msg_throttler.go @@ -12,7 +12,6 @@ import ( "github.com/ava-labs/avalanchego/snow/networking/tracker" "github.com/ava-labs/avalanchego/snow/validators" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" ) var _ InboundMsgThrottler = (*inboundMsgThrottler)(nil) @@ -54,7 +53,6 @@ type InboundMsgThrottlerConfig struct { // Returns a new, sybil-safe inbound message throttler. func NewInboundMsgThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, throttlerConfig InboundMsgThrottlerConfig, @@ -64,7 +62,6 @@ func NewInboundMsgThrottler( ) (InboundMsgThrottler, error) { byteThrottler, err := newInboundMsgByteThrottler( log, - namespace, registerer, vdrs, throttlerConfig.MsgByteThrottlerConfig, @@ -73,7 +70,6 @@ func NewInboundMsgThrottler( return nil, err } bufferThrottler, err := newInboundMsgBufferThrottler( - namespace, registerer, throttlerConfig.MaxProcessingMsgsPerNode, ) @@ -82,7 +78,6 @@ func NewInboundMsgThrottler( } bandwidthThrottler, err := newBandwidthThrottler( log, - namespace, registerer, throttlerConfig.BandwidthThrottlerConfig, ) @@ -90,7 +85,7 @@ func NewInboundMsgThrottler( return nil, err } cpuThrottler, err := NewSystemThrottler( - metric.AppendNamespace(namespace, "cpu"), + "cpu", registerer, throttlerConfig.CPUThrottlerConfig, resourceTracker.CPUTracker(), @@ -100,7 +95,7 @@ func NewInboundMsgThrottler( return nil, err } diskThrottler, err := NewSystemThrottler( - metric.AppendNamespace(namespace, "disk"), + "disk", registerer, throttlerConfig.DiskThrottlerConfig, resourceTracker.DiskTracker(), diff --git a/network/throttling/outbound_msg_throttler.go b/network/throttling/outbound_msg_throttler.go index d75c53f1548..b27fe01060d 100644 --- a/network/throttling/outbound_msg_throttler.go +++ b/network/throttling/outbound_msg_throttler.go @@ -42,7 +42,6 @@ type outboundMsgThrottler struct { func NewSybilOutboundMsgThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, config MsgByteThrottlerConfig, @@ -59,7 +58,7 @@ func NewSybilOutboundMsgThrottler( nodeToAtLargeBytesUsed: make(map[ids.NodeID]uint64), }, } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } func (t *outboundMsgThrottler) Acquire(msg message.OutboundMessage, nodeID ids.NodeID) bool { @@ -176,31 +175,26 @@ type outboundMsgThrottlerMetrics struct { awaitingRelease prometheus.Gauge } -func (m *outboundMsgThrottlerMetrics) initialize(namespace string, registerer prometheus.Registerer) error { +func (m *outboundMsgThrottlerMetrics) initialize(registerer prometheus.Registerer) error { m.acquireSuccesses = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "throttler_outbound_acquire_successes", - Help: "Outbound messages not dropped due to rate-limiting", + Name: "throttler_outbound_acquire_successes", + Help: "Outbound messages not dropped due to rate-limiting", }) m.acquireFailures = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "throttler_outbound_acquire_failures", - Help: "Outbound messages dropped due to rate-limiting", + Name: "throttler_outbound_acquire_failures", + Help: "Outbound messages dropped due to rate-limiting", }) m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_remaining_at_large_bytes", - Help: "Bytes remaining in the at large byte allocation", + Name: "throttler_outbound_remaining_at_large_bytes", + Help: "Bytes remaining in the at large byte allocation", }) m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_remaining_validator_bytes", - Help: "Bytes remaining in the validator byte allocation", + Name: "throttler_outbound_remaining_validator_bytes", + Help: "Bytes remaining in the validator byte allocation", }) m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_awaiting_release", - Help: "Number of messages waiting to be sent", + Name: "throttler_outbound_awaiting_release", + Help: "Number of messages waiting to be sent", }) return utils.Err( registerer.Register(m.acquireSuccesses), diff --git a/network/throttling/outbound_msg_throttler_test.go b/network/throttling/outbound_msg_throttler_test.go index 664449adadd..ab883b8fa4e 100644 --- a/network/throttling/outbound_msg_throttler_test.go +++ b/network/throttling/outbound_msg_throttler_test.go @@ -32,7 +32,6 @@ func TestSybilOutboundMsgThrottler(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr2ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -174,7 +173,6 @@ func TestSybilOutboundMsgThrottlerMaxNonVdr(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -221,7 +219,6 @@ func TestBypassThrottling(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, diff --git a/node/node.go b/node/node.go index 3138f00c1c3..f4804f3a460 100644 --- a/node/node.go +++ b/node/node.go @@ -66,6 +66,7 @@ import ( "github.com/ava-labs/avalanchego/utils/ips" "github.com/ava-labs/avalanchego/utils/logging" "github.com/ava-labs/avalanchego/utils/math/meter" + "github.com/ava-labs/avalanchego/utils/metric" "github.com/ava-labs/avalanchego/utils/perms" "github.com/ava-labs/avalanchego/utils/profiler" "github.com/ava-labs/avalanchego/utils/resource" @@ -180,15 +181,22 @@ func New( n.initSharedMemory() // Initialize shared memory + n.networkRegisterer = prometheus.NewRegistry() + err = n.MetricsGatherer.Register( + metric.AppendNamespace(constants.PlatformName, "network"), + n.networkRegisterer, + ) + if err != nil { + return nil, fmt.Errorf("couldn't register network metrics: %w", err) + } + // message.Creator is shared between networking, chainManager and the engine. // It must be initiated before networking (initNetworking), chain manager (initChainManager) // and the engine (initChains) but after the metrics (initMetricsAPI) // message.Creator currently record metrics under network namespace - n.networkNamespace = "network" n.msgCreator, err = message.NewCreator( n.Log, - n.MetricsRegisterer, - n.networkNamespace, + n.networkRegisterer, n.Config.NetworkConfig.CompressionType, n.Config.NetworkConfig.MaximumInboundMessageTimeout, ) @@ -310,8 +318,8 @@ type Node struct { VertexAcceptorGroup snow.AcceptorGroup // Net runs the networking stack - networkNamespace string - Net network.Network + networkRegisterer *prometheus.Registry + Net network.Network // The staking address will optionally be written to a process context // file to enable other nodes to be configured to use this node as a @@ -584,7 +592,6 @@ func (n *Node) initNetworking() error { } // add node configs to network config - n.Config.NetworkConfig.Namespace = n.networkNamespace n.Config.NetworkConfig.MyNodeID = n.ID n.Config.NetworkConfig.MyIPPort = dynamicIP n.Config.NetworkConfig.NetworkID = n.Config.NetworkID @@ -603,7 +610,7 @@ func (n *Node) initNetworking() error { n.Net, err = network.NewNetwork( &n.Config.NetworkConfig, n.msgCreator, - n.MetricsRegisterer, + n.networkRegisterer, n.Log, listener, dialer.NewDialer(constants.NetworkType, n.Config.NetworkConfig.DialerConfig, n.Log), diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 2005c25da2a..6bd2bc558c9 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -70,7 +70,6 @@ func TestTimeout(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -347,7 +346,6 @@ func TestReliableMessages(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -504,7 +502,6 @@ func TestReliableMessagesToMyself(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) diff --git a/snow/snowtest/snowtest.go b/snow/snowtest/snowtest.go index 0ddee75707a..a2e03249da1 100644 --- a/snow/snowtest/snowtest.go +++ b/snow/snowtest/snowtest.go @@ -90,7 +90,7 @@ func Context(tb testing.TB, chainID ids.ID) *snow.Context { Log: logging.NoLog{}, BCLookup: aliaser, - Metrics: metrics.NewMultiGatherer(), + Metrics: metrics.NewPrefixGatherer(), ValidatorState: validatorState, ChainDataDir: "", diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index 13802ad4dae..d6a3fc41a85 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1425,7 +1425,12 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { chainRouter := &router.ChainRouter{} metrics := prometheus.NewRegistry() - mc, err := message.NewCreator(logging.NoLog{}, metrics, "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second) + mc, err := message.NewCreator( + logging.NoLog{}, + metrics, + constants.DefaultNetworkCompressionType, + 10*time.Second, + ) require.NoError(err) require.NoError(chainRouter.Initialize( From 00630c6b1c37ae8c8d7e3fbf32018310efcaf194 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 14:31:51 -0400 Subject: [PATCH 11/53] update more metrics --- api/metrics/README.md | 19 ++++--- api/metrics/multi_gatherer.go | 9 +++ chains/manager.go | 26 ++++++++- database/meterdb/db.go | 16 ++---- database/meterdb/db_test.go | 4 +- node/node.go | 63 ++++++++++++++++----- snow/networking/tracker/resource_tracker.go | 29 ++++------ utils/resource/metrics.go | 27 ++++----- utils/resource/usage.go | 2 +- 9 files changed, 122 insertions(+), 73 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 8100442ff81..69e5a0dbcf6 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -6,13 +6,14 @@ graph LR A --> C[network] A --> D[api] A --> E[db] - A --> F[go] - A --> G[health] - A --> H[system_resources] - A --> I[resource_tracker] - A --> J[requests] - B -- $chainID --> K[$vmID] - B -- $chainID, $isProposerVM --> L[meterchainvm] - B -- $chainID --> M[meterdagvm] - B -- $chainID --> N[proposervm] + A --> F[db] + A --> G[go] + A --> H[health] + A --> I[system_resources] + A --> J[resource_tracker] + A --> K[requests] + B -- $chainID --> L[$vmID] + B -- $chainID, $isProposerVM --> M[meterchainvm] + B -- $chainID --> N[meterdagvm] + B -- $chainID --> O[proposervm] ``` diff --git a/api/metrics/multi_gatherer.go b/api/metrics/multi_gatherer.go index 04e35da6b1e..ef74cf87c80 100644 --- a/api/metrics/multi_gatherer.go +++ b/api/metrics/multi_gatherer.go @@ -4,6 +4,7 @@ package metrics import ( + "fmt" "sync" "github.com/prometheus/client_golang/prometheus" @@ -38,3 +39,11 @@ func (g *multiGatherer) Gather() ([]*dto.MetricFamily, error) { return g.gatherers.Gather() } + +func MakeAndRegister(gatherer MultiGatherer, name string) (*prometheus.Registry, error) { + reg := prometheus.NewRegistry() + if err := gatherer.Register(name, reg); err != nil { + return nil, fmt.Errorf("couldn't register %q metrics: %w", name, err) + } + return reg, nil +} diff --git a/chains/manager.go b/chains/manager.go index c5b79dd470e..ed5ea431cb9 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -207,7 +207,9 @@ type ManagerConfig struct { // ShutdownNodeFunc allows the chain manager to issue a request to shutdown the node ShutdownNodeFunc func(exitCode int) MeterVMEnabled bool // Should each VM be wrapped with a MeterVM - Metrics metrics.MultiGatherer + + Metrics metrics.MultiGatherer + MeterDBMetrics metrics.MultiGatherer FrontierPollFrequency time.Duration ConsensusAppConcurrency int @@ -561,10 +563,19 @@ func (m *manager) createAvalancheChain( State: snow.Initializing, }) - meterDB, err := meterdb.New("db", ctx.Registerer, m.DB) + meterDBReg, err := metrics.MakeAndRegister( + m.MeterDBMetrics, + m.PrimaryAliasOrDefault(ctx.ChainID), + ) if err != nil { return nil, err } + + meterDB, err := meterdb.New(meterDBReg, m.DB) + if err != nil { + return nil, err + } + prefixDB := prefixdb.New(ctx.ChainID[:], meterDB) vmDB := prefixdb.New(VMDBPrefix, prefixDB) vertexDB := prefixdb.New(VertexDBPrefix, prefixDB) @@ -949,10 +960,19 @@ func (m *manager) createSnowmanChain( State: snow.Initializing, }) - meterDB, err := meterdb.New("db", ctx.Registerer, m.DB) + meterDBReg, err := metrics.MakeAndRegister( + m.MeterDBMetrics, + m.PrimaryAliasOrDefault(ctx.ChainID), + ) + if err != nil { + return nil, err + } + + meterDB, err := meterdb.New(meterDBReg, m.DB) if err != nil { return nil, err } + prefixDB := prefixdb.New(ctx.ChainID[:], meterDB) vmDB := prefixdb.New(VMDBPrefix, prefixDB) bootstrappingDB := prefixdb.New(ChainBootstrappingDBPrefix, prefixDB) diff --git a/database/meterdb/db.go b/database/meterdb/db.go index 5f9ef51df16..af41746b32e 100644 --- a/database/meterdb/db.go +++ b/database/meterdb/db.go @@ -98,7 +98,6 @@ type Database struct { // New returns a new database with added metrics func New( - namespace string, reg prometheus.Registerer, db database.Database, ) (*Database, error) { @@ -106,25 +105,22 @@ func New( db: db, calls: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "calls", - Help: "number of calls to the database", + Name: "calls", + Help: "number of calls to the database", }, methodLabels, ), duration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "duration", - Help: "time spent in database calls (ns)", + Name: "duration", + Help: "time spent in database calls (ns)", }, methodLabels, ), size: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "size", - Help: "size of data passed in database calls", + Name: "size", + Help: "size of data passed in database calls", }, methodLabels, ), diff --git a/database/meterdb/db_test.go b/database/meterdb/db_test.go index 48a8966b277..57cedc18104 100644 --- a/database/meterdb/db_test.go +++ b/database/meterdb/db_test.go @@ -18,7 +18,7 @@ func TestInterface(t *testing.T) { for name, test := range database.Tests { t.Run(name, func(t *testing.T) { baseDB := memdb.New() - db, err := New("", prometheus.NewRegistry(), baseDB) + db, err := New(prometheus.NewRegistry(), baseDB) require.NoError(t, err) test(t, db) @@ -28,7 +28,7 @@ func TestInterface(t *testing.T) { func newDB(t testing.TB) database.Database { baseDB := memdb.New() - db, err := New("", prometheus.NewRegistry(), baseDB) + db, err := New(prometheus.NewRegistry(), baseDB) require.NoError(t, err) return db } diff --git a/node/node.go b/node/node.go index f4804f3a460..762ee01061c 100644 --- a/node/node.go +++ b/node/node.go @@ -161,7 +161,10 @@ func New( return nil, fmt.Errorf("couldn't initialize tracer: %w", err) } - n.initMetrics() + if err := n.initMetrics(); err != nil { + return nil, fmt.Errorf("couldn't initialize metrics: %w", err) + } + n.initNAT() if err := n.initAPIServer(); err != nil { // Start the API Server return nil, fmt.Errorf("couldn't initialize API server: %w", err) @@ -181,13 +184,12 @@ func New( n.initSharedMemory() // Initialize shared memory - n.networkRegisterer = prometheus.NewRegistry() - err = n.MetricsGatherer.Register( + n.networkRegisterer, err = metrics.MakeAndRegister( + n.MetricsGatherer, metric.AppendNamespace(constants.PlatformName, "network"), - n.networkRegisterer, ) if err != nil { - return nil, fmt.Errorf("couldn't register network metrics: %w", err) + return nil, err } // message.Creator is shared between networking, chainManager and the engine. @@ -209,7 +211,7 @@ func New( logger.Warn("sybil control is not enforced") n.vdrs = newOverriddenManager(constants.PrimaryNetworkID, n.vdrs) } - if err := n.initResourceManager(n.MetricsRegisterer); err != nil { + if err := n.initResourceManager(); err != nil { return nil, fmt.Errorf("problem initializing resource manager: %w", err) } n.initCPUTargeter(&config.CPUTargeterConfig) @@ -360,8 +362,9 @@ type Node struct { DoneShuttingDown sync.WaitGroup // Metrics Registerer - MetricsRegisterer *prometheus.Registry - MetricsGatherer metrics.MultiGatherer + MetricsRegisterer *prometheus.Registry + MetricsGatherer metrics.MultiGatherer + MeterDBMetricsGatherer metrics.MultiGatherer VMAliaser ids.Aliaser VMManager vms.Manager @@ -761,8 +764,15 @@ func (n *Node) initDatabase() error { n.DB = versiondb.New(n.DB) } - var err error - n.DB, err = meterdb.New("db", n.MetricsRegisterer, n.DB) + meterDBReg, err := metrics.MakeAndRegister( + n.MeterDBMetricsGatherer, + "all", + ) + if err != nil { + return err + } + + n.DB, err = meterdb.New(meterDBReg, n.DB) if err != nil { return err } @@ -883,9 +893,13 @@ func (n *Node) initChains(genesisBytes []byte) error { return n.chainManager.StartChainCreator(platformChain) } -func (n *Node) initMetrics() { +func (n *Node) initMetrics() error { + // TODO: Remove n.MetricsRegisterer = prometheus.NewRegistry() - n.MetricsGatherer = metrics.NewMultiGatherer() + + n.MetricsGatherer = metrics.NewPrefixGatherer() + n.MeterDBMetricsGatherer = metrics.NewLabelGatherer("chain") + return n.MetricsGatherer.Register("meterdb", n.MeterDBMetricsGatherer) } func (n *Node) initNAT() { @@ -1498,14 +1512,21 @@ func (n *Node) initAPIAliases(genesisBytes []byte) error { } // Initialize [n.resourceManager]. -func (n *Node) initResourceManager(reg prometheus.Registerer) error { +func (n *Node) initResourceManager() error { + systemResourcesRegisterer, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "system_resources"), + ) + if err != nil { + return err + } resourceManager, err := resource.NewManager( n.Log, n.Config.DatabaseConfig.Path, n.Config.SystemTrackerFrequency, n.Config.SystemTrackerCPUHalflife, n.Config.SystemTrackerDiskHalflife, - reg, + systemResourcesRegisterer, ) if err != nil { return err @@ -1513,7 +1534,19 @@ func (n *Node) initResourceManager(reg prometheus.Registerer) error { n.resourceManager = resourceManager n.resourceManager.TrackProcess(os.Getpid()) - n.resourceTracker, err = tracker.NewResourceTracker(reg, n.resourceManager, &meter.ContinuousFactory{}, n.Config.SystemTrackerProcessingHalflife) + resourceTrackerRegisterer, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "resource_tracker"), + ) + if err != nil { + return err + } + n.resourceTracker, err = tracker.NewResourceTracker( + resourceTrackerRegisterer, + n.resourceManager, + &meter.ContinuousFactory{}, + n.Config.SystemTrackerProcessingHalflife, + ) return err } diff --git a/snow/networking/tracker/resource_tracker.go b/snow/networking/tracker/resource_tracker.go index 7b480d24255..d8f5da99192 100644 --- a/snow/networking/tracker/resource_tracker.go +++ b/snow/networking/tracker/resource_tracker.go @@ -218,7 +218,7 @@ func NewResourceTracker( meters: linked.NewHashmap[ids.NodeID, meter.Meter](), } var err error - t.metrics, err = newCPUTrackerMetrics("resource_tracker", reg) + t.metrics, err = newCPUTrackerMetrics(reg) if err != nil { return nil, fmt.Errorf("initializing resourceTracker metrics errored with: %w", err) } @@ -293,32 +293,27 @@ type trackerMetrics struct { diskSpaceAvailable prometheus.Gauge } -func newCPUTrackerMetrics(namespace string, reg prometheus.Registerer) (*trackerMetrics, error) { +func newCPUTrackerMetrics(reg prometheus.Registerer) (*trackerMetrics, error) { m := &trackerMetrics{ processingTimeMetric: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "processing_time", - Help: "Tracked processing time over all nodes. Value expected to be in [0, number of CPU cores], but can go higher due to IO bound processes and thread multiplexing", + Name: "processing_time", + Help: "Tracked processing time over all nodes. Value expected to be in [0, number of CPU cores], but can go higher due to IO bound processes and thread multiplexing", }), cpuMetric: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "cpu_usage", - Help: "CPU usage tracked by the resource manager. Value should be in [0, number of CPU cores]", + Name: "cpu_usage", + Help: "CPU usage tracked by the resource manager. Value should be in [0, number of CPU cores]", }), diskReadsMetric: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "disk_reads", - Help: "Disk reads (bytes/sec) tracked by the resource manager", + Name: "disk_reads", + Help: "Disk reads (bytes/sec) tracked by the resource manager", }), diskWritesMetric: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "disk_writes", - Help: "Disk writes (bytes/sec) tracked by the resource manager", + Name: "disk_writes", + Help: "Disk writes (bytes/sec) tracked by the resource manager", }), diskSpaceAvailable: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "disk_available_space", - Help: "Available space remaining (bytes) on the database volume", + Name: "disk_available_space", + Help: "Available space remaining (bytes) on the database volume", }), } err := utils.Err( diff --git a/utils/resource/metrics.go b/utils/resource/metrics.go index 3ce87ade258..42d12f1ccc7 100644 --- a/utils/resource/metrics.go +++ b/utils/resource/metrics.go @@ -17,45 +17,40 @@ type metrics struct { numDiskWritesBytes *prometheus.GaugeVec } -func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { +func newMetrics(registerer prometheus.Registerer) (*metrics, error) { m := &metrics{ numCPUCycles: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_cpu_cycles", - Help: "Total number of CPU cycles", + Name: "num_cpu_cycles", + Help: "Total number of CPU cycles", }, []string{"processID"}, ), numDiskReads: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_disk_reads", - Help: "Total number of disk reads", + Name: "num_disk_reads", + Help: "Total number of disk reads", }, []string{"processID"}, ), numDiskReadBytes: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_disk_read_bytes", - Help: "Total number of disk read bytes", + Name: "num_disk_read_bytes", + Help: "Total number of disk read bytes", }, []string{"processID"}, ), numDiskWrites: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_disk_writes", - Help: "Total number of disk writes", + Name: "num_disk_writes", + Help: "Total number of disk writes", }, []string{"processID"}, ), numDiskWritesBytes: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_disk_write_bytes", - Help: "Total number of disk write bytes", + Name: "num_disk_write_bytes", + Help: "Total number of disk write bytes", }, []string{"processID"}, ), diff --git a/utils/resource/usage.go b/utils/resource/usage.go index 32a9d1965c9..32ffbfe4aa8 100644 --- a/utils/resource/usage.go +++ b/utils/resource/usage.go @@ -94,7 +94,7 @@ func NewManager( diskHalflife time.Duration, metricsRegisterer prometheus.Registerer, ) (Manager, error) { - processMetrics, err := newMetrics("system_resources", metricsRegisterer) + processMetrics, err := newMetrics(metricsRegisterer) if err != nil { return nil, err } From 5fb44f55031f33ad4f7ac479d03e404e229573d4 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 14:32:16 -0400 Subject: [PATCH 12/53] update more metrics --- api/metrics/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 69e5a0dbcf6..90bb69b7a4f 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -5,7 +5,7 @@ graph LR A[avalanche] --> B[chain] A --> C[network] A --> D[api] - A --> E[db] + A -- $chainID --> E[meterdb] A --> F[db] A --> G[go] A --> H[health] From e9a4b60100d64fbead10159b1d2893c231072d27 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 15:58:52 -0400 Subject: [PATCH 13/53] wip updating metrics --- api/server/metrics.go | 17 ++-- api/server/server.go | 3 +- chains/manager.go | 4 +- database/leveldb/db.go | 4 +- database/leveldb/db_test.go | 4 +- database/leveldb/metrics.go | 92 ++++++++----------- database/pebble/batch_test.go | 2 +- database/pebble/db.go | 2 +- database/pebble/db_test.go | 2 +- node/node.go | 82 ++++++++++++----- snow/networking/router/chain_router.go | 5 +- .../networking/router/chain_router_metrics.go | 17 ++-- snow/networking/router/chain_router_test.go | 16 ---- snow/networking/router/mock_router.go | 8 +- snow/networking/router/router.go | 3 +- snow/networking/router/traced_router.go | 6 +- snow/networking/sender/sender_test.go | 6 -- snow/networking/timeout/manager.go | 6 +- snow/networking/timeout/manager_test.go | 1 - utils/timer/adaptive_timeout_manager.go | 31 +++---- utils/timer/adaptive_timeout_manager_test.go | 3 +- .../validators/manager_benchmark_test.go | 1 - vms/platformvm/vm_test.go | 2 - 23 files changed, 143 insertions(+), 174 deletions(-) diff --git a/api/server/metrics.go b/api/server/metrics.go index e3b2d76c83e..9734f36eeaa 100644 --- a/api/server/metrics.go +++ b/api/server/metrics.go @@ -18,29 +18,26 @@ type metrics struct { totalDuration *prometheus.GaugeVec } -func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { +func newMetrics(registerer prometheus.Registerer) (*metrics, error) { m := &metrics{ numProcessing: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "calls_processing", - Help: "The number of calls this API is currently processing", + Name: "calls_processing", + Help: "The number of calls this API is currently processing", }, []string{"base"}, ), numCalls: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "calls", - Help: "The number of calls this API has processed", + Name: "calls", + Help: "The number of calls this API has processed", }, []string{"base"}, ), totalDuration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "calls_duration", - Help: "The total amount of time, in nanoseconds, spent handling API calls", + Name: "calls_duration", + Help: "The total amount of time, in nanoseconds, spent handling API calls", }, []string{"base"}, ), diff --git a/api/server/server.go b/api/server/server.go index cd712ec88b9..8af570d09bd 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -108,12 +108,11 @@ func New( nodeID ids.NodeID, tracingEnabled bool, tracer trace.Tracer, - namespace string, registerer prometheus.Registerer, httpConfig HTTPConfig, allowedHosts []string, ) (Server, error) { - m, err := newMetrics(namespace, registerer) + m, err := newMetrics(registerer) if err != nil { return nil, err } diff --git a/chains/manager.go b/chains/manager.go index ed5ea431cb9..cafd46417e4 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -264,7 +264,7 @@ type manager struct { } // New returns a new Manager -func New(config *ManagerConfig) Manager { +func New(config *ManagerConfig) (Manager, error) { return &manager{ Aliaser: ids.NewAliaser(), ManagerConfig: *config, @@ -272,7 +272,7 @@ func New(config *ManagerConfig) Manager { chainsQueue: buffer.NewUnboundedBlockingDeque[ChainParameters](initialQueueSize), unblockChainCreatorCh: make(chan struct{}), chainCreatorShutdownCh: make(chan struct{}), - } + }, nil } // QueueChainCreation queues a chain creation request diff --git a/database/leveldb/db.go b/database/leveldb/db.go index 6c09606128d..7c54b1d86e3 100644 --- a/database/leveldb/db.go +++ b/database/leveldb/db.go @@ -186,7 +186,7 @@ type config struct { } // New returns a wrapped LevelDB object. -func New(file string, configBytes []byte, log logging.Logger, namespace string, reg prometheus.Registerer) (database.Database, error) { +func New(file string, configBytes []byte, log logging.Logger, reg prometheus.Registerer) (database.Database, error) { parsedConfig := config{ BlockCacheCapacity: DefaultBlockCacheSize, DisableSeeksCompaction: true, @@ -236,7 +236,7 @@ func New(file string, configBytes []byte, log logging.Logger, namespace string, closeCh: make(chan struct{}), } if parsedConfig.MetricUpdateFrequency > 0 { - metrics, err := newMetrics(namespace, reg) + metrics, err := newMetrics(reg) if err != nil { // Drop any close error to report the original error _ = db.Close() diff --git a/database/leveldb/db_test.go b/database/leveldb/db_test.go index 8352e53bd53..65214d08084 100644 --- a/database/leveldb/db_test.go +++ b/database/leveldb/db_test.go @@ -18,7 +18,7 @@ func TestInterface(t *testing.T) { for name, test := range database.Tests { t.Run(name, func(t *testing.T) { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) test(t, db) @@ -30,7 +30,7 @@ func TestInterface(t *testing.T) { func newDB(t testing.TB) database.Database { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return db } diff --git a/database/leveldb/metrics.go b/database/leveldb/metrics.go index 5ad2e2b369d..d1edab6f98e 100644 --- a/database/leveldb/metrics.go +++ b/database/leveldb/metrics.go @@ -62,117 +62,99 @@ type metrics struct { priorStats, currentStats *leveldb.DBStats } -func newMetrics(namespace string, reg prometheus.Registerer) (metrics, error) { +func newMetrics(reg prometheus.Registerer) (metrics, error) { m := metrics{ writesDelayedCount: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "writes_delayed", - Help: "number of cumulative writes that have been delayed due to compaction", + Name: "writes_delayed", + Help: "number of cumulative writes that have been delayed due to compaction", }), writesDelayedDuration: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "writes_delayed_duration", - Help: "amount of time (in ns) that writes have been delayed due to compaction", + Name: "writes_delayed_duration", + Help: "amount of time (in ns) that writes have been delayed due to compaction", }), writeIsDelayed: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "write_delayed", - Help: "1 if there is currently a write that is being delayed due to compaction", + Name: "write_delayed", + Help: "1 if there is currently a write that is being delayed due to compaction", }), aliveSnapshots: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "alive_snapshots", - Help: "number of currently alive snapshots", + Name: "alive_snapshots", + Help: "number of currently alive snapshots", }), aliveIterators: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "alive_iterators", - Help: "number of currently alive iterators", + Name: "alive_iterators", + Help: "number of currently alive iterators", }), ioWrite: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "io_write", - Help: "cumulative amount of io write during compaction", + Name: "io_write", + Help: "cumulative amount of io write during compaction", }), ioRead: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "io_read", - Help: "cumulative amount of io read during compaction", + Name: "io_read", + Help: "cumulative amount of io read during compaction", }), blockCacheSize: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "block_cache_size", - Help: "total size of cached blocks", + Name: "block_cache_size", + Help: "total size of cached blocks", }), openTables: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "open_tables", - Help: "number of currently opened tables", + Name: "open_tables", + Help: "number of currently opened tables", }), levelTableCount: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "table_count", - Help: "number of tables allocated by level", + Name: "table_count", + Help: "number of tables allocated by level", }, levelLabels, ), levelSize: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "size", - Help: "amount of bytes allocated by level", + Name: "size", + Help: "amount of bytes allocated by level", }, levelLabels, ), levelDuration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "duration", - Help: "amount of time (in ns) spent in compaction by level", + Name: "duration", + Help: "amount of time (in ns) spent in compaction by level", }, levelLabels, ), levelReads: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "reads", - Help: "amount of bytes read during compaction by level", + Name: "reads", + Help: "amount of bytes read during compaction by level", }, levelLabels, ), levelWrites: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "writes", - Help: "amount of bytes written during compaction by level", + Name: "writes", + Help: "amount of bytes written during compaction by level", }, levelLabels, ), memCompactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "mem_comps", - Help: "total number of memory compactions performed", + Name: "mem_comps", + Help: "total number of memory compactions performed", }), level0Compactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "level_0_comps", - Help: "total number of level 0 compactions performed", + Name: "level_0_comps", + Help: "total number of level 0 compactions performed", }), nonLevel0Compactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "non_level_0_comps", - Help: "total number of non-level 0 compactions performed", + Name: "non_level_0_comps", + Help: "total number of non-level 0 compactions performed", }), seekCompactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "seek_comps", - Help: "total number of seek compactions performed", + Name: "seek_comps", + Help: "total number of seek compactions performed", }), priorStats: &leveldb.DBStats{}, diff --git a/database/pebble/batch_test.go b/database/pebble/batch_test.go index 4fcc537d1e8..1f995e2e851 100644 --- a/database/pebble/batch_test.go +++ b/database/pebble/batch_test.go @@ -17,7 +17,7 @@ func TestBatch(t *testing.T) { require := require.New(t) dirName := t.TempDir() - db, err := New(dirName, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(dirName, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(err) batchIntf := db.NewBatch() diff --git a/database/pebble/db.go b/database/pebble/db.go index 0acb10d12c5..418d123a0dd 100644 --- a/database/pebble/db.go +++ b/database/pebble/db.go @@ -64,7 +64,7 @@ type Config struct { } // TODO: Add metrics -func New(file string, configBytes []byte, log logging.Logger, _ string, _ prometheus.Registerer) (database.Database, error) { +func New(file string, configBytes []byte, log logging.Logger, _ prometheus.Registerer) (database.Database, error) { cfg := DefaultConfig if len(configBytes) > 0 { if err := json.Unmarshal(configBytes, &cfg); err != nil { diff --git a/database/pebble/db_test.go b/database/pebble/db_test.go index 3b37d9362d9..61a78cdf339 100644 --- a/database/pebble/db_test.go +++ b/database/pebble/db_test.go @@ -16,7 +16,7 @@ import ( func newDB(t testing.TB) *Database { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "pebble", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return db.(*Database) } diff --git a/node/node.go b/node/node.go index 762ee01061c..43e7473176f 100644 --- a/node/node.go +++ b/node/node.go @@ -362,7 +362,6 @@ type Node struct { DoneShuttingDown sync.WaitGroup // Metrics Registerer - MetricsRegisterer *prometheus.Registry MetricsGatherer metrics.MultiGatherer MeterDBMetricsGatherer metrics.MultiGatherer @@ -730,6 +729,14 @@ func (n *Node) Dispatch() error { */ func (n *Node) initDatabase() error { + dbReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "db"), + ) + if err != nil { + return err + } + // start the db switch n.Config.DatabaseConfig.Name { case leveldb.Name: @@ -737,7 +744,7 @@ func (n *Node) initDatabase() error { // files went to [dbPath]/[networkID]/v1.4.5. dbPath := filepath.Join(n.Config.DatabaseConfig.Path, version.CurrentDatabase.String()) var err error - n.DB, err = leveldb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, "db_internal", n.MetricsRegisterer) + n.DB, err = leveldb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbReg) if err != nil { return fmt.Errorf("couldn't create leveldb at %s: %w", dbPath, err) } @@ -746,7 +753,7 @@ func (n *Node) initDatabase() error { case pebble.Name: dbPath := filepath.Join(n.Config.DatabaseConfig.Path, pebble.Name) var err error - n.DB, err = pebble.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, "db_internal", n.MetricsRegisterer) + n.DB, err = pebble.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbReg) if err != nil { return fmt.Errorf("couldn't create pebbledb at %s: %w", dbPath, err) } @@ -894,9 +901,6 @@ func (n *Node) initChains(genesisBytes []byte) error { } func (n *Node) initMetrics() error { - // TODO: Remove - n.MetricsRegisterer = prometheus.NewRegistry() - n.MetricsGatherer = metrics.NewPrefixGatherer() n.MeterDBMetricsGatherer = metrics.NewLabelGatherer("chain") return n.MetricsGatherer.Register("meterdb", n.MeterDBMetricsGatherer) @@ -988,6 +992,13 @@ func (n *Node) initAPIServer() error { } n.apiURI = fmt.Sprintf("%s://%s", protocol, listener.Addr()) + apiReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "api"), + ) + if err != nil { + return err + } n.APIServer, err = server.New( n.Log, n.LogFactory, @@ -997,8 +1008,7 @@ func (n *Node) initAPIServer() error { n.ID, n.Config.TraceConfig.Enabled, n.tracer, - "api", - n.MetricsRegisterer, + apiReg, n.Config.HTTPConfig.HTTPConfig, n.Config.HTTPAllowedHosts, ) @@ -1042,11 +1052,18 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { cChainID, ) + requestsReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "requests"), + ) + if err != nil { + return err + } + n.timeoutManager, err = timeout.NewManager( &n.Config.AdaptiveTimeoutConfig, n.benchlistManager, - "requests", - n.MetricsRegisterer, + requestsReg, ) if err != nil { return err @@ -1064,8 +1081,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { n.Config.TrackedSubnets, n.Shutdown, n.Config.RouterHealthConfig, - "requests", - n.MetricsRegisterer, + requestsReg, ) if err != nil { return fmt.Errorf("couldn't initialize chain router: %w", err) @@ -1075,7 +1091,8 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { if err != nil { return fmt.Errorf("failed to initialize subnets: %w", err) } - n.chainManager = chains.New( + + n.chainManager, err = chains.New( &chains.ManagerConfig{ SybilProtectionEnabled: n.Config.SybilProtectionEnabled, StakingTLSSigner: n.StakingTLSSigner, @@ -1107,6 +1124,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { ShutdownNodeFunc: n.Shutdown, MeterVMEnabled: n.Config.MeterVMEnabled, Metrics: n.MetricsGatherer, + MeterDBMetrics: n.MeterDBMetricsGatherer, SubnetConfigs: n.Config.SubnetConfigs, ChainConfigs: n.Config.ChainConfigs, FrontierPollFrequency: n.Config.FrontierPollFrequency, @@ -1124,6 +1142,9 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { Subnets: subnets, }, ) + if err != nil { + return err + } // Notify the API server when new chains are created n.chainManager.AddRegistrant(n.APIServer) @@ -1245,19 +1266,23 @@ func (n *Node) initMetricsAPI() error { return nil } - if err := n.MetricsGatherer.Register(constants.PlatformName, n.MetricsRegisterer); err != nil { + processReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "process"), + ) + if err != nil { return err } // Current state of process metrics. processCollector := collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}) - if err := n.MetricsRegisterer.Register(processCollector); err != nil { + if err := processReg.Register(processCollector); err != nil { return err } // Go process metrics using debug.GCStats. goCollector := collectors.NewGoCollector() - if err := n.MetricsRegisterer.Register(goCollector); err != nil { + if err := processReg.Register(goCollector); err != nil { return err } @@ -1374,11 +1399,18 @@ func (n *Node) initInfoAPI() error { // initHealthAPI initializes the Health API service // Assumes n.Log, n.Net, n.APIServer, n.HTTPLog already initialized func (n *Node) initHealthAPI() error { - healthChecker, err := health.New(n.Log, n.MetricsRegisterer) + healthReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "health"), + ) + if err != nil { + return err + } + + n.health, err = health.New(n.Log, healthReg) if err != nil { return err } - n.health = healthChecker if !n.Config.HealthAPIEnabled { n.Log.Info("skipping health API initialization because it has been disabled") @@ -1386,18 +1418,18 @@ func (n *Node) initHealthAPI() error { } n.Log.Info("initializing Health API") - err = healthChecker.RegisterHealthCheck("network", n.Net, health.ApplicationTag) + err = n.health.RegisterHealthCheck("network", n.Net, health.ApplicationTag) if err != nil { return fmt.Errorf("couldn't register network health check: %w", err) } - err = healthChecker.RegisterHealthCheck("router", n.chainRouter, health.ApplicationTag) + err = n.health.RegisterHealthCheck("router", n.chainRouter, health.ApplicationTag) if err != nil { return fmt.Errorf("couldn't register router health check: %w", err) } // TODO: add database health to liveness check - err = healthChecker.RegisterHealthCheck("database", n.DB, health.ApplicationTag) + err = n.health.RegisterHealthCheck("database", n.DB, health.ApplicationTag) if err != nil { return fmt.Errorf("couldn't register database health check: %w", err) } @@ -1429,7 +1461,7 @@ func (n *Node) initHealthAPI() error { return fmt.Errorf("couldn't register resource health check: %w", err) } - handler, err := health.NewGetAndPostHandler(n.Log, healthChecker) + handler, err := health.NewGetAndPostHandler(n.Log, n.health) if err != nil { return err } @@ -1444,7 +1476,7 @@ func (n *Node) initHealthAPI() error { } err = n.APIServer.AddRoute( - health.NewGetHandler(healthChecker.Readiness), + health.NewGetHandler(n.health.Readiness), "health", "/readiness", ) @@ -1453,7 +1485,7 @@ func (n *Node) initHealthAPI() error { } err = n.APIServer.AddRoute( - health.NewGetHandler(healthChecker.Health), + health.NewGetHandler(n.health.Health), "health", "/health", ) @@ -1462,7 +1494,7 @@ func (n *Node) initHealthAPI() error { } return n.APIServer.AddRoute( - health.NewGetHandler(healthChecker.Liveness), + health.NewGetHandler(n.health.Liveness), "health", "/liveness", ) diff --git a/snow/networking/router/chain_router.go b/snow/networking/router/chain_router.go index 8d471fb768c..27bf891ab4f 100644 --- a/snow/networking/router/chain_router.go +++ b/snow/networking/router/chain_router.go @@ -101,8 +101,7 @@ func (cr *ChainRouter) Initialize( trackedSubnets set.Set[ids.ID], onFatal func(exitCode int), healthConfig HealthConfig, - metricsNamespace string, - metricsRegisterer prometheus.Registerer, + reg prometheus.Registerer, ) error { cr.log = log cr.chainHandlers = make(map[ids.ID]handler.Handler) @@ -126,7 +125,7 @@ func (cr *ChainRouter) Initialize( cr.peers[nodeID] = myself // Register metrics - rMetrics, err := newRouterMetrics(metricsNamespace, metricsRegisterer) + rMetrics, err := newRouterMetrics(reg) if err != nil { return err } diff --git a/snow/networking/router/chain_router_metrics.go b/snow/networking/router/chain_router_metrics.go index bc8f2622358..8855acc5ccd 100644 --- a/snow/networking/router/chain_router_metrics.go +++ b/snow/networking/router/chain_router_metrics.go @@ -16,27 +16,24 @@ type routerMetrics struct { droppedRequests prometheus.Counter } -func newRouterMetrics(namespace string, registerer prometheus.Registerer) (*routerMetrics, error) { +func newRouterMetrics(registerer prometheus.Registerer) (*routerMetrics, error) { rMetrics := &routerMetrics{} rMetrics.outstandingRequests = prometheus.NewGauge( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "outstanding", - Help: "Number of outstanding requests (all types)", + Name: "outstanding", + Help: "Number of outstanding requests (all types)", }, ) rMetrics.longestRunningRequest = prometheus.NewGauge( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "longest_running", - Help: "Time (in ns) the longest request took", + Name: "longest_running", + Help: "Time (in ns) the longest request took", }, ) rMetrics.droppedRequests = prometheus.NewCounter( prometheus.CounterOpts{ - Namespace: namespace, - Name: "dropped", - Help: "Number of dropped requests (all types)", + Name: "dropped", + Help: "Number of dropped requests (all types)", }, ) diff --git a/snow/networking/router/chain_router_test.go b/snow/networking/router/chain_router_test.go index c17360f0248..91f2c67090c 100644 --- a/snow/networking/router/chain_router_test.go +++ b/snow/networking/router/chain_router_test.go @@ -61,7 +61,6 @@ func TestShutdown(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -80,7 +79,6 @@ func TestShutdown(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) @@ -211,7 +209,6 @@ func TestShutdownTimesOut(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist, - "", metrics, ) require.NoError(err) @@ -231,7 +228,6 @@ func TestShutdownTimesOut(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", metrics, )) @@ -360,7 +356,6 @@ func TestRouterTimeout(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -380,7 +375,6 @@ func TestRouterTimeout(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) defer chainRouter.Shutdown(context.Background()) @@ -729,7 +723,6 @@ func TestRouterHonorsRequestedEngine(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -749,7 +742,6 @@ func TestRouterHonorsRequestedEngine(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) defer chainRouter.Shutdown(context.Background()) @@ -954,7 +946,6 @@ func TestValidatorOnlyMessageDrops(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -974,7 +965,6 @@ func TestValidatorOnlyMessageDrops(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) defer chainRouter.Shutdown(context.Background()) @@ -1115,7 +1105,6 @@ func TestConnectedSubnet(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "timeoutManager", prometheus.NewRegistry(), ) require.NoError(err) @@ -1140,7 +1129,6 @@ func TestConnectedSubnet(t *testing.T) { trackedSubnets, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) @@ -1232,7 +1220,6 @@ func TestValidatorOnlyAllowedNodeMessageDrops(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -1252,7 +1239,6 @@ func TestValidatorOnlyAllowedNodeMessageDrops(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) defer chainRouter.Shutdown(context.Background()) @@ -1582,7 +1568,6 @@ func newChainRouterTest(t *testing.T) (*ChainRouter, *common.EngineTest) { TimeoutHalflife: 5 * time.Minute, }, benchlist.NewNoBenchlist(), - "", prometheus.NewRegistry(), ) require.NoError(t, err) @@ -1601,7 +1586,6 @@ func newChainRouterTest(t *testing.T) (*ChainRouter, *common.EngineTest) { set.Set[ids.ID]{}, nil, HealthConfig{}, - "", prometheus.NewRegistry(), )) diff --git a/snow/networking/router/mock_router.go b/snow/networking/router/mock_router.go index c9146a77713..548b3211077 100644 --- a/snow/networking/router/mock_router.go +++ b/snow/networking/router/mock_router.go @@ -125,17 +125,17 @@ func (mr *MockRouterMockRecorder) HealthCheck(arg0 any) *gomock.Call { } // Initialize mocks base method. -func (m *MockRouter) Initialize(nodeID ids.NodeID, log logging.Logger, timeouts timeout.Manager, shutdownTimeout time.Duration, criticalChains set.Set[ids.ID], sybilProtectionEnabled bool, trackedSubnets set.Set[ids.ID], onFatal func(int), healthConfig HealthConfig, metricsNamespace string, metricsRegisterer prometheus.Registerer) error { +func (m *MockRouter) Initialize(nodeID ids.NodeID, log logging.Logger, timeouts timeout.Manager, shutdownTimeout time.Duration, criticalChains set.Set[ids.ID], sybilProtectionEnabled bool, trackedSubnets set.Set[ids.ID], onFatal func(int), healthConfig HealthConfig, reg prometheus.Registerer) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Initialize", nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, metricsNamespace, metricsRegisterer) + ret := m.ctrl.Call(m, "Initialize", nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, reg) ret0, _ := ret[0].(error) return ret0 } // Initialize indicates an expected call of Initialize. -func (mr *MockRouterMockRecorder) Initialize(nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, metricsNamespace, metricsRegisterer any) *gomock.Call { +func (mr *MockRouterMockRecorder) Initialize(nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, reg any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Initialize", reflect.TypeOf((*MockRouter)(nil).Initialize), nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, metricsNamespace, metricsRegisterer) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Initialize", reflect.TypeOf((*MockRouter)(nil).Initialize), nodeID, log, timeouts, shutdownTimeout, criticalChains, sybilProtectionEnabled, trackedSubnets, onFatal, healthConfig, reg) } // RegisterRequest mocks base method. diff --git a/snow/networking/router/router.go b/snow/networking/router/router.go index 4df5614c25f..ef4765cb096 100644 --- a/snow/networking/router/router.go +++ b/snow/networking/router/router.go @@ -36,8 +36,7 @@ type Router interface { trackedSubnets set.Set[ids.ID], onFatal func(exitCode int), healthConfig HealthConfig, - metricsNamespace string, - metricsRegisterer prometheus.Registerer, + reg prometheus.Registerer, ) error Shutdown(context.Context) AddChain(ctx context.Context, chain handler.Handler) diff --git a/snow/networking/router/traced_router.go b/snow/networking/router/traced_router.go index 4c52bce0827..cbd2b6ed120 100644 --- a/snow/networking/router/traced_router.go +++ b/snow/networking/router/traced_router.go @@ -47,8 +47,7 @@ func (r *tracedRouter) Initialize( trackedSubnets set.Set[ids.ID], onFatal func(exitCode int), healthConfig HealthConfig, - metricsNamespace string, - metricsRegisterer prometheus.Registerer, + reg prometheus.Registerer, ) error { return r.router.Initialize( nodeID, @@ -60,8 +59,7 @@ func (r *tracedRouter) Initialize( trackedSubnets, onFatal, healthConfig, - metricsNamespace, - metricsRegisterer, + reg, ) } diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 6bd2bc558c9..3a5bc93c155 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -58,7 +58,6 @@ func TestTimeout(t *testing.T) { TimeoutCoefficient: 1.25, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -85,7 +84,6 @@ func TestTimeout(t *testing.T) { set.Set[ids.ID]{}, nil, router.HealthConfig{}, - "", prometheus.NewRegistry(), )) @@ -333,7 +331,6 @@ func TestReliableMessages(t *testing.T) { TimeoutCoefficient: 1.25, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -361,7 +358,6 @@ func TestReliableMessages(t *testing.T) { set.Set[ids.ID]{}, nil, router.HealthConfig{}, - "", prometheus.NewRegistry(), )) @@ -489,7 +485,6 @@ func TestReliableMessagesToMyself(t *testing.T) { TimeoutCoefficient: 1.25, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -517,7 +512,6 @@ func TestReliableMessagesToMyself(t *testing.T) { set.Set[ids.ID]{}, nil, router.HealthConfig{}, - "", prometheus.NewRegistry(), )) diff --git a/snow/networking/timeout/manager.go b/snow/networking/timeout/manager.go index 89a7cc56d86..85ea88abe52 100644 --- a/snow/networking/timeout/manager.go +++ b/snow/networking/timeout/manager.go @@ -71,13 +71,11 @@ type Manager interface { func NewManager( timeoutConfig *timer.AdaptiveTimeoutConfig, benchlistMgr benchlist.Manager, - metricsNamespace string, - metricsRegister prometheus.Registerer, + reg prometheus.Registerer, ) (Manager, error) { tm, err := timer.NewAdaptiveTimeoutManager( timeoutConfig, - metricsNamespace, - metricsRegister, + reg, ) if err != nil { return nil, fmt.Errorf("couldn't create timeout manager: %w", err) diff --git a/snow/networking/timeout/manager_test.go b/snow/networking/timeout/manager_test.go index 49a05f78d8d..5ed1aef7fae 100644 --- a/snow/networking/timeout/manager_test.go +++ b/snow/networking/timeout/manager_test.go @@ -27,7 +27,6 @@ func TestManagerFire(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(t, err) diff --git a/utils/timer/adaptive_timeout_manager.go b/utils/timer/adaptive_timeout_manager.go index 493769018ba..5d8670bb56e 100644 --- a/utils/timer/adaptive_timeout_manager.go +++ b/utils/timer/adaptive_timeout_manager.go @@ -92,8 +92,7 @@ type adaptiveTimeoutManager struct { func NewAdaptiveTimeoutManager( config *AdaptiveTimeoutConfig, - metricsNamespace string, - metricsRegister prometheus.Registerer, + reg prometheus.Registerer, ) (AdaptiveTimeoutManager, error) { switch { case config.InitialTimeout > config.MaximumTimeout: @@ -108,24 +107,20 @@ func NewAdaptiveTimeoutManager( tm := &adaptiveTimeoutManager{ networkTimeoutMetric: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: metricsNamespace, - Name: "current_timeout", - Help: "Duration of current network timeout in nanoseconds", + Name: "current_timeout", + Help: "Duration of current network timeout in nanoseconds", }), avgLatency: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: metricsNamespace, - Name: "average_latency", - Help: "Average network latency in nanoseconds", + Name: "average_latency", + Help: "Average network latency in nanoseconds", }), numTimeouts: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: metricsNamespace, - Name: "timeouts", - Help: "Number of timed out requests", + Name: "timeouts", + Help: "Number of timed out requests", }), numPendingTimeouts: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: metricsNamespace, - Name: "pending_timeouts", - Help: "Number of pending timeouts", + Name: "pending_timeouts", + Help: "Number of pending timeouts", }), minimumTimeout: config.MinimumTimeout, maximumTimeout: config.MaximumTimeout, @@ -139,10 +134,10 @@ func NewAdaptiveTimeoutManager( tm.averager = math.NewAverager(float64(config.InitialTimeout), config.TimeoutHalflife, tm.clock.Time()) err := utils.Err( - metricsRegister.Register(tm.networkTimeoutMetric), - metricsRegister.Register(tm.avgLatency), - metricsRegister.Register(tm.numTimeouts), - metricsRegister.Register(tm.numPendingTimeouts), + reg.Register(tm.networkTimeoutMetric), + reg.Register(tm.avgLatency), + reg.Register(tm.numTimeouts), + reg.Register(tm.numPendingTimeouts), ) return tm, err } diff --git a/utils/timer/adaptive_timeout_manager_test.go b/utils/timer/adaptive_timeout_manager_test.go index 5b725303f38..e522b525272 100644 --- a/utils/timer/adaptive_timeout_manager_test.go +++ b/utils/timer/adaptive_timeout_manager_test.go @@ -83,7 +83,7 @@ func TestAdaptiveTimeoutManagerInit(t *testing.T) { } for _, test := range tests { - _, err := NewAdaptiveTimeoutManager(&test.config, "", prometheus.NewRegistry()) + _, err := NewAdaptiveTimeoutManager(&test.config, prometheus.NewRegistry()) require.ErrorIs(t, err, test.expectedErr) } } @@ -97,7 +97,6 @@ func TestAdaptiveTimeoutManager(t *testing.T) { TimeoutHalflife: 5 * time.Minute, TimeoutCoefficient: 1.25, }, - "", prometheus.NewRegistry(), ) require.NoError(t, err) diff --git a/vms/platformvm/validators/manager_benchmark_test.go b/vms/platformvm/validators/manager_benchmark_test.go index 912f3619e3e..ae8e919815d 100644 --- a/vms/platformvm/validators/manager_benchmark_test.go +++ b/vms/platformvm/validators/manager_benchmark_test.go @@ -49,7 +49,6 @@ func BenchmarkGetValidatorSet(b *testing.B) { b.TempDir(), nil, logging.NoLog{}, - "", prometheus.NewRegistry(), ) require.NoError(err) diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index d6a3fc41a85..9c9ec08fdbb 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1414,7 +1414,6 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { TimeoutCoefficient: 1.25, }, benchlist, - "", prometheus.NewRegistry(), ) require.NoError(err) @@ -1443,7 +1442,6 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { set.Set[ids.ID]{}, nil, router.HealthConfig{}, - "", prometheus.NewRegistry(), )) From 38c7bd675f6a4080d98838ee2ba729969f460001 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 18:08:40 -0400 Subject: [PATCH 14/53] seems to be working --- api/metrics/README.md | 2 + chains/linearizable_vm.go | 3 - chains/manager.go | 198 ++++++++++++++++++----- genesis/aliases.go | 4 +- genesis/genesis.go | 2 +- genesis/genesis_test.go | 6 +- node/node.go | 4 +- utils/constants/vm_ids.go | 3 +- vms/avm/block/builder/builder_test.go | 2 +- vms/avm/metrics/metrics.go | 30 ++-- vms/avm/metrics/tx_metrics.go | 12 +- vms/avm/vm.go | 12 +- vms/metervm/block_metrics.go | 55 ++++--- vms/metervm/block_vm.go | 23 +-- vms/metervm/metrics.go | 4 +- vms/metervm/vertex_metrics.go | 17 +- vms/metervm/vertex_vm.go | 22 +-- vms/platformvm/vm.go | 12 +- vms/proposervm/batched_vm_test.go | 2 + vms/proposervm/block_test.go | 3 + vms/proposervm/config.go | 4 + vms/proposervm/post_fork_option_test.go | 2 + vms/proposervm/state_syncable_vm_test.go | 2 + vms/proposervm/vm.go | 18 +-- vms/proposervm/vm_test.go | 12 ++ vms/rpcchainvm/vm_client.go | 17 +- vms/rpcchainvm/vm_server.go | 3 +- 27 files changed, 290 insertions(+), 184 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index 90bb69b7a4f..ea72015c93c 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -16,4 +16,6 @@ graph LR B -- $chainID, $isProposerVM --> M[meterchainvm] B -- $chainID --> N[meterdagvm] B -- $chainID --> O[proposervm] + B -- $chainID --> P[snowman] + B -- $chainID --> Q[avalanche] ``` diff --git a/chains/linearizable_vm.go b/chains/linearizable_vm.go index 0521e418667..e7e99b77cb9 100644 --- a/chains/linearizable_vm.go +++ b/chains/linearizable_vm.go @@ -6,7 +6,6 @@ package chains import ( "context" - "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/snow" @@ -29,7 +28,6 @@ type initializeOnLinearizeVM struct { vmToInitialize common.VM vmToLinearize *linearizeOnInitializeVM - registerer metrics.MultiGatherer ctx *snow.Context db database.Database genesisBytes []byte @@ -42,7 +40,6 @@ type initializeOnLinearizeVM struct { func (vm *initializeOnLinearizeVM) Linearize(ctx context.Context, stopVertexID ids.ID) error { vm.vmToLinearize.stopVertexID = stopVertexID - vm.ctx.Metrics = vm.registerer return vm.vmToInitialize.Initialize( ctx, vm.ctx, diff --git a/chains/manager.go b/chains/manager.go index cafd46417e4..558851099c8 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -13,7 +13,6 @@ import ( "sync" "time" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "github.com/ava-labs/avalanchego/api/health" @@ -53,6 +52,7 @@ import ( "github.com/ava-labs/avalanchego/utils/set" "github.com/ava-labs/avalanchego/version" "github.com/ava-labs/avalanchego/vms" + "github.com/ava-labs/avalanchego/vms/example/xsvm" "github.com/ava-labs/avalanchego/vms/fx" "github.com/ava-labs/avalanchego/vms/metervm" "github.com/ava-labs/avalanchego/vms/nftfx" @@ -76,6 +76,13 @@ import ( const ( defaultChannelSize = 1 initialQueueSize = 3 + + chainNamespace = constants.PlatformName + metric.NamespaceSeparator + "chain" + snowmanNamespace = chainNamespace + metric.NamespaceSeparator + "snowman" + avalancheNamespace = chainNamespace + metric.NamespaceSeparator + "avalanche" + proposervmNamespace = chainNamespace + metric.NamespaceSeparator + "proposervm" + meterchainvmNamespace = chainNamespace + metric.NamespaceSeparator + "meterchainvm" + meterdagvmNamespace = chainNamespace + metric.NamespaceSeparator + "meterdagvm" ) var ( @@ -261,10 +268,42 @@ type manager struct { // snowman++ related interface to allow validators retrieval validatorState validators.State + + snowmanGatherer metrics.MultiGatherer // chainID + avalancheGatherer metrics.MultiGatherer // chainID + proposervmGatherer metrics.MultiGatherer // chainID + meterChainVMGatherer metrics.MultiGatherer // chainID -> isProposervm + meterDAGVMGatherer metrics.MultiGatherer // chainID + vmGatherer map[ids.ID]metrics.MultiGatherer // vmID -> chainID } // New returns a new Manager func New(config *ManagerConfig) (Manager, error) { + snowmanGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { + return nil, err + } + + avalancheGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(avalancheNamespace, avalancheGatherer); err != nil { + return nil, err + } + + proposervmGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(proposervmNamespace, proposervmGatherer); err != nil { + return nil, err + } + + meterChainVMGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(meterchainvmNamespace, meterChainVMGatherer); err != nil { + return nil, err + } + + meterDAGVMGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(meterdagvmNamespace, meterDAGVMGatherer); err != nil { + return nil, err + } + return &manager{ Aliaser: ids.NewAliaser(), ManagerConfig: *config, @@ -272,6 +311,13 @@ func New(config *ManagerConfig) (Manager, error) { chainsQueue: buffer.NewUnboundedBlockingDeque[ChainParameters](initialQueueSize), unblockChainCreatorCh: make(chan struct{}), chainCreatorShutdownCh: make(chan struct{}), + + snowmanGatherer: snowmanGatherer, + avalancheGatherer: avalancheGatherer, + proposervmGatherer: proposervmGatherer, + meterChainVMGatherer: meterChainVMGatherer, + meterDAGVMGatherer: meterDAGVMGatherer, + vmGatherer: make(map[ids.ID]metrics.MultiGatherer), }, nil } @@ -421,25 +467,25 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c return nil, fmt.Errorf("error while creating chain's log %w", err) } - consensusMetrics := prometheus.NewRegistry() - chainNamespace := metric.AppendNamespace(constants.PlatformName, primaryAlias) - if err := m.Metrics.Register(chainNamespace, consensusMetrics); err != nil { - return nil, fmt.Errorf("error while registering chain's metrics %w", err) + snowmanMetrics, err := metrics.MakeAndRegister( + m.snowmanGatherer, + primaryAlias, + ) + if err != nil { + return nil, err } - // This converts the prefix for all the Avalanche consensus metrics from - // `avalanche_{chainID}_` into `avalanche_{chainID}_avalanche_` so that - // there are no conflicts when registering the Snowman consensus metrics. - avalancheConsensusMetrics := prometheus.NewRegistry() - avalancheDAGNamespace := metric.AppendNamespace(chainNamespace, "avalanche") - if err := m.Metrics.Register(avalancheDAGNamespace, avalancheConsensusMetrics); err != nil { - return nil, fmt.Errorf("error while registering DAG metrics %w", err) + avalancheMetrics, err := metrics.MakeAndRegister( + m.avalancheGatherer, + primaryAlias, + ) + if err != nil { + return nil, err } - vmMetrics := metrics.NewMultiGatherer() - vmNamespace := metric.AppendNamespace(chainNamespace, "vm") - if err := m.Metrics.Register(vmNamespace, vmMetrics); err != nil { - return nil, fmt.Errorf("error while registering vm's metrics %w", err) + vmMetrics, err := m.getOrMakeVMRegisterer(chainParams.VMID, primaryAlias) + if err != nil { + return nil, err } ctx := &snow.ConsensusContext{ @@ -468,8 +514,8 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c BlockAcceptor: m.BlockAcceptorGroup, TxAcceptor: m.TxAcceptorGroup, VertexAcceptor: m.VertexAcceptorGroup, - Registerer: consensusMetrics, - AvalancheRegisterer: avalancheConsensusMetrics, + Registerer: snowmanMetrics, + AvalancheRegisterer: avalancheMetrics, } // Get a factory for the vm we want to use on our chain @@ -563,9 +609,10 @@ func (m *manager) createAvalancheChain( State: snow.Initializing, }) + primaryAlias := m.PrimaryAliasOrDefault(ctx.ChainID) meterDBReg, err := metrics.MakeAndRegister( m.MeterDBMetrics, - m.PrimaryAliasOrDefault(ctx.ChainID), + primaryAlias, ) if err != nil { return nil, err @@ -635,7 +682,15 @@ func (m *manager) createAvalancheChain( dagVM := vm if m.MeterVMEnabled { - dagVM = metervm.NewVertexVM(dagVM) + meterdagvmReg, err := metrics.MakeAndRegister( + m.meterDAGVMGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + dagVM = metervm.NewVertexVM(dagVM, meterdagvmReg) } if m.TracingEnabled { dagVM = tracedvm.NewVertexVM(dagVM, m.Tracer) @@ -653,17 +708,6 @@ func (m *manager) createAvalancheChain( }, ) - avalancheRegisterer := metrics.NewMultiGatherer() - snowmanRegisterer := metrics.NewMultiGatherer() - if err := ctx.Context.Metrics.Register("avalanche", avalancheRegisterer); err != nil { - return nil, err - } - if err := ctx.Context.Metrics.Register("", snowmanRegisterer); err != nil { - return nil, err - } - - ctx.Context.Metrics = avalancheRegisterer - // The channel through which a VM may send messages to the consensus engine // VM uses this channel to notify engine that a block is ready to be made msgChan := make(chan common.Message, defaultChannelSize) @@ -703,14 +747,20 @@ func (m *manager) createAvalancheChain( zap.Uint64("numHistoricalBlocks", numHistoricalBlocks), ) - chainAlias := m.PrimaryAliasOrDefault(ctx.ChainID) - // Note: this does not use [dagVM] to ensure we use the [vm]'s height index. untracedVMWrappedInsideProposerVM := NewLinearizeOnInitializeVM(vm) var vmWrappedInsideProposerVM block.ChainVM = untracedVMWrappedInsideProposerVM if m.TracingEnabled { - vmWrappedInsideProposerVM = tracedvm.NewBlockVM(vmWrappedInsideProposerVM, chainAlias, m.Tracer) + vmWrappedInsideProposerVM = tracedvm.NewBlockVM(vmWrappedInsideProposerVM, primaryAlias, m.Tracer) + } + + proposervmReg, err := metrics.MakeAndRegister( + m.proposervmGatherer, + primaryAlias, + ) + if err != nil { + return nil, err } // Note: vmWrappingProposerVM is the VM that the Snowman engines should be @@ -725,11 +775,20 @@ func (m *manager) createAvalancheChain( NumHistoricalBlocks: numHistoricalBlocks, StakingLeafSigner: m.StakingTLSSigner, StakingCertLeaf: m.StakingTLSCert, + Registerer: proposervmReg, }, ) if m.MeterVMEnabled { - vmWrappingProposerVM = metervm.NewBlockVM(vmWrappingProposerVM) + meterchainvmReg, err := metrics.MakeAndRegister( + m.meterChainVMGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + vmWrappingProposerVM = metervm.NewBlockVM(vmWrappingProposerVM, meterchainvmReg) } if m.TracingEnabled { vmWrappingProposerVM = tracedvm.NewBlockVM(vmWrappingProposerVM, "proposervm", m.Tracer) @@ -742,7 +801,6 @@ func (m *manager) createAvalancheChain( vmToInitialize: vmWrappingProposerVM, vmToLinearize: untracedVMWrappedInsideProposerVM, - registerer: snowmanRegisterer, ctx: ctx.Context, db: vmDB, genesisBytes: genesisData, @@ -930,12 +988,12 @@ func (m *manager) createAvalancheChain( }) // Register health check for this chain - if err := m.Health.RegisterHealthCheck(chainAlias, h, ctx.SubnetID.String()); err != nil { - return nil, fmt.Errorf("couldn't add health check for chain %s: %w", chainAlias, err) + if err := m.Health.RegisterHealthCheck(primaryAlias, h, ctx.SubnetID.String()); err != nil { + return nil, fmt.Errorf("couldn't add health check for chain %s: %w", primaryAlias, err) } return &chain{ - Name: chainAlias, + Name: primaryAlias, Context: ctx, VM: dagVM, Handler: h, @@ -960,9 +1018,10 @@ func (m *manager) createSnowmanChain( State: snow.Initializing, }) + primaryAlias := m.PrimaryAliasOrDefault(ctx.ChainID) meterDBReg, err := metrics.MakeAndRegister( m.MeterDBMetrics, - m.PrimaryAliasOrDefault(ctx.ChainID), + primaryAlias, ) if err != nil { return nil, err @@ -1069,6 +1128,14 @@ func (m *manager) createSnowmanChain( vm = tracedvm.NewBlockVM(vm, chainAlias, m.Tracer) } + proposervmReg, err := metrics.MakeAndRegister( + m.proposervmGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + vm = proposervm.New( vm, proposervm.Config{ @@ -1079,11 +1146,20 @@ func (m *manager) createSnowmanChain( NumHistoricalBlocks: numHistoricalBlocks, StakingLeafSigner: m.StakingTLSSigner, StakingCertLeaf: m.StakingTLSCert, + Registerer: proposervmReg, }, ) if m.MeterVMEnabled { - vm = metervm.NewBlockVM(vm) + meterchainvmReg, err := metrics.MakeAndRegister( + m.meterChainVMGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + vm = metervm.NewBlockVM(vm, meterchainvmReg) } if m.TracingEnabled { vm = tracedvm.NewBlockVM(vm, "proposervm", m.Tracer) @@ -1405,3 +1481,43 @@ func (m *manager) getChainConfig(id ids.ID) (ChainConfig, error) { return ChainConfig{}, nil } + +func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics.MultiGatherer, error) { + vmGatherer, ok := m.vmGatherer[vmID] + if !ok { + vmGatherer = metrics.NewLabelGatherer("chain") + + // TODO: Cleanup vm aliasing + var vmIDStr string + switch vmID { + case constants.PlatformVMID: + vmIDStr = "platformvm" + case constants.AVMID: + vmIDStr = "avm" + case constants.CorethID: + vmIDStr = "coreth" + case constants.SubnetEVMID: + vmIDStr = "subnetevm" + case xsvm.ID: + vmIDStr = "xsvm" + default: + vmIDStr = vmID.String() + } + + err := m.Metrics.Register( + metric.AppendNamespace(chainNamespace, vmIDStr), + vmGatherer, + ) + if err != nil { + return nil, err + } + m.vmGatherer[vmID] = vmGatherer + } + + chainReg := metrics.NewPrefixGatherer() + err := vmGatherer.Register( + chainAlias, + chainReg, + ) + return chainReg, err +} diff --git a/genesis/aliases.go b/genesis/aliases.go index 2c0407d1122..d2af6fbb0ed 100644 --- a/genesis/aliases.go +++ b/genesis/aliases.go @@ -22,7 +22,7 @@ var ( VMAliases = map[ids.ID][]string{ constants.PlatformVMID: {"platform"}, constants.AVMID: {"avm"}, - constants.EVMID: {"evm"}, + constants.CorethID: {"evm"}, secp256k1fx.ID: {"secp256k1fx"}, nftfx.ID: {"nftfx"}, propertyfx.ID: {"propertyfx"}, @@ -60,7 +60,7 @@ func Aliases(genesisBytes []byte) (map[string][]string, map[ids.ID][]string, err path.Join(constants.ChainAliasPrefix, "avm"), } chainAliases[chainID] = XChainAliases - case constants.EVMID: + case constants.CorethID: apiAliases[endpoint] = []string{ "C", "evm", diff --git a/genesis/genesis.go b/genesis/genesis.go index e25088a59a1..29657739860 100644 --- a/genesis/genesis.go +++ b/genesis/genesis.go @@ -455,7 +455,7 @@ func FromConfig(config *Config) ([]byte, ids.ID, error) { { GenesisData: genesisStr, SubnetID: constants.PrimaryNetworkID, - VMID: constants.EVMID, + VMID: constants.CorethID, Name: "C-Chain", }, } diff --git a/genesis/genesis_test.go b/genesis/genesis_test.go index 679fc05be91..24c39c36676 100644 --- a/genesis/genesis_test.go +++ b/genesis/genesis_test.go @@ -390,7 +390,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM", }, { - vmID: constants.EVMID, + vmID: constants.CorethID, expectedID: "2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5", }, }, @@ -403,7 +403,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2JVSBoinj9C2J33VntvzYtVJNZdN2NKiwwKjcumHUWEb5DbBrm", }, { - vmID: constants.EVMID, + vmID: constants.CorethID, expectedID: "yH8D7ThNJkxmtkuv2jgBa4P1Rn3Qpr4pPr7QYNfcdoS6k6HWp", }, }, @@ -416,7 +416,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2eNy1mUFdmaxXNj1eQHUe7Np4gju9sJsEtWQ4MX3ToiNKuADed", }, { - vmID: constants.EVMID, + vmID: constants.CorethID, expectedID: "2CA6j5zYzasynPsFeNoqWkmTCt3VScMvXUZHbfDJ8k3oGzAPtU", }, }, diff --git a/node/node.go b/node/node.go index 43e7473176f..5b4425f2ee5 100644 --- a/node/node.go +++ b/node/node.go @@ -1039,7 +1039,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { } xChainID := createAVMTx.ID() - createEVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.EVMID) + createEVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.CorethID) if err != nil { return err } @@ -1202,7 +1202,7 @@ func (n *Node) initVMs() error { EUpgradeTime: eUpgradeTime, }, }), - n.VMManager.RegisterFactory(context.TODO(), constants.EVMID, &coreth.Factory{}), + n.VMManager.RegisterFactory(context.TODO(), constants.CorethID, &coreth.Factory{}), ) if err != nil { return err diff --git a/utils/constants/vm_ids.go b/utils/constants/vm_ids.go index 9fda498f1f3..cbbee9a04e0 100644 --- a/utils/constants/vm_ids.go +++ b/utils/constants/vm_ids.go @@ -8,5 +8,6 @@ import "github.com/ava-labs/avalanchego/ids" var ( PlatformVMID = ids.ID{'p', 'l', 'a', 't', 'f', 'o', 'r', 'm', 'v', 'm'} AVMID = ids.ID{'a', 'v', 'm'} - EVMID = ids.ID{'e', 'v', 'm'} + CorethID = ids.ID{'e', 'v', 'm'} + SubnetEVMID = ids.ID{'s', 'u', 'b', 'n', 'e', 't', 'e', 'v', 'm'} ) diff --git a/vms/avm/block/builder/builder_test.go b/vms/avm/block/builder/builder_test.go index 89f043844b5..36159598b70 100644 --- a/vms/avm/block/builder/builder_test.go +++ b/vms/avm/block/builder/builder_test.go @@ -544,7 +544,7 @@ func TestBlockBuilderAddLocalTx(t *testing.T) { state.AddBlock(parentBlk) state.SetLastAccepted(parentBlk.ID()) - metrics, err := metrics.New("", registerer) + metrics, err := metrics.New(registerer) require.NoError(err) manager := blkexecutor.NewManager(mempool, metrics, state, backend, clk, onAccept) diff --git a/vms/avm/metrics/metrics.go b/vms/avm/metrics/metrics.go index 9e4053e1fcc..121c89a0086 100644 --- a/vms/avm/metrics/metrics.go +++ b/vms/avm/metrics/metrics.go @@ -66,38 +66,32 @@ func (m *metrics) MarkTxAccepted(tx *txs.Tx) error { return tx.Unsigned.Visit(m.txMetrics) } -func New( - namespace string, - registerer prometheus.Registerer, -) (Metrics, error) { - txMetrics, err := newTxMetrics(namespace, registerer) +func New(reg prometheus.Registerer) (Metrics, error) { + txMetrics, err := newTxMetrics(reg) errs := wrappers.Errs{Err: err} m := &metrics{txMetrics: txMetrics} m.numTxRefreshes = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refreshes", - Help: "Number of times unique txs have been refreshed", + Name: "tx_refreshes", + Help: "Number of times unique txs have been refreshed", }) m.numTxRefreshHits = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refresh_hits", - Help: "Number of times unique txs have not been unique, but were cached", + Name: "tx_refresh_hits", + Help: "Number of times unique txs have not been unique, but were cached", }) m.numTxRefreshMisses = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refresh_misses", - Help: "Number of times unique txs have not been unique and weren't cached", + Name: "tx_refresh_misses", + Help: "Number of times unique txs have not been unique and weren't cached", }) - apiRequestMetric, err := metric.NewAPIInterceptor(namespace, registerer) + apiRequestMetric, err := metric.NewAPIInterceptor("", reg) m.APIInterceptor = apiRequestMetric errs.Add( err, - registerer.Register(m.numTxRefreshes), - registerer.Register(m.numTxRefreshHits), - registerer.Register(m.numTxRefreshMisses), + reg.Register(m.numTxRefreshes), + reg.Register(m.numTxRefreshHits), + reg.Register(m.numTxRefreshMisses), ) return m, errs.Err } diff --git a/vms/avm/metrics/tx_metrics.go b/vms/avm/metrics/tx_metrics.go index 8b9bf2c0ed4..f1551ad027d 100644 --- a/vms/avm/metrics/tx_metrics.go +++ b/vms/avm/metrics/tx_metrics.go @@ -21,21 +21,17 @@ type txMetrics struct { numTxs *prometheus.CounterVec } -func newTxMetrics( - namespace string, - registerer prometheus.Registerer, -) (*txMetrics, error) { +func newTxMetrics(reg prometheus.Registerer) (*txMetrics, error) { m := &txMetrics{ numTxs: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "txs_accepted", - Help: "number of transactions accepted", + Name: "txs_accepted", + Help: "number of transactions accepted", }, txLabels, ), } - return m, registerer.Register(m.numTxs) + return m, reg.Register(m.numTxs) } func (m *txMetrics) BaseTx(*txs.BaseTx) error { diff --git a/vms/avm/vm.go b/vms/avm/vm.go index b8fe322ef95..6a455132c1a 100644 --- a/vms/avm/vm.go +++ b/vms/avm/vm.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/cache" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/versiondb" @@ -33,7 +34,6 @@ import ( "github.com/ava-labs/avalanchego/version" "github.com/ava-labs/avalanchego/vms/avm/block" "github.com/ava-labs/avalanchego/vms/avm/config" - "github.com/ava-labs/avalanchego/vms/avm/metrics" "github.com/ava-labs/avalanchego/vms/avm/network" "github.com/ava-labs/avalanchego/vms/avm/state" "github.com/ava-labs/avalanchego/vms/avm/txs" @@ -47,6 +47,7 @@ import ( blockbuilder "github.com/ava-labs/avalanchego/vms/avm/block/builder" blockexecutor "github.com/ava-labs/avalanchego/vms/avm/block/executor" extensions "github.com/ava-labs/avalanchego/vms/avm/fxs" + avmmetrics "github.com/ava-labs/avalanchego/vms/avm/metrics" txexecutor "github.com/ava-labs/avalanchego/vms/avm/txs/executor" xmempool "github.com/ava-labs/avalanchego/vms/avm/txs/mempool" ) @@ -66,7 +67,7 @@ type VM struct { config.Config - metrics metrics.Metrics + metrics avmmetrics.Metrics avax.AddressManager ids.Aliaser @@ -173,16 +174,15 @@ func (vm *VM) Initialize( zap.Reflect("config", avmConfig), ) - registerer := prometheus.NewRegistry() - if err := ctx.Metrics.Register("", registerer); err != nil { + vm.registerer, err = metrics.MakeAndRegister(ctx.Metrics, "") + if err != nil { return err } - vm.registerer = registerer vm.connectedPeers = make(map[ids.NodeID]*version.Application) // Initialize metrics as soon as possible - vm.metrics, err = metrics.New("", registerer) + vm.metrics, err = avmmetrics.New(vm.registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } diff --git a/vms/metervm/block_metrics.go b/vms/metervm/block_metrics.go index 0a6473f617b..2bdc247c406 100644 --- a/vms/metervm/block_metrics.go +++ b/vms/metervm/block_metrics.go @@ -49,43 +49,42 @@ func (m *blockMetrics) Initialize( supportsBlockBuildingWithContext bool, supportsBatchedFetching bool, supportsStateSync bool, - namespace string, reg prometheus.Registerer, ) error { errs := wrappers.Errs{} - m.buildBlock = newAverager(namespace, "build_block", reg, &errs) - m.buildBlockErr = newAverager(namespace, "build_block_err", reg, &errs) - m.parseBlock = newAverager(namespace, "parse_block", reg, &errs) - m.parseBlockErr = newAverager(namespace, "parse_block_err", reg, &errs) - m.getBlock = newAverager(namespace, "get_block", reg, &errs) - m.getBlockErr = newAverager(namespace, "get_block_err", reg, &errs) - m.setPreference = newAverager(namespace, "set_preference", reg, &errs) - m.lastAccepted = newAverager(namespace, "last_accepted", reg, &errs) - m.verify = newAverager(namespace, "verify", reg, &errs) - m.verifyErr = newAverager(namespace, "verify_err", reg, &errs) - m.accept = newAverager(namespace, "accept", reg, &errs) - m.reject = newAverager(namespace, "reject", reg, &errs) - m.shouldVerifyWithContext = newAverager(namespace, "should_verify_with_context", reg, &errs) - m.verifyWithContext = newAverager(namespace, "verify_with_context", reg, &errs) - m.verifyWithContextErr = newAverager(namespace, "verify_with_context_err", reg, &errs) - m.getBlockIDAtHeight = newAverager(namespace, "get_block_id_at_height", reg, &errs) + m.buildBlock = newAverager("build_block", reg, &errs) + m.buildBlockErr = newAverager("build_block_err", reg, &errs) + m.parseBlock = newAverager("parse_block", reg, &errs) + m.parseBlockErr = newAverager("parse_block_err", reg, &errs) + m.getBlock = newAverager("get_block", reg, &errs) + m.getBlockErr = newAverager("get_block_err", reg, &errs) + m.setPreference = newAverager("set_preference", reg, &errs) + m.lastAccepted = newAverager("last_accepted", reg, &errs) + m.verify = newAverager("verify", reg, &errs) + m.verifyErr = newAverager("verify_err", reg, &errs) + m.accept = newAverager("accept", reg, &errs) + m.reject = newAverager("reject", reg, &errs) + m.shouldVerifyWithContext = newAverager("should_verify_with_context", reg, &errs) + m.verifyWithContext = newAverager("verify_with_context", reg, &errs) + m.verifyWithContextErr = newAverager("verify_with_context_err", reg, &errs) + m.getBlockIDAtHeight = newAverager("get_block_id_at_height", reg, &errs) if supportsBlockBuildingWithContext { - m.buildBlockWithContext = newAverager(namespace, "build_block_with_context", reg, &errs) - m.buildBlockWithContextErr = newAverager(namespace, "build_block_with_context_err", reg, &errs) + m.buildBlockWithContext = newAverager("build_block_with_context", reg, &errs) + m.buildBlockWithContextErr = newAverager("build_block_with_context_err", reg, &errs) } if supportsBatchedFetching { - m.getAncestors = newAverager(namespace, "get_ancestors", reg, &errs) - m.batchedParseBlock = newAverager(namespace, "batched_parse_block", reg, &errs) + m.getAncestors = newAverager("get_ancestors", reg, &errs) + m.batchedParseBlock = newAverager("batched_parse_block", reg, &errs) } if supportsStateSync { - m.stateSyncEnabled = newAverager(namespace, "state_sync_enabled", reg, &errs) - m.getOngoingSyncStateSummary = newAverager(namespace, "get_ongoing_state_sync_summary", reg, &errs) - m.getLastStateSummary = newAverager(namespace, "get_last_state_summary", reg, &errs) - m.parseStateSummary = newAverager(namespace, "parse_state_summary", reg, &errs) - m.parseStateSummaryErr = newAverager(namespace, "parse_state_summary_err", reg, &errs) - m.getStateSummary = newAverager(namespace, "get_state_summary", reg, &errs) - m.getStateSummaryErr = newAverager(namespace, "get_state_summary_err", reg, &errs) + m.stateSyncEnabled = newAverager("state_sync_enabled", reg, &errs) + m.getOngoingSyncStateSummary = newAverager("get_ongoing_state_sync_summary", reg, &errs) + m.getLastStateSummary = newAverager("get_last_state_summary", reg, &errs) + m.parseStateSummary = newAverager("parse_state_summary", reg, &errs) + m.parseStateSummaryErr = newAverager("parse_state_summary_err", reg, &errs) + m.getStateSummary = newAverager("get_state_summary", reg, &errs) + m.getStateSummaryErr = newAverager("get_state_summary_err", reg, &errs) } return errs.Err } diff --git a/vms/metervm/block_vm.go b/vms/metervm/block_vm.go index 0ecb982c474..3055d3b0279 100644 --- a/vms/metervm/block_vm.go +++ b/vms/metervm/block_vm.go @@ -8,7 +8,6 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/snow" @@ -32,10 +31,14 @@ type blockVM struct { ssVM block.StateSyncableVM blockMetrics - clock mockable.Clock + registry *prometheus.Registry + clock mockable.Clock } -func NewBlockVM(vm block.ChainVM) block.ChainVM { +func NewBlockVM( + vm block.ChainVM, + reg *prometheus.Registry, +) block.ChainVM { buildBlockVM, _ := vm.(block.BuildBlockWithContextChainVM) batchedVM, _ := vm.(block.BatchedChainVM) ssVM, _ := vm.(block.StateSyncableVM) @@ -44,6 +47,7 @@ func NewBlockVM(vm block.ChainVM) block.ChainVM { buildBlockVM: buildBlockVM, batchedVM: batchedVM, ssVM: ssVM, + registry: reg, } } @@ -58,27 +62,16 @@ func (vm *blockVM) Initialize( fxs []*common.Fx, appSender common.AppSender, ) error { - registerer := prometheus.NewRegistry() err := vm.blockMetrics.Initialize( vm.buildBlockVM != nil, vm.batchedVM != nil, vm.ssVM != nil, - "", - registerer, + vm.registry, ) if err != nil { return err } - multiGatherer := metrics.NewMultiGatherer() - if err := chainCtx.Metrics.Register("metervm", registerer); err != nil { - return err - } - if err := chainCtx.Metrics.Register("", multiGatherer); err != nil { - return err - } - chainCtx.Metrics = multiGatherer - return vm.ChainVM.Initialize(ctx, chainCtx, db, genesisBytes, upgradeBytes, configBytes, toEngine, fxs, appSender) } diff --git a/vms/metervm/metrics.go b/vms/metervm/metrics.go index d4c9304e769..09d85a77058 100644 --- a/vms/metervm/metrics.go +++ b/vms/metervm/metrics.go @@ -10,9 +10,9 @@ import ( "github.com/ava-labs/avalanchego/utils/wrappers" ) -func newAverager(namespace, name string, reg prometheus.Registerer, errs *wrappers.Errs) metric.Averager { +func newAverager(name string, reg prometheus.Registerer, errs *wrappers.Errs) metric.Averager { return metric.NewAveragerWithErrs( - namespace, + "", name, "time (in ns) of a "+name, reg, diff --git a/vms/metervm/vertex_metrics.go b/vms/metervm/vertex_metrics.go index 67caa50b610..04096f2ae03 100644 --- a/vms/metervm/vertex_metrics.go +++ b/vms/metervm/vertex_metrics.go @@ -19,16 +19,13 @@ type vertexMetrics struct { reject metric.Averager } -func (m *vertexMetrics) Initialize( - namespace string, - reg prometheus.Registerer, -) error { +func (m *vertexMetrics) Initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} - m.parse = newAverager(namespace, "parse_tx", reg, &errs) - m.parseErr = newAverager(namespace, "parse_tx_err", reg, &errs) - m.verify = newAverager(namespace, "verify_tx", reg, &errs) - m.verifyErr = newAverager(namespace, "verify_tx_err", reg, &errs) - m.accept = newAverager(namespace, "accept", reg, &errs) - m.reject = newAverager(namespace, "reject", reg, &errs) + m.parse = newAverager("parse_tx", reg, &errs) + m.parseErr = newAverager("parse_tx_err", reg, &errs) + m.verify = newAverager("verify_tx", reg, &errs) + m.verifyErr = newAverager("verify_tx_err", reg, &errs) + m.accept = newAverager("accept", reg, &errs) + m.reject = newAverager("reject", reg, &errs) return errs.Err } diff --git a/vms/metervm/vertex_vm.go b/vms/metervm/vertex_vm.go index 7cd112ffde2..5ecfe8f4457 100644 --- a/vms/metervm/vertex_vm.go +++ b/vms/metervm/vertex_vm.go @@ -8,7 +8,6 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/snow" "github.com/ava-labs/avalanchego/snow/consensus/snowstorm" @@ -22,16 +21,21 @@ var ( _ snowstorm.Tx = (*meterTx)(nil) ) -func NewVertexVM(vm vertex.LinearizableVMWithEngine) vertex.LinearizableVMWithEngine { +func NewVertexVM( + vm vertex.LinearizableVMWithEngine, + reg *prometheus.Registry, +) vertex.LinearizableVMWithEngine { return &vertexVM{ LinearizableVMWithEngine: vm, + registry: reg, } } type vertexVM struct { vertex.LinearizableVMWithEngine vertexMetrics - clock mockable.Clock + registry *prometheus.Registry + clock mockable.Clock } func (vm *vertexVM) Initialize( @@ -45,20 +49,10 @@ func (vm *vertexVM) Initialize( fxs []*common.Fx, appSender common.AppSender, ) error { - registerer := prometheus.NewRegistry() - if err := vm.vertexMetrics.Initialize("", registerer); err != nil { + if err := vm.vertexMetrics.Initialize(vm.registry); err != nil { return err } - multiGatherer := metrics.NewMultiGatherer() - if err := chainCtx.Metrics.Register("metervm", registerer); err != nil { - return err - } - if err := chainCtx.Metrics.Register("", multiGatherer); err != nil { - return err - } - chainCtx.Metrics = multiGatherer - return vm.LinearizableVMWithEngine.Initialize( ctx, chainCtx, diff --git a/vms/platformvm/vm.go b/vms/platformvm/vm.go index f33451b18d4..14f540676c2 100644 --- a/vms/platformvm/vm.go +++ b/vms/platformvm/vm.go @@ -12,9 +12,9 @@ import ( "time" "github.com/gorilla/rpc/v2" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/cache" "github.com/ava-labs/avalanchego/codec" "github.com/ava-labs/avalanchego/codec/linearcodec" @@ -35,7 +35,6 @@ import ( "github.com/ava-labs/avalanchego/vms/platformvm/block" "github.com/ava-labs/avalanchego/vms/platformvm/config" "github.com/ava-labs/avalanchego/vms/platformvm/fx" - "github.com/ava-labs/avalanchego/vms/platformvm/metrics" "github.com/ava-labs/avalanchego/vms/platformvm/network" "github.com/ava-labs/avalanchego/vms/platformvm/reward" "github.com/ava-labs/avalanchego/vms/platformvm/state" @@ -47,6 +46,7 @@ import ( snowmanblock "github.com/ava-labs/avalanchego/snow/engine/snowman/block" blockbuilder "github.com/ava-labs/avalanchego/vms/platformvm/block/builder" blockexecutor "github.com/ava-labs/avalanchego/vms/platformvm/block/executor" + platformvmmetrics "github.com/ava-labs/avalanchego/vms/platformvm/metrics" txexecutor "github.com/ava-labs/avalanchego/vms/platformvm/txs/executor" pmempool "github.com/ava-labs/avalanchego/vms/platformvm/txs/mempool" pvalidators "github.com/ava-labs/avalanchego/vms/platformvm/validators" @@ -65,7 +65,7 @@ type VM struct { *network.Network validators.State - metrics metrics.Metrics + metrics platformvmmetrics.Metrics // Used to get time. Useful for faking time during tests. clock mockable.Clock @@ -113,13 +113,13 @@ func (vm *VM) Initialize( } chainCtx.Log.Info("using VM execution config", zap.Reflect("config", execConfig)) - registerer := prometheus.NewRegistry() - if err := chainCtx.Metrics.Register("", registerer); err != nil { + registerer, err := metrics.MakeAndRegister(chainCtx.Metrics, "") + if err != nil { return err } // Initialize metrics as soon as possible - vm.metrics, err = metrics.New("", registerer) + vm.metrics, err = platformvmmetrics.New("", registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } diff --git a/vms/proposervm/batched_vm_test.go b/vms/proposervm/batched_vm_test.go index a6e9ffb2b1d..be134823c89 100644 --- a/vms/proposervm/batched_vm_test.go +++ b/vms/proposervm/batched_vm_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/database" @@ -868,6 +869,7 @@ func initTestRemoteProposerVM( NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) diff --git a/vms/proposervm/block_test.go b/vms/proposervm/block_test.go index d8c867058f5..12b18a75d68 100644 --- a/vms/proposervm/block_test.go +++ b/vms/proposervm/block_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" @@ -74,6 +75,7 @@ func TestPostForkCommonComponents_buildChild(t *testing.T) { DurangoTime: time.Unix(0, 0), StakingCertLeaf: &staking.Certificate{}, StakingLeafSigner: pk, + Registerer: prometheus.NewRegistry(), }, ChainVM: innerVM, blockBuilderVM: innerBlockBuilderVM, @@ -386,6 +388,7 @@ func TestPostDurangoBuildChildResetScheduler(t *testing.T) { DurangoTime: time.Unix(0, 0), StakingCertLeaf: &staking.Certificate{}, StakingLeafSigner: pk, + Registerer: prometheus.NewRegistry(), }, ChainVM: block.NewMockChainVM(ctrl), ctx: &snow.Context{ diff --git a/vms/proposervm/config.go b/vms/proposervm/config.go index a7eb4ff0db9..7a2f2893f0a 100644 --- a/vms/proposervm/config.go +++ b/vms/proposervm/config.go @@ -7,6 +7,8 @@ import ( "crypto" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/ava-labs/avalanchego/staking" ) @@ -32,6 +34,8 @@ type Config struct { // Block certificate StakingCertLeaf *staking.Certificate + + Registerer *prometheus.Registry } func (c *Config) IsDurangoActivated(timestamp time.Time) bool { diff --git a/vms/proposervm/post_fork_option_test.go b/vms/proposervm/post_fork_option_test.go index 39c6434dddf..43b7d5f5b90 100644 --- a/vms/proposervm/post_fork_option_test.go +++ b/vms/proposervm/post_fork_option_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/database" @@ -548,6 +549,7 @@ func TestOptionTimestampValidity(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) diff --git a/vms/proposervm/state_syncable_vm_test.go b/vms/proposervm/state_syncable_vm_test.go index 4f44adc0bf7..479c311b5fe 100644 --- a/vms/proposervm/state_syncable_vm_test.go +++ b/vms/proposervm/state_syncable_vm_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/database" @@ -65,6 +66,7 @@ func helperBuildStateSyncTestObjects(t *testing.T) (*fullVM, *VM) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) diff --git a/vms/proposervm/vm.go b/vms/proposervm/vm.go index dfff407a03d..4442aca65a9 100644 --- a/vms/proposervm/vm.go +++ b/vms/proposervm/vm.go @@ -9,10 +9,8 @@ import ( "fmt" "time" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" - "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/cache" "github.com/ava-labs/avalanchego/cache/metercacher" "github.com/ava-labs/avalanchego/database" @@ -130,21 +128,9 @@ func (vm *VM) Initialize( fxs []*common.Fx, appSender common.AppSender, ) error { - // TODO: Add a helper for this metrics override, it is performed in multiple - // places. - registerer := prometheus.NewRegistry() - if err := chainCtx.Metrics.Register("proposervm", registerer); err != nil { - return err - } - multiGatherer := metrics.NewMultiGatherer() - if err := chainCtx.Metrics.Register("", multiGatherer); err != nil { - return err - } - chainCtx.Metrics = multiGatherer - vm.ctx = chainCtx vm.db = versiondb.New(prefixdb.New(dbPrefix, db)) - baseState, err := state.NewMetered(vm.db, "state", registerer) + baseState, err := state.NewMetered(vm.db, "state", vm.Config.Registerer) if err != nil { return err } @@ -153,7 +139,7 @@ func (vm *VM) Initialize( vm.Tree = tree.New() innerBlkCache, err := metercacher.New( "inner_block_cache", - registerer, + vm.Config.Registerer, cache.NewSizedLRU( innerBlkCacheSize, cachedBlockSize, diff --git a/vms/proposervm/vm_test.go b/vms/proposervm/vm_test.go index fb717c203f7..a2536375d48 100644 --- a/vms/proposervm/vm_test.go +++ b/vms/proposervm/vm_test.go @@ -12,6 +12,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" @@ -132,6 +133,7 @@ func initTestProposerVM( NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -812,6 +814,7 @@ func TestExpiredBuildBlock(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -1128,6 +1131,7 @@ func TestInnerVMRollback(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -1206,6 +1210,7 @@ func TestInnerVMRollback(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -1608,6 +1613,7 @@ func TestRejectedHeightNotIndexed(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -1779,6 +1785,7 @@ func TestRejectedOptionHeightNotIndexed(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -1913,6 +1920,7 @@ func TestVMInnerBlkCache(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -2123,6 +2131,7 @@ func TestVM_VerifyBlockWithContext(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -2324,6 +2333,7 @@ func TestHistoricalBlockDeletion(t *testing.T) { NumHistoricalBlocks: DefaultNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -2415,6 +2425,7 @@ func TestHistoricalBlockDeletion(t *testing.T) { NumHistoricalBlocks: numHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) @@ -2459,6 +2470,7 @@ func TestHistoricalBlockDeletion(t *testing.T) { NumHistoricalBlocks: newNumHistoricalBlocks, StakingLeafSigner: pTestSigner, StakingCertLeaf: pTestCert, + Registerer: prometheus.NewRegistry(), }, ) diff --git a/vms/rpcchainvm/vm_client.go b/vms/rpcchainvm/vm_client.go index 038a728c0ff..6e6417725f1 100644 --- a/vms/rpcchainvm/vm_client.go +++ b/vms/rpcchainvm/vm_client.go @@ -18,6 +18,7 @@ import ( "google.golang.org/protobuf/types/known/emptypb" "github.com/ava-labs/avalanchego/api/keystore/gkeystore" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/chains/atomic/gsharedmemory" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/rpcdb" @@ -135,15 +136,19 @@ func (vm *VMClient) Initialize( } // Register metrics - registerer := prometheus.NewRegistry() - vm.grpcServerMetrics = grpc_prometheus.NewServerMetrics() - if err := registerer.Register(vm.grpcServerMetrics); err != nil { + serverReg, err := metrics.MakeAndRegister( + chainCtx.Metrics, + "rpcchainvm", + ) + if err != nil { return err } - if err := chainCtx.Metrics.Register("rpcchainvm", registerer); err != nil { + vm.grpcServerMetrics = grpc_prometheus.NewServerMetrics() + if err := serverReg.Register(vm.grpcServerMetrics); err != nil { return err } - if err := chainCtx.Metrics.Register("", vm); err != nil { + + if err := chainCtx.Metrics.Register("plugin", vm); err != nil { return err } @@ -225,7 +230,7 @@ func (vm *VMClient) Initialize( } vm.State, err = chain.NewMeteredState( - registerer, + serverReg, &chain.Config{ DecidedCacheSize: decidedCacheSize, MissingCacheSize: missingCacheSize, diff --git a/vms/rpcchainvm/vm_server.go b/vms/rpcchainvm/vm_server.go index 67a55187426..0068bc492f3 100644 --- a/vms/rpcchainvm/vm_server.go +++ b/vms/rpcchainvm/vm_server.go @@ -225,7 +225,7 @@ func (vm *VMServer) Initialize(ctx context.Context, req *vmpb.InitializeRequest) Keystore: keystoreClient, SharedMemory: sharedMemoryClient, BCLookup: bcLookupClient, - Metrics: metrics.NewMultiGatherer(), + Metrics: metrics.NewPrefixGatherer(), // Signs warp messages WarpSigner: warpSignerClient, @@ -566,6 +566,7 @@ func (vm *VMServer) AppGossip(ctx context.Context, req *vmpb.AppGossipMsg) (*emp return &emptypb.Empty{}, vm.vm.AppGossip(ctx, nodeID, req.Msg) } +// TODO: FIXME func (vm *VMServer) Gather(context.Context, *emptypb.Empty) (*vmpb.GatherResponse, error) { // Gather metrics registered to snow context Gatherer. These // metrics are defined by the underlying vm implementation. From a634e7fc014ecd2402de424e473e496c9e67476d Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 18:15:41 -0400 Subject: [PATCH 15/53] nit --- node/node.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/node/node.go b/node/node.go index 5b4425f2ee5..ef4aaf42c06 100644 --- a/node/node.go +++ b/node/node.go @@ -903,7 +903,10 @@ func (n *Node) initChains(genesisBytes []byte) error { func (n *Node) initMetrics() error { n.MetricsGatherer = metrics.NewPrefixGatherer() n.MeterDBMetricsGatherer = metrics.NewLabelGatherer("chain") - return n.MetricsGatherer.Register("meterdb", n.MeterDBMetricsGatherer) + return n.MetricsGatherer.Register( + metric.AppendNamespace(constants.PlatformName, "meterdb"), + n.MeterDBMetricsGatherer, + ) } func (n *Node) initNAT() { From 582f45b132c67b0a7c9e7fff2d6d82604b563444 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 18:36:51 -0400 Subject: [PATCH 16/53] finish rpcchainvm --- vms/rpcchainvm/vm_server.go | 58 ++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/vms/rpcchainvm/vm_server.go b/vms/rpcchainvm/vm_server.go index 0068bc492f3..b33fd3e5b5f 100644 --- a/vms/rpcchainvm/vm_server.go +++ b/vms/rpcchainvm/vm_server.go @@ -72,9 +72,9 @@ type VMServer struct { allowShutdown *utils.Atomic[bool] - processMetrics prometheus.Gatherer - db database.Database - log logging.Logger + metrics prometheus.Gatherer + db database.Database + log logging.Logger serverCloser grpcutils.ServerCloser connCloser wrappers.Closer @@ -125,28 +125,47 @@ func (vm *VMServer) Initialize(ctx context.Context, req *vmpb.InitializeRequest) return nil, err } - registerer := prometheus.NewRegistry() + pluginMetrics := metrics.NewPrefixGatherer() + vm.metrics = pluginMetrics + + processMetrics, err := metrics.MakeAndRegister( + pluginMetrics, + "process", + ) + if err != nil { + return nil, err + } // Current state of process metrics processCollector := collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}) - if err := registerer.Register(processCollector); err != nil { + if err := processMetrics.Register(processCollector); err != nil { return nil, err } // Go process metrics using debug.GCStats goCollector := collectors.NewGoCollector() - if err := registerer.Register(goCollector); err != nil { + if err := processMetrics.Register(goCollector); err != nil { + return nil, err + } + + grpcMetrics, err := metrics.MakeAndRegister( + pluginMetrics, + "grpc", + ) + if err != nil { return nil, err } // gRPC client metrics grpcClientMetrics := grpc_prometheus.NewClientMetrics() - if err := registerer.Register(grpcClientMetrics); err != nil { + if err := grpcMetrics.Register(grpcClientMetrics); err != nil { return nil, err } - // Register metrics for each Go plugin processes - vm.processMetrics = registerer + vmMetrics := metrics.NewPrefixGatherer() + if err := pluginMetrics.Register("vm", vmMetrics); err != nil { + return nil, err + } // Dial the database dbClientConn, err := grpcutils.Dial( @@ -225,7 +244,7 @@ func (vm *VMServer) Initialize(ctx context.Context, req *vmpb.InitializeRequest) Keystore: keystoreClient, SharedMemory: sharedMemoryClient, BCLookup: bcLookupClient, - Metrics: metrics.NewPrefixGatherer(), + Metrics: vmMetrics, // Signs warp messages WarpSigner: warpSignerClient, @@ -566,24 +585,9 @@ func (vm *VMServer) AppGossip(ctx context.Context, req *vmpb.AppGossipMsg) (*emp return &emptypb.Empty{}, vm.vm.AppGossip(ctx, nodeID, req.Msg) } -// TODO: FIXME func (vm *VMServer) Gather(context.Context, *emptypb.Empty) (*vmpb.GatherResponse, error) { - // Gather metrics registered to snow context Gatherer. These - // metrics are defined by the underlying vm implementation. - mfs, err := vm.ctx.Metrics.Gather() - if err != nil { - return nil, err - } - - // Gather metrics registered by rpcchainvm server Gatherer. These - // metrics are collected for each Go plugin process. - pluginMetrics, err := vm.processMetrics.Gather() - if err != nil { - return nil, err - } - mfs = append(mfs, pluginMetrics...) - - return &vmpb.GatherResponse{MetricFamilies: mfs}, err + metrics, err := vm.metrics.Gather() + return &vmpb.GatherResponse{MetricFamilies: metrics}, err } func (vm *VMServer) GetAncestors(ctx context.Context, req *vmpb.GetAncestorsRequest) (*vmpb.GetAncestorsResponse, error) { From ca30268095d3cdd5adea22f77c5bc50dfee02ec4 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 19:07:17 -0400 Subject: [PATCH 17/53] rename meterdb --- api/metrics/README.md | 21 +++++----- chains/manager.go | 42 +++++++++++++++---- node/node.go | 4 +- snow/networking/handler/handler.go | 7 ++-- snow/networking/handler/handler_test.go | 7 ++++ snow/networking/handler/health_test.go | 1 + snow/networking/handler/message_queue.go | 3 +- snow/networking/handler/message_queue_test.go | 3 +- snow/networking/handler/metrics.go | 22 ++++------ snow/networking/router/chain_router_test.go | 6 +++ snow/networking/sender/sender.go | 5 +-- snow/networking/sender/sender_test.go | 3 ++ vms/platformvm/vm_test.go | 1 + 13 files changed, 85 insertions(+), 40 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index ea72015c93c..ca5667e8b57 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -5,17 +5,18 @@ graph LR A[avalanche] --> B[chain] A --> C[network] A --> D[api] - A -- $chainID --> E[meterdb] - A --> F[db] - A --> G[go] - A --> H[health] - A --> I[system_resources] - A --> J[resource_tracker] - A --> K[requests] - B -- $chainID --> L[$vmID] - B -- $chainID, $isProposerVM --> M[meterchainvm] - B -- $chainID --> N[meterdagvm] + A --> E[db] + A --> F[go] + A --> G[health] + A --> H[system_resources] + A --> I[resource_tracker] + A --> J[requests] + B -- $chainID --> K[$vmID] + B -- $chainID --> L[meter_db] + B -- $chainID --> M[meter_chainvm] + B -- $chainID --> N[meter_dagvm] B -- $chainID --> O[proposervm] B -- $chainID --> P[snowman] B -- $chainID --> Q[avalanche] + B -- $chainID --> R[handler] ``` diff --git a/chains/manager.go b/chains/manager.go index 558851099c8..9d4b17263df 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -77,12 +77,13 @@ const ( defaultChannelSize = 1 initialQueueSize = 3 - chainNamespace = constants.PlatformName + metric.NamespaceSeparator + "chain" - snowmanNamespace = chainNamespace + metric.NamespaceSeparator + "snowman" - avalancheNamespace = chainNamespace + metric.NamespaceSeparator + "avalanche" - proposervmNamespace = chainNamespace + metric.NamespaceSeparator + "proposervm" - meterchainvmNamespace = chainNamespace + metric.NamespaceSeparator + "meterchainvm" - meterdagvmNamespace = chainNamespace + metric.NamespaceSeparator + "meterdagvm" + ChainNamespace = constants.PlatformName + metric.NamespaceSeparator + "chain" + handlerNamespace = ChainNamespace + metric.NamespaceSeparator + "handler" + snowmanNamespace = ChainNamespace + metric.NamespaceSeparator + "snowman" + avalancheNamespace = ChainNamespace + metric.NamespaceSeparator + "avalanche" + proposervmNamespace = ChainNamespace + metric.NamespaceSeparator + "proposervm" + meterchainvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meter_chainvm" + meterdagvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meter_dagvm" ) var ( @@ -269,16 +270,22 @@ type manager struct { // snowman++ related interface to allow validators retrieval validatorState validators.State + handlerGatherer metrics.MultiGatherer // chainID snowmanGatherer metrics.MultiGatherer // chainID avalancheGatherer metrics.MultiGatherer // chainID proposervmGatherer metrics.MultiGatherer // chainID - meterChainVMGatherer metrics.MultiGatherer // chainID -> isProposervm + meterChainVMGatherer metrics.MultiGatherer // chainID meterDAGVMGatherer metrics.MultiGatherer // chainID vmGatherer map[ids.ID]metrics.MultiGatherer // vmID -> chainID } // New returns a new Manager func New(config *ManagerConfig) (Manager, error) { + handlerGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(handlerNamespace, handlerGatherer); err != nil { + return nil, err + } + snowmanGatherer := metrics.NewLabelGatherer("chain") if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { return nil, err @@ -312,6 +319,7 @@ func New(config *ManagerConfig) (Manager, error) { unblockChainCreatorCh: make(chan struct{}), chainCreatorShutdownCh: make(chan struct{}), + handlerGatherer: handlerGatherer, snowmanGatherer: snowmanGatherer, avalancheGatherer: avalancheGatherer, proposervmGatherer: proposervmGatherer, @@ -839,6 +847,14 @@ func (m *manager) createAvalancheChain( return nil, fmt.Errorf("error creating peer tracker: %w", err) } + handlerReg, err := metrics.MakeAndRegister( + m.handlerGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + // Asynchronously passes messages from the network to the consensus engine h, err := handler.New( ctx, @@ -851,6 +867,7 @@ func (m *manager) createAvalancheChain( sb, connectedValidators, peerTracker, + handlerReg, ) if err != nil { return nil, fmt.Errorf("error initializing network handler: %w", err) @@ -1211,6 +1228,14 @@ func (m *manager) createSnowmanChain( return nil, fmt.Errorf("error creating peer tracker: %w", err) } + handlerReg, err := metrics.MakeAndRegister( + m.handlerGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + // Asynchronously passes messages from the network to the consensus engine h, err := handler.New( ctx, @@ -1223,6 +1248,7 @@ func (m *manager) createSnowmanChain( sb, connectedValidators, peerTracker, + handlerReg, ) if err != nil { return nil, fmt.Errorf("couldn't initialize message handler: %w", err) @@ -1505,7 +1531,7 @@ func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics } err := m.Metrics.Register( - metric.AppendNamespace(chainNamespace, vmIDStr), + metric.AppendNamespace(ChainNamespace, vmIDStr), vmGatherer, ) if err != nil { diff --git a/node/node.go b/node/node.go index ef4aaf42c06..4a98e41e90c 100644 --- a/node/node.go +++ b/node/node.go @@ -90,6 +90,8 @@ const ( httpPortName = constants.AppName + "-http" ipResolutionTimeout = 30 * time.Second + + meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "handler" ) var ( @@ -904,7 +906,7 @@ func (n *Node) initMetrics() error { n.MetricsGatherer = metrics.NewPrefixGatherer() n.MeterDBMetricsGatherer = metrics.NewLabelGatherer("chain") return n.MetricsGatherer.Register( - metric.AppendNamespace(constants.PlatformName, "meterdb"), + meterDBNamespace, n.MeterDBMetricsGatherer, ) } diff --git a/snow/networking/handler/handler.go b/snow/networking/handler/handler.go index f1966adc4dc..4cb5e7741a0 100644 --- a/snow/networking/handler/handler.go +++ b/snow/networking/handler/handler.go @@ -140,6 +140,7 @@ func New( subnet subnets.Subnet, peerTracker commontracker.Peers, p2pTracker *p2p.PeerTracker, + reg prometheus.Registerer, ) (Handler, error) { h := &handler{ ctx: ctx, @@ -160,16 +161,16 @@ func New( var err error - h.metrics, err = newMetrics("handler", h.ctx.Registerer) + h.metrics, err = newMetrics(reg) if err != nil { return nil, fmt.Errorf("initializing handler metrics errored with: %w", err) } cpuTracker := resourceTracker.CPUTracker() - h.syncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "handler") + h.syncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "sync", reg) if err != nil { return nil, fmt.Errorf("initializing sync message queue errored with: %w", err) } - h.asyncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "handler_async") + h.asyncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "async", reg) if err != nil { return nil, fmt.Errorf("initializing async message queue errored with: %w", err) } diff --git a/snow/networking/handler/handler_test.go b/snow/networking/handler/handler_test.go index e8ab5f85ebb..cb24040643f 100644 --- a/snow/networking/handler/handler_test.go +++ b/snow/networking/handler/handler_test.go @@ -77,6 +77,7 @@ func TestHandlerDropsTimedOutMessages(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) handler := handlerIntf.(*handler) @@ -183,6 +184,7 @@ func TestHandlerClosesOnError(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) handler := handlerIntf.(*handler) @@ -285,6 +287,7 @@ func TestHandlerDropsGossipDuringBootstrapping(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) handler := handlerIntf.(*handler) @@ -375,6 +378,7 @@ func TestHandlerDispatchInternal(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -460,6 +464,7 @@ func TestHandlerSubnetConnector(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -641,6 +646,7 @@ func TestDynamicEngineTypeDispatch(t *testing.T) { subnets.New(ids.EmptyNodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -723,6 +729,7 @@ func TestHandlerStartError(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/networking/handler/health_test.go b/snow/networking/handler/health_test.go index 163332735ea..789d3464187 100644 --- a/snow/networking/handler/health_test.go +++ b/snow/networking/handler/health_test.go @@ -93,6 +93,7 @@ func TestHealthCheckSubnet(t *testing.T) { sb, peerTracker, p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/networking/handler/message_queue.go b/snow/networking/handler/message_queue.go index f17cfc1a2e9..8fcef984b3d 100644 --- a/snow/networking/handler/message_queue.go +++ b/snow/networking/handler/message_queue.go @@ -79,6 +79,7 @@ func NewMessageQueue( vdrs validators.Manager, cpuTracker tracker.Tracker, metricsNamespace string, + reg prometheus.Registerer, ) (MessageQueue, error) { m := &messageQueue{ ctx: ctx, @@ -88,7 +89,7 @@ func NewMessageQueue( nodeToUnprocessedMsgs: make(map[ids.NodeID]int), msgAndCtxs: buffer.NewUnboundedDeque[*msgAndContext](1 /*=initSize*/), } - return m, m.metrics.initialize(metricsNamespace, ctx.Registerer) + return m, m.metrics.initialize(metricsNamespace, reg) } func (m *messageQueue) Push(ctx context.Context, msg Message) { diff --git a/snow/networking/handler/message_queue_test.go b/snow/networking/handler/message_queue_test.go index 577a4686faa..e4cd439017c 100644 --- a/snow/networking/handler/message_queue_test.go +++ b/snow/networking/handler/message_queue_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" @@ -29,7 +30,7 @@ func TestQueue(t *testing.T) { vdr1ID, vdr2ID := ids.GenerateTestNodeID(), ids.GenerateTestNodeID() require.NoError(vdrs.AddStaker(ctx.SubnetID, vdr1ID, nil, ids.Empty, 1)) require.NoError(vdrs.AddStaker(ctx.SubnetID, vdr2ID, nil, ids.Empty, 1)) - mIntf, err := NewMessageQueue(ctx, vdrs, cpuTracker, "") + mIntf, err := NewMessageQueue(ctx, vdrs, cpuTracker, "", prometheus.NewRegistry()) require.NoError(err) u := mIntf.(*messageQueue) currentTime := time.Now() diff --git a/snow/networking/handler/metrics.go b/snow/networking/handler/metrics.go index 9cd6c9ec409..f3a21149f26 100644 --- a/snow/networking/handler/metrics.go +++ b/snow/networking/handler/metrics.go @@ -16,36 +16,32 @@ type metrics struct { messageHandlingTime *prometheus.GaugeVec // op } -func newMetrics(namespace string, reg prometheus.Registerer) (*metrics, error) { +func newMetrics(reg prometheus.Registerer) (*metrics, error) { m := &metrics{ expired: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "expired", - Help: "messages dropped because the deadline expired", + Name: "expired", + Help: "messages dropped because the deadline expired", }, opLabels, ), messages: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "messages", - Help: "messages handled", + Name: "messages", + Help: "messages handled", }, opLabels, ), messageHandlingTime: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "message_handling_time", - Help: "time spent handling messages", + Name: "message_handling_time", + Help: "time spent handling messages", }, opLabels, ), lockingTime: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "locking_time", - Help: "time spent acquiring the context lock", + Name: "locking_time", + Help: "time spent acquiring the context lock", }), } return m, utils.Err( diff --git a/snow/networking/router/chain_router_test.go b/snow/networking/router/chain_router_test.go index 91f2c67090c..7e889d43fc6 100644 --- a/snow/networking/router/chain_router_test.go +++ b/snow/networking/router/chain_router_test.go @@ -112,6 +112,7 @@ func TestShutdown(t *testing.T) { subnets.New(chainCtx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -259,6 +260,7 @@ func TestShutdownTimesOut(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -427,6 +429,7 @@ func TestRouterTimeout(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -1007,6 +1010,7 @@ func TestValidatorOnlyMessageDrops(t *testing.T) { sb, commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -1285,6 +1289,7 @@ func TestValidatorOnlyAllowedNodeMessageDrops(t *testing.T) { sb, commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -1623,6 +1628,7 @@ func newChainRouterTest(t *testing.T) (*ChainRouter, *common.EngineTest) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(t, err) diff --git a/snow/networking/sender/sender.go b/snow/networking/sender/sender.go index c13b9e22682..37076972fe1 100644 --- a/snow/networking/sender/sender.go +++ b/snow/networking/sender/sender.go @@ -67,9 +67,8 @@ func New( timeouts: timeouts, failedDueToBench: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: "", - Name: "failed_benched", - Help: "requests dropped because a node was benched", + Name: "failed_benched", + Help: "requests dropped because a node was benched", }, opLabels, ), diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 3a5bc93c155..cac97cd132b 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -130,6 +130,7 @@ func TestTimeout(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -404,6 +405,7 @@ func TestReliableMessages(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) @@ -558,6 +560,7 @@ func TestReliableMessagesToMyself(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), commontracker.NewPeers(), p2pTracker, + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index 9c9ec08fdbb..ff5807ba2db 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1531,6 +1531,7 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { subnets.New(ctx.NodeID, subnets.Config{}), tracker.NewPeers(), peerTracker, + prometheus.NewRegistry(), ) require.NoError(err) From ca64267f294dd5d7b2879bbb810324398e066510 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 19:10:33 -0400 Subject: [PATCH 18/53] rename --- api/metrics/README.md | 6 +++--- chains/manager.go | 4 ++-- node/node.go | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index ca5667e8b57..cc63c7eece8 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -12,9 +12,9 @@ graph LR A --> I[resource_tracker] A --> J[requests] B -- $chainID --> K[$vmID] - B -- $chainID --> L[meter_db] - B -- $chainID --> M[meter_chainvm] - B -- $chainID --> N[meter_dagvm] + B -- $chainID --> L[meterdb] + B -- $chainID --> M[meterchainvm] + B -- $chainID --> N[meterdagvm] B -- $chainID --> O[proposervm] B -- $chainID --> P[snowman] B -- $chainID --> Q[avalanche] diff --git a/chains/manager.go b/chains/manager.go index 9d4b17263df..dd888a18e17 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -82,8 +82,8 @@ const ( snowmanNamespace = ChainNamespace + metric.NamespaceSeparator + "snowman" avalancheNamespace = ChainNamespace + metric.NamespaceSeparator + "avalanche" proposervmNamespace = ChainNamespace + metric.NamespaceSeparator + "proposervm" - meterchainvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meter_chainvm" - meterdagvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meter_dagvm" + meterchainvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterchainvm" + meterdagvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterdagvm" ) var ( diff --git a/node/node.go b/node/node.go index 4a98e41e90c..bf3cc5132d7 100644 --- a/node/node.go +++ b/node/node.go @@ -91,7 +91,7 @@ const ( ipResolutionTimeout = 30 * time.Second - meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "handler" + meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "meterdb" ) var ( From 9098df02d493db63f641787eef663d5e9bcc5e55 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 19:24:21 -0400 Subject: [PATCH 19/53] wip --- chains/manager.go | 4 ++++ snow/networking/benchlist/benchlist.go | 1 + 2 files changed, 5 insertions(+) diff --git a/chains/manager.go b/chains/manager.go index dd888a18e17..63889b3485b 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -830,12 +830,14 @@ func (m *manager) createAvalancheChain( sampleK = int(bootstrapWeight) } + // TODO: FIXME connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } vdrs.RegisterSetCallbackListener(ctx.SubnetID, connectedValidators) + // TODO: FIXME peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", @@ -1211,12 +1213,14 @@ func (m *manager) createSnowmanChain( sampleK = int(bootstrapWeight) } + // TODO: FIXME connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } vdrs.RegisterSetCallbackListener(ctx.SubnetID, connectedValidators) + // TODO: FIXME peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", diff --git a/snow/networking/benchlist/benchlist.go b/snow/networking/benchlist/benchlist.go index 2bf68e04986..ff70dc808e9 100644 --- a/snow/networking/benchlist/benchlist.go +++ b/snow/networking/benchlist/benchlist.go @@ -117,6 +117,7 @@ func NewBenchlist( duration: duration, maxPortion: maxPortion, } + // TODO: FIXME if err := benchlist.metrics.Initialize(ctx.Registerer); err != nil { return nil, err } From fe4ffc8cfd151d54f57b7fbf60a25e43d991a6b4 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 19:35:28 -0400 Subject: [PATCH 20/53] wip --- chains/manager.go | 4 ++-- snow/networking/timeout/metrics.go | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 63889b3485b..b0065bc83d3 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -831,14 +831,14 @@ func (m *manager) createAvalancheChain( } // TODO: FIXME - connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) + connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) // stake? if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } vdrs.RegisterSetCallbackListener(ctx.SubnetID, connectedValidators) // TODO: FIXME - peerTracker, err := p2p.NewPeerTracker( + peerTracker, err := p2p.NewPeerTracker( // p2p? ctx.Log, "peer_tracker", ctx.Registerer, diff --git a/snow/networking/timeout/metrics.go b/snow/networking/timeout/metrics.go index 101bda85625..5ad61f8f33d 100644 --- a/snow/networking/timeout/metrics.go +++ b/snow/networking/timeout/metrics.go @@ -38,6 +38,7 @@ func (m *metrics) RegisterChain(ctx *snow.ConsensusContext) error { if _, exists := m.chainToMetrics[ctx.ChainID]; exists { return fmt.Errorf("chain %s has already been registered", ctx.ChainID) } + // TODO: FIXME cm, err := newChainMetrics(ctx.Registerer) if err != nil { return fmt.Errorf("couldn't create metrics for chain %s: %w", ctx.ChainID, err) From 90649ce354e32bb06574af9b840fd3518508f91c Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 20:54:20 -0400 Subject: [PATCH 21/53] maybe done --- chains/manager.go | 67 ++++++++++--- network/throttling/bandwidth_throttler.go | 1 - .../inbound_msg_buffer_throttler.go | 1 - .../throttling/inbound_msg_byte_throttler.go | 1 - node/node.go | 22 ++++- snow/consensus/snowman/metrics.go | 50 ++++------ snow/consensus/snowman/poll/set.go | 7 +- snow/consensus/snowman/poll/set_test.go | 30 ++---- snow/consensus/snowman/topological.go | 1 - snow/context.go | 4 + snow/engine/avalanche/getter/getter.go | 3 +- snow/engine/common/tracker/peers.go | 17 ++-- snow/engine/snowman/getter/getter.go | 3 +- snow/engine/snowman/metrics.go | 80 ++++++---------- snow/engine/snowman/transitive.go | 3 +- snow/engine/snowman/transitive_test.go | 2 +- snow/networking/benchlist/benchlist.go | 34 +++++-- snow/networking/benchlist/benchlist_test.go | 4 + snow/networking/benchlist/manager.go | 23 +++-- snow/networking/benchlist/metrics.go | 36 ------- snow/networking/router/chain_router_test.go | 13 ++- snow/networking/sender/sender_test.go | 3 + snow/networking/timeout/manager.go | 16 +++- snow/networking/timeout/manager_test.go | 1 + snow/networking/timeout/metrics.go | 94 +++++++------------ snow/snowtest/snowtest.go | 1 + utils/metric/averager.go | 16 ++-- vms/metervm/metrics.go | 1 - vms/platformvm/vm_test.go | 1 + 29 files changed, 269 insertions(+), 266 deletions(-) delete mode 100644 snow/networking/benchlist/metrics.go diff --git a/chains/manager.go b/chains/manager.go index b0065bc83d3..31b171608e6 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -79,6 +79,8 @@ const ( ChainNamespace = constants.PlatformName + metric.NamespaceSeparator + "chain" handlerNamespace = ChainNamespace + metric.NamespaceSeparator + "handler" + stakeNamespace = ChainNamespace + metric.NamespaceSeparator + "stake" + p2pNamespace = ChainNamespace + metric.NamespaceSeparator + "p2p" snowmanNamespace = ChainNamespace + metric.NamespaceSeparator + "snowman" avalancheNamespace = ChainNamespace + metric.NamespaceSeparator + "avalanche" proposervmNamespace = ChainNamespace + metric.NamespaceSeparator + "proposervm" @@ -271,6 +273,8 @@ type manager struct { validatorState validators.State handlerGatherer metrics.MultiGatherer // chainID + stakeGatherer metrics.MultiGatherer // chainID + p2pGatherer metrics.MultiGatherer // chainID snowmanGatherer metrics.MultiGatherer // chainID avalancheGatherer metrics.MultiGatherer // chainID proposervmGatherer metrics.MultiGatherer // chainID @@ -286,6 +290,16 @@ func New(config *ManagerConfig) (Manager, error) { return nil, err } + stakeGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(stakeNamespace, stakeGatherer); err != nil { + return nil, err + } + + p2pGatherer := metrics.NewLabelGatherer("chain") + if err := config.Metrics.Register(p2pNamespace, p2pGatherer); err != nil { + return nil, err + } + snowmanGatherer := metrics.NewLabelGatherer("chain") if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { return nil, err @@ -320,6 +334,8 @@ func New(config *ManagerConfig) (Manager, error) { chainCreatorShutdownCh: make(chan struct{}), handlerGatherer: handlerGatherer, + stakeGatherer: stakeGatherer, + p2pGatherer: p2pGatherer, snowmanGatherer: snowmanGatherer, avalancheGatherer: avalancheGatherer, proposervmGatherer: proposervmGatherer, @@ -519,11 +535,12 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c ValidatorState: m.validatorState, ChainDataDir: chainDataDir, }, + PrimaryAlias: primaryAlias, + Registerer: snowmanMetrics, + AvalancheRegisterer: avalancheMetrics, BlockAcceptor: m.BlockAcceptorGroup, TxAcceptor: m.TxAcceptorGroup, VertexAcceptor: m.VertexAcceptorGroup, - Registerer: snowmanMetrics, - AvalancheRegisterer: avalancheMetrics, } // Get a factory for the vm we want to use on our chain @@ -830,18 +847,32 @@ func (m *manager) createAvalancheChain( sampleK = int(bootstrapWeight) } - // TODO: FIXME - connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) // stake? + stakeReg, err := metrics.MakeAndRegister( + m.stakeGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + connectedValidators, err := tracker.NewMeteredPeers(stakeReg) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } vdrs.RegisterSetCallbackListener(ctx.SubnetID, connectedValidators) - // TODO: FIXME - peerTracker, err := p2p.NewPeerTracker( // p2p? + p2pReg, err := metrics.MakeAndRegister( + m.p2pGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", - ctx.Registerer, + p2pReg, set.Of(ctx.NodeID), nil, ) @@ -1213,18 +1244,32 @@ func (m *manager) createSnowmanChain( sampleK = int(bootstrapWeight) } - // TODO: FIXME - connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) + stakeReg, err := metrics.MakeAndRegister( + m.stakeGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + connectedValidators, err := tracker.NewMeteredPeers(stakeReg) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } vdrs.RegisterSetCallbackListener(ctx.SubnetID, connectedValidators) - // TODO: FIXME + p2pReg, err := metrics.MakeAndRegister( + m.p2pGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", - ctx.Registerer, + p2pReg, set.Of(ctx.NodeID), nil, ) diff --git a/network/throttling/bandwidth_throttler.go b/network/throttling/bandwidth_throttler.go index 12ca3ac9a84..58938f31c11 100644 --- a/network/throttling/bandwidth_throttler.go +++ b/network/throttling/bandwidth_throttler.go @@ -68,7 +68,6 @@ func newBandwidthThrottler( limiters: make(map[ids.NodeID]*rate.Limiter), metrics: bandwidthThrottlerMetrics{ acquireLatency: metric.NewAveragerWithErrs( - "", "bandwidth_throttler_inbound_acquire_latency", "average time (in ns) to acquire bytes from the inbound bandwidth throttler", registerer, diff --git a/network/throttling/inbound_msg_buffer_throttler.go b/network/throttling/inbound_msg_buffer_throttler.go index 395b6da1688..73ebc4ed977 100644 --- a/network/throttling/inbound_msg_buffer_throttler.go +++ b/network/throttling/inbound_msg_buffer_throttler.go @@ -132,7 +132,6 @@ type inboundMsgBufferThrottlerMetrics struct { func (m *inboundMsgBufferThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - "", "buffer_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message buffer", reg, diff --git a/network/throttling/inbound_msg_byte_throttler.go b/network/throttling/inbound_msg_byte_throttler.go index 3e20762f85e..237041f00a0 100644 --- a/network/throttling/inbound_msg_byte_throttler.go +++ b/network/throttling/inbound_msg_byte_throttler.go @@ -308,7 +308,6 @@ type inboundMsgByteThrottlerMetrics struct { func (m *inboundMsgByteThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - "", "byte_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message byte buffer", reg, diff --git a/node/node.go b/node/node.go index bf3cc5132d7..367d50dfc2b 100644 --- a/node/node.go +++ b/node/node.go @@ -91,7 +91,8 @@ const ( ipResolutionTimeout = 30 * time.Second - meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "meterdb" + meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "meterdb" + benchlistNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "benchlist" ) var ( @@ -532,6 +533,16 @@ func (n *Node) initNetworking() error { // Configure benchlist n.Config.BenchlistConfig.Validators = n.vdrs n.Config.BenchlistConfig.Benchable = n.chainRouter + n.Config.BenchlistConfig.BenchlistRegisterer = metrics.NewLabelGatherer("chain") + + err = n.MetricsGatherer.Register( + benchlistNamespace, + n.Config.BenchlistConfig.BenchlistRegisterer, + ) + if err != nil { + return err + } + n.benchlistManager = benchlist.NewManager(&n.Config.BenchlistConfig) n.uptimeCalculator = uptime.NewLockedCalculator() @@ -1065,10 +1076,19 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { return err } + responseReg, err := metrics.MakeAndRegister( + n.MetricsGatherer, + metric.AppendNamespace(constants.PlatformName, "responses"), + ) + if err != nil { + return err + } + n.timeoutManager, err = timeout.NewManager( &n.Config.AdaptiveTimeoutConfig, n.benchlistManager, requestsReg, + responseReg, ) if err != nil { return err diff --git a/snow/consensus/snowman/metrics.go b/snow/consensus/snowman/metrics.go index 6b48e868aaa..24065a8cf8c 100644 --- a/snow/consensus/snowman/metrics.go +++ b/snow/consensus/snowman/metrics.go @@ -65,7 +65,6 @@ type metrics struct { func newMetrics( log logging.Logger, - namespace string, reg prometheus.Registerer, lastAcceptedHeight uint64, lastAcceptedTime time.Time, @@ -75,19 +74,16 @@ func newMetrics( log: log, currentMaxVerifiedHeight: lastAcceptedHeight, maxVerifiedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "max_verified_height", - Help: "highest verified height", + Name: "max_verified_height", + Help: "highest verified height", }), lastAcceptedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "last_accepted_height", - Help: "last height accepted", + Name: "last_accepted_height", + Help: "last height accepted", }), lastAcceptedTimestamp: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "last_accepted_timestamp", - Help: "timestamp of the last accepted block in unix seconds", + Name: "last_accepted_timestamp", + Help: "timestamp of the last accepted block in unix seconds", }), processingBlocks: linked.NewHashmap[ids.ID, processingStart](), @@ -95,18 +91,15 @@ func newMetrics( // e.g., // "avalanche_X_blks_processing" reports how many blocks are currently processing numProcessing: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_processing", - Help: "number of currently processing blocks", + Name: "blks_processing", + Help: "number of currently processing blocks", }), blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_accepted_container_size_sum", - Help: "cumulative size of all accepted blocks", + Name: "blks_accepted_container_size_sum", + Help: "cumulative size of all accepted blocks", }), pollsAccepted: metric.NewAveragerWithErrs( - namespace, "blks_polls_accepted", "number of polls from the issuance of a block to its acceptance", reg, @@ -119,25 +112,21 @@ func newMetrics( // "avalanche_C_blks_accepted_container_size_sum" reports the cumulative sum of all accepted blocks' sizes in bytes // "avalanche_C_blks_accepted_container_size_sum / avalanche_C_blks_accepted_count" is the average accepted block size in bytes latAccepted: metric.NewAveragerWithErrs( - namespace, "blks_accepted", "time (in ns) from the issuance of a block to its acceptance", reg, &errs, ), buildLatencyAccepted: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_build_accept_latency", - Help: "time (in ns) from the timestamp of a block to the time it was accepted", + Name: "blks_build_accept_latency", + Help: "time (in ns) from the timestamp of a block to the time it was accepted", }), blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_rejected_container_size_sum", - Help: "cumulative size of all rejected blocks", + Name: "blks_rejected_container_size_sum", + Help: "cumulative size of all rejected blocks", }), pollsRejected: metric.NewAveragerWithErrs( - namespace, "blks_polls_rejected", "number of polls from the issuance of a block to its rejection", reg, @@ -150,7 +139,6 @@ func newMetrics( // "avalanche_P_blks_rejected_container_size_sum" reports the cumulative sum of all rejected blocks' sizes in bytes // "avalanche_P_blks_rejected_container_size_sum / avalanche_P_blks_rejected_count" is the average rejected block size in bytes latRejected: metric.NewAveragerWithErrs( - namespace, "blks_rejected", "time (in ns) from the issuance of a block to its rejection", reg, @@ -158,14 +146,12 @@ func newMetrics( ), numSuccessfulPolls: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls_successful", - Help: "number of successful polls", + Name: "polls_successful", + Help: "number of successful polls", }), numFailedPolls: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls_failed", - Help: "number of failed polls", + Name: "polls_failed", + Help: "number of failed polls", }), } diff --git a/snow/consensus/snowman/poll/set.go b/snow/consensus/snowman/poll/set.go index 87a751584c7..aa7e7342542 100644 --- a/snow/consensus/snowman/poll/set.go +++ b/snow/consensus/snowman/poll/set.go @@ -55,20 +55,17 @@ type set struct { func NewSet( factory Factory, log logging.Logger, - namespace string, reg prometheus.Registerer, ) (Set, error) { numPolls := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "polls", - Help: "Number of pending network polls", + Name: "polls", + Help: "Number of pending network polls", }) if err := reg.Register(numPolls); err != nil { return nil, fmt.Errorf("%w: %w", errFailedPollsMetric, err) } durPolls, err := metric.NewAverager( - namespace, "poll_duration", "time (in ns) this poll took to complete", reg, diff --git a/snow/consensus/snowman/poll/set_test.go b/snow/consensus/snowman/poll/set_test.go index 0717242060d..97166e0e937 100644 --- a/snow/consensus/snowman/poll/set_test.go +++ b/snow/consensus/snowman/poll/set_test.go @@ -32,15 +32,13 @@ func TestNewSetErrorOnPollsMetrics(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(1, 1) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() require.NoError(registerer.Register(prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls", + Name: "polls", }))) - _, err := NewSet(factory, log, namespace, registerer) + _, err := NewSet(factory, log, registerer) require.ErrorIs(err, errFailedPollsMetric) } @@ -49,15 +47,13 @@ func TestNewSetErrorOnPollDurationMetrics(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(1, 1) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() require.NoError(registerer.Register(prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "poll_duration_count", + Name: "poll_duration_count", }))) - _, err := NewSet(factory, log, namespace, registerer) + _, err := NewSet(factory, log, registerer) require.ErrorIs(err, errFailedPollDurationMetrics) } @@ -69,9 +65,8 @@ func TestCreateAndFinishPollOutOfOrder_NewerFinishesFirst(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create two polls for the two blocks @@ -106,9 +101,8 @@ func TestCreateAndFinishPollOutOfOrder_OlderFinishesFirst(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create two polls for the two blocks @@ -143,9 +137,8 @@ func TestCreateAndFinishPollOutOfOrder_UnfinishedPollsGaps(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create three polls for the two blocks @@ -188,9 +181,8 @@ func TestCreateAndFinishSuccessfulPoll(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) require.Zero(s.Len()) @@ -221,9 +213,8 @@ func TestCreateAndFinishFailedPoll(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) require.Zero(s.Len()) @@ -251,9 +242,8 @@ func TestSetString(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) expected := `current polls: (Size = 1) diff --git a/snow/consensus/snowman/topological.go b/snow/consensus/snowman/topological.go index 0eb8b55c1f6..f2ef015654c 100644 --- a/snow/consensus/snowman/topological.go +++ b/snow/consensus/snowman/topological.go @@ -111,7 +111,6 @@ func (ts *Topological) Initialize( ts.metrics, err = newMetrics( ctx.Log, - "", ctx.Registerer, lastAcceptedHeight, lastAcceptedTime, diff --git a/snow/context.go b/snow/context.go index f610adca999..d8748d0e1ae 100644 --- a/snow/context.go +++ b/snow/context.go @@ -65,6 +65,10 @@ type Registerer interface { type ConsensusContext struct { *Context + // PrimaryAlias is the primary alias of the chain this context exists + // within. + PrimaryAlias string + // Registers all common and snowman consensus metrics. Unlike the avalanche // consensus engine metrics, we do not prefix the name with the engine name, // as snowman is used for all chains by default. diff --git a/snow/engine/avalanche/getter/getter.go b/snow/engine/avalanche/getter/getter.go index 796cade92fa..1e1105c7675 100644 --- a/snow/engine/avalanche/getter/getter.go +++ b/snow/engine/avalanche/getter/getter.go @@ -44,8 +44,7 @@ func New( var err error gh.getAncestorsVtxs, err = metric.NewAverager( - "bs", - "get_ancestors_vtxs", + "bs_get_ancestors_vtxs", "vertices fetched in a call to GetAncestors", reg, ) diff --git a/snow/engine/common/tracker/peers.go b/snow/engine/common/tracker/peers.go index 1e76d42f426..94ed4676478 100644 --- a/snow/engine/common/tracker/peers.go +++ b/snow/engine/common/tracker/peers.go @@ -113,21 +113,18 @@ type meteredPeers struct { totalWeight prometheus.Gauge } -func NewMeteredPeers(namespace string, reg prometheus.Registerer) (Peers, error) { +func NewMeteredPeers(reg prometheus.Registerer) (Peers, error) { percentConnected := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "percent_connected", - Help: "Percent of connected stake", + Name: "percent_connected", + Help: "Percent of connected stake", }) totalWeight := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "total_weight", - Help: "Total stake", + Name: "total_weight", + Help: "Total stake", }) numValidators := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_validators", - Help: "Total number of validators", + Name: "num_validators", + Help: "Total number of validators", }) err := utils.Err( reg.Register(percentConnected), diff --git a/snow/engine/snowman/getter/getter.go b/snow/engine/snowman/getter/getter.go index b58d7eb8742..b501aeef268 100644 --- a/snow/engine/snowman/getter/getter.go +++ b/snow/engine/snowman/getter/getter.go @@ -43,8 +43,7 @@ func New( var err error gh.getAncestorsBlks, err = metric.NewAverager( - "bs", - "get_ancestors_blks", + "bs_get_ancestors_blks", "blocks fetched in a call to GetAncestors", reg, ) diff --git a/snow/engine/snowman/metrics.go b/snow/engine/snowman/metrics.go index 193b067a14c..922b18200d4 100644 --- a/snow/engine/snowman/metrics.go +++ b/snow/engine/snowman/metrics.go @@ -38,104 +38,86 @@ type metrics struct { issued *prometheus.CounterVec } -func newMetrics(namespace string, reg prometheus.Registerer) (*metrics, error) { +func newMetrics(reg prometheus.Registerer) (*metrics, error) { errs := wrappers.Errs{} m := &metrics{ bootstrapFinished: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "bootstrap_finished", - Help: "Whether or not bootstrap process has completed. 1 is success, 0 is fail or ongoing.", + Name: "bootstrap_finished", + Help: "Whether or not bootstrap process has completed. 1 is success, 0 is fail or ongoing.", }), numRequests: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "requests", - Help: "Number of outstanding block requests", + Name: "requests", + Help: "Number of outstanding block requests", }), numBlocked: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blocked", - Help: "Number of blocks that are pending issuance", + Name: "blocked", + Help: "Number of blocks that are pending issuance", }), numBlockers: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blockers", - Help: "Number of blocks that are blocking other blocks from being issued because they haven't been issued", + Name: "blockers", + Help: "Number of blocks that are blocking other blocks from being issued because they haven't been issued", }), numNonVerifieds: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "non_verified_blks", - Help: "Number of non-verified blocks in the memory", + Name: "non_verified_blks", + Help: "Number of non-verified blocks in the memory", }), numBuilt: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blks_built", - Help: "Number of blocks that have been built locally", + Name: "blks_built", + Help: "Number of blocks that have been built locally", }), numBuildsFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blk_builds_failed", - Help: "Number of BuildBlock calls that have failed", + Name: "blk_builds_failed", + Help: "Number of BuildBlock calls that have failed", }), numUselessPutBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_put_bytes", - Help: "Amount of useless bytes received in Put messages", + Name: "num_useless_put_bytes", + Help: "Amount of useless bytes received in Put messages", }), numUselessPushQueryBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_push_query_bytes", - Help: "Amount of useless bytes received in PushQuery messages", + Name: "num_useless_push_query_bytes", + Help: "Amount of useless bytes received in PushQuery messages", }), numMissingAcceptedBlocks: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_missing_accepted_blocks", - Help: "Number of times an accepted block height was referenced and it wasn't locally available", + Name: "num_missing_accepted_blocks", + Help: "Number of times an accepted block height was referenced and it wasn't locally available", }), numProcessingAncestorFetchesFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_failed", - Help: "Number of votes that were dropped due to unknown blocks", + Name: "num_processing_ancestor_fetches_failed", + Help: "Number of votes that were dropped due to unknown blocks", }), numProcessingAncestorFetchesDropped: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_dropped", - Help: "Number of votes that were dropped due to decided blocks", + Name: "num_processing_ancestor_fetches_dropped", + Help: "Number of votes that were dropped due to decided blocks", }), numProcessingAncestorFetchesSucceeded: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_succeeded", - Help: "Number of votes that were applied to ancestor blocks", + Name: "num_processing_ancestor_fetches_succeeded", + Help: "Number of votes that were applied to ancestor blocks", }), numProcessingAncestorFetchesUnneeded: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_unneeded", - Help: "Number of votes that were directly applied to blocks", + Name: "num_processing_ancestor_fetches_unneeded", + Help: "Number of votes that were directly applied to blocks", }), getAncestorsBlks: metric.NewAveragerWithErrs( - namespace, "get_ancestors_blks", "blocks fetched in a call to GetAncestors", reg, &errs, ), selectedVoteIndex: metric.NewAveragerWithErrs( - namespace, "selected_vote_index", "index of the voteID that was passed into consensus", reg, &errs, ), issuerStake: metric.NewAveragerWithErrs( - namespace, "issuer_stake", "stake weight of the peer who provided a block that was issued into consensus", reg, &errs, ), issued: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blks_issued", - Help: "number of blocks that have been issued into consensus by discovery mechanism", + Name: "blks_issued", + Help: "number of blocks that have been issued into consensus by discovery mechanism", }, []string{"source"}), } diff --git a/snow/engine/snowman/transitive.go b/snow/engine/snowman/transitive.go index 32bf4ac5d5a..9e89fedd22b 100644 --- a/snow/engine/snowman/transitive.go +++ b/snow/engine/snowman/transitive.go @@ -118,14 +118,13 @@ func New(config Config) (*Transitive, error) { polls, err := poll.NewSet( factory, config.Ctx.Log, - "", config.Ctx.Registerer, ) if err != nil { return nil, err } - metrics, err := newMetrics("", config.Ctx.Registerer) + metrics, err := newMetrics(config.Ctx.Registerer) if err != nil { return nil, err } diff --git a/snow/engine/snowman/transitive_test.go b/snow/engine/snowman/transitive_test.go index 75040d9a1f5..2961b018c8c 100644 --- a/snow/engine/snowman/transitive_test.go +++ b/snow/engine/snowman/transitive_test.go @@ -2866,7 +2866,7 @@ func TestGetProcessingAncestor(t *testing.T) { unissuedBlock = snowmantest.BuildChild(issuedBlock) ) - metrics, err := newMetrics("", prometheus.NewRegistry()) + metrics, err := newMetrics(prometheus.NewRegistry()) require.NoError(t, err) c := &snowman.Topological{} diff --git a/snow/networking/benchlist/benchlist.go b/snow/networking/benchlist/benchlist.go index ff70dc808e9..45339537943 100644 --- a/snow/networking/benchlist/benchlist.go +++ b/snow/networking/benchlist/benchlist.go @@ -9,11 +9,13 @@ import ( "sync" "time" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/snow" "github.com/ava-labs/avalanchego/snow/validators" + "github.com/ava-labs/avalanchego/utils" "github.com/ava-labs/avalanchego/utils/heap" "github.com/ava-labs/avalanchego/utils/set" "github.com/ava-labs/avalanchego/utils/timer/mockable" @@ -50,8 +52,9 @@ type failureStreak struct { type benchlist struct { lock sync.RWMutex // Context of the chain this is the benchlist for - ctx *snow.ConsensusContext - metrics metrics + ctx *snow.ConsensusContext + + numBenched, weightBenched prometheus.Gauge // Used to notify the timer that it should recalculate when it should fire resetTimer chan struct{} @@ -99,13 +102,22 @@ func NewBenchlist( minimumFailingDuration, duration time.Duration, maxPortion float64, + reg prometheus.Registerer, ) (Benchlist, error) { if maxPortion < 0 || maxPortion >= 1 { return nil, fmt.Errorf("max portion of benched stake must be in [0,1) but got %f", maxPortion) } benchlist := &benchlist{ - ctx: ctx, + ctx: ctx, + numBenched: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "benched_num", + Help: "Number of currently benched validators", + }), + weightBenched: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "benched_weight", + Help: "Weight of currently benched validators", + }), resetTimer: make(chan struct{}, 1), failureStreaks: make(map[ids.NodeID]failureStreak), benchlistSet: set.Set[ids.NodeID]{}, @@ -117,8 +129,12 @@ func NewBenchlist( duration: duration, maxPortion: maxPortion, } - // TODO: FIXME - if err := benchlist.metrics.Initialize(ctx.Registerer); err != nil { + + err := utils.Err( + reg.Register(benchlist.numBenched), + reg.Register(benchlist.weightBenched), + ) + if err != nil { return nil, err } @@ -189,7 +205,7 @@ func (b *benchlist) removedExpiredNodes() { b.benchable.Unbenched(b.ctx.ChainID, nodeID) } - b.metrics.numBenched.Set(float64(b.benchedHeap.Len())) + b.numBenched.Set(float64(b.benchedHeap.Len())) benchedStake, err := b.vdrs.SubsetWeight(b.ctx.SubnetID, b.benchlistSet) if err != nil { b.ctx.Log.Error("error calculating benched stake", @@ -198,7 +214,7 @@ func (b *benchlist) removedExpiredNodes() { ) return } - b.metrics.weightBenched.Set(float64(benchedStake)) + b.weightBenched.Set(float64(benchedStake)) } func (b *benchlist) durationToSleep() time.Duration { @@ -339,6 +355,6 @@ func (b *benchlist) bench(nodeID ids.NodeID) { } // Update metrics - b.metrics.numBenched.Set(float64(b.benchedHeap.Len())) - b.metrics.weightBenched.Set(float64(newBenchedStake)) + b.numBenched.Set(float64(b.benchedHeap.Len())) + b.weightBenched.Set(float64(newBenchedStake)) } diff --git a/snow/networking/benchlist/benchlist_test.go b/snow/networking/benchlist/benchlist_test.go index 45568392297..3a52be818f7 100644 --- a/snow/networking/benchlist/benchlist_test.go +++ b/snow/networking/benchlist/benchlist_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/ids" @@ -49,6 +50,7 @@ func TestBenchlistAdd(t *testing.T) { minimumFailingDuration, duration, maxPortion, + prometheus.NewRegistry(), ) require.NoError(err) b := benchIntf.(*benchlist) @@ -173,6 +175,7 @@ func TestBenchlistMaxStake(t *testing.T) { minimumFailingDuration, duration, maxPortion, + prometheus.NewRegistry(), ) require.NoError(err) b := benchIntf.(*benchlist) @@ -295,6 +298,7 @@ func TestBenchlistRemove(t *testing.T) { minimumFailingDuration, duration, maxPortion, + prometheus.NewRegistry(), ) require.NoError(err) b := benchIntf.(*benchlist) diff --git a/snow/networking/benchlist/manager.go b/snow/networking/benchlist/manager.go index e6ac45da440..e19c5441044 100644 --- a/snow/networking/benchlist/manager.go +++ b/snow/networking/benchlist/manager.go @@ -7,6 +7,7 @@ import ( "sync" "time" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/snow" "github.com/ava-labs/avalanchego/snow/validators" @@ -39,12 +40,13 @@ type Manager interface { // Config defines the configuration for a benchlist type Config struct { - Benchable Benchable `json:"-"` - Validators validators.Manager `json:"-"` - Threshold int `json:"threshold"` - MinimumFailingDuration time.Duration `json:"minimumFailingDuration"` - Duration time.Duration `json:"duration"` - MaxPortion float64 `json:"maxPortion"` + Benchable Benchable `json:"-"` + Validators validators.Manager `json:"-"` + BenchlistRegisterer metrics.MultiGatherer `json:"-"` + Threshold int `json:"threshold"` + MinimumFailingDuration time.Duration `json:"minimumFailingDuration"` + Duration time.Duration `json:"duration"` + MaxPortion float64 `json:"maxPortion"` } type manager struct { @@ -108,6 +110,14 @@ func (m *manager) RegisterChain(ctx *snow.ConsensusContext) error { return nil } + reg, err := metrics.MakeAndRegister( + m.config.BenchlistRegisterer, + ctx.PrimaryAlias, + ) + if err != nil { + return err + } + benchlist, err := NewBenchlist( ctx, m.config.Benchable, @@ -116,6 +126,7 @@ func (m *manager) RegisterChain(ctx *snow.ConsensusContext) error { m.config.MinimumFailingDuration, m.config.Duration, m.config.MaxPortion, + reg, ) if err != nil { return err diff --git a/snow/networking/benchlist/metrics.go b/snow/networking/benchlist/metrics.go deleted file mode 100644 index 25f9e50f7da..00000000000 --- a/snow/networking/benchlist/metrics.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package benchlist - -import ( - "fmt" - - "github.com/prometheus/client_golang/prometheus" -) - -type metrics struct { - numBenched, weightBenched prometheus.Gauge -} - -func (m *metrics) Initialize(registerer prometheus.Registerer) error { - m.numBenched = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: "benchlist", - Name: "benched_num", - Help: "Number of currently benched validators", - }) - if err := registerer.Register(m.numBenched); err != nil { - return fmt.Errorf("failed to register num benched statistics due to %w", err) - } - - m.weightBenched = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: "benchlist", - Name: "benched_weight", - Help: "Weight of currently benched validators", - }) - if err := registerer.Register(m.weightBenched); err != nil { - return fmt.Errorf("failed to register weight benched statistics due to %w", err) - } - - return nil -} diff --git a/snow/networking/router/chain_router_test.go b/snow/networking/router/chain_router_test.go index 7e889d43fc6..19b889cd2d9 100644 --- a/snow/networking/router/chain_router_test.go +++ b/snow/networking/router/chain_router_test.go @@ -62,6 +62,7 @@ func TestShutdown(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -199,7 +200,6 @@ func TestShutdownTimesOut(t *testing.T) { vdrs := validators.NewManager() require.NoError(vdrs.AddStaker(ctx.SubnetID, ids.GenerateTestNodeID(), nil, ids.Empty, 1)) benchlist := benchlist.NewNoBenchlist() - metrics := prometheus.NewRegistry() // Ensure that the Ancestors request does not timeout tm, err := timeout.NewManager( &timer.AdaptiveTimeoutConfig{ @@ -210,7 +210,8 @@ func TestShutdownTimesOut(t *testing.T) { TimeoutHalflife: 5 * time.Minute, }, benchlist, - metrics, + prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -229,7 +230,7 @@ func TestShutdownTimesOut(t *testing.T) { set.Set[ids.ID]{}, nil, HealthConfig{}, - metrics, + prometheus.NewRegistry(), )) resourceTracker, err := tracker.NewResourceTracker( @@ -359,6 +360,7 @@ func TestRouterTimeout(t *testing.T) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -727,6 +729,7 @@ func TestRouterHonorsRequestedEngine(t *testing.T) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -950,6 +953,7 @@ func TestValidatorOnlyMessageDrops(t *testing.T) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1110,6 +1114,7 @@ func TestConnectedSubnet(t *testing.T) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1225,6 +1230,7 @@ func TestValidatorOnlyAllowedNodeMessageDrops(t *testing.T) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1574,6 +1580,7 @@ func newChainRouterTest(t *testing.T) (*ChainRouter, *common.EngineTest) { }, benchlist.NewNoBenchlist(), prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(t, err) diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index cac97cd132b..5ea4cda4d95 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -59,6 +59,7 @@ func TestTimeout(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) go tm.Dispatch() @@ -333,6 +334,7 @@ func TestReliableMessages(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) @@ -488,6 +490,7 @@ func TestReliableMessagesToMyself(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/networking/timeout/manager.go b/snow/networking/timeout/manager.go index 85ea88abe52..573dbe712bc 100644 --- a/snow/networking/timeout/manager.go +++ b/snow/networking/timeout/manager.go @@ -71,25 +71,33 @@ type Manager interface { func NewManager( timeoutConfig *timer.AdaptiveTimeoutConfig, benchlistMgr benchlist.Manager, - reg prometheus.Registerer, + requestReg prometheus.Registerer, + responseReg prometheus.Registerer, ) (Manager, error) { tm, err := timer.NewAdaptiveTimeoutManager( timeoutConfig, - reg, + requestReg, ) if err != nil { return nil, fmt.Errorf("couldn't create timeout manager: %w", err) } + + m, err := newTimeoutMetrics(responseReg) + if err != nil { + return nil, fmt.Errorf("couldn't create timeout metrics: %w", err) + } + return &manager{ - benchlistMgr: benchlistMgr, tm: tm, + benchlistMgr: benchlistMgr, + metrics: m, }, nil } type manager struct { tm timer.AdaptiveTimeoutManager benchlistMgr benchlist.Manager - metrics metrics + metrics *timeoutMetrics stopOnce sync.Once } diff --git a/snow/networking/timeout/manager_test.go b/snow/networking/timeout/manager_test.go index 5ed1aef7fae..d6109002f61 100644 --- a/snow/networking/timeout/manager_test.go +++ b/snow/networking/timeout/manager_test.go @@ -28,6 +28,7 @@ func TestManagerFire(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(t, err) go manager.Dispatch() diff --git a/snow/networking/timeout/metrics.go b/snow/networking/timeout/metrics.go index 5ad61f8f33d..3f217d5f7ad 100644 --- a/snow/networking/timeout/metrics.go +++ b/snow/networking/timeout/metrics.go @@ -4,7 +4,6 @@ package timeout import ( - "fmt" "sync" "time" @@ -17,84 +16,61 @@ import ( ) const ( - responseNamespace = "response" - opLabel = "op" + chainLabel = "chain" + opLabel = "op" ) -var opLabels = []string{opLabel} +var opLabels = []string{chainLabel, opLabel} -type metrics struct { - lock sync.Mutex - chainToMetrics map[ids.ID]*chainMetrics -} - -func (m *metrics) RegisterChain(ctx *snow.ConsensusContext) error { - m.lock.Lock() - defer m.lock.Unlock() +type timeoutMetrics struct { + messages *prometheus.CounterVec // chain + op + messageLatencies *prometheus.GaugeVec // chain + op - if m.chainToMetrics == nil { - m.chainToMetrics = map[ids.ID]*chainMetrics{} - } - if _, exists := m.chainToMetrics[ctx.ChainID]; exists { - return fmt.Errorf("chain %s has already been registered", ctx.ChainID) - } - // TODO: FIXME - cm, err := newChainMetrics(ctx.Registerer) - if err != nil { - return fmt.Errorf("couldn't create metrics for chain %s: %w", ctx.ChainID, err) - } - m.chainToMetrics[ctx.ChainID] = cm - return nil + lock sync.RWMutex + chainIDToAlias map[ids.ID]string } -// Record that a response of type [op] took [latency] -func (m *metrics) Observe(chainID ids.ID, op message.Op, latency time.Duration) { - m.lock.Lock() - defer m.lock.Unlock() - - cm, exists := m.chainToMetrics[chainID] - if !exists { - // TODO should this log an error? - return - } - cm.observe(op, latency) -} - -// chainMetrics contains message response time metrics for a chain -type chainMetrics struct { - messages *prometheus.CounterVec // op - messageLatencies *prometheus.GaugeVec // op -} - -func newChainMetrics(reg prometheus.Registerer) (*chainMetrics, error) { - cm := &chainMetrics{ +func newTimeoutMetrics(reg prometheus.Registerer) (*timeoutMetrics, error) { + m := &timeoutMetrics{ messages: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: responseNamespace, - Name: "messages", - Help: "number of responses", + Name: "messages", + Help: "number of responses", }, opLabels, ), messageLatencies: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: responseNamespace, - Name: "message_latencies", - Help: "message latencies (ns)", + Name: "message_latencies", + Help: "message latencies (ns)", }, opLabels, ), + chainIDToAlias: make(map[ids.ID]string), } - return cm, utils.Err( - reg.Register(cm.messages), - reg.Register(cm.messageLatencies), + return m, utils.Err( + reg.Register(m.messages), + reg.Register(m.messageLatencies), ) } -func (cm *chainMetrics) observe(op message.Op, latency time.Duration) { +func (m *timeoutMetrics) RegisterChain(ctx *snow.ConsensusContext) error { + m.lock.Lock() + defer m.lock.Unlock() + + m.chainIDToAlias[ctx.ChainID] = ctx.PrimaryAlias + return nil +} + +// Record that a response of type [op] took [latency] +func (m *timeoutMetrics) Observe(chainID ids.ID, op message.Op, latency time.Duration) { + m.lock.RLock() + defer m.lock.RUnlock() + labels := prometheus.Labels{ - opLabel: op.String(), + chainLabel: m.chainIDToAlias[chainID], + opLabel: op.String(), } - cm.messages.With(labels).Inc() - cm.messageLatencies.With(labels).Add(float64(latency)) + m.messages.With(labels).Inc() + m.messageLatencies.With(labels).Add(float64(latency)) } diff --git a/snow/snowtest/snowtest.go b/snow/snowtest/snowtest.go index a2e03249da1..3dd21befc26 100644 --- a/snow/snowtest/snowtest.go +++ b/snow/snowtest/snowtest.go @@ -40,6 +40,7 @@ func (noOpAcceptor) Accept(*snow.ConsensusContext, ids.ID, []byte) error { func ConsensusContext(ctx *snow.Context) *snow.ConsensusContext { return &snow.ConsensusContext{ Context: ctx, + PrimaryAlias: ctx.ChainID.String(), Registerer: prometheus.NewRegistry(), AvalancheRegisterer: prometheus.NewRegistry(), BlockAcceptor: noOpAcceptor{}, diff --git a/utils/metric/averager.go b/utils/metric/averager.go index e63e0007c0b..d84e7875276 100644 --- a/utils/metric/averager.go +++ b/utils/metric/averager.go @@ -23,23 +23,21 @@ type averager struct { sum prometheus.Gauge } -func NewAverager(namespace, name, desc string, reg prometheus.Registerer) (Averager, error) { +func NewAverager(name, desc string, reg prometheus.Registerer) (Averager, error) { errs := wrappers.Errs{} - a := NewAveragerWithErrs(namespace, name, desc, reg, &errs) + a := NewAveragerWithErrs(name, desc, reg, &errs) return a, errs.Err } -func NewAveragerWithErrs(namespace, name, desc string, reg prometheus.Registerer, errs *wrappers.Errs) Averager { +func NewAveragerWithErrs(name, desc string, reg prometheus.Registerer, errs *wrappers.Errs) Averager { a := averager{ count: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: name + "_count", - Help: "Total # of observations of " + desc, + Name: AppendNamespace(name, "count"), + Help: "Total # of observations of " + desc, }), sum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: name + "_sum", - Help: "Sum of " + desc, + Name: AppendNamespace(name, "sum"), + Help: "Sum of " + desc, }), } diff --git a/vms/metervm/metrics.go b/vms/metervm/metrics.go index 09d85a77058..4cad7d153f8 100644 --- a/vms/metervm/metrics.go +++ b/vms/metervm/metrics.go @@ -12,7 +12,6 @@ import ( func newAverager(name string, reg prometheus.Registerer, errs *wrappers.Errs) metric.Averager { return metric.NewAveragerWithErrs( - "", name, "time (in ns) of a "+name, reg, diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index ff5807ba2db..fcba1a412a6 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1415,6 +1415,7 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { }, benchlist, prometheus.NewRegistry(), + prometheus.NewRegistry(), ) require.NoError(err) From cd44e3c0694488445e93b7ea6a4674c3f0295918 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 20:56:31 -0400 Subject: [PATCH 22/53] use interfaces --- node/node.go | 2 +- snow/consensus/snowman/consensus_test.go | 2 +- vms/metervm/block_vm.go | 4 ++-- vms/metervm/vertex_vm.go | 4 ++-- vms/proposervm/config.go | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/node/node.go b/node/node.go index 367d50dfc2b..8575075265b 100644 --- a/node/node.go +++ b/node/node.go @@ -323,7 +323,7 @@ type Node struct { VertexAcceptorGroup snow.AcceptorGroup // Net runs the networking stack - networkRegisterer *prometheus.Registry + networkRegisterer prometheus.Registerer Net network.Network // The staking address will optionally be written to a process context diff --git a/snow/consensus/snowman/consensus_test.go b/snow/consensus/snowman/consensus_test.go index d52e7675951..5a5ed07ec86 100644 --- a/snow/consensus/snowman/consensus_test.go +++ b/snow/consensus/snowman/consensus_test.go @@ -1415,7 +1415,7 @@ func ErrorOnAddDecidedBlockTest(t *testing.T, factory Factory) { require.ErrorIs(err, errDuplicateAdd) } -func gatherCounterGauge(t *testing.T, reg *prometheus.Registry) map[string]float64 { +func gatherCounterGauge(t *testing.T, reg prometheus.Gatherer) map[string]float64 { ms, err := reg.Gather() require.NoError(t, err) mss := make(map[string]float64) diff --git a/vms/metervm/block_vm.go b/vms/metervm/block_vm.go index 3055d3b0279..da64f9af01d 100644 --- a/vms/metervm/block_vm.go +++ b/vms/metervm/block_vm.go @@ -31,13 +31,13 @@ type blockVM struct { ssVM block.StateSyncableVM blockMetrics - registry *prometheus.Registry + registry prometheus.Registerer clock mockable.Clock } func NewBlockVM( vm block.ChainVM, - reg *prometheus.Registry, + reg prometheus.Registerer, ) block.ChainVM { buildBlockVM, _ := vm.(block.BuildBlockWithContextChainVM) batchedVM, _ := vm.(block.BatchedChainVM) diff --git a/vms/metervm/vertex_vm.go b/vms/metervm/vertex_vm.go index 5ecfe8f4457..936a688de99 100644 --- a/vms/metervm/vertex_vm.go +++ b/vms/metervm/vertex_vm.go @@ -23,7 +23,7 @@ var ( func NewVertexVM( vm vertex.LinearizableVMWithEngine, - reg *prometheus.Registry, + reg prometheus.Registerer, ) vertex.LinearizableVMWithEngine { return &vertexVM{ LinearizableVMWithEngine: vm, @@ -34,7 +34,7 @@ func NewVertexVM( type vertexVM struct { vertex.LinearizableVMWithEngine vertexMetrics - registry *prometheus.Registry + registry prometheus.Registerer clock mockable.Clock } diff --git a/vms/proposervm/config.go b/vms/proposervm/config.go index 7a2f2893f0a..493f549d246 100644 --- a/vms/proposervm/config.go +++ b/vms/proposervm/config.go @@ -35,7 +35,7 @@ type Config struct { // Block certificate StakingCertLeaf *staking.Certificate - Registerer *prometheus.Registry + Registerer prometheus.Registerer } func (c *Config) IsDurangoActivated(timestamp time.Time) bool { From 8abf9deada98b010ae8b299251a87c2b2ebe2ef3 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 21:03:47 -0400 Subject: [PATCH 23/53] nits --- api/metrics/README.md | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/api/metrics/README.md b/api/metrics/README.md index cc63c7eece8..392cbefb8af 100644 --- a/api/metrics/README.md +++ b/api/metrics/README.md @@ -2,21 +2,22 @@ ```mermaid graph LR - A[avalanche] --> B[chain] - A --> C[network] - A --> D[api] - A --> E[db] - A --> F[go] - A --> G[health] - A --> H[system_resources] + A[avalanche] --> B[api] + A --> C[chain] + A --> D[db] + A --> E[health] + A --> F[network] + A --> G[process] + A --> H[requests] A --> I[resource_tracker] - A --> J[requests] - B -- $chainID --> K[$vmID] - B -- $chainID --> L[meterdb] - B -- $chainID --> M[meterchainvm] - B -- $chainID --> N[meterdagvm] - B -- $chainID --> O[proposervm] - B -- $chainID --> P[snowman] - B -- $chainID --> Q[avalanche] - B -- $chainID --> R[handler] + A --> J[responses] + A --> K[system_resources] + C -- $chainID --> L[avalanche] + C -- $chainID --> M[handler] + C -- $chainID --> N[meterchainvm] + C -- $chainID --> O[meterdagvm] + C -- $chainID --> P[meterdb] + C -- $chainID --> Q[proposervm] + C -- $chainID --> R[snowman] + C -- $chainID --> S[$vmID] ``` From 744eb79b48b217f777f825e1f49210474f5381fa Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 21:09:15 -0400 Subject: [PATCH 24/53] comments --- api/metrics/label_gatherer.go | 2 ++ api/metrics/prefix_gatherer.go | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go index e2f08279bb4..d0ad899ac31 100644 --- a/api/metrics/label_gatherer.go +++ b/api/metrics/label_gatherer.go @@ -20,6 +20,8 @@ var ( errDuplicateGatherer = errors.New("attempt to register duplicate gatherer") ) +// NewLabelGatherer returns a new MultiGatherer that merges metrics by adding a +// new label. func NewLabelGatherer(labelName string) MultiGatherer { return &labelGatherer{ labelName: labelName, diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go index 24128ed9909..2d7cd98e840 100644 --- a/api/metrics/prefix_gatherer.go +++ b/api/metrics/prefix_gatherer.go @@ -20,6 +20,8 @@ var ( errOverlappingNamespaces = errors.New("prefix could create overlapping namespaces") ) +// NewPrefixGatherer returns a new MultiGatherer that merges metrics by adding a +// prefix to their names. func NewPrefixGatherer() MultiGatherer { return &prefixGatherer{} } @@ -78,7 +80,11 @@ func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { } // eitherIsPrefix returns true if either [a] is a prefix of [b] or [b] is a -// prefix of [a] +// prefix of [a]. +// +// This function accounts for the usage of the namespace boundary, so "hello" is +// not considered a prefix of "helloworld". However, "hello" is considered a +// prefix of "hello_world". func eitherIsPrefix(a, b string) bool { if len(a) > len(b) { a, b = b, a From 50e91724a1cf278f8043e8020ba9965b8fe444e9 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 21:15:03 -0400 Subject: [PATCH 25/53] nir --- api/metrics/multi_gatherer.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/metrics/multi_gatherer.go b/api/metrics/multi_gatherer.go index ef74cf87c80..b2fede55643 100644 --- a/api/metrics/multi_gatherer.go +++ b/api/metrics/multi_gatherer.go @@ -23,6 +23,8 @@ type MultiGatherer interface { } // Deprecated: Use NewPrefixGatherer instead. +// +// TODO: Remove once coreth is updated. func NewMultiGatherer() MultiGatherer { return NewPrefixGatherer() } From bb2eab12a30711e8f7ecd62280a21f6393c89ec1 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 21:25:59 -0400 Subject: [PATCH 26/53] make const --- chains/manager.go | 20 +++++++++++--------- node/node.go | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 31b171608e6..c40d89e7404 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -86,6 +86,8 @@ const ( proposervmNamespace = ChainNamespace + metric.NamespaceSeparator + "proposervm" meterchainvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterchainvm" meterdagvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterdagvm" + + ChainLabel = "chain" ) var ( @@ -285,42 +287,42 @@ type manager struct { // New returns a new Manager func New(config *ManagerConfig) (Manager, error) { - handlerGatherer := metrics.NewLabelGatherer("chain") + handlerGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(handlerNamespace, handlerGatherer); err != nil { return nil, err } - stakeGatherer := metrics.NewLabelGatherer("chain") + stakeGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(stakeNamespace, stakeGatherer); err != nil { return nil, err } - p2pGatherer := metrics.NewLabelGatherer("chain") + p2pGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(p2pNamespace, p2pGatherer); err != nil { return nil, err } - snowmanGatherer := metrics.NewLabelGatherer("chain") + snowmanGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { return nil, err } - avalancheGatherer := metrics.NewLabelGatherer("chain") + avalancheGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(avalancheNamespace, avalancheGatherer); err != nil { return nil, err } - proposervmGatherer := metrics.NewLabelGatherer("chain") + proposervmGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(proposervmNamespace, proposervmGatherer); err != nil { return nil, err } - meterChainVMGatherer := metrics.NewLabelGatherer("chain") + meterChainVMGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(meterchainvmNamespace, meterChainVMGatherer); err != nil { return nil, err } - meterDAGVMGatherer := metrics.NewLabelGatherer("chain") + meterDAGVMGatherer := metrics.NewLabelGatherer(ChainLabel) if err := config.Metrics.Register(meterdagvmNamespace, meterDAGVMGatherer); err != nil { return nil, err } @@ -1560,7 +1562,7 @@ func (m *manager) getChainConfig(id ids.ID) (ChainConfig, error) { func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics.MultiGatherer, error) { vmGatherer, ok := m.vmGatherer[vmID] if !ok { - vmGatherer = metrics.NewLabelGatherer("chain") + vmGatherer = metrics.NewLabelGatherer(ChainLabel) // TODO: Cleanup vm aliasing var vmIDStr string diff --git a/node/node.go b/node/node.go index 8575075265b..bb151ff9400 100644 --- a/node/node.go +++ b/node/node.go @@ -533,7 +533,7 @@ func (n *Node) initNetworking() error { // Configure benchlist n.Config.BenchlistConfig.Validators = n.vdrs n.Config.BenchlistConfig.Benchable = n.chainRouter - n.Config.BenchlistConfig.BenchlistRegisterer = metrics.NewLabelGatherer("chain") + n.Config.BenchlistConfig.BenchlistRegisterer = metrics.NewLabelGatherer(chains.ChainLabel) err = n.MetricsGatherer.Register( benchlistNamespace, @@ -915,7 +915,7 @@ func (n *Node) initChains(genesisBytes []byte) error { func (n *Node) initMetrics() error { n.MetricsGatherer = metrics.NewPrefixGatherer() - n.MeterDBMetricsGatherer = metrics.NewLabelGatherer("chain") + n.MeterDBMetricsGatherer = metrics.NewLabelGatherer(chains.ChainLabel) return n.MetricsGatherer.Register( meterDBNamespace, n.MeterDBMetricsGatherer, From 6e41c15692347817cfcfcb72899e504bd4d5a6e9 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 28 May 2024 21:27:24 -0400 Subject: [PATCH 27/53] nit --- chains/manager.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index c40d89e7404..8dcfdc1e324 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -1175,9 +1175,8 @@ func (m *manager) createSnowmanChain( zap.Uint64("numHistoricalBlocks", numHistoricalBlocks), ) - chainAlias := m.PrimaryAliasOrDefault(ctx.ChainID) if m.TracingEnabled { - vm = tracedvm.NewBlockVM(vm, chainAlias, m.Tracer) + vm = tracedvm.NewBlockVM(vm, primaryAlias, m.Tracer) } proposervmReg, err := metrics.MakeAndRegister( @@ -1412,12 +1411,12 @@ func (m *manager) createSnowmanChain( }) // Register health checks - if err := m.Health.RegisterHealthCheck(chainAlias, h, ctx.SubnetID.String()); err != nil { - return nil, fmt.Errorf("couldn't add health check for chain %s: %w", chainAlias, err) + if err := m.Health.RegisterHealthCheck(primaryAlias, h, ctx.SubnetID.String()); err != nil { + return nil, fmt.Errorf("couldn't add health check for chain %s: %w", primaryAlias, err) } return &chain{ - Name: chainAlias, + Name: primaryAlias, Context: ctx, VM: vm, Handler: h, From 131bcc388cf0815f326671fe73cca54f727744dc Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 29 May 2024 17:03:03 -0400 Subject: [PATCH 28/53] nits --- chains/manager.go | 6 +-- snow/consensus/snowman/consensus_test.go | 8 ++-- snow/consensus/snowman/metrics.go | 15 ------- snow/consensus/snowman/topological.go | 2 +- snow/context.go | 11 ++--- snow/engine/snowman/bootstrap/bootstrapper.go | 2 +- .../snowman/bootstrap/bootstrapper_test.go | 6 +-- snow/engine/snowman/syncer/utils_test.go | 2 +- snow/engine/snowman/transitive.go | 6 +-- snow/engine/snowman/transitive_test.go | 2 +- snow/networking/sender/sender.go | 2 +- snow/networking/sender/sender_test.go | 6 +-- snow/snowtest/snowtest.go | 2 +- tests/e2e/x/transfer/virtuous.go | 19 +++++---- tests/http.go | 40 ++++++++++++++----- vms/platformvm/vm_test.go | 4 +- 16 files changed, 69 insertions(+), 64 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 8dcfdc1e324..ac3c75e382b 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -538,7 +538,7 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c ChainDataDir: chainDataDir, }, PrimaryAlias: primaryAlias, - Registerer: snowmanMetrics, + SnowmanRegisterer: snowmanMetrics, AvalancheRegisterer: avalancheMetrics, BlockAcceptor: m.BlockAcceptorGroup, TxAcceptor: m.TxAcceptorGroup, @@ -918,7 +918,7 @@ func (m *manager) createAvalancheChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.Registerer, + ctx.SnowmanRegisterer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize snow base message handler: %w", err) @@ -1314,7 +1314,7 @@ func (m *manager) createSnowmanChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.Registerer, + ctx.SnowmanRegisterer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize snow base message handler: %w", err) diff --git a/snow/consensus/snowman/consensus_test.go b/snow/consensus/snowman/consensus_test.go index 5a5ed07ec86..49d02064071 100644 --- a/snow/consensus/snowman/consensus_test.go +++ b/snow/consensus/snowman/consensus_test.go @@ -507,7 +507,7 @@ func RecordPollSplitVoteNoChangeTest(t *testing.T, factory Factory) { snowCtx := snowtest.Context(t, snowtest.CChainID) ctx := snowtest.ConsensusContext(snowCtx) registerer := prometheus.NewRegistry() - ctx.Registerer = registerer + ctx.SnowmanRegisterer = registerer params := snowball.Parameters{ K: 2, @@ -1115,7 +1115,7 @@ func MetricsProcessingErrorTest(t *testing.T, factory Factory) { Name: "blks_processing", }) - require.NoError(ctx.Registerer.Register(numProcessing)) + require.NoError(ctx.SnowmanRegisterer.Register(numProcessing)) err := sm.Initialize( ctx, @@ -1149,7 +1149,7 @@ func MetricsAcceptedErrorTest(t *testing.T, factory Factory) { Name: "blks_accepted_count", }) - require.NoError(ctx.Registerer.Register(numAccepted)) + require.NoError(ctx.SnowmanRegisterer.Register(numAccepted)) err := sm.Initialize( ctx, @@ -1183,7 +1183,7 @@ func MetricsRejectedErrorTest(t *testing.T, factory Factory) { Name: "blks_rejected_count", }) - require.NoError(ctx.Registerer.Register(numRejected)) + require.NoError(ctx.SnowmanRegisterer.Register(numRejected)) err := sm.Initialize( ctx, diff --git a/snow/consensus/snowman/metrics.go b/snow/consensus/snowman/metrics.go index 24065a8cf8c..575a29a81ea 100644 --- a/snow/consensus/snowman/metrics.go +++ b/snow/consensus/snowman/metrics.go @@ -87,9 +87,6 @@ func newMetrics( }), processingBlocks: linked.NewHashmap[ids.ID, processingStart](), - - // e.g., - // "avalanche_X_blks_processing" reports how many blocks are currently processing numProcessing: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "blks_processing", Help: "number of currently processing blocks", @@ -105,12 +102,6 @@ func newMetrics( reg, &errs, ), - // e.g., - // "avalanche_C_blks_accepted_count" reports how many times "Observe" has been called which is the total number of blocks accepted - // "avalanche_C_blks_accepted_sum" reports the cumulative sum of all block acceptance latencies in nanoseconds - // "avalanche_C_blks_accepted_sum / avalanche_C_blks_accepted_count" is the average block acceptance latency in nanoseconds - // "avalanche_C_blks_accepted_container_size_sum" reports the cumulative sum of all accepted blocks' sizes in bytes - // "avalanche_C_blks_accepted_container_size_sum / avalanche_C_blks_accepted_count" is the average accepted block size in bytes latAccepted: metric.NewAveragerWithErrs( "blks_accepted", "time (in ns) from the issuance of a block to its acceptance", @@ -132,12 +123,6 @@ func newMetrics( reg, &errs, ), - // e.g., - // "avalanche_P_blks_rejected_count" reports how many times "Observe" has been called which is the total number of blocks rejected - // "avalanche_P_blks_rejected_sum" reports the cumulative sum of all block rejection latencies in nanoseconds - // "avalanche_P_blks_rejected_sum / avalanche_P_blks_rejected_count" is the average block rejection latency in nanoseconds - // "avalanche_P_blks_rejected_container_size_sum" reports the cumulative sum of all rejected blocks' sizes in bytes - // "avalanche_P_blks_rejected_container_size_sum / avalanche_P_blks_rejected_count" is the average rejected block size in bytes latRejected: metric.NewAveragerWithErrs( "blks_rejected", "time (in ns) from the issuance of a block to its rejection", diff --git a/snow/consensus/snowman/topological.go b/snow/consensus/snowman/topological.go index f2ef015654c..79a817c1a19 100644 --- a/snow/consensus/snowman/topological.go +++ b/snow/consensus/snowman/topological.go @@ -111,7 +111,7 @@ func (ts *Topological) Initialize( ts.metrics, err = newMetrics( ctx.Log, - ctx.Registerer, + ctx.SnowmanRegisterer, lastAcceptedHeight, lastAcceptedTime, ) diff --git a/snow/context.go b/snow/context.go index d8748d0e1ae..1ef471b302a 100644 --- a/snow/context.go +++ b/snow/context.go @@ -69,14 +69,9 @@ type ConsensusContext struct { // within. PrimaryAlias string - // Registers all common and snowman consensus metrics. Unlike the avalanche - // consensus engine metrics, we do not prefix the name with the engine name, - // as snowman is used for all chains by default. - Registerer Registerer - // Only used to register Avalanche consensus metrics. Previously, all - // metrics were prefixed with "avalanche_{chainID}_". Now we add avalanche - // to the prefix, "avalanche_{chainID}_avalanche_", to differentiate - // consensus operations after the DAG linearization. + // Registers all snowman consensus metrics. + SnowmanRegisterer Registerer + // Registers all avalanche consensus metrics. AvalancheRegisterer Registerer // BlockAcceptor is the callback that will be fired whenever a VM is diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 095ba4e63b1..0fbeade5879 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -116,7 +116,7 @@ type Bootstrapper struct { } func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) error) (*Bootstrapper, error) { - metrics, err := newMetrics("bs", config.Ctx.Registerer) + metrics, err := newMetrics("bs", config.Ctx.SnowmanRegisterer) return &Bootstrapper{ Config: config, metrics: metrics, diff --git a/snow/engine/snowman/bootstrap/bootstrapper_test.go b/snow/engine/snowman/bootstrap/bootstrapper_test.go index 5577f62fa81..85a971ebab2 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper_test.go +++ b/snow/engine/snowman/bootstrap/bootstrapper_test.go @@ -76,7 +76,7 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *block.Tes require.NoError(startupTracker.Connected(context.Background(), peer, version.CurrentApp)) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( @@ -127,7 +127,7 @@ func TestBootstrapperStartsOnlyIfEnoughStakeIsConnected(t *testing.T) { startupTracker := tracker.NewStartup(tracker.NewPeers(), startupAlpha) peers.RegisterSetCallbackListener(ctx.SubnetID, startupTracker) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( @@ -642,7 +642,7 @@ func TestBootstrapNoParseOnNew(t *testing.T) { peers.RegisterSetCallbackListener(ctx.SubnetID, startupTracker) require.NoError(startupTracker.Connected(context.Background(), peer, version.CurrentApp)) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) require.NoError(err) blk1 := snowmantest.BuildChild(snowmantest.Genesis) diff --git a/snow/engine/snowman/syncer/utils_test.go b/snow/engine/snowman/syncer/utils_test.go index a5217a4bf0d..72a5083e0ba 100644 --- a/snow/engine/snowman/syncer/utils_test.go +++ b/snow/engine/snowman/syncer/utils_test.go @@ -96,7 +96,7 @@ func buildTestsObjects( ctx.Log, time.Second, 2000, - ctx.Registerer, + ctx.SnowmanRegisterer, ) require.NoError(err) diff --git a/snow/engine/snowman/transitive.go b/snow/engine/snowman/transitive.go index 9e89fedd22b..d1e2c2aae2c 100644 --- a/snow/engine/snowman/transitive.go +++ b/snow/engine/snowman/transitive.go @@ -98,7 +98,7 @@ func New(config Config) (*Transitive, error) { nonVerifiedCache, err := metercacher.New[ids.ID, snowman.Block]( "non_verified_cache", - config.Ctx.Registerer, + config.Ctx.SnowmanRegisterer, cache.NewSizedLRU[ids.ID, snowman.Block]( nonVerifiedCacheSize, cachedBlockSize, @@ -118,13 +118,13 @@ func New(config Config) (*Transitive, error) { polls, err := poll.NewSet( factory, config.Ctx.Log, - config.Ctx.Registerer, + config.Ctx.SnowmanRegisterer, ) if err != nil { return nil, err } - metrics, err := newMetrics(config.Ctx.Registerer) + metrics, err := newMetrics(config.Ctx.SnowmanRegisterer) if err != nil { return nil, err } diff --git a/snow/engine/snowman/transitive_test.go b/snow/engine/snowman/transitive_test.go index 2961b018c8c..1ce546dd0e9 100644 --- a/snow/engine/snowman/transitive_test.go +++ b/snow/engine/snowman/transitive_test.go @@ -103,7 +103,7 @@ func setup(t *testing.T, config Config) (ids.NodeID, validators.Manager, *common config.Ctx.Log, time.Second, 2000, - config.Ctx.Registerer, + config.Ctx.SnowmanRegisterer, ) require.NoError(err) config.AllGetsServer = snowGetHandler diff --git a/snow/networking/sender/sender.go b/snow/networking/sender/sender.go index 37076972fe1..317d2cc3a7f 100644 --- a/snow/networking/sender/sender.go +++ b/snow/networking/sender/sender.go @@ -79,7 +79,7 @@ func New( var reg prometheus.Registerer switch engineType { case p2p.EngineType_ENGINE_TYPE_SNOWMAN: - reg = ctx.Registerer + reg = ctx.SnowmanRegisterer case p2p.EngineType_ENGINE_TYPE_AVALANCHE: reg = ctx.AvalancheRegisterer default: diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 5ea4cda4d95..1c7b1b22008 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -833,7 +833,7 @@ func TestSender_Bootstrap_Requests(t *testing.T) { // Instantiate new registerers to avoid duplicate metrics // registration - ctx.Registerer = prometheus.NewRegistry() + ctx.SnowmanRegisterer = prometheus.NewRegistry() sender, err := New( ctx, @@ -1051,7 +1051,7 @@ func TestSender_Bootstrap_Responses(t *testing.T) { // Instantiate new registerers to avoid duplicate metrics // registration - ctx.Registerer = prometheus.NewRegistry() + ctx.SnowmanRegisterer = prometheus.NewRegistry() ctx.AvalancheRegisterer = prometheus.NewRegistry() sender, err := New( @@ -1218,7 +1218,7 @@ func TestSender_Single_Request(t *testing.T) { // Instantiate new registerers to avoid duplicate metrics // registration - ctx.Registerer = prometheus.NewRegistry() + ctx.SnowmanRegisterer = prometheus.NewRegistry() sender, err := New( ctx, diff --git a/snow/snowtest/snowtest.go b/snow/snowtest/snowtest.go index 3dd21befc26..399ab5fac04 100644 --- a/snow/snowtest/snowtest.go +++ b/snow/snowtest/snowtest.go @@ -41,7 +41,7 @@ func ConsensusContext(ctx *snow.Context) *snow.ConsensusContext { return &snow.ConsensusContext{ Context: ctx, PrimaryAlias: ctx.ChainID.String(), - Registerer: prometheus.NewRegistry(), + SnowmanRegisterer: prometheus.NewRegistry(), AvalancheRegisterer: prometheus.NewRegistry(), BlockAcceptor: noOpAcceptor{}, TxAcceptor: noOpAcceptor{}, diff --git a/tests/e2e/x/transfer/virtuous.go b/tests/e2e/x/transfer/virtuous.go index 994c6a84505..253110e88ca 100644 --- a/tests/e2e/x/transfer/virtuous.go +++ b/tests/e2e/x/transfer/virtuous.go @@ -9,6 +9,7 @@ import ( "math/rand" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/ava-labs/avalanchego/ids" @@ -28,10 +29,14 @@ import ( const ( totalRounds = 50 - xBlksProcessingMetric = "avalanche_X_blks_processing" - xBlksAcceptedMetric = "avalanche_X_blks_accepted_count" + blksProcessingMetric = "avalanche_chain_snowman_blks_processing" + blksAcceptedMetric = "avalanche_chain_snowman_blks_accepted_count" ) +var xChainMetricLabels = prometheus.Labels{ + "chain": "X", +} + // This test requires that the network not have ongoing blocks and // cannot reliably be run in parallel. var _ = e2e.DescribeXChainSerial("[Virtuous Transfer Tx AVAX]", func() { @@ -55,7 +60,7 @@ var _ = e2e.DescribeXChainSerial("[Virtuous Transfer Tx AVAX]", func() { require.NoError(err) for _, metrics := range allNodeMetrics { - xBlksProcessing, ok := tests.GetFirstMetricValue(metrics, xBlksProcessingMetric) + xBlksProcessing, ok := tests.GetMetricValue(metrics, blksProcessingMetric, xChainMetricLabels) if !ok || xBlksProcessing > 0 { return false } @@ -248,13 +253,13 @@ RECEIVER NEW BALANCE (AFTER) : %21d AVAX // +0 since X-chain tx must have been processed and accepted // by now - currentXBlksProcessing, _ := tests.GetFirstMetricValue(mm, xBlksProcessingMetric) - previousXBlksProcessing, _ := tests.GetFirstMetricValue(prev, xBlksProcessingMetric) + currentXBlksProcessing, _ := tests.GetMetricValue(mm, blksProcessingMetric, xChainMetricLabels) + previousXBlksProcessing, _ := tests.GetMetricValue(prev, blksProcessingMetric, xChainMetricLabels) require.Equal(currentXBlksProcessing, previousXBlksProcessing) // +1 since X-chain tx must have been accepted by now - currentXBlksAccepted, _ := tests.GetFirstMetricValue(mm, xBlksAcceptedMetric) - previousXBlksAccepted, _ := tests.GetFirstMetricValue(prev, xBlksAcceptedMetric) + currentXBlksAccepted, _ := tests.GetMetricValue(mm, blksAcceptedMetric, xChainMetricLabels) + previousXBlksAccepted, _ := tests.GetMetricValue(prev, blksAcceptedMetric, xChainMetricLabels) require.Equal(currentXBlksAccepted, previousXBlksAccepted+1) metricsBeforeTx[u] = mm diff --git a/tests/http.go b/tests/http.go index b5a7b0ffe09..4a02a7481fd 100644 --- a/tests/http.go +++ b/tests/http.go @@ -9,6 +9,7 @@ import ( "github.com/ava-labs/avalanchego/api/metrics" + "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" ) @@ -37,19 +38,38 @@ func GetNodesMetrics(ctx context.Context, nodeURIs []string) (NodesMetrics, erro return metrics, nil } -func GetFirstMetricValue(metrics NodeMetrics, name string) (float64, bool) { +func GetMetricValue(metrics NodeMetrics, name string, labels prometheus.Labels) (float64, bool) { metricFamily, ok := metrics[name] - if !ok || len(metricFamily.Metric) < 1 { + if !ok { return 0, false } - metric := metricFamily.Metric[0] - switch { - case metric.Gauge != nil: - return metric.Gauge.GetValue(), true - case metric.Counter != nil: - return metric.Counter.GetValue(), true - default: - return 0, false + for _, metric := range metricFamily.Metric { + if !labelsMatch(metric, labels) { + continue + } + + switch { + case metric.Gauge != nil: + return metric.Gauge.GetValue(), true + case metric.Counter != nil: + return metric.Counter.GetValue(), true + } + } + return 0, false +} + +func labelsMatch(metric *dto.Metric, labels prometheus.Labels) bool { + var found int + for _, label := range metric.Label { + expectedValue, ok := labels[label.GetName()] + if !ok { + continue + } + if label.GetValue() != expectedValue { + return false + } + found++ } + return found == len(labels) } diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index fcba1a412a6..9bd9baa8836 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1485,14 +1485,14 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { consensusCtx.Log, time.Second, 2000, - consensusCtx.Registerer, + consensusCtx.SnowmanRegisterer, ) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", - consensusCtx.Registerer, + consensusCtx.SnowmanRegisterer, set.Of(ctx.NodeID), nil, ) From 43b00f7956cc1c1242d584dbacf7a46535f1e250 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 29 May 2024 17:09:10 -0400 Subject: [PATCH 29/53] nit --- tests/http.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/http.go b/tests/http.go index 4a02a7481fd..2249ef7bc1b 100644 --- a/tests/http.go +++ b/tests/http.go @@ -7,9 +7,10 @@ import ( "context" "fmt" + "github.com/prometheus/client_golang/prometheus" + "github.com/ava-labs/avalanchego/api/metrics" - "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" ) From 3fce9d913976dd119c7c72bd261a146f1833f253 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 29 May 2024 23:00:33 -0400 Subject: [PATCH 30/53] Add constants.VMName function --- tests/fixture/subnet/xsvm.go | 4 +-- utils/constants/vm_ids.go | 29 +++++++++++++++++++ vms/example/xsvm/cmd/chain/create/cmd.go | 4 +-- vms/example/xsvm/cmd/version/cmd.go | 5 ++-- vms/example/xsvm/constants.go | 21 ++++---------- vms/example/xsvm/vm.go | 3 +- .../primary/examples/create-chain/main.go | 4 +-- 7 files changed, 46 insertions(+), 24 deletions(-) diff --git a/tests/fixture/subnet/xsvm.go b/tests/fixture/subnet/xsvm.go index 28fb017da5a..c5bb03bc202 100644 --- a/tests/fixture/subnet/xsvm.go +++ b/tests/fixture/subnet/xsvm.go @@ -8,8 +8,8 @@ import ( "time" "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/utils/crypto/secp256k1" - "github.com/ava-labs/avalanchego/vms/example/xsvm" "github.com/ava-labs/avalanchego/vms/example/xsvm/genesis" ) @@ -35,7 +35,7 @@ func NewXSVMOrPanic(name string, key *secp256k1.PrivateKey, nodes ...*tmpnet.Nod Name: name, Chains: []*tmpnet.Chain{ { - VMID: xsvm.ID, + VMID: constants.XSVMID, Genesis: genesisBytes, PreFundedKey: key, }, diff --git a/utils/constants/vm_ids.go b/utils/constants/vm_ids.go index 9fda498f1f3..c0c4773590a 100644 --- a/utils/constants/vm_ids.go +++ b/utils/constants/vm_ids.go @@ -5,8 +5,37 @@ package constants import "github.com/ava-labs/avalanchego/ids" +const ( + PlatformVMName = "platformvm" + AVMName = "avm" + EVMName = "evm" + SubnetEVMName = "subnetevm" + XSVMName = "xsvm" +) + var ( PlatformVMID = ids.ID{'p', 'l', 'a', 't', 'f', 'o', 'r', 'm', 'v', 'm'} AVMID = ids.ID{'a', 'v', 'm'} EVMID = ids.ID{'e', 'v', 'm'} + SubnetEVMID = ids.ID{'s', 'u', 'b', 'n', 'e', 't', 'e', 'v', 'm'} + XSVMID = ids.ID{'x', 's', 'v', 'm'} ) + +// VMName returns the name of the VM with the provided ID. If a human readable +// name isn't known, then the formatted ID is returned. +func VMName(vmID ids.ID) string { + switch vmID { + case PlatformVMID: + return PlatformVMName + case AVMID: + return AVMName + case EVMID: + return EVMName + case SubnetEVMID: + return SubnetEVMName + case XSVMID: + return XSVMName + default: + return vmID.String() + } +} diff --git a/vms/example/xsvm/cmd/chain/create/cmd.go b/vms/example/xsvm/cmd/chain/create/cmd.go index 984ff45df8b..a08edf50711 100644 --- a/vms/example/xsvm/cmd/chain/create/cmd.go +++ b/vms/example/xsvm/cmd/chain/create/cmd.go @@ -9,8 +9,8 @@ import ( "github.com/spf13/cobra" + "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/utils/set" - "github.com/ava-labs/avalanchego/vms/example/xsvm" "github.com/ava-labs/avalanchego/vms/example/xsvm/genesis" "github.com/ava-labs/avalanchego/vms/secp256k1fx" "github.com/ava-labs/avalanchego/wallet/subnet/primary" @@ -72,7 +72,7 @@ func createFunc(c *cobra.Command, args []string) error { createChainTxID, err := pWallet.IssueCreateChainTx( config.SubnetID, genesisBytes, - xsvm.ID, + constants.XSVMID, nil, config.Name, common.WithContext(ctx), diff --git a/vms/example/xsvm/cmd/version/cmd.go b/vms/example/xsvm/cmd/version/cmd.go index 1c956c6a9b0..471ccfd10aa 100644 --- a/vms/example/xsvm/cmd/version/cmd.go +++ b/vms/example/xsvm/cmd/version/cmd.go @@ -8,6 +8,7 @@ import ( "github.com/spf13/cobra" + "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/version" "github.com/ava-labs/avalanchego/vms/example/xsvm" ) @@ -29,8 +30,8 @@ func Command() *cobra.Command { func versionFunc(*cobra.Command, []string) error { fmt.Printf( format, - xsvm.Name, - xsvm.ID, + constants.XSVMName, + constants.XSVMID, xsvm.Version, version.RPCChainVMProtocol, ) diff --git a/vms/example/xsvm/constants.go b/vms/example/xsvm/constants.go index eb2199211ef..7628cc56b17 100644 --- a/vms/example/xsvm/constants.go +++ b/vms/example/xsvm/constants.go @@ -3,19 +3,10 @@ package xsvm -import ( - "github.com/ava-labs/avalanchego/ids" - "github.com/ava-labs/avalanchego/version" -) +import "github.com/ava-labs/avalanchego/version" -const Name = "xsvm" - -var ( - ID = ids.ID{'x', 's', 'v', 'm'} - - Version = &version.Semantic{ - Major: 1, - Minor: 0, - Patch: 4, - } -) +var Version = &version.Semantic{ + Major: 1, + Minor: 0, + Patch: 4, +} diff --git a/vms/example/xsvm/vm.go b/vms/example/xsvm/vm.go index ef59e6c5165..526fc47c499 100644 --- a/vms/example/xsvm/vm.go +++ b/vms/example/xsvm/vm.go @@ -17,6 +17,7 @@ import ( "github.com/ava-labs/avalanchego/snow" "github.com/ava-labs/avalanchego/snow/consensus/snowman" "github.com/ava-labs/avalanchego/snow/engine/common" + "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/utils/json" "github.com/ava-labs/avalanchego/version" "github.com/ava-labs/avalanchego/vms/example/xsvm/api" @@ -124,7 +125,7 @@ func (vm *VM) CreateHandlers(context.Context) (map[string]http.Handler, error) { ) return map[string]http.Handler{ "": server, - }, server.RegisterService(api, Name) + }, server.RegisterService(api, constants.XSVMName) } func (*VM) HealthCheck(context.Context) (interface{}, error) { diff --git a/wallet/subnet/primary/examples/create-chain/main.go b/wallet/subnet/primary/examples/create-chain/main.go index 0382b8bb815..c626086bce8 100644 --- a/wallet/subnet/primary/examples/create-chain/main.go +++ b/wallet/subnet/primary/examples/create-chain/main.go @@ -11,8 +11,8 @@ import ( "github.com/ava-labs/avalanchego/genesis" "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/utils/constants" "github.com/ava-labs/avalanchego/utils/set" - "github.com/ava-labs/avalanchego/vms/example/xsvm" "github.com/ava-labs/avalanchego/vms/secp256k1fx" "github.com/ava-labs/avalanchego/wallet/subnet/primary" @@ -33,7 +33,7 @@ func main() { }, }, } - vmID := xsvm.ID + vmID := constants.XSVMID name := "let there" subnetID, err := ids.FromString(subnetIDStr) From 3532c18c47ab423c7075ac23a4796f6e269f7f99 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 29 May 2024 23:05:17 -0400 Subject: [PATCH 31/53] nit --- chains/manager.go | 2 +- genesis/aliases.go | 4 ++-- genesis/genesis.go | 2 +- genesis/genesis_test.go | 6 +++--- node/node.go | 4 ++-- utils/constants/vm_ids.go | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index ac3c75e382b..180f4988d50 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -1570,7 +1570,7 @@ func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics vmIDStr = "platformvm" case constants.AVMID: vmIDStr = "avm" - case constants.CorethID: + case constants.EVMID: vmIDStr = "coreth" case constants.SubnetEVMID: vmIDStr = "subnetevm" diff --git a/genesis/aliases.go b/genesis/aliases.go index d2af6fbb0ed..2c0407d1122 100644 --- a/genesis/aliases.go +++ b/genesis/aliases.go @@ -22,7 +22,7 @@ var ( VMAliases = map[ids.ID][]string{ constants.PlatformVMID: {"platform"}, constants.AVMID: {"avm"}, - constants.CorethID: {"evm"}, + constants.EVMID: {"evm"}, secp256k1fx.ID: {"secp256k1fx"}, nftfx.ID: {"nftfx"}, propertyfx.ID: {"propertyfx"}, @@ -60,7 +60,7 @@ func Aliases(genesisBytes []byte) (map[string][]string, map[ids.ID][]string, err path.Join(constants.ChainAliasPrefix, "avm"), } chainAliases[chainID] = XChainAliases - case constants.CorethID: + case constants.EVMID: apiAliases[endpoint] = []string{ "C", "evm", diff --git a/genesis/genesis.go b/genesis/genesis.go index 29657739860..e25088a59a1 100644 --- a/genesis/genesis.go +++ b/genesis/genesis.go @@ -455,7 +455,7 @@ func FromConfig(config *Config) ([]byte, ids.ID, error) { { GenesisData: genesisStr, SubnetID: constants.PrimaryNetworkID, - VMID: constants.CorethID, + VMID: constants.EVMID, Name: "C-Chain", }, } diff --git a/genesis/genesis_test.go b/genesis/genesis_test.go index 24c39c36676..679fc05be91 100644 --- a/genesis/genesis_test.go +++ b/genesis/genesis_test.go @@ -390,7 +390,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM", }, { - vmID: constants.CorethID, + vmID: constants.EVMID, expectedID: "2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5", }, }, @@ -403,7 +403,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2JVSBoinj9C2J33VntvzYtVJNZdN2NKiwwKjcumHUWEb5DbBrm", }, { - vmID: constants.CorethID, + vmID: constants.EVMID, expectedID: "yH8D7ThNJkxmtkuv2jgBa4P1Rn3Qpr4pPr7QYNfcdoS6k6HWp", }, }, @@ -416,7 +416,7 @@ func TestVMGenesis(t *testing.T) { expectedID: "2eNy1mUFdmaxXNj1eQHUe7Np4gju9sJsEtWQ4MX3ToiNKuADed", }, { - vmID: constants.CorethID, + vmID: constants.EVMID, expectedID: "2CA6j5zYzasynPsFeNoqWkmTCt3VScMvXUZHbfDJ8k3oGzAPtU", }, }, diff --git a/node/node.go b/node/node.go index bb151ff9400..492f3bf59e9 100644 --- a/node/node.go +++ b/node/node.go @@ -1055,7 +1055,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { } xChainID := createAVMTx.ID() - createEVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.CorethID) + createEVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.EVMID) if err != nil { return err } @@ -1227,7 +1227,7 @@ func (n *Node) initVMs() error { EUpgradeTime: eUpgradeTime, }, }), - n.VMManager.RegisterFactory(context.TODO(), constants.CorethID, &coreth.Factory{}), + n.VMManager.RegisterFactory(context.TODO(), constants.EVMID, &coreth.Factory{}), ) if err != nil { return err diff --git a/utils/constants/vm_ids.go b/utils/constants/vm_ids.go index cbbee9a04e0..2d8882c7b71 100644 --- a/utils/constants/vm_ids.go +++ b/utils/constants/vm_ids.go @@ -8,6 +8,6 @@ import "github.com/ava-labs/avalanchego/ids" var ( PlatformVMID = ids.ID{'p', 'l', 'a', 't', 'f', 'o', 'r', 'm', 'v', 'm'} AVMID = ids.ID{'a', 'v', 'm'} - CorethID = ids.ID{'e', 'v', 'm'} + EVMID = ids.ID{'e', 'v', 'm'} SubnetEVMID = ids.ID{'s', 'u', 'b', 'n', 'e', 't', 'e', 'v', 'm'} ) From 393342c0c9433f1d88fcaa8cb6ec6a7b8e4baf20 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 11:29:58 -0400 Subject: [PATCH 32/53] Remove unused metrics namespaces --- chains/manager.go | 4 +- snow/consensus/snowman/metrics.go | 68 +++++---------- snow/consensus/snowman/poll/set.go | 8 +- snow/consensus/snowman/poll/set_test.go | 30 +++---- snow/consensus/snowman/topological.go | 1 - .../avalanche/bootstrap/bootstrapper.go | 2 +- snow/engine/avalanche/bootstrap/metrics.go | 25 ++---- snow/engine/avalanche/getter/getter.go | 4 +- snow/engine/common/tracker/peers.go | 17 ++-- snow/engine/snowman/bootstrap/bootstrapper.go | 2 +- snow/engine/snowman/bootstrap/metrics.go | 12 ++- snow/engine/snowman/getter/getter.go | 4 +- snow/engine/snowman/metrics.go | 83 ++++++++----------- snow/engine/snowman/transitive.go | 3 +- snow/engine/snowman/transitive_test.go | 2 +- snow/networking/sender/sender.go | 5 +- vms/avm/block/builder/builder_test.go | 2 +- vms/avm/metrics/metrics.go | 24 ++---- vms/avm/metrics/tx_metrics.go | 10 +-- vms/avm/vm.go | 2 +- vms/metervm/block_metrics.go | 55 ++++++------ vms/metervm/block_vm.go | 1 - vms/metervm/metrics.go | 4 +- vms/metervm/vertex_metrics.go | 17 ++-- vms/metervm/vertex_vm.go | 2 +- vms/platformvm/metrics/block_metrics.go | 12 +-- vms/platformvm/metrics/metrics.go | 49 +++++------ vms/platformvm/metrics/tx_metrics.go | 10 +-- vms/platformvm/vm.go | 2 +- 29 files changed, 180 insertions(+), 280 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index c5b79dd470e..7fee70b8f81 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -753,7 +753,7 @@ func (m *manager) createAvalancheChain( sampleK = int(bootstrapWeight) } - connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) + connectedValidators, err := tracker.NewMeteredPeers(ctx.Registerer) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } @@ -1098,7 +1098,7 @@ func (m *manager) createSnowmanChain( sampleK = int(bootstrapWeight) } - connectedValidators, err := tracker.NewMeteredPeers("", ctx.Registerer) + connectedValidators, err := tracker.NewMeteredPeers(ctx.Registerer) if err != nil { return nil, fmt.Errorf("error creating peer tracker: %w", err) } diff --git a/snow/consensus/snowman/metrics.go b/snow/consensus/snowman/metrics.go index 6b48e868aaa..1db1bdbc1c5 100644 --- a/snow/consensus/snowman/metrics.go +++ b/snow/consensus/snowman/metrics.go @@ -65,7 +65,6 @@ type metrics struct { func newMetrics( log logging.Logger, - namespace string, reg prometheus.Registerer, lastAcceptedHeight uint64, lastAcceptedTime time.Time, @@ -75,82 +74,61 @@ func newMetrics( log: log, currentMaxVerifiedHeight: lastAcceptedHeight, maxVerifiedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "max_verified_height", - Help: "highest verified height", + Name: "max_verified_height", + Help: "highest verified height", }), lastAcceptedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "last_accepted_height", - Help: "last height accepted", + Name: "last_accepted_height", + Help: "last height accepted", }), lastAcceptedTimestamp: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "last_accepted_timestamp", - Help: "timestamp of the last accepted block in unix seconds", + Name: "last_accepted_timestamp", + Help: "timestamp of the last accepted block in unix seconds", }), processingBlocks: linked.NewHashmap[ids.ID, processingStart](), - // e.g., - // "avalanche_X_blks_processing" reports how many blocks are currently processing numProcessing: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_processing", - Help: "number of currently processing blocks", + Name: "blks_processing", + Help: "number of currently processing blocks", }), blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_accepted_container_size_sum", - Help: "cumulative size of all accepted blocks", + Name: "blks_accepted_container_size_sum", + Help: "cumulative size of all accepted blocks", }), pollsAccepted: metric.NewAveragerWithErrs( - namespace, + "", "blks_polls_accepted", "number of polls from the issuance of a block to its acceptance", reg, &errs, ), - // e.g., - // "avalanche_C_blks_accepted_count" reports how many times "Observe" has been called which is the total number of blocks accepted - // "avalanche_C_blks_accepted_sum" reports the cumulative sum of all block acceptance latencies in nanoseconds - // "avalanche_C_blks_accepted_sum / avalanche_C_blks_accepted_count" is the average block acceptance latency in nanoseconds - // "avalanche_C_blks_accepted_container_size_sum" reports the cumulative sum of all accepted blocks' sizes in bytes - // "avalanche_C_blks_accepted_container_size_sum / avalanche_C_blks_accepted_count" is the average accepted block size in bytes latAccepted: metric.NewAveragerWithErrs( - namespace, + "", "blks_accepted", "time (in ns) from the issuance of a block to its acceptance", reg, &errs, ), buildLatencyAccepted: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_build_accept_latency", - Help: "time (in ns) from the timestamp of a block to the time it was accepted", + Name: "blks_build_accept_latency", + Help: "time (in ns) from the timestamp of a block to the time it was accepted", }), blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blks_rejected_container_size_sum", - Help: "cumulative size of all rejected blocks", + Name: "blks_rejected_container_size_sum", + Help: "cumulative size of all rejected blocks", }), pollsRejected: metric.NewAveragerWithErrs( - namespace, + "", "blks_polls_rejected", "number of polls from the issuance of a block to its rejection", reg, &errs, ), - // e.g., - // "avalanche_P_blks_rejected_count" reports how many times "Observe" has been called which is the total number of blocks rejected - // "avalanche_P_blks_rejected_sum" reports the cumulative sum of all block rejection latencies in nanoseconds - // "avalanche_P_blks_rejected_sum / avalanche_P_blks_rejected_count" is the average block rejection latency in nanoseconds - // "avalanche_P_blks_rejected_container_size_sum" reports the cumulative sum of all rejected blocks' sizes in bytes - // "avalanche_P_blks_rejected_container_size_sum / avalanche_P_blks_rejected_count" is the average rejected block size in bytes latRejected: metric.NewAveragerWithErrs( - namespace, + "", "blks_rejected", "time (in ns) from the issuance of a block to its rejection", reg, @@ -158,14 +136,12 @@ func newMetrics( ), numSuccessfulPolls: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls_successful", - Help: "number of successful polls", + Name: "polls_successful", + Help: "number of successful polls", }), numFailedPolls: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls_failed", - Help: "number of failed polls", + Name: "polls_failed", + Help: "number of failed polls", }), } diff --git a/snow/consensus/snowman/poll/set.go b/snow/consensus/snowman/poll/set.go index 87a751584c7..7ef519ea7f5 100644 --- a/snow/consensus/snowman/poll/set.go +++ b/snow/consensus/snowman/poll/set.go @@ -55,20 +55,18 @@ type set struct { func NewSet( factory Factory, log logging.Logger, - namespace string, reg prometheus.Registerer, ) (Set, error) { numPolls := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "polls", - Help: "Number of pending network polls", + Name: "polls", + Help: "Number of pending network polls", }) if err := reg.Register(numPolls); err != nil { return nil, fmt.Errorf("%w: %w", errFailedPollsMetric, err) } durPolls, err := metric.NewAverager( - namespace, + "", "poll_duration", "time (in ns) this poll took to complete", reg, diff --git a/snow/consensus/snowman/poll/set_test.go b/snow/consensus/snowman/poll/set_test.go index 0717242060d..97166e0e937 100644 --- a/snow/consensus/snowman/poll/set_test.go +++ b/snow/consensus/snowman/poll/set_test.go @@ -32,15 +32,13 @@ func TestNewSetErrorOnPollsMetrics(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(1, 1) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() require.NoError(registerer.Register(prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "polls", + Name: "polls", }))) - _, err := NewSet(factory, log, namespace, registerer) + _, err := NewSet(factory, log, registerer) require.ErrorIs(err, errFailedPollsMetric) } @@ -49,15 +47,13 @@ func TestNewSetErrorOnPollDurationMetrics(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(1, 1) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() require.NoError(registerer.Register(prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "poll_duration_count", + Name: "poll_duration_count", }))) - _, err := NewSet(factory, log, namespace, registerer) + _, err := NewSet(factory, log, registerer) require.ErrorIs(err, errFailedPollDurationMetrics) } @@ -69,9 +65,8 @@ func TestCreateAndFinishPollOutOfOrder_NewerFinishesFirst(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create two polls for the two blocks @@ -106,9 +101,8 @@ func TestCreateAndFinishPollOutOfOrder_OlderFinishesFirst(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create two polls for the two blocks @@ -143,9 +137,8 @@ func TestCreateAndFinishPollOutOfOrder_UnfinishedPollsGaps(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) // create three polls for the two blocks @@ -188,9 +181,8 @@ func TestCreateAndFinishSuccessfulPoll(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) require.Zero(s.Len()) @@ -221,9 +213,8 @@ func TestCreateAndFinishFailedPoll(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) require.Zero(s.Len()) @@ -251,9 +242,8 @@ func TestSetString(t *testing.T) { factory := NewEarlyTermNoTraversalFactory(alpha, alpha) log := logging.NoLog{} - namespace := "" registerer := prometheus.NewRegistry() - s, err := NewSet(factory, log, namespace, registerer) + s, err := NewSet(factory, log, registerer) require.NoError(err) expected := `current polls: (Size = 1) diff --git a/snow/consensus/snowman/topological.go b/snow/consensus/snowman/topological.go index 0eb8b55c1f6..f2ef015654c 100644 --- a/snow/consensus/snowman/topological.go +++ b/snow/consensus/snowman/topological.go @@ -111,7 +111,6 @@ func (ts *Topological) Initialize( ts.metrics, err = newMetrics( ctx.Log, - "", ctx.Registerer, lastAcceptedHeight, lastAcceptedTime, diff --git a/snow/engine/avalanche/bootstrap/bootstrapper.go b/snow/engine/avalanche/bootstrap/bootstrapper.go index b79c9c8cb5f..55e3307e933 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper.go @@ -66,7 +66,7 @@ func New( processedCache: &cache.LRU[ids.ID, struct{}]{Size: cacheSize}, onFinished: onFinished, } - return b, b.metrics.Initialize("bs", config.Ctx.AvalancheRegisterer) + return b, b.metrics.Initialize(config.Ctx.AvalancheRegisterer) } // Note: To align with the Snowman invariant, it should be guaranteed the VM is diff --git a/snow/engine/avalanche/bootstrap/metrics.go b/snow/engine/avalanche/bootstrap/metrics.go index 5ad1b371364..fdf68f5ecff 100644 --- a/snow/engine/avalanche/bootstrap/metrics.go +++ b/snow/engine/avalanche/bootstrap/metrics.go @@ -14,30 +14,23 @@ type metrics struct { numFetchedTxs, numAcceptedTxs prometheus.Counter } -func (m *metrics) Initialize( - namespace string, - registerer prometheus.Registerer, -) error { +func (m *metrics) Initialize(registerer prometheus.Registerer) error { m.numFetchedVts = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "fetched_vts", - Help: "Number of vertices fetched during bootstrapping", + Name: "bs_fetched_vts", + Help: "Number of vertices fetched during bootstrapping", }) m.numAcceptedVts = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "accepted_vts", - Help: "Number of vertices accepted during bootstrapping", + Name: "bs_accepted_vts", + Help: "Number of vertices accepted during bootstrapping", }) m.numFetchedTxs = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "fetched_txs", - Help: "Number of transactions fetched during bootstrapping", + Name: "bs_fetched_txs", + Help: "Number of transactions fetched during bootstrapping", }) m.numAcceptedTxs = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "accepted_txs", - Help: "Number of transactions accepted during bootstrapping", + Name: "bs_accepted_txs", + Help: "Number of transactions accepted during bootstrapping", }) return utils.Err( diff --git a/snow/engine/avalanche/getter/getter.go b/snow/engine/avalanche/getter/getter.go index 796cade92fa..6866e7a54ba 100644 --- a/snow/engine/avalanche/getter/getter.go +++ b/snow/engine/avalanche/getter/getter.go @@ -44,8 +44,8 @@ func New( var err error gh.getAncestorsVtxs, err = metric.NewAverager( - "bs", - "get_ancestors_vtxs", + "", + "bs_get_ancestors_vtxs", "vertices fetched in a call to GetAncestors", reg, ) diff --git a/snow/engine/common/tracker/peers.go b/snow/engine/common/tracker/peers.go index 1e76d42f426..94ed4676478 100644 --- a/snow/engine/common/tracker/peers.go +++ b/snow/engine/common/tracker/peers.go @@ -113,21 +113,18 @@ type meteredPeers struct { totalWeight prometheus.Gauge } -func NewMeteredPeers(namespace string, reg prometheus.Registerer) (Peers, error) { +func NewMeteredPeers(reg prometheus.Registerer) (Peers, error) { percentConnected := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "percent_connected", - Help: "Percent of connected stake", + Name: "percent_connected", + Help: "Percent of connected stake", }) totalWeight := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "total_weight", - Help: "Total stake", + Name: "total_weight", + Help: "Total stake", }) numValidators := prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "num_validators", - Help: "Total number of validators", + Name: "num_validators", + Help: "Total number of validators", }) err := utils.Err( reg.Register(percentConnected), diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 095ba4e63b1..6b8462f83f6 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -116,7 +116,7 @@ type Bootstrapper struct { } func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) error) (*Bootstrapper, error) { - metrics, err := newMetrics("bs", config.Ctx.Registerer) + metrics, err := newMetrics(config.Ctx.Registerer) return &Bootstrapper{ Config: config, metrics: metrics, diff --git a/snow/engine/snowman/bootstrap/metrics.go b/snow/engine/snowman/bootstrap/metrics.go index 311ed05f136..7b28b8b969b 100644 --- a/snow/engine/snowman/bootstrap/metrics.go +++ b/snow/engine/snowman/bootstrap/metrics.go @@ -13,17 +13,15 @@ type metrics struct { numFetched, numAccepted prometheus.Counter } -func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { +func newMetrics(registerer prometheus.Registerer) (*metrics, error) { m := &metrics{ numFetched: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "fetched", - Help: "Number of blocks fetched during bootstrapping", + Name: "bs_fetched", + Help: "Number of blocks fetched during bootstrapping", }), numAccepted: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "accepted", - Help: "Number of blocks accepted during bootstrapping", + Name: "bs_accepted", + Help: "Number of blocks accepted during bootstrapping", }), } diff --git a/snow/engine/snowman/getter/getter.go b/snow/engine/snowman/getter/getter.go index b58d7eb8742..aed51298cd2 100644 --- a/snow/engine/snowman/getter/getter.go +++ b/snow/engine/snowman/getter/getter.go @@ -43,8 +43,8 @@ func New( var err error gh.getAncestorsBlks, err = metric.NewAverager( - "bs", - "get_ancestors_blks", + "", + "bs_get_ancestors_blks", "blocks fetched in a call to GetAncestors", reg, ) diff --git a/snow/engine/snowman/metrics.go b/snow/engine/snowman/metrics.go index 193b067a14c..bd46eb002fc 100644 --- a/snow/engine/snowman/metrics.go +++ b/snow/engine/snowman/metrics.go @@ -38,104 +38,89 @@ type metrics struct { issued *prometheus.CounterVec } -func newMetrics(namespace string, reg prometheus.Registerer) (*metrics, error) { +func newMetrics(reg prometheus.Registerer) (*metrics, error) { errs := wrappers.Errs{} m := &metrics{ bootstrapFinished: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "bootstrap_finished", - Help: "Whether or not bootstrap process has completed. 1 is success, 0 is fail or ongoing.", + Name: "bootstrap_finished", + Help: "Whether or not bootstrap process has completed. 1 is success, 0 is fail or ongoing.", }), numRequests: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "requests", - Help: "Number of outstanding block requests", + Name: "requests", + Help: "Number of outstanding block requests", }), numBlocked: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blocked", - Help: "Number of blocks that are pending issuance", + Name: "blocked", + Help: "Number of blocks that are pending issuance", }), numBlockers: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "blockers", - Help: "Number of blocks that are blocking other blocks from being issued because they haven't been issued", + Name: "blockers", + Help: "Number of blocks that are blocking other blocks from being issued because they haven't been issued", }), numNonVerifieds: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "non_verified_blks", - Help: "Number of non-verified blocks in the memory", + Name: "non_verified_blks", + Help: "Number of non-verified blocks in the memory", }), numBuilt: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blks_built", - Help: "Number of blocks that have been built locally", + Name: "blks_built", + Help: "Number of blocks that have been built locally", }), numBuildsFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blk_builds_failed", - Help: "Number of BuildBlock calls that have failed", + Name: "blk_builds_failed", + Help: "Number of BuildBlock calls that have failed", }), numUselessPutBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_put_bytes", - Help: "Amount of useless bytes received in Put messages", + Name: "num_useless_put_bytes", + Help: "Amount of useless bytes received in Put messages", }), numUselessPushQueryBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_push_query_bytes", - Help: "Amount of useless bytes received in PushQuery messages", + Name: "num_useless_push_query_bytes", + Help: "Amount of useless bytes received in PushQuery messages", }), numMissingAcceptedBlocks: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_missing_accepted_blocks", - Help: "Number of times an accepted block height was referenced and it wasn't locally available", + Name: "num_missing_accepted_blocks", + Help: "Number of times an accepted block height was referenced and it wasn't locally available", }), numProcessingAncestorFetchesFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_failed", - Help: "Number of votes that were dropped due to unknown blocks", + Name: "num_processing_ancestor_fetches_failed", + Help: "Number of votes that were dropped due to unknown blocks", }), numProcessingAncestorFetchesDropped: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_dropped", - Help: "Number of votes that were dropped due to decided blocks", + Name: "num_processing_ancestor_fetches_dropped", + Help: "Number of votes that were dropped due to decided blocks", }), numProcessingAncestorFetchesSucceeded: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_succeeded", - Help: "Number of votes that were applied to ancestor blocks", + Name: "num_processing_ancestor_fetches_succeeded", + Help: "Number of votes that were applied to ancestor blocks", }), numProcessingAncestorFetchesUnneeded: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_processing_ancestor_fetches_unneeded", - Help: "Number of votes that were directly applied to blocks", + Name: "num_processing_ancestor_fetches_unneeded", + Help: "Number of votes that were directly applied to blocks", }), getAncestorsBlks: metric.NewAveragerWithErrs( - namespace, + "", "get_ancestors_blks", "blocks fetched in a call to GetAncestors", reg, &errs, ), selectedVoteIndex: metric.NewAveragerWithErrs( - namespace, + "", "selected_vote_index", "index of the voteID that was passed into consensus", reg, &errs, ), issuerStake: metric.NewAveragerWithErrs( - namespace, + "", "issuer_stake", "stake weight of the peer who provided a block that was issued into consensus", reg, &errs, ), issued: prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: namespace, - Name: "blks_issued", - Help: "number of blocks that have been issued into consensus by discovery mechanism", + Name: "blks_issued", + Help: "number of blocks that have been issued into consensus by discovery mechanism", }, []string{"source"}), } diff --git a/snow/engine/snowman/transitive.go b/snow/engine/snowman/transitive.go index 32bf4ac5d5a..9e89fedd22b 100644 --- a/snow/engine/snowman/transitive.go +++ b/snow/engine/snowman/transitive.go @@ -118,14 +118,13 @@ func New(config Config) (*Transitive, error) { polls, err := poll.NewSet( factory, config.Ctx.Log, - "", config.Ctx.Registerer, ) if err != nil { return nil, err } - metrics, err := newMetrics("", config.Ctx.Registerer) + metrics, err := newMetrics(config.Ctx.Registerer) if err != nil { return nil, err } diff --git a/snow/engine/snowman/transitive_test.go b/snow/engine/snowman/transitive_test.go index 75040d9a1f5..2961b018c8c 100644 --- a/snow/engine/snowman/transitive_test.go +++ b/snow/engine/snowman/transitive_test.go @@ -2866,7 +2866,7 @@ func TestGetProcessingAncestor(t *testing.T) { unissuedBlock = snowmantest.BuildChild(issuedBlock) ) - metrics, err := newMetrics("", prometheus.NewRegistry()) + metrics, err := newMetrics(prometheus.NewRegistry()) require.NoError(t, err) c := &snowman.Topological{} diff --git a/snow/networking/sender/sender.go b/snow/networking/sender/sender.go index c13b9e22682..37076972fe1 100644 --- a/snow/networking/sender/sender.go +++ b/snow/networking/sender/sender.go @@ -67,9 +67,8 @@ func New( timeouts: timeouts, failedDueToBench: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: "", - Name: "failed_benched", - Help: "requests dropped because a node was benched", + Name: "failed_benched", + Help: "requests dropped because a node was benched", }, opLabels, ), diff --git a/vms/avm/block/builder/builder_test.go b/vms/avm/block/builder/builder_test.go index 89f043844b5..36159598b70 100644 --- a/vms/avm/block/builder/builder_test.go +++ b/vms/avm/block/builder/builder_test.go @@ -544,7 +544,7 @@ func TestBlockBuilderAddLocalTx(t *testing.T) { state.AddBlock(parentBlk) state.SetLastAccepted(parentBlk.ID()) - metrics, err := metrics.New("", registerer) + metrics, err := metrics.New(registerer) require.NoError(err) manager := blkexecutor.NewManager(mempool, metrics, state, backend, clk, onAccept) diff --git a/vms/avm/metrics/metrics.go b/vms/avm/metrics/metrics.go index 9e4053e1fcc..345ad2f3710 100644 --- a/vms/avm/metrics/metrics.go +++ b/vms/avm/metrics/metrics.go @@ -66,32 +66,26 @@ func (m *metrics) MarkTxAccepted(tx *txs.Tx) error { return tx.Unsigned.Visit(m.txMetrics) } -func New( - namespace string, - registerer prometheus.Registerer, -) (Metrics, error) { - txMetrics, err := newTxMetrics(namespace, registerer) +func New(registerer prometheus.Registerer) (Metrics, error) { + txMetrics, err := newTxMetrics(registerer) errs := wrappers.Errs{Err: err} m := &metrics{txMetrics: txMetrics} m.numTxRefreshes = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refreshes", - Help: "Number of times unique txs have been refreshed", + Name: "tx_refreshes", + Help: "Number of times unique txs have been refreshed", }) m.numTxRefreshHits = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refresh_hits", - Help: "Number of times unique txs have not been unique, but were cached", + Name: "tx_refresh_hits", + Help: "Number of times unique txs have not been unique, but were cached", }) m.numTxRefreshMisses = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tx_refresh_misses", - Help: "Number of times unique txs have not been unique and weren't cached", + Name: "tx_refresh_misses", + Help: "Number of times unique txs have not been unique and weren't cached", }) - apiRequestMetric, err := metric.NewAPIInterceptor(namespace, registerer) + apiRequestMetric, err := metric.NewAPIInterceptor("", registerer) m.APIInterceptor = apiRequestMetric errs.Add( err, diff --git a/vms/avm/metrics/tx_metrics.go b/vms/avm/metrics/tx_metrics.go index 8b9bf2c0ed4..3c8d1bac79a 100644 --- a/vms/avm/metrics/tx_metrics.go +++ b/vms/avm/metrics/tx_metrics.go @@ -21,16 +21,12 @@ type txMetrics struct { numTxs *prometheus.CounterVec } -func newTxMetrics( - namespace string, - registerer prometheus.Registerer, -) (*txMetrics, error) { +func newTxMetrics(registerer prometheus.Registerer) (*txMetrics, error) { m := &txMetrics{ numTxs: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "txs_accepted", - Help: "number of transactions accepted", + Name: "txs_accepted", + Help: "number of transactions accepted", }, txLabels, ), diff --git a/vms/avm/vm.go b/vms/avm/vm.go index b8fe322ef95..ab05b053b39 100644 --- a/vms/avm/vm.go +++ b/vms/avm/vm.go @@ -182,7 +182,7 @@ func (vm *VM) Initialize( vm.connectedPeers = make(map[ids.NodeID]*version.Application) // Initialize metrics as soon as possible - vm.metrics, err = metrics.New("", registerer) + vm.metrics, err = metrics.New(registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } diff --git a/vms/metervm/block_metrics.go b/vms/metervm/block_metrics.go index 0a6473f617b..2bdc247c406 100644 --- a/vms/metervm/block_metrics.go +++ b/vms/metervm/block_metrics.go @@ -49,43 +49,42 @@ func (m *blockMetrics) Initialize( supportsBlockBuildingWithContext bool, supportsBatchedFetching bool, supportsStateSync bool, - namespace string, reg prometheus.Registerer, ) error { errs := wrappers.Errs{} - m.buildBlock = newAverager(namespace, "build_block", reg, &errs) - m.buildBlockErr = newAverager(namespace, "build_block_err", reg, &errs) - m.parseBlock = newAverager(namespace, "parse_block", reg, &errs) - m.parseBlockErr = newAverager(namespace, "parse_block_err", reg, &errs) - m.getBlock = newAverager(namespace, "get_block", reg, &errs) - m.getBlockErr = newAverager(namespace, "get_block_err", reg, &errs) - m.setPreference = newAverager(namespace, "set_preference", reg, &errs) - m.lastAccepted = newAverager(namespace, "last_accepted", reg, &errs) - m.verify = newAverager(namespace, "verify", reg, &errs) - m.verifyErr = newAverager(namespace, "verify_err", reg, &errs) - m.accept = newAverager(namespace, "accept", reg, &errs) - m.reject = newAverager(namespace, "reject", reg, &errs) - m.shouldVerifyWithContext = newAverager(namespace, "should_verify_with_context", reg, &errs) - m.verifyWithContext = newAverager(namespace, "verify_with_context", reg, &errs) - m.verifyWithContextErr = newAverager(namespace, "verify_with_context_err", reg, &errs) - m.getBlockIDAtHeight = newAverager(namespace, "get_block_id_at_height", reg, &errs) + m.buildBlock = newAverager("build_block", reg, &errs) + m.buildBlockErr = newAverager("build_block_err", reg, &errs) + m.parseBlock = newAverager("parse_block", reg, &errs) + m.parseBlockErr = newAverager("parse_block_err", reg, &errs) + m.getBlock = newAverager("get_block", reg, &errs) + m.getBlockErr = newAverager("get_block_err", reg, &errs) + m.setPreference = newAverager("set_preference", reg, &errs) + m.lastAccepted = newAverager("last_accepted", reg, &errs) + m.verify = newAverager("verify", reg, &errs) + m.verifyErr = newAverager("verify_err", reg, &errs) + m.accept = newAverager("accept", reg, &errs) + m.reject = newAverager("reject", reg, &errs) + m.shouldVerifyWithContext = newAverager("should_verify_with_context", reg, &errs) + m.verifyWithContext = newAverager("verify_with_context", reg, &errs) + m.verifyWithContextErr = newAverager("verify_with_context_err", reg, &errs) + m.getBlockIDAtHeight = newAverager("get_block_id_at_height", reg, &errs) if supportsBlockBuildingWithContext { - m.buildBlockWithContext = newAverager(namespace, "build_block_with_context", reg, &errs) - m.buildBlockWithContextErr = newAverager(namespace, "build_block_with_context_err", reg, &errs) + m.buildBlockWithContext = newAverager("build_block_with_context", reg, &errs) + m.buildBlockWithContextErr = newAverager("build_block_with_context_err", reg, &errs) } if supportsBatchedFetching { - m.getAncestors = newAverager(namespace, "get_ancestors", reg, &errs) - m.batchedParseBlock = newAverager(namespace, "batched_parse_block", reg, &errs) + m.getAncestors = newAverager("get_ancestors", reg, &errs) + m.batchedParseBlock = newAverager("batched_parse_block", reg, &errs) } if supportsStateSync { - m.stateSyncEnabled = newAverager(namespace, "state_sync_enabled", reg, &errs) - m.getOngoingSyncStateSummary = newAverager(namespace, "get_ongoing_state_sync_summary", reg, &errs) - m.getLastStateSummary = newAverager(namespace, "get_last_state_summary", reg, &errs) - m.parseStateSummary = newAverager(namespace, "parse_state_summary", reg, &errs) - m.parseStateSummaryErr = newAverager(namespace, "parse_state_summary_err", reg, &errs) - m.getStateSummary = newAverager(namespace, "get_state_summary", reg, &errs) - m.getStateSummaryErr = newAverager(namespace, "get_state_summary_err", reg, &errs) + m.stateSyncEnabled = newAverager("state_sync_enabled", reg, &errs) + m.getOngoingSyncStateSummary = newAverager("get_ongoing_state_sync_summary", reg, &errs) + m.getLastStateSummary = newAverager("get_last_state_summary", reg, &errs) + m.parseStateSummary = newAverager("parse_state_summary", reg, &errs) + m.parseStateSummaryErr = newAverager("parse_state_summary_err", reg, &errs) + m.getStateSummary = newAverager("get_state_summary", reg, &errs) + m.getStateSummaryErr = newAverager("get_state_summary_err", reg, &errs) } return errs.Err } diff --git a/vms/metervm/block_vm.go b/vms/metervm/block_vm.go index 0ecb982c474..6d951f344b2 100644 --- a/vms/metervm/block_vm.go +++ b/vms/metervm/block_vm.go @@ -63,7 +63,6 @@ func (vm *blockVM) Initialize( vm.buildBlockVM != nil, vm.batchedVM != nil, vm.ssVM != nil, - "", registerer, ) if err != nil { diff --git a/vms/metervm/metrics.go b/vms/metervm/metrics.go index d4c9304e769..09d85a77058 100644 --- a/vms/metervm/metrics.go +++ b/vms/metervm/metrics.go @@ -10,9 +10,9 @@ import ( "github.com/ava-labs/avalanchego/utils/wrappers" ) -func newAverager(namespace, name string, reg prometheus.Registerer, errs *wrappers.Errs) metric.Averager { +func newAverager(name string, reg prometheus.Registerer, errs *wrappers.Errs) metric.Averager { return metric.NewAveragerWithErrs( - namespace, + "", name, "time (in ns) of a "+name, reg, diff --git a/vms/metervm/vertex_metrics.go b/vms/metervm/vertex_metrics.go index 67caa50b610..04096f2ae03 100644 --- a/vms/metervm/vertex_metrics.go +++ b/vms/metervm/vertex_metrics.go @@ -19,16 +19,13 @@ type vertexMetrics struct { reject metric.Averager } -func (m *vertexMetrics) Initialize( - namespace string, - reg prometheus.Registerer, -) error { +func (m *vertexMetrics) Initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} - m.parse = newAverager(namespace, "parse_tx", reg, &errs) - m.parseErr = newAverager(namespace, "parse_tx_err", reg, &errs) - m.verify = newAverager(namespace, "verify_tx", reg, &errs) - m.verifyErr = newAverager(namespace, "verify_tx_err", reg, &errs) - m.accept = newAverager(namespace, "accept", reg, &errs) - m.reject = newAverager(namespace, "reject", reg, &errs) + m.parse = newAverager("parse_tx", reg, &errs) + m.parseErr = newAverager("parse_tx_err", reg, &errs) + m.verify = newAverager("verify_tx", reg, &errs) + m.verifyErr = newAverager("verify_tx_err", reg, &errs) + m.accept = newAverager("accept", reg, &errs) + m.reject = newAverager("reject", reg, &errs) return errs.Err } diff --git a/vms/metervm/vertex_vm.go b/vms/metervm/vertex_vm.go index 7cd112ffde2..7cbd47a6747 100644 --- a/vms/metervm/vertex_vm.go +++ b/vms/metervm/vertex_vm.go @@ -46,7 +46,7 @@ func (vm *vertexVM) Initialize( appSender common.AppSender, ) error { registerer := prometheus.NewRegistry() - if err := vm.vertexMetrics.Initialize("", registerer); err != nil { + if err := vm.vertexMetrics.Initialize(registerer); err != nil { return err } diff --git a/vms/platformvm/metrics/block_metrics.go b/vms/platformvm/metrics/block_metrics.go index cc1acd7eb86..bf05de8e8dd 100644 --- a/vms/platformvm/metrics/block_metrics.go +++ b/vms/platformvm/metrics/block_metrics.go @@ -22,11 +22,8 @@ type blockMetrics struct { numBlocks *prometheus.CounterVec } -func newBlockMetrics( - namespace string, - registerer prometheus.Registerer, -) (*blockMetrics, error) { - txMetrics, err := newTxMetrics(namespace, registerer) +func newBlockMetrics(registerer prometheus.Registerer) (*blockMetrics, error) { + txMetrics, err := newTxMetrics(registerer) if err != nil { return nil, err } @@ -35,9 +32,8 @@ func newBlockMetrics( txMetrics: txMetrics, numBlocks: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "blks_accepted", - Help: "number of blocks accepted", + Name: "blks_accepted", + Help: "number of blocks accepted", }, blkLabels, ), diff --git a/vms/platformvm/metrics/metrics.go b/vms/platformvm/metrics/metrics.go index 98b611a017e..26ecfcf1b5a 100644 --- a/vms/platformvm/metrics/metrics.go +++ b/vms/platformvm/metrics/metrics.go @@ -40,61 +40,50 @@ type Metrics interface { SetTimeUntilSubnetUnstake(subnetID ids.ID, timeUntilUnstake time.Duration) } -func New( - namespace string, - registerer prometheus.Registerer, -) (Metrics, error) { - blockMetrics, err := newBlockMetrics(namespace, registerer) +func New(registerer prometheus.Registerer) (Metrics, error) { + blockMetrics, err := newBlockMetrics(registerer) m := &metrics{ blockMetrics: blockMetrics, timeUntilUnstake: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_until_unstake", - Help: "Time (in ns) until this node leaves the Primary Network's validator set", + Name: "time_until_unstake", + Help: "Time (in ns) until this node leaves the Primary Network's validator set", }), timeUntilSubnetUnstake: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_until_unstake_subnet", - Help: "Time (in ns) until this node leaves the subnet's validator set", + Name: "time_until_unstake_subnet", + Help: "Time (in ns) until this node leaves the subnet's validator set", }, []string{"subnetID"}, ), localStake: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "local_staked", - Help: "Amount (in nAVAX) of AVAX staked on this node", + Name: "local_staked", + Help: "Amount (in nAVAX) of AVAX staked on this node", }), totalStake: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "total_staked", - Help: "Amount (in nAVAX) of AVAX staked on the Primary Network", + Name: "total_staked", + Help: "Amount (in nAVAX) of AVAX staked on the Primary Network", }), validatorSetsCached: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "validator_sets_cached", - Help: "Total number of validator sets cached", + Name: "validator_sets_cached", + Help: "Total number of validator sets cached", }), validatorSetsCreated: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "validator_sets_created", - Help: "Total number of validator sets created from applying difflayers", + Name: "validator_sets_created", + Help: "Total number of validator sets created from applying difflayers", }), validatorSetsHeightDiff: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "validator_sets_height_diff_sum", - Help: "Total number of validator sets diffs applied for generating validator sets", + Name: "validator_sets_height_diff_sum", + Help: "Total number of validator sets diffs applied for generating validator sets", }), validatorSetsDuration: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "validator_sets_duration_sum", - Help: "Total amount of time generating validator sets in nanoseconds", + Name: "validator_sets_duration_sum", + Help: "Total amount of time generating validator sets in nanoseconds", }), } errs := wrappers.Errs{Err: err} - apiRequestMetrics, err := metric.NewAPIInterceptor(namespace, registerer) + apiRequestMetrics, err := metric.NewAPIInterceptor("", registerer) errs.Add(err) m.APIInterceptor = apiRequestMetrics errs.Add( diff --git a/vms/platformvm/metrics/tx_metrics.go b/vms/platformvm/metrics/tx_metrics.go index 5526a6a0be5..02f45f01162 100644 --- a/vms/platformvm/metrics/tx_metrics.go +++ b/vms/platformvm/metrics/tx_metrics.go @@ -21,16 +21,12 @@ type txMetrics struct { numTxs *prometheus.CounterVec } -func newTxMetrics( - namespace string, - registerer prometheus.Registerer, -) (*txMetrics, error) { +func newTxMetrics(registerer prometheus.Registerer) (*txMetrics, error) { m := &txMetrics{ numTxs: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "txs_accepted", - Help: "number of transactions accepted", + Name: "txs_accepted", + Help: "number of transactions accepted", }, txLabels, ), diff --git a/vms/platformvm/vm.go b/vms/platformvm/vm.go index b6417bbd0c4..565960cff59 100644 --- a/vms/platformvm/vm.go +++ b/vms/platformvm/vm.go @@ -119,7 +119,7 @@ func (vm *VM) Initialize( } // Initialize metrics as soon as possible - vm.metrics, err = metrics.New("", registerer) + vm.metrics, err = metrics.New(registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } From 86a50c3e2e401a949f61bb567aa10a04dc625895 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 15:47:06 -0400 Subject: [PATCH 33/53] nit --- vms/platformvm/block/builder/helpers_test.go | 2 +- vms/platformvm/validators/manager_benchmark_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vms/platformvm/block/builder/helpers_test.go b/vms/platformvm/block/builder/helpers_test.go index 0108162649d..909d7adefe5 100644 --- a/vms/platformvm/block/builder/helpers_test.go +++ b/vms/platformvm/block/builder/helpers_test.go @@ -177,7 +177,7 @@ func newEnvironment(t *testing.T, f fork) *environment { //nolint:unparam return nil } - metrics, err := metrics.New("", registerer) + metrics, err := metrics.New(registerer) require.NoError(err) res.mempool, err = mempool.New("mempool", registerer, nil) diff --git a/vms/platformvm/validators/manager_benchmark_test.go b/vms/platformvm/validators/manager_benchmark_test.go index 912f3619e3e..3a756f73c6a 100644 --- a/vms/platformvm/validators/manager_benchmark_test.go +++ b/vms/platformvm/validators/manager_benchmark_test.go @@ -104,7 +104,7 @@ func BenchmarkGetValidatorSet(b *testing.B) { execConfig, err := config.GetExecutionConfig(nil) require.NoError(err) - metrics, err := metrics.New("", prometheus.NewRegistry()) + metrics, err := metrics.New(prometheus.NewRegistry()) require.NoError(err) s, err := state.New( From d6425d72f8195b1be052afad1fbb5806703ac93d Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 16:38:56 -0400 Subject: [PATCH 34/53] remove moar --- utils/metric/api_interceptor.go | 15 ++++++--------- vms/avm/metrics/metrics.go | 2 +- vms/platformvm/metrics/metrics.go | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/utils/metric/api_interceptor.go b/utils/metric/api_interceptor.go index 7d970b22b83..50027fde147 100644 --- a/utils/metric/api_interceptor.go +++ b/utils/metric/api_interceptor.go @@ -29,27 +29,24 @@ type apiInterceptor struct { requestErrors *prometheus.CounterVec } -func NewAPIInterceptor(namespace string, registerer prometheus.Registerer) (APIInterceptor, error) { +func NewAPIInterceptor(registerer prometheus.Registerer) (APIInterceptor, error) { requestDurationCount := prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "request_duration_count", - Help: "Number of times this type of request was made", + Name: "request_duration_count", + Help: "Number of times this type of request was made", }, []string{"method"}, ) requestDurationSum := prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "request_duration_sum", - Help: "Amount of time in nanoseconds that has been spent handling this type of request", + Name: "request_duration_sum", + Help: "Amount of time in nanoseconds that has been spent handling this type of request", }, []string{"method"}, ) requestErrors := prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "request_error_count", + Name: "request_error_count", }, []string{"method"}, ) diff --git a/vms/avm/metrics/metrics.go b/vms/avm/metrics/metrics.go index 345ad2f3710..7d122ce6e42 100644 --- a/vms/avm/metrics/metrics.go +++ b/vms/avm/metrics/metrics.go @@ -85,7 +85,7 @@ func New(registerer prometheus.Registerer) (Metrics, error) { Help: "Number of times unique txs have not been unique and weren't cached", }) - apiRequestMetric, err := metric.NewAPIInterceptor("", registerer) + apiRequestMetric, err := metric.NewAPIInterceptor(registerer) m.APIInterceptor = apiRequestMetric errs.Add( err, diff --git a/vms/platformvm/metrics/metrics.go b/vms/platformvm/metrics/metrics.go index 26ecfcf1b5a..82b51dc8c34 100644 --- a/vms/platformvm/metrics/metrics.go +++ b/vms/platformvm/metrics/metrics.go @@ -83,7 +83,7 @@ func New(registerer prometheus.Registerer) (Metrics, error) { } errs := wrappers.Errs{Err: err} - apiRequestMetrics, err := metric.NewAPIInterceptor("", registerer) + apiRequestMetrics, err := metric.NewAPIInterceptor(registerer) errs.Add(err) m.APIInterceptor = apiRequestMetrics errs.Add( From d1214fc4b03ae88f695c25a0ff93a80fe156085b Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 17:36:33 -0400 Subject: [PATCH 35/53] Remove api namespace --- api/metrics/multi_gatherer.go | 8 ++++++++ api/server/metrics.go | 17 +++++++---------- api/server/server.go | 3 +-- node/node.go | 14 ++++++++++++-- utils/metric/namespace.go | 7 ++++++- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/api/metrics/multi_gatherer.go b/api/metrics/multi_gatherer.go index 4bd0900a022..d8d4d93d2d7 100644 --- a/api/metrics/multi_gatherer.go +++ b/api/metrics/multi_gatherer.go @@ -93,3 +93,11 @@ func sortMetrics(m []*dto.MetricFamily) { return cmp.Compare(*i.Name, *j.Name) }) } + +func MakeAndRegister(gatherer MultiGatherer, name string) (*prometheus.Registry, error) { + reg := prometheus.NewRegistry() + if err := gatherer.Register(name, reg); err != nil { + return nil, fmt.Errorf("couldn't register %q metrics: %w", name, err) + } + return reg, nil +} diff --git a/api/server/metrics.go b/api/server/metrics.go index e3b2d76c83e..9734f36eeaa 100644 --- a/api/server/metrics.go +++ b/api/server/metrics.go @@ -18,29 +18,26 @@ type metrics struct { totalDuration *prometheus.GaugeVec } -func newMetrics(namespace string, registerer prometheus.Registerer) (*metrics, error) { +func newMetrics(registerer prometheus.Registerer) (*metrics, error) { m := &metrics{ numProcessing: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "calls_processing", - Help: "The number of calls this API is currently processing", + Name: "calls_processing", + Help: "The number of calls this API is currently processing", }, []string{"base"}, ), numCalls: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "calls", - Help: "The number of calls this API has processed", + Name: "calls", + Help: "The number of calls this API has processed", }, []string{"base"}, ), totalDuration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "calls_duration", - Help: "The total amount of time, in nanoseconds, spent handling API calls", + Name: "calls_duration", + Help: "The total amount of time, in nanoseconds, spent handling API calls", }, []string{"base"}, ), diff --git a/api/server/server.go b/api/server/server.go index cd712ec88b9..8af570d09bd 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -108,12 +108,11 @@ func New( nodeID ids.NodeID, tracingEnabled bool, tracer trace.Tracer, - namespace string, registerer prometheus.Registerer, httpConfig HTTPConfig, allowedHosts []string, ) (Server, error) { - m, err := newMetrics(namespace, registerer) + m, err := newMetrics(registerer) if err != nil { return nil, err } diff --git a/node/node.go b/node/node.go index 2103762f658..5445c626d1d 100644 --- a/node/node.go +++ b/node/node.go @@ -66,6 +66,7 @@ import ( "github.com/ava-labs/avalanchego/utils/ips" "github.com/ava-labs/avalanchego/utils/logging" "github.com/ava-labs/avalanchego/utils/math/meter" + "github.com/ava-labs/avalanchego/utils/metric" "github.com/ava-labs/avalanchego/utils/perms" "github.com/ava-labs/avalanchego/utils/profiler" "github.com/ava-labs/avalanchego/utils/resource" @@ -89,6 +90,8 @@ const ( httpPortName = constants.AppName + "-http" ipResolutionTimeout = 30 * time.Second + + apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" ) var ( @@ -967,6 +970,14 @@ func (n *Node) initAPIServer() error { } n.apiURI = fmt.Sprintf("%s://%s", protocol, listener.Addr()) + apiRegisterer, err := metrics.MakeAndRegister( + n.MetricsGatherer, + apiNamespace, + ) + if err != nil { + return err + } + n.APIServer, err = server.New( n.Log, n.LogFactory, @@ -976,8 +987,7 @@ func (n *Node) initAPIServer() error { n.ID, n.Config.TraceConfig.Enabled, n.tracer, - "api", - n.MetricsRegisterer, + apiRegisterer, n.Config.HTTPConfig.HTTPConfig, n.Config.HTTPAllowedHosts, ) diff --git a/utils/metric/namespace.go b/utils/metric/namespace.go index 4371bb1dc07..8d80a86266f 100644 --- a/utils/metric/namespace.go +++ b/utils/metric/namespace.go @@ -5,6 +5,11 @@ package metric import "strings" +const ( + NamespaceSeparatorByte = '_' + NamespaceSeparator = string(NamespaceSeparatorByte) +) + func AppendNamespace(prefix, suffix string) string { switch { case len(prefix) == 0: @@ -12,6 +17,6 @@ func AppendNamespace(prefix, suffix string) string { case len(suffix) == 0: return prefix default: - return strings.Join([]string{prefix, suffix}, "_") + return strings.Join([]string{prefix, suffix}, NamespaceSeparator) } } From 073ac5c2726ee42c0dd1e3b015584de676247953 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 20:56:47 -0400 Subject: [PATCH 36/53] Remove network namespace --- message/creator.go | 4 - message/inbound_msg_builder_test.go | 2 - message/messages.go | 11 +-- message/messages_benchmark_test.go | 4 +- message/messages_test.go | 4 - message/outbound_msg_builder_test.go | 1 - network/config.go | 1 - network/ip_tracker.go | 15 +-- network/ip_tracker_test.go | 2 +- network/metrics.go | 91 ++++++++----------- network/network.go | 8 +- network/network_test.go | 2 - network/peer/metrics.go | 40 +++----- network/peer/peer_test.go | 6 +- network/peer/test_peer.go | 6 +- network/test_network.go | 1 - network/throttling/bandwidth_throttler.go | 8 +- .../throttling/bandwidth_throttler_test.go | 2 +- .../inbound_msg_buffer_throttler.go | 12 +-- .../inbound_msg_buffer_throttler_test.go | 4 +- .../throttling/inbound_msg_byte_throttler.go | 27 +++--- .../inbound_msg_byte_throttler_test.go | 5 - network/throttling/inbound_msg_throttler.go | 9 +- network/throttling/outbound_msg_throttler.go | 30 +++--- .../throttling/outbound_msg_throttler_test.go | 3 - node/node.go | 26 ++++-- snow/networking/sender/sender_test.go | 3 - vms/platformvm/vm_test.go | 2 +- 28 files changed, 121 insertions(+), 208 deletions(-) diff --git a/message/creator.go b/message/creator.go index 8040bccb186..86c9af1f107 100644 --- a/message/creator.go +++ b/message/creator.go @@ -10,7 +10,6 @@ import ( "github.com/ava-labs/avalanchego/utils/compression" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" ) var _ Creator = (*creator)(nil) @@ -28,14 +27,11 @@ type creator struct { func NewCreator( log logging.Logger, metrics prometheus.Registerer, - parentNamespace string, compressionType compression.Type, maxMessageTimeout time.Duration, ) (Creator, error) { - namespace := metric.AppendNamespace(parentNamespace, "codec") builder, err := newMsgBuilder( log, - namespace, metrics, maxMessageTimeout, ) diff --git a/message/inbound_msg_builder_test.go b/message/inbound_msg_builder_test.go index 09269d1e2ad..92d18b6836b 100644 --- a/message/inbound_msg_builder_test.go +++ b/message/inbound_msg_builder_test.go @@ -23,7 +23,6 @@ func Test_newMsgBuilder(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 10*time.Second, ) @@ -393,7 +392,6 @@ func TestAppError(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "", prometheus.NewRegistry(), time.Second, ) diff --git a/message/messages.go b/message/messages.go index 0362ba070e4..06ef3125d69 100644 --- a/message/messages.go +++ b/message/messages.go @@ -149,7 +149,6 @@ type msgBuilder struct { func newMsgBuilder( log logging.Logger, - namespace string, metrics prometheus.Registerer, maxMessageTimeout time.Duration, ) (*msgBuilder, error) { @@ -164,17 +163,15 @@ func newMsgBuilder( zstdCompressor: zstdCompressor, count: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "compressed_count", - Help: "number of compressed messages", + Name: "codec_compressed_count", + Help: "number of compressed messages", }, metricLabels, ), duration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "compressed_duration", - Help: "time spent handling compressed messages", + Name: "codec_compressed_duration", + Help: "time spent handling compressed messages", }, metricLabels, ), diff --git a/message/messages_benchmark_test.go b/message/messages_benchmark_test.go index 6abb80b9235..9a96f1f4191 100644 --- a/message/messages_benchmark_test.go +++ b/message/messages_benchmark_test.go @@ -62,7 +62,7 @@ func BenchmarkMarshalHandshake(b *testing.B) { useBuilder := os.Getenv("USE_BUILDER") != "" - codec, err := newMsgBuilder(logging.NoLog{}, "", prometheus.NewRegistry(), 10*time.Second) + codec, err := newMsgBuilder(logging.NoLog{}, prometheus.NewRegistry(), 10*time.Second) require.NoError(err) b.Logf("proto length %d-byte (use builder %v)", msgLen, useBuilder) @@ -119,7 +119,7 @@ func BenchmarkUnmarshalHandshake(b *testing.B) { require.NoError(err) useBuilder := os.Getenv("USE_BUILDER") != "" - codec, err := newMsgBuilder(logging.NoLog{}, "", prometheus.NewRegistry(), 10*time.Second) + codec, err := newMsgBuilder(logging.NoLog{}, prometheus.NewRegistry(), 10*time.Second) require.NoError(err) b.StartTimer() diff --git a/message/messages_test.go b/message/messages_test.go index bbe8f2377ac..583f26533d1 100644 --- a/message/messages_test.go +++ b/message/messages_test.go @@ -25,7 +25,6 @@ func TestMessage(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 5*time.Second, ) @@ -668,7 +667,6 @@ func TestInboundMessageToString(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 5*time.Second, ) @@ -699,7 +697,6 @@ func TestEmptyInboundMessage(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 5*time.Second, ) @@ -720,7 +717,6 @@ func TestNilInboundMessage(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 5*time.Second, ) diff --git a/message/outbound_msg_builder_test.go b/message/outbound_msg_builder_test.go index 02e46ef166a..1f7187cdd43 100644 --- a/message/outbound_msg_builder_test.go +++ b/message/outbound_msg_builder_test.go @@ -20,7 +20,6 @@ func Test_newOutboundBuilder(t *testing.T) { mb, err := newMsgBuilder( logging.NoLog{}, - "test", prometheus.NewRegistry(), 10*time.Second, ) diff --git a/network/config.go b/network/config.go index ed82ea507e8..3004a12bdc5 100644 --- a/network/config.go +++ b/network/config.go @@ -110,7 +110,6 @@ type Config struct { TLSKeyLogFile string `json:"tlsKeyLogFile"` - Namespace string `json:"namespace"` MyNodeID ids.NodeID `json:"myNodeID"` MyIPPort ips.DynamicIPPort `json:"myIP"` NetworkID uint32 `json:"networkID"` diff --git a/network/ip_tracker.go b/network/ip_tracker.go index 03040b15337..370c7d47da9 100644 --- a/network/ip_tracker.go +++ b/network/ip_tracker.go @@ -17,7 +17,6 @@ import ( "github.com/ava-labs/avalanchego/utils/crypto/bls" "github.com/ava-labs/avalanchego/utils/ips" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" "github.com/ava-labs/avalanchego/utils/sampler" "github.com/ava-labs/avalanchego/utils/set" ) @@ -42,25 +41,21 @@ var _ validators.SetCallbackListener = (*ipTracker)(nil) func newIPTracker( log logging.Logger, - namespace string, registerer prometheus.Registerer, ) (*ipTracker, error) { - bloomNamespace := metric.AppendNamespace(namespace, "ip_bloom") - bloomMetrics, err := bloom.NewMetrics(bloomNamespace, registerer) + bloomMetrics, err := bloom.NewMetrics("ip_bloom", registerer) if err != nil { return nil, err } tracker := &ipTracker{ log: log, numTrackedIPs: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "tracked_ips", - Help: "Number of IPs this node is willing to dial", + Name: "tracked_ips", + Help: "Number of IPs this node is willing to dial", }), numGossipableIPs: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "gossipable_ips", - Help: "Number of IPs this node is willing to gossip", + Name: "gossipable_ips", + Help: "Number of IPs this node is willing to gossip", }), bloomMetrics: bloomMetrics, mostRecentTrackedIPs: make(map[ids.NodeID]*ips.ClaimedIPPort), diff --git a/network/ip_tracker_test.go b/network/ip_tracker_test.go index be9ea59a51e..edae70de5b9 100644 --- a/network/ip_tracker_test.go +++ b/network/ip_tracker_test.go @@ -17,7 +17,7 @@ import ( ) func newTestIPTracker(t *testing.T) *ipTracker { - tracker, err := newIPTracker(logging.NoLog{}, "", prometheus.NewRegistry()) + tracker, err := newIPTracker(logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return tracker } diff --git a/network/metrics.go b/network/metrics.go index c6b47a1360a..8cc5155ec10 100644 --- a/network/metrics.go +++ b/network/metrics.go @@ -44,111 +44,92 @@ type metrics struct { } func newMetrics( - namespace string, registerer prometheus.Registerer, trackedSubnets set.Set[ids.ID], ) (*metrics, error) { m := &metrics{ trackedSubnets: trackedSubnets, numPeers: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peers", - Help: "Number of network peers", + Name: "peers", + Help: "Number of network peers", }), numTracked: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "tracked", - Help: "Number of currently tracked IPs attempting to be connected to", + Name: "tracked", + Help: "Number of currently tracked IPs attempting to be connected to", }), numSubnetPeers: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peers_subnet", - Help: "Number of peers that are validating a particular subnet", + Name: "peers_subnet", + Help: "Number of peers that are validating a particular subnet", }, []string{"subnetID"}, ), timeSinceLastMsgReceived: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_since_last_msg_received", - Help: "Time (in ns) since the last msg was received", + Name: "time_since_last_msg_received", + Help: "Time (in ns) since the last msg was received", }), timeSinceLastMsgSent: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "time_since_last_msg_sent", - Help: "Time (in ns) since the last msg was sent", + Name: "time_since_last_msg_sent", + Help: "Time (in ns) since the last msg was sent", }), sendFailRate: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "send_fail_rate", - Help: "Portion of messages that recently failed to be sent over the network", + Name: "send_fail_rate", + Help: "Portion of messages that recently failed to be sent over the network", }), connected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "times_connected", - Help: "Times this node successfully completed a handshake with a peer", + Name: "times_connected", + Help: "Times this node successfully completed a handshake with a peer", }), disconnected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "times_disconnected", - Help: "Times this node disconnected from a peer it had completed a handshake with", + Name: "times_disconnected", + Help: "Times this node disconnected from a peer it had completed a handshake with", }), acceptFailed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "accept_failed", - Help: "Times this node's listener failed to accept an inbound connection", + Name: "accept_failed", + Help: "Times this node's listener failed to accept an inbound connection", }), inboundConnAllowed: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "inbound_conn_throttler_allowed", - Help: "Times this node allowed (attempted to upgrade) an inbound connection", + Name: "inbound_conn_throttler_allowed", + Help: "Times this node allowed (attempted to upgrade) an inbound connection", }), tlsConnRejected: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "tls_conn_rejected", - Help: "Times this node rejected a connection due to an unsupported TLS certificate", + Name: "tls_conn_rejected", + Help: "Times this node rejected a connection due to an unsupported TLS certificate", }), numUselessPeerListBytes: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "num_useless_peerlist_bytes", - Help: "Amount of useless bytes (i.e. information about nodes we already knew/don't want to connect to) received in PeerList messages", + Name: "num_useless_peerlist_bytes", + Help: "Amount of useless bytes (i.e. information about nodes we already knew/don't want to connect to) received in PeerList messages", }), inboundConnRateLimited: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "inbound_conn_throttler_rate_limited", - Help: "Times this node rejected an inbound connection due to rate-limiting", + Name: "inbound_conn_throttler_rate_limited", + Help: "Times this node rejected an inbound connection due to rate-limiting", }), nodeUptimeWeightedAverage: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_uptime_weighted_average", - Help: "This node's uptime average weighted by observing peer stakes", + Name: "node_uptime_weighted_average", + Help: "This node's uptime average weighted by observing peer stakes", }), nodeUptimeRewardingStake: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_uptime_rewarding_stake", - Help: "The percentage of total stake which thinks this node is eligible for rewards", + Name: "node_uptime_rewarding_stake", + Help: "The percentage of total stake which thinks this node is eligible for rewards", }), nodeSubnetUptimeWeightedAverage: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_subnet_uptime_weighted_average", - Help: "This node's subnet uptime averages weighted by observing subnet peer stakes", + Name: "node_subnet_uptime_weighted_average", + Help: "This node's subnet uptime averages weighted by observing subnet peer stakes", }, []string{"subnetID"}, ), nodeSubnetUptimeRewardingStake: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "node_subnet_uptime_rewarding_stake", - Help: "The percentage of subnet's total stake which thinks this node is eligible for subnet's rewards", + Name: "node_subnet_uptime_rewarding_stake", + Help: "The percentage of subnet's total stake which thinks this node is eligible for subnet's rewards", }, []string{"subnetID"}, ), peerConnectedLifetimeAverage: prometheus.NewGauge( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "peer_connected_duration_average", - Help: "The average duration of all peer connections in nanoseconds", + Name: "peer_connected_duration_average", + Help: "The average duration of all peer connections in nanoseconds", }, ), peerConnectedStartTimes: make(map[ids.NodeID]float64), diff --git a/network/network.go b/network/network.go index 9963612c016..51ded9b8cf9 100644 --- a/network/network.go +++ b/network/network.go @@ -202,7 +202,6 @@ func NewNetwork( inboundMsgThrottler, err := throttling.NewInboundMsgThrottler( log, - config.Namespace, metricsRegisterer, config.Validators, config.ThrottlerConfig.InboundMsgThrottlerConfig, @@ -216,7 +215,6 @@ func NewNetwork( outboundMsgThrottler, err := throttling.NewSybilOutboundMsgThrottler( log, - config.Namespace, metricsRegisterer, config.Validators, config.ThrottlerConfig.OutboundMsgThrottlerConfig, @@ -225,17 +223,17 @@ func NewNetwork( return nil, fmt.Errorf("initializing outbound message throttler failed with: %w", err) } - peerMetrics, err := peer.NewMetrics(config.Namespace, metricsRegisterer) + peerMetrics, err := peer.NewMetrics(metricsRegisterer) if err != nil { return nil, fmt.Errorf("initializing peer metrics failed with: %w", err) } - metrics, err := newMetrics(config.Namespace, metricsRegisterer, config.TrackedSubnets) + metrics, err := newMetrics(metricsRegisterer, config.TrackedSubnets) if err != nil { return nil, fmt.Errorf("initializing network metrics failed with: %w", err) } - ipTracker, err := newIPTracker(log, config.Namespace, metricsRegisterer) + ipTracker, err := newIPTracker(log, metricsRegisterer) if err != nil { return nil, fmt.Errorf("initializing ip tracker failed with: %w", err) } diff --git a/network/network_test.go b/network/network_test.go index f8f7b56427f..5ae2cef5af3 100644 --- a/network/network_test.go +++ b/network/network_test.go @@ -104,7 +104,6 @@ var ( DialerConfig: defaultDialerConfig, - Namespace: "", NetworkID: 49463, MaxClockDifference: time.Minute, PingFrequency: constants.DefaultPingFrequency, @@ -196,7 +195,6 @@ func newMessageCreator(t *testing.T) message.Creator { mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) diff --git a/network/peer/metrics.go b/network/peer/metrics.go index 94d46ac1e5f..7547d7a827d 100644 --- a/network/peer/metrics.go +++ b/network/peer/metrics.go @@ -39,55 +39,45 @@ type Metrics struct { BytesSaved *prometheus.GaugeVec // io + op } -func NewMetrics( - namespace string, - registerer prometheus.Registerer, -) (*Metrics, error) { +func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { m := &Metrics{ ClockSkewCount: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "clock_skew_count", - Help: "number of handshake timestamps inspected (n)", + Name: "clock_skew_count", + Help: "number of handshake timestamps inspected (n)", }), ClockSkewSum: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "clock_skew_sum", - Help: "sum of (peer timestamp - local timestamp) from handshake messages (s)", + Name: "clock_skew_sum", + Help: "sum of (peer timestamp - local timestamp) from handshake messages (s)", }), NumFailedToParse: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_failed_to_parse", - Help: "number of received messages that could not be parsed", + Name: "msgs_failed_to_parse", + Help: "number of received messages that could not be parsed", }), NumSendFailed: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_failed_to_send", - Help: "number of messages that failed to be sent", + Name: "msgs_failed_to_send", + Help: "number of messages that failed to be sent", }, opLabels, ), Messages: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs", - Help: "number of handled messages", + Name: "msgs", + Help: "number of handled messages", }, ioOpCompressedLabels, ), Bytes: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "msgs_bytes", - Help: "number of message bytes", + Name: "msgs_bytes", + Help: "number of message bytes", }, ioOpLabels, ), BytesSaved: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "msgs_bytes_saved", - Help: "number of message bytes saved", + Name: "msgs_bytes_saved", + Help: "number of message bytes saved", }, ioOpLabels, ), diff --git a/network/peer/peer_test.go b/network/peer/peer_test.go index ffd5915aa2c..4a0399bc3a1 100644 --- a/network/peer/peer_test.go +++ b/network/peer/peer_test.go @@ -50,7 +50,6 @@ func newMessageCreator(t *testing.T) message.Creator { mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -63,10 +62,7 @@ func newConfig(t *testing.T) Config { t.Helper() require := require.New(t) - metrics, err := NewMetrics( - "", - prometheus.NewRegistry(), - ) + metrics, err := NewMetrics(prometheus.NewRegistry()) require.NoError(err) resourceTracker, err := tracker.NewResourceTracker( diff --git a/network/peer/test_peer.go b/network/peer/test_peer.go index a8f633ccf65..a4df06b72ee 100644 --- a/network/peer/test_peer.go +++ b/network/peer/test_peer.go @@ -76,7 +76,6 @@ func StartTestPeer( mc, err := message.NewCreator( logging.NoLog{}, prometheus.NewRegistry(), - "", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -84,10 +83,7 @@ func StartTestPeer( return nil, err } - metrics, err := NewMetrics( - "", - prometheus.NewRegistry(), - ) + metrics, err := NewMetrics(prometheus.NewRegistry()) if err != nil { return nil, err } diff --git a/network/test_network.go b/network/test_network.go index 25039ad046b..6a6bcdfcc08 100644 --- a/network/test_network.go +++ b/network/test_network.go @@ -82,7 +82,6 @@ func NewTestNetwork( msgCreator, err := message.NewCreator( logging.NoLog{}, metrics, - "", constants.DefaultNetworkCompressionType, constants.DefaultNetworkMaximumInboundTimeout, ) diff --git a/network/throttling/bandwidth_throttler.go b/network/throttling/bandwidth_throttler.go index cde94b96124..12ca3ac9a84 100644 --- a/network/throttling/bandwidth_throttler.go +++ b/network/throttling/bandwidth_throttler.go @@ -58,7 +58,6 @@ type BandwidthThrottlerConfig struct { func newBandwidthThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, config BandwidthThrottlerConfig, ) (bandwidthThrottler, error) { @@ -69,16 +68,15 @@ func newBandwidthThrottler( limiters: make(map[ids.NodeID]*rate.Limiter), metrics: bandwidthThrottlerMetrics{ acquireLatency: metric.NewAveragerWithErrs( - namespace, + "", "bandwidth_throttler_inbound_acquire_latency", "average time (in ns) to acquire bytes from the inbound bandwidth throttler", registerer, &errs, ), awaitingAcquire: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "bandwidth_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to acquire bandwidth from the inbound bandwidth throttler", + Name: "bandwidth_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to acquire bandwidth from the inbound bandwidth throttler", }), }, } diff --git a/network/throttling/bandwidth_throttler_test.go b/network/throttling/bandwidth_throttler_test.go index 9f9195477b3..da9ac6ded28 100644 --- a/network/throttling/bandwidth_throttler_test.go +++ b/network/throttling/bandwidth_throttler_test.go @@ -22,7 +22,7 @@ func TestBandwidthThrottler(t *testing.T) { RefillRate: 8, MaxBurstSize: 10, } - throttlerIntf, err := newBandwidthThrottler(logging.NoLog{}, "", prometheus.NewRegistry(), config) + throttlerIntf, err := newBandwidthThrottler(logging.NoLog{}, prometheus.NewRegistry(), config) require.NoError(err) require.IsType(&bandwidthThrottlerImpl{}, throttlerIntf) throttler := throttlerIntf.(*bandwidthThrottlerImpl) diff --git a/network/throttling/inbound_msg_buffer_throttler.go b/network/throttling/inbound_msg_buffer_throttler.go index 65306eea7d5..395b6da1688 100644 --- a/network/throttling/inbound_msg_buffer_throttler.go +++ b/network/throttling/inbound_msg_buffer_throttler.go @@ -18,7 +18,6 @@ import ( // See inbound_msg_throttler.go func newInboundMsgBufferThrottler( - namespace string, registerer prometheus.Registerer, maxProcessingMsgsPerNode uint64, ) (*inboundMsgBufferThrottler, error) { @@ -27,7 +26,7 @@ func newInboundMsgBufferThrottler( awaitingAcquire: make(map[ids.NodeID]chan struct{}), nodeToNumProcessingMsgs: make(map[ids.NodeID]uint64), } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } // Rate-limits inbound messages based on the number of @@ -130,19 +129,18 @@ type inboundMsgBufferThrottlerMetrics struct { awaitingAcquire prometheus.Gauge } -func (m *inboundMsgBufferThrottlerMetrics) initialize(namespace string, reg prometheus.Registerer) error { +func (m *inboundMsgBufferThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - namespace, + "", "buffer_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message buffer", reg, &errs, ) m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "buffer_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to take space on the inbound message buffer", + Name: "buffer_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to take space on the inbound message buffer", }) errs.Add( reg.Register(m.awaitingAcquire), diff --git a/network/throttling/inbound_msg_buffer_throttler_test.go b/network/throttling/inbound_msg_buffer_throttler_test.go index 38e6d735097..d9f3e4d29bc 100644 --- a/network/throttling/inbound_msg_buffer_throttler_test.go +++ b/network/throttling/inbound_msg_buffer_throttler_test.go @@ -17,7 +17,7 @@ import ( // Test inboundMsgBufferThrottler func TestMsgBufferThrottler(t *testing.T) { require := require.New(t) - throttler, err := newInboundMsgBufferThrottler("", prometheus.NewRegistry(), 3) + throttler, err := newInboundMsgBufferThrottler(prometheus.NewRegistry(), 3) require.NoError(err) nodeID1, nodeID2 := ids.GenerateTestNodeID(), ids.GenerateTestNodeID() @@ -69,7 +69,7 @@ func TestMsgBufferThrottler(t *testing.T) { // Test inboundMsgBufferThrottler when an acquire is cancelled func TestMsgBufferThrottlerContextCancelled(t *testing.T) { require := require.New(t) - throttler, err := newInboundMsgBufferThrottler("", prometheus.NewRegistry(), 3) + throttler, err := newInboundMsgBufferThrottler(prometheus.NewRegistry(), 3) require.NoError(err) vdr1Context, vdr1ContextCancelFunc := context.WithCancel(context.Background()) diff --git a/network/throttling/inbound_msg_byte_throttler.go b/network/throttling/inbound_msg_byte_throttler.go index 6bdacb28092..3e20762f85e 100644 --- a/network/throttling/inbound_msg_byte_throttler.go +++ b/network/throttling/inbound_msg_byte_throttler.go @@ -23,7 +23,6 @@ import ( func newInboundMsgByteThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, config MsgByteThrottlerConfig, @@ -42,7 +41,7 @@ func newInboundMsgByteThrottler( waitingToAcquire: linked.NewHashmap[uint64, *msgMetadata](), nodeToWaitingMsgID: make(map[ids.NodeID]uint64), } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } // Information about a message waiting to be read. @@ -306,34 +305,30 @@ type inboundMsgByteThrottlerMetrics struct { awaitingRelease prometheus.Gauge } -func (m *inboundMsgByteThrottlerMetrics) initialize(namespace string, reg prometheus.Registerer) error { +func (m *inboundMsgByteThrottlerMetrics) initialize(reg prometheus.Registerer) error { errs := wrappers.Errs{} m.acquireLatency = metric.NewAveragerWithErrs( - namespace, + "", "byte_throttler_inbound_acquire_latency", "average time (in ns) to get space on the inbound message byte buffer", reg, &errs, ) m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_remaining_at_large_bytes", - Help: "Bytes remaining in the at-large byte buffer", + Name: "byte_throttler_inbound_remaining_at_large_bytes", + Help: "Bytes remaining in the at-large byte buffer", }) m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_remaining_validator_bytes", - Help: "Bytes remaining in the validator byte buffer", + Name: "byte_throttler_inbound_remaining_validator_bytes", + Help: "Bytes remaining in the validator byte buffer", }) m.awaitingAcquire = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_awaiting_acquire", - Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer", + Name: "byte_throttler_inbound_awaiting_acquire", + Help: "Number of inbound messages waiting to acquire space on the inbound message byte buffer", }) m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "byte_throttler_inbound_awaiting_release", - Help: "Number of messages currently being read/handled", + Name: "byte_throttler_inbound_awaiting_release", + Help: "Number of messages currently being read/handled", }) errs.Add( reg.Register(m.remainingAtLargeBytes), diff --git a/network/throttling/inbound_msg_byte_throttler_test.go b/network/throttling/inbound_msg_byte_throttler_test.go index 4fc931e3f37..72ca316de44 100644 --- a/network/throttling/inbound_msg_byte_throttler_test.go +++ b/network/throttling/inbound_msg_byte_throttler_test.go @@ -30,7 +30,6 @@ func TestInboundMsgByteThrottlerCancelContextDeadlock(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -60,7 +59,6 @@ func TestInboundMsgByteThrottlerCancelContext(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -118,7 +116,6 @@ func TestInboundMsgByteThrottler(t *testing.T) { throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -333,7 +330,6 @@ func TestSybilMsgThrottlerMaxNonVdr(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -384,7 +380,6 @@ func TestMsgThrottlerNextMsg(t *testing.T) { maxBytes := maxVdrBytes throttler, err := newInboundMsgByteThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, diff --git a/network/throttling/inbound_msg_throttler.go b/network/throttling/inbound_msg_throttler.go index ea9167deca1..faf64ed083a 100644 --- a/network/throttling/inbound_msg_throttler.go +++ b/network/throttling/inbound_msg_throttler.go @@ -12,7 +12,6 @@ import ( "github.com/ava-labs/avalanchego/snow/networking/tracker" "github.com/ava-labs/avalanchego/snow/validators" "github.com/ava-labs/avalanchego/utils/logging" - "github.com/ava-labs/avalanchego/utils/metric" ) var _ InboundMsgThrottler = (*inboundMsgThrottler)(nil) @@ -54,7 +53,6 @@ type InboundMsgThrottlerConfig struct { // Returns a new, sybil-safe inbound message throttler. func NewInboundMsgThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, throttlerConfig InboundMsgThrottlerConfig, @@ -64,7 +62,6 @@ func NewInboundMsgThrottler( ) (InboundMsgThrottler, error) { byteThrottler, err := newInboundMsgByteThrottler( log, - namespace, registerer, vdrs, throttlerConfig.MsgByteThrottlerConfig, @@ -73,7 +70,6 @@ func NewInboundMsgThrottler( return nil, err } bufferThrottler, err := newInboundMsgBufferThrottler( - namespace, registerer, throttlerConfig.MaxProcessingMsgsPerNode, ) @@ -82,7 +78,6 @@ func NewInboundMsgThrottler( } bandwidthThrottler, err := newBandwidthThrottler( log, - namespace, registerer, throttlerConfig.BandwidthThrottlerConfig, ) @@ -90,7 +85,7 @@ func NewInboundMsgThrottler( return nil, err } cpuThrottler, err := NewSystemThrottler( - metric.AppendNamespace(namespace, "cpu"), + "cpu", registerer, throttlerConfig.CPUThrottlerConfig, resourceTracker.CPUTracker(), @@ -100,7 +95,7 @@ func NewInboundMsgThrottler( return nil, err } diskThrottler, err := NewSystemThrottler( - metric.AppendNamespace(namespace, "disk"), + "disk", registerer, throttlerConfig.DiskThrottlerConfig, resourceTracker.DiskTracker(), diff --git a/network/throttling/outbound_msg_throttler.go b/network/throttling/outbound_msg_throttler.go index d75c53f1548..b27fe01060d 100644 --- a/network/throttling/outbound_msg_throttler.go +++ b/network/throttling/outbound_msg_throttler.go @@ -42,7 +42,6 @@ type outboundMsgThrottler struct { func NewSybilOutboundMsgThrottler( log logging.Logger, - namespace string, registerer prometheus.Registerer, vdrs validators.Manager, config MsgByteThrottlerConfig, @@ -59,7 +58,7 @@ func NewSybilOutboundMsgThrottler( nodeToAtLargeBytesUsed: make(map[ids.NodeID]uint64), }, } - return t, t.metrics.initialize(namespace, registerer) + return t, t.metrics.initialize(registerer) } func (t *outboundMsgThrottler) Acquire(msg message.OutboundMessage, nodeID ids.NodeID) bool { @@ -176,31 +175,26 @@ type outboundMsgThrottlerMetrics struct { awaitingRelease prometheus.Gauge } -func (m *outboundMsgThrottlerMetrics) initialize(namespace string, registerer prometheus.Registerer) error { +func (m *outboundMsgThrottlerMetrics) initialize(registerer prometheus.Registerer) error { m.acquireSuccesses = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "throttler_outbound_acquire_successes", - Help: "Outbound messages not dropped due to rate-limiting", + Name: "throttler_outbound_acquire_successes", + Help: "Outbound messages not dropped due to rate-limiting", }) m.acquireFailures = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "throttler_outbound_acquire_failures", - Help: "Outbound messages dropped due to rate-limiting", + Name: "throttler_outbound_acquire_failures", + Help: "Outbound messages dropped due to rate-limiting", }) m.remainingAtLargeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_remaining_at_large_bytes", - Help: "Bytes remaining in the at large byte allocation", + Name: "throttler_outbound_remaining_at_large_bytes", + Help: "Bytes remaining in the at large byte allocation", }) m.remainingVdrBytes = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_remaining_validator_bytes", - Help: "Bytes remaining in the validator byte allocation", + Name: "throttler_outbound_remaining_validator_bytes", + Help: "Bytes remaining in the validator byte allocation", }) m.awaitingRelease = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "throttler_outbound_awaiting_release", - Help: "Number of messages waiting to be sent", + Name: "throttler_outbound_awaiting_release", + Help: "Number of messages waiting to be sent", }) return utils.Err( registerer.Register(m.acquireSuccesses), diff --git a/network/throttling/outbound_msg_throttler_test.go b/network/throttling/outbound_msg_throttler_test.go index 664449adadd..ab883b8fa4e 100644 --- a/network/throttling/outbound_msg_throttler_test.go +++ b/network/throttling/outbound_msg_throttler_test.go @@ -32,7 +32,6 @@ func TestSybilOutboundMsgThrottler(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr2ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -174,7 +173,6 @@ func TestSybilOutboundMsgThrottlerMaxNonVdr(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, @@ -221,7 +219,6 @@ func TestBypassThrottling(t *testing.T) { require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) throttlerIntf, err := NewSybilOutboundMsgThrottler( logging.NoLog{}, - "", prometheus.NewRegistry(), vdrs, config, diff --git a/node/node.go b/node/node.go index 5445c626d1d..bdbe89d0db5 100644 --- a/node/node.go +++ b/node/node.go @@ -91,7 +91,8 @@ const ( ipResolutionTimeout = 30 * time.Second - apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" + apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" + networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" ) var ( @@ -187,11 +188,18 @@ func New( // It must be initiated before networking (initNetworking), chain manager (initChainManager) // and the engine (initChains) but after the metrics (initMetricsAPI) // message.Creator currently record metrics under network namespace - n.networkNamespace = "network" + + networkRegisterer, err := metrics.MakeAndRegister( + n.MetricsGatherer, + networkNamespace, + ) + if err != nil { + return nil, err + } + n.msgCreator, err = message.NewCreator( n.Log, - n.MetricsRegisterer, - n.networkNamespace, + networkRegisterer, n.Config.NetworkConfig.CompressionType, n.Config.NetworkConfig.MaximumInboundMessageTimeout, ) @@ -209,7 +217,7 @@ func New( } n.initCPUTargeter(&config.CPUTargeterConfig) n.initDiskTargeter(&config.DiskTargeterConfig) - if err := n.initNetworking(); err != nil { // Set up networking layer. + if err := n.initNetworking(networkRegisterer); err != nil { // Set up networking layer. return nil, fmt.Errorf("problem initializing networking: %w", err) } @@ -313,8 +321,7 @@ type Node struct { VertexAcceptorGroup snow.AcceptorGroup // Net runs the networking stack - networkNamespace string - Net network.Network + Net network.Network // The staking address will optionally be written to a process context // file to enable other nodes to be configured to use this node as a @@ -390,7 +397,7 @@ type Node struct { // Initialize the networking layer. // Assumes [n.vdrs], [n.CPUTracker], and [n.CPUTargeter] have been initialized. -func (n *Node) initNetworking() error { +func (n *Node) initNetworking(reg prometheus.Registerer) error { // Providing either loopback address - `::1` for ipv6 and `127.0.0.1` for ipv4 - as the listen // host will avoid the need for a firewall exception on recent MacOS: // @@ -587,7 +594,6 @@ func (n *Node) initNetworking() error { } // add node configs to network config - n.Config.NetworkConfig.Namespace = n.networkNamespace n.Config.NetworkConfig.MyNodeID = n.ID n.Config.NetworkConfig.MyIPPort = dynamicIP n.Config.NetworkConfig.NetworkID = n.Config.NetworkID @@ -606,7 +612,7 @@ func (n *Node) initNetworking() error { n.Net, err = network.NewNetwork( &n.Config.NetworkConfig, n.msgCreator, - n.MetricsRegisterer, + reg, n.Log, listener, dialer.NewDialer(constants.NetworkType, n.Config.NetworkConfig.DialerConfig, n.Log), diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 2005c25da2a..6bd2bc558c9 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -70,7 +70,6 @@ func TestTimeout(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -347,7 +346,6 @@ func TestReliableMessages(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) @@ -504,7 +502,6 @@ func TestReliableMessagesToMyself(t *testing.T) { mc, err := message.NewCreator( logging.NoLog{}, metrics, - "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second, ) diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index dbd766b2a6e..34b7d54b76e 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1418,7 +1418,7 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { chainRouter := &router.ChainRouter{} metrics := prometheus.NewRegistry() - mc, err := message.NewCreator(logging.NoLog{}, metrics, "dummyNamespace", constants.DefaultNetworkCompressionType, 10*time.Second) + mc, err := message.NewCreator(logging.NoLog{}, metrics, constants.DefaultNetworkCompressionType, 10*time.Second) require.NoError(err) require.NoError(chainRouter.Initialize( From d19acc6085bcf6945e293cbd3a99f6896408cfee Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Thu, 30 May 2024 21:30:15 -0400 Subject: [PATCH 37/53] Remove db namespace --- database/leveldb/db.go | 4 +- database/leveldb/db_test.go | 4 +- database/leveldb/metrics.go | 92 +++++++++++++-------------------- database/pebbledb/batch_test.go | 2 +- database/pebbledb/db.go | 2 +- database/pebbledb/db_test.go | 2 +- node/node.go | 16 ++++-- 7 files changed, 55 insertions(+), 67 deletions(-) diff --git a/database/leveldb/db.go b/database/leveldb/db.go index 6c09606128d..7c54b1d86e3 100644 --- a/database/leveldb/db.go +++ b/database/leveldb/db.go @@ -186,7 +186,7 @@ type config struct { } // New returns a wrapped LevelDB object. -func New(file string, configBytes []byte, log logging.Logger, namespace string, reg prometheus.Registerer) (database.Database, error) { +func New(file string, configBytes []byte, log logging.Logger, reg prometheus.Registerer) (database.Database, error) { parsedConfig := config{ BlockCacheCapacity: DefaultBlockCacheSize, DisableSeeksCompaction: true, @@ -236,7 +236,7 @@ func New(file string, configBytes []byte, log logging.Logger, namespace string, closeCh: make(chan struct{}), } if parsedConfig.MetricUpdateFrequency > 0 { - metrics, err := newMetrics(namespace, reg) + metrics, err := newMetrics(reg) if err != nil { // Drop any close error to report the original error _ = db.Close() diff --git a/database/leveldb/db_test.go b/database/leveldb/db_test.go index 8352e53bd53..65214d08084 100644 --- a/database/leveldb/db_test.go +++ b/database/leveldb/db_test.go @@ -18,7 +18,7 @@ func TestInterface(t *testing.T) { for name, test := range database.Tests { t.Run(name, func(t *testing.T) { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) test(t, db) @@ -30,7 +30,7 @@ func TestInterface(t *testing.T) { func newDB(t testing.TB) database.Database { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return db } diff --git a/database/leveldb/metrics.go b/database/leveldb/metrics.go index 5ad2e2b369d..d1edab6f98e 100644 --- a/database/leveldb/metrics.go +++ b/database/leveldb/metrics.go @@ -62,117 +62,99 @@ type metrics struct { priorStats, currentStats *leveldb.DBStats } -func newMetrics(namespace string, reg prometheus.Registerer) (metrics, error) { +func newMetrics(reg prometheus.Registerer) (metrics, error) { m := metrics{ writesDelayedCount: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "writes_delayed", - Help: "number of cumulative writes that have been delayed due to compaction", + Name: "writes_delayed", + Help: "number of cumulative writes that have been delayed due to compaction", }), writesDelayedDuration: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "writes_delayed_duration", - Help: "amount of time (in ns) that writes have been delayed due to compaction", + Name: "writes_delayed_duration", + Help: "amount of time (in ns) that writes have been delayed due to compaction", }), writeIsDelayed: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "write_delayed", - Help: "1 if there is currently a write that is being delayed due to compaction", + Name: "write_delayed", + Help: "1 if there is currently a write that is being delayed due to compaction", }), aliveSnapshots: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "alive_snapshots", - Help: "number of currently alive snapshots", + Name: "alive_snapshots", + Help: "number of currently alive snapshots", }), aliveIterators: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "alive_iterators", - Help: "number of currently alive iterators", + Name: "alive_iterators", + Help: "number of currently alive iterators", }), ioWrite: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "io_write", - Help: "cumulative amount of io write during compaction", + Name: "io_write", + Help: "cumulative amount of io write during compaction", }), ioRead: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "io_read", - Help: "cumulative amount of io read during compaction", + Name: "io_read", + Help: "cumulative amount of io read during compaction", }), blockCacheSize: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "block_cache_size", - Help: "total size of cached blocks", + Name: "block_cache_size", + Help: "total size of cached blocks", }), openTables: prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "open_tables", - Help: "number of currently opened tables", + Name: "open_tables", + Help: "number of currently opened tables", }), levelTableCount: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "table_count", - Help: "number of tables allocated by level", + Name: "table_count", + Help: "number of tables allocated by level", }, levelLabels, ), levelSize: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "size", - Help: "amount of bytes allocated by level", + Name: "size", + Help: "amount of bytes allocated by level", }, levelLabels, ), levelDuration: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "duration", - Help: "amount of time (in ns) spent in compaction by level", + Name: "duration", + Help: "amount of time (in ns) spent in compaction by level", }, levelLabels, ), levelReads: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "reads", - Help: "amount of bytes read during compaction by level", + Name: "reads", + Help: "amount of bytes read during compaction by level", }, levelLabels, ), levelWrites: prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "writes", - Help: "amount of bytes written during compaction by level", + Name: "writes", + Help: "amount of bytes written during compaction by level", }, levelLabels, ), memCompactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "mem_comps", - Help: "total number of memory compactions performed", + Name: "mem_comps", + Help: "total number of memory compactions performed", }), level0Compactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "level_0_comps", - Help: "total number of level 0 compactions performed", + Name: "level_0_comps", + Help: "total number of level 0 compactions performed", }), nonLevel0Compactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "non_level_0_comps", - Help: "total number of non-level 0 compactions performed", + Name: "non_level_0_comps", + Help: "total number of non-level 0 compactions performed", }), seekCompactions: prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "seek_comps", - Help: "total number of seek compactions performed", + Name: "seek_comps", + Help: "total number of seek compactions performed", }), priorStats: &leveldb.DBStats{}, diff --git a/database/pebbledb/batch_test.go b/database/pebbledb/batch_test.go index 3a0ad63b7e4..98ab0e28eb3 100644 --- a/database/pebbledb/batch_test.go +++ b/database/pebbledb/batch_test.go @@ -17,7 +17,7 @@ func TestBatch(t *testing.T) { require := require.New(t) dirName := t.TempDir() - db, err := New(dirName, nil, logging.NoLog{}, "", prometheus.NewRegistry()) + db, err := New(dirName, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(err) batchIntf := db.NewBatch() diff --git a/database/pebbledb/db.go b/database/pebbledb/db.go index ca048ec5f63..1de9c2ce5de 100644 --- a/database/pebbledb/db.go +++ b/database/pebbledb/db.go @@ -64,7 +64,7 @@ type Config struct { } // TODO: Add metrics -func New(file string, configBytes []byte, log logging.Logger, _ string, _ prometheus.Registerer) (database.Database, error) { +func New(file string, configBytes []byte, log logging.Logger, _ prometheus.Registerer) (database.Database, error) { cfg := DefaultConfig if len(configBytes) > 0 { if err := json.Unmarshal(configBytes, &cfg); err != nil { diff --git a/database/pebbledb/db_test.go b/database/pebbledb/db_test.go index 506221dce5c..7d48a00c627 100644 --- a/database/pebbledb/db_test.go +++ b/database/pebbledb/db_test.go @@ -16,7 +16,7 @@ import ( func newDB(t testing.TB) *Database { folder := t.TempDir() - db, err := New(folder, nil, logging.NoLog{}, "pebble", prometheus.NewRegistry()) + db, err := New(folder, nil, logging.NoLog{}, prometheus.NewRegistry()) require.NoError(t, err) return db.(*Database) } diff --git a/node/node.go b/node/node.go index 5445c626d1d..ab7dcb02b91 100644 --- a/node/node.go +++ b/node/node.go @@ -92,6 +92,7 @@ const ( ipResolutionTimeout = 30 * time.Second apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" + dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db_internal" ) var ( @@ -723,14 +724,21 @@ func (n *Node) Dispatch() error { */ func (n *Node) initDatabase() error { + dbRegisterer, err := metrics.MakeAndRegister( + n.MetricsGatherer, + dbNamespace, + ) + if err != nil { + return err + } + // start the db switch n.Config.DatabaseConfig.Name { case leveldb.Name: // Prior to v1.10.15, the only on-disk database was leveldb, and its // files went to [dbPath]/[networkID]/v1.4.5. dbPath := filepath.Join(n.Config.DatabaseConfig.Path, version.CurrentDatabase.String()) - var err error - n.DB, err = leveldb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, "db_internal", n.MetricsRegisterer) + n.DB, err = leveldb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbRegisterer) if err != nil { return fmt.Errorf("couldn't create %s at %s: %w", leveldb.Name, dbPath, err) } @@ -738,8 +746,7 @@ func (n *Node) initDatabase() error { n.DB = memdb.New() case pebbledb.Name: dbPath := filepath.Join(n.Config.DatabaseConfig.Path, "pebble") - var err error - n.DB, err = pebbledb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, "db_internal", n.MetricsRegisterer) + n.DB, err = pebbledb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbRegisterer) if err != nil { return fmt.Errorf("couldn't create %s at %s: %w", pebbledb.Name, dbPath, err) } @@ -757,7 +764,6 @@ func (n *Node) initDatabase() error { n.DB = versiondb.New(n.DB) } - var err error n.DB, err = meterdb.New("db", n.MetricsRegisterer, n.DB) if err != nil { return err From 06e7c7e58986b940cf46be387d48d6bfd4d5e958 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Fri, 31 May 2024 14:19:59 -0400 Subject: [PATCH 38/53] Remove _chain_ separator in metrics --- chains/manager.go | 19 +++++++++---------- node/node.go | 11 +++++------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 00663da25b1..719ee76e3f7 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -76,15 +76,14 @@ const ( defaultChannelSize = 1 initialQueueSize = 3 - ChainNamespace = constants.PlatformName + metric.NamespaceSeparator + "chain" - handlerNamespace = ChainNamespace + metric.NamespaceSeparator + "handler" - stakeNamespace = ChainNamespace + metric.NamespaceSeparator + "stake" - p2pNamespace = ChainNamespace + metric.NamespaceSeparator + "p2p" - snowmanNamespace = ChainNamespace + metric.NamespaceSeparator + "snowman" - avalancheNamespace = ChainNamespace + metric.NamespaceSeparator + "avalanche" - proposervmNamespace = ChainNamespace + metric.NamespaceSeparator + "proposervm" - meterchainvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterchainvm" - meterdagvmNamespace = ChainNamespace + metric.NamespaceSeparator + "meterdagvm" + handlerNamespace = constants.PlatformName + metric.NamespaceSeparator + "handler" + stakeNamespace = constants.PlatformName + metric.NamespaceSeparator + "stake" + p2pNamespace = constants.PlatformName + metric.NamespaceSeparator + "p2p" + snowmanNamespace = constants.PlatformName + metric.NamespaceSeparator + "snowman" + avalancheNamespace = constants.PlatformName + metric.NamespaceSeparator + "avalanche" + proposervmNamespace = constants.PlatformName + metric.NamespaceSeparator + "proposervm" + meterchainvmNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterchainvm" + meterdagvmNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdagvm" ChainLabel = "chain" ) @@ -1563,7 +1562,7 @@ func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics vmName := constants.VMName(vmID) vmGatherer = metrics.NewLabelGatherer(ChainLabel) err := m.Metrics.Register( - metric.AppendNamespace(ChainNamespace, vmName), + vmName, vmGatherer, ) if err != nil { diff --git a/node/node.go b/node/node.go index 9b614c9bf19..5c2284edeed 100644 --- a/node/node.go +++ b/node/node.go @@ -91,12 +91,11 @@ const ( ipResolutionTimeout = 30 * time.Second - apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" - dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db" - networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" - - meterDBNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "meterdb" - benchlistNamespace = chains.ChainNamespace + metric.NamespaceSeparator + "benchlist" + apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" + benchlistNamespace = constants.PlatformName + metric.NamespaceSeparator + "benchlist" + dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db" + meterDBNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdb" + networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" ) var ( From 4ddc2c5cd38981427938b4e0526ad447589c317f Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Fri, 31 May 2024 15:26:21 -0400 Subject: [PATCH 39/53] Implement label gatherer --- api/metrics/gatherer_test.go | 10 +- api/metrics/label_gatherer.go | 93 ++++++++++++++ api/metrics/label_gatherer_test.go | 182 ++++++++++++++++++++++++++++ api/metrics/multi_gatherer.go | 76 ++---------- api/metrics/multi_gatherer_test.go | 137 --------------------- api/metrics/prefix_gatherer.go | 74 +++++++++++ api/metrics/prefix_gatherer_test.go | 179 +++++++++++++++++++++++++++ 7 files changed, 541 insertions(+), 210 deletions(-) create mode 100644 api/metrics/label_gatherer.go create mode 100644 api/metrics/label_gatherer_test.go delete mode 100644 api/metrics/multi_gatherer_test.go create mode 100644 api/metrics/prefix_gatherer.go create mode 100644 api/metrics/prefix_gatherer_test.go diff --git a/api/metrics/gatherer_test.go b/api/metrics/gatherer_test.go index 334c361ebcc..df6919cfb27 100644 --- a/api/metrics/gatherer_test.go +++ b/api/metrics/gatherer_test.go @@ -3,15 +3,7 @@ package metrics -import ( - dto "github.com/prometheus/client_model/go" -) - -var ( - hello = "hello" - world = "world" - helloWorld = "hello_world" -) +import dto "github.com/prometheus/client_model/go" type testGatherer struct { mfs []*dto.MetricFamily diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go new file mode 100644 index 00000000000..d0ad899ac31 --- /dev/null +++ b/api/metrics/label_gatherer.go @@ -0,0 +1,93 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "cmp" + "errors" + "fmt" + "slices" + + "github.com/prometheus/client_golang/prometheus" + + dto "github.com/prometheus/client_model/go" +) + +var ( + _ MultiGatherer = (*prefixGatherer)(nil) + + errDuplicateGatherer = errors.New("attempt to register duplicate gatherer") +) + +// NewLabelGatherer returns a new MultiGatherer that merges metrics by adding a +// new label. +func NewLabelGatherer(labelName string) MultiGatherer { + return &labelGatherer{ + labelName: labelName, + } +} + +type labelGatherer struct { + multiGatherer + + labelName string +} + +func (g *labelGatherer) Register(labelValue string, gatherer prometheus.Gatherer) error { + g.lock.Lock() + defer g.lock.Unlock() + + if slices.Contains(g.names, labelValue) { + return fmt.Errorf("%w: for %q with label %q", + errDuplicateGatherer, + g.labelName, + labelValue, + ) + } + + g.names = append(g.names, labelValue) + g.gatherers = append(g.gatherers, &labeledGatherer{ + labelName: g.labelName, + labelValue: labelValue, + gatherer: gatherer, + }) + return nil +} + +type labeledGatherer struct { + labelName string + labelValue string + gatherer prometheus.Gatherer +} + +func (g *labeledGatherer) Gather() ([]*dto.MetricFamily, error) { + gatheredMetricFamilies, err := g.gatherer.Gather() + if err != nil { + return nil, err + } + + for _, gatheredMetricFamily := range gatheredMetricFamilies { + if gatheredMetricFamily == nil { + continue + } + + metrics := gatheredMetricFamily.Metric[:0] + for _, gatheredMetric := range gatheredMetricFamily.Metric { + if gatheredMetric == nil { + continue + } + + gatheredMetric.Label = append(gatheredMetric.Label, &dto.LabelPair{ + Name: &g.labelName, + Value: &g.labelValue, + }) + slices.SortFunc(gatheredMetric.Label, func(i, j *dto.LabelPair) int { + return cmp.Compare(i.GetName(), j.GetName()) + }) + metrics = append(metrics, gatheredMetric) + } + gatheredMetricFamily.Metric = metrics + } + return gatheredMetricFamilies, nil +} diff --git a/api/metrics/label_gatherer_test.go b/api/metrics/label_gatherer_test.go new file mode 100644 index 00000000000..abd8b61e611 --- /dev/null +++ b/api/metrics/label_gatherer_test.go @@ -0,0 +1,182 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + dto "github.com/prometheus/client_model/go" +) + +func TestLabelGatherer_Gather(t *testing.T) { + require := require.New(t) + + gatherer := NewLabelGatherer("smith") + require.NotNil(gatherer) + + registerA := prometheus.NewRegistry() + require.NoError(gatherer.Register("rick", registerA)) + + registerB := prometheus.NewRegistry() + require.NoError(gatherer.Register("morty", registerB)) + + counterA := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + require.NoError(registerA.Register(counterA)) + + counterB := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + counterB.Inc() + require.NoError(registerB.Register(counterB)) + + metrics, err := gatherer.Gather() + require.NoError(err) + require.Equal( + []*dto.MetricFamily{ + { + Name: proto.String("counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("smith"), + Value: proto.String("morty"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("smith"), + Value: proto.String("rick"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(0), + }, + }, + }, + }, + }, + metrics, + ) +} + +func TestLabelGatherer_Register(t *testing.T) { + tests := []struct { + name string + labelGatherer *labelGatherer + labelValue string + gatherer prometheus.Gatherer + expectedErr error + expectedLabelGatherer *labelGatherer + }{ + { + name: "first registration", + labelGatherer: &labelGatherer{}, + labelValue: "first", + gatherer: &testGatherer{}, + expectedErr: nil, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + { + name: "second registration", + labelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + labelValue: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: nil, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first", "second"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + &labeledGatherer{ + labelValue: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + }, + }, + }, + }, + }, + { + name: "conflicts with previous registration", + labelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + labelValue: "first", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: errDuplicateGatherer, + expectedLabelGatherer: &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + err := test.labelGatherer.Register(test.labelValue, test.gatherer) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedLabelGatherer, test.labelGatherer) + }) + } +} diff --git a/api/metrics/multi_gatherer.go b/api/metrics/multi_gatherer.go index d8d4d93d2d7..b2fede55643 100644 --- a/api/metrics/multi_gatherer.go +++ b/api/metrics/multi_gatherer.go @@ -4,94 +4,42 @@ package metrics import ( - "cmp" - "errors" "fmt" - "slices" "sync" "github.com/prometheus/client_golang/prometheus" - "github.com/ava-labs/avalanchego/utils/metric" - dto "github.com/prometheus/client_model/go" ) -var ( - _ MultiGatherer = (*multiGatherer)(nil) - - errReregisterGatherer = errors.New("attempt to register existing gatherer") -) - // MultiGatherer extends the Gatherer interface by allowing additional gatherers // to be registered. type MultiGatherer interface { prometheus.Gatherer // Register adds the outputs of [gatherer] to the results of future calls to - // Gather with the provided [namespace] added to the metrics. - Register(namespace string, gatherer prometheus.Gatherer) error + // Gather with the provided [name] added to the metrics. + Register(name string, gatherer prometheus.Gatherer) error } -type multiGatherer struct { - lock sync.RWMutex - gatherers map[string]prometheus.Gatherer +// Deprecated: Use NewPrefixGatherer instead. +// +// TODO: Remove once coreth is updated. +func NewMultiGatherer() MultiGatherer { + return NewPrefixGatherer() } -func NewMultiGatherer() MultiGatherer { - return &multiGatherer{ - gatherers: make(map[string]prometheus.Gatherer), - } +type multiGatherer struct { + lock sync.RWMutex + names []string + gatherers prometheus.Gatherers } func (g *multiGatherer) Gather() ([]*dto.MetricFamily, error) { g.lock.RLock() defer g.lock.RUnlock() - var results []*dto.MetricFamily - for namespace, gatherer := range g.gatherers { - gatheredMetrics, err := gatherer.Gather() - if err != nil { - return nil, err - } - for _, gatheredMetric := range gatheredMetrics { - var name string - if gatheredMetric.Name != nil { - name = metric.AppendNamespace(namespace, *gatheredMetric.Name) - } else { - name = namespace - } - gatheredMetric.Name = &name - results = append(results, gatheredMetric) - } - } - // Because we overwrite every metric's name, we are guaranteed that there - // are no metrics with nil names. - sortMetrics(results) - return results, nil -} - -func (g *multiGatherer) Register(namespace string, gatherer prometheus.Gatherer) error { - g.lock.Lock() - defer g.lock.Unlock() - - if existingGatherer, exists := g.gatherers[namespace]; exists { - return fmt.Errorf("%w for namespace %q; existing: %#v; new: %#v", - errReregisterGatherer, - namespace, - existingGatherer, - gatherer, - ) - } - - g.gatherers[namespace] = gatherer - return nil -} - -func sortMetrics(m []*dto.MetricFamily) { - slices.SortFunc(m, func(i, j *dto.MetricFamily) int { - return cmp.Compare(*i.Name, *j.Name) - }) + return g.gatherers.Gather() } func MakeAndRegister(gatherer MultiGatherer, name string) (*prometheus.Registry, error) { diff --git a/api/metrics/multi_gatherer_test.go b/api/metrics/multi_gatherer_test.go deleted file mode 100644 index 51b548d18a6..00000000000 --- a/api/metrics/multi_gatherer_test.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. -// See the file LICENSE for licensing terms. - -package metrics - -import ( - "errors" - "testing" - - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - - dto "github.com/prometheus/client_model/go" -) - -func TestMultiGathererEmptyGather(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - mfs, err := g.Gather() - require.NoError(err) - require.Empty(mfs) -} - -func TestMultiGathererDuplicatedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - og := prometheus.NewRegistry() - - require.NoError(g.Register("", og)) - - err := g.Register("", og) - require.ErrorIs(err, errReregisterGatherer) - - require.NoError(g.Register("lol", og)) -} - -func TestMultiGathererAddedError(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - errTest := errors.New("non-nil error") - tg := &testGatherer{ - err: errTest, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.ErrorIs(err, errTest) - require.Empty(mfs) -} - -func TestMultiGathererNoAddedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{ - Name: &hello, - }}, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&hello, mfs[0].Name) -} - -func TestMultiGathererAddedPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{ - Name: &world, - }}, - } - - require.NoError(g.Register(hello, tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&helloWorld, mfs[0].Name) -} - -func TestMultiGathererJustPrefix(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - tg := &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - } - - require.NoError(g.Register(hello, tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 1) - require.Equal(&hello, mfs[0].Name) -} - -func TestMultiGathererSorted(t *testing.T) { - require := require.New(t) - - g := NewMultiGatherer() - - name0 := "a" - name1 := "z" - tg := &testGatherer{ - mfs: []*dto.MetricFamily{ - { - Name: &name1, - }, - { - Name: &name0, - }, - }, - } - - require.NoError(g.Register("", tg)) - - mfs, err := g.Gather() - require.NoError(err) - require.Len(mfs, 2) - require.Equal(&name0, mfs[0].Name) - require.Equal(&name1, mfs[1].Name) -} diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go new file mode 100644 index 00000000000..1c76d53de8b --- /dev/null +++ b/api/metrics/prefix_gatherer.go @@ -0,0 +1,74 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "fmt" + "slices" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/ava-labs/avalanchego/utils/metric" + + dto "github.com/prometheus/client_model/go" +) + +var _ MultiGatherer = (*prefixGatherer)(nil) + +// NewPrefixGatherer returns a new MultiGatherer that merges metrics by adding a +// prefix to their names. +func NewPrefixGatherer() MultiGatherer { + return &prefixGatherer{} +} + +type prefixGatherer struct { + multiGatherer +} + +func (g *prefixGatherer) Register(prefix string, gatherer prometheus.Gatherer) error { + g.lock.Lock() + defer g.lock.Unlock() + + // TODO: Restrict prefixes to avoid potential conflicts + if slices.Contains(g.names, prefix) { + return fmt.Errorf("%w: %q", + errDuplicateGatherer, + prefix, + ) + } + + g.names = append(g.names, prefix) + g.gatherers = append(g.gatherers, &prefixedGatherer{ + prefix: prefix, + gatherer: gatherer, + }) + return nil +} + +type prefixedGatherer struct { + prefix string + gatherer prometheus.Gatherer +} + +func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { + gatheredMetricFamilies, err := g.gatherer.Gather() + if err != nil { + return nil, err + } + + metricFamilies := gatheredMetricFamilies[:0] + for _, gatheredMetricFamily := range gatheredMetricFamilies { + if gatheredMetricFamily == nil { + continue + } + + name := metric.AppendNamespace( + g.prefix, + gatheredMetricFamily.GetName(), + ) + gatheredMetricFamily.Name = &name + metricFamilies = append(metricFamilies, gatheredMetricFamily) + } + return metricFamilies, nil +} diff --git a/api/metrics/prefix_gatherer_test.go b/api/metrics/prefix_gatherer_test.go new file mode 100644 index 00000000000..7733922b1cb --- /dev/null +++ b/api/metrics/prefix_gatherer_test.go @@ -0,0 +1,179 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + dto "github.com/prometheus/client_model/go" +) + +func TestPrefixGatherer_Gather(t *testing.T) { + require := require.New(t) + + gatherer := NewPrefixGatherer() + require.NotNil(gatherer) + + registerA := prometheus.NewRegistry() + require.NoError(gatherer.Register("a", registerA)) + + registerB := prometheus.NewRegistry() + require.NoError(gatherer.Register("b", registerB)) + + counterA := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + require.NoError(registerA.Register(counterA)) + + counterB := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "help", + }) + counterB.Inc() + require.NoError(registerB.Register(counterB)) + + metrics, err := gatherer.Gather() + require.NoError(err) + require.Equal( + []*dto.MetricFamily{ + { + Name: proto.String("a_counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{}, + Counter: &dto.Counter{ + Value: proto.Float64(0), + }, + }, + }, + }, + { + Name: proto.String("b_counter"), + Help: proto.String("help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{}, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + }, + }, + metrics, + ) +} + +func TestPrefixGatherer_Register(t *testing.T) { + tests := []struct { + name string + prefixGatherer *prefixGatherer + prefix string + gatherer prometheus.Gatherer + expectedErr error + expectedPrefixGatherer *prefixGatherer + }{ + { + name: "first registration", + prefixGatherer: &prefixGatherer{}, + prefix: "first", + gatherer: &testGatherer{}, + expectedErr: nil, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + { + name: "second registration", + prefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + prefix: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: nil, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first", "second"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + &prefixedGatherer{ + prefix: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + }, + }, + }, + }, + }, + { + name: "conflicts with previous registration", + prefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + prefix: "first", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + expectedErr: errDuplicateGatherer, + expectedPrefixGatherer: &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{"first"}, + gatherers: prometheus.Gatherers{ + &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + }, + }, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + err := test.prefixGatherer.Register(test.prefix, test.gatherer) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedPrefixGatherer, test.prefixGatherer) + }) + } +} From b5d98550e8ce5ef074c0d2e3948e7214e9a99cd9 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 3 Jun 2024 12:04:40 -0400 Subject: [PATCH 40/53] cleanup --- api/metrics/gatherer_test.go | 11 +- api/metrics/label_gatherer.go | 29 +-- api/metrics/label_gatherer_test.go | 293 ++++++++++++++++------------ api/metrics/prefix_gatherer.go | 24 +-- api/metrics/prefix_gatherer_test.go | 157 ++++++--------- 5 files changed, 250 insertions(+), 264 deletions(-) diff --git a/api/metrics/gatherer_test.go b/api/metrics/gatherer_test.go index df6919cfb27..83a438867fb 100644 --- a/api/metrics/gatherer_test.go +++ b/api/metrics/gatherer_test.go @@ -3,7 +3,16 @@ package metrics -import dto "github.com/prometheus/client_model/go" +import ( + "github.com/prometheus/client_golang/prometheus" + + dto "github.com/prometheus/client_model/go" +) + +var counterOpts = prometheus.CounterOpts{ + Name: "counter", + Help: "help", +} type testGatherer struct { mfs []*dto.MetricFamily diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go index d0ad899ac31..c608cc7badf 100644 --- a/api/metrics/label_gatherer.go +++ b/api/metrics/label_gatherer.go @@ -4,7 +4,6 @@ package metrics import ( - "cmp" "errors" "fmt" "slices" @@ -62,32 +61,14 @@ type labeledGatherer struct { } func (g *labeledGatherer) Gather() ([]*dto.MetricFamily, error) { - gatheredMetricFamilies, err := g.gatherer.Gather() - if err != nil { - return nil, err - } - - for _, gatheredMetricFamily := range gatheredMetricFamilies { - if gatheredMetricFamily == nil { - continue - } - - metrics := gatheredMetricFamily.Metric[:0] - for _, gatheredMetric := range gatheredMetricFamily.Metric { - if gatheredMetric == nil { - continue - } - - gatheredMetric.Label = append(gatheredMetric.Label, &dto.LabelPair{ + metricFamilies, err := g.gatherer.Gather() + for _, metricFamily := range metricFamilies { + for _, metric := range metricFamily.Metric { + metric.Label = append(metric.Label, &dto.LabelPair{ Name: &g.labelName, Value: &g.labelValue, }) - slices.SortFunc(gatheredMetric.Label, func(i, j *dto.LabelPair) int { - return cmp.Compare(i.GetName(), j.GetName()) - }) - metrics = append(metrics, gatheredMetric) } - gatheredMetricFamily.Metric = metrics } - return gatheredMetricFamilies, nil + return metricFamilies, err } diff --git a/api/metrics/label_gatherer_test.go b/api/metrics/label_gatherer_test.go index abd8b61e611..d5f30fd6529 100644 --- a/api/metrics/label_gatherer_test.go +++ b/api/metrics/label_gatherer_test.go @@ -14,69 +14,164 @@ import ( ) func TestLabelGatherer_Gather(t *testing.T) { - require := require.New(t) - - gatherer := NewLabelGatherer("smith") - require.NotNil(gatherer) - - registerA := prometheus.NewRegistry() - require.NoError(gatherer.Register("rick", registerA)) - - registerB := prometheus.NewRegistry() - require.NoError(gatherer.Register("morty", registerB)) - - counterA := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "counter", - Help: "help", - }) - require.NoError(registerA.Register(counterA)) - - counterB := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "counter", - Help: "help", - }) - counterB.Inc() - require.NoError(registerB.Register(counterB)) - - metrics, err := gatherer.Gather() - require.NoError(err) - require.Equal( - []*dto.MetricFamily{ - { - Name: proto.String("counter"), - Help: proto.String("help"), - Type: dto.MetricType_COUNTER.Enum(), - Metric: []*dto.Metric{ - { - Label: []*dto.LabelPair{ - { - Name: proto.String("smith"), - Value: proto.String("morty"), - }, + const ( + labelName = "smith" + labelValueA = "rick" + labelValueB = "morty" + customLabelName = "tag" + customLabelValueA = "a" + customLabelValueB = "b" + ) + tests := []struct { + name string + labelName string + expectedMetrics []*dto.Metric + expectErr bool + }{ + { + name: "no overlap", + labelName: customLabelName, + expectedMetrics: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String(labelName), + Value: proto.String(labelValueB), }, - Counter: &dto.Counter{ - Value: proto.Float64(1), + { + Name: proto.String(customLabelName), + Value: proto.String(customLabelValueB), }, }, - { - Label: []*dto.LabelPair{ - { - Name: proto.String("smith"), - Value: proto.String("rick"), - }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String(labelName), + Value: proto.String(labelValueA), }, - Counter: &dto.Counter{ - Value: proto.Float64(0), + { + Name: proto.String(customLabelName), + Value: proto.String(customLabelValueA), }, }, + Counter: &dto.Counter{ + Value: proto.Float64(0), + }, }, }, + expectErr: false, }, - metrics, - ) + { + name: "has overlap", + labelName: labelName, + expectedMetrics: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String(labelName), + Value: proto.String(labelValueB), + }, + { + Name: proto.String(customLabelName), + Value: proto.String(customLabelValueB), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(1), + }, + }, + }, + expectErr: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + gatherer := NewLabelGatherer(labelName) + require.NotNil(gatherer) + + registerA := prometheus.NewRegistry() + require.NoError(gatherer.Register(labelValueA, registerA)) + { + counterA := prometheus.NewCounterVec( + counterOpts, + []string{test.labelName}, + ) + counterA.With(prometheus.Labels{test.labelName: customLabelValueA}) + require.NoError(registerA.Register(counterA)) + } + + registerB := prometheus.NewRegistry() + require.NoError(gatherer.Register(labelValueB, registerB)) + { + counterB := prometheus.NewCounterVec( + counterOpts, + []string{customLabelName}, + ) + counterB.With(prometheus.Labels{customLabelName: customLabelValueB}).Inc() + require.NoError(registerB.Register(counterB)) + } + + metrics, err := gatherer.Gather() + if test.expectErr { + require.Error(err) //nolint:forbidigo // the error is not exported + } else { + require.NoError(err) + } + require.Equal( + []*dto.MetricFamily{ + { + Name: proto.String(counterOpts.Name), + Help: proto.String(counterOpts.Help), + Type: dto.MetricType_COUNTER.Enum(), + Metric: test.expectedMetrics, + }, + }, + metrics, + ) + }) + } } func TestLabelGatherer_Register(t *testing.T) { + firstLabeledGatherer := &labeledGatherer{ + labelValue: "first", + gatherer: &testGatherer{}, + } + firstLabelGatherer := func() *labelGatherer { + return &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{firstLabeledGatherer.labelValue}, + gatherers: prometheus.Gatherers{ + firstLabeledGatherer, + }, + }, + } + } + secondLabeledGatherer := &labeledGatherer{ + labelValue: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + } + secondLabelGatherer := &labelGatherer{ + multiGatherer: multiGatherer{ + names: []string{ + firstLabeledGatherer.labelValue, + secondLabeledGatherer.labelValue, + }, + gatherers: prometheus.Gatherers{ + firstLabeledGatherer, + secondLabeledGatherer, + }, + }, + } + tests := []struct { name string labelGatherer *labelGatherer @@ -86,88 +181,28 @@ func TestLabelGatherer_Register(t *testing.T) { expectedLabelGatherer *labelGatherer }{ { - name: "first registration", - labelGatherer: &labelGatherer{}, - labelValue: "first", - gatherer: &testGatherer{}, - expectedErr: nil, - expectedLabelGatherer: &labelGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &labeledGatherer{ - labelValue: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, + name: "first registration", + labelGatherer: &labelGatherer{}, + labelValue: "first", + gatherer: firstLabeledGatherer.gatherer, + expectedErr: nil, + expectedLabelGatherer: firstLabelGatherer(), }, { - name: "second registration", - labelGatherer: &labelGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &labeledGatherer{ - labelValue: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, - labelValue: "second", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - expectedErr: nil, - expectedLabelGatherer: &labelGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first", "second"}, - gatherers: prometheus.Gatherers{ - &labeledGatherer{ - labelValue: "first", - gatherer: &testGatherer{}, - }, - &labeledGatherer{ - labelValue: "second", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - }, - }, - }, - }, + name: "second registration", + labelGatherer: firstLabelGatherer(), + labelValue: "second", + gatherer: secondLabeledGatherer.gatherer, + expectedErr: nil, + expectedLabelGatherer: secondLabelGatherer, }, { - name: "conflicts with previous registration", - labelGatherer: &labelGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &labeledGatherer{ - labelValue: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, - labelValue: "first", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - expectedErr: errDuplicateGatherer, - expectedLabelGatherer: &labelGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &labeledGatherer{ - labelValue: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, + name: "conflicts with previous registration", + labelGatherer: firstLabelGatherer(), + labelValue: "first", + gatherer: secondLabeledGatherer.gatherer, + expectedErr: errDuplicateGatherer, + expectedLabelGatherer: firstLabelGatherer(), }, } for _, test := range tests { diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go index 1c76d53de8b..6e88fe8e550 100644 --- a/api/metrics/prefix_gatherer.go +++ b/api/metrics/prefix_gatherer.go @@ -8,6 +8,7 @@ import ( "slices" "github.com/prometheus/client_golang/prometheus" + "google.golang.org/protobuf/proto" "github.com/ava-labs/avalanchego/utils/metric" @@ -52,23 +53,12 @@ type prefixedGatherer struct { } func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { - gatheredMetricFamilies, err := g.gatherer.Gather() - if err != nil { - return nil, err - } - - metricFamilies := gatheredMetricFamilies[:0] - for _, gatheredMetricFamily := range gatheredMetricFamilies { - if gatheredMetricFamily == nil { - continue - } - - name := metric.AppendNamespace( + metricFamilies, err := g.gatherer.Gather() + for _, metricFamily := range metricFamilies { + metricFamily.Name = proto.String(metric.AppendNamespace( g.prefix, - gatheredMetricFamily.GetName(), - ) - gatheredMetricFamily.Name = &name - metricFamilies = append(metricFamilies, gatheredMetricFamily) + metricFamily.GetName(), + )) } - return metricFamilies, nil + return metricFamilies, err } diff --git a/api/metrics/prefix_gatherer_test.go b/api/metrics/prefix_gatherer_test.go index 7733922b1cb..ba37540b01e 100644 --- a/api/metrics/prefix_gatherer_test.go +++ b/api/metrics/prefix_gatherer_test.go @@ -21,22 +21,18 @@ func TestPrefixGatherer_Gather(t *testing.T) { registerA := prometheus.NewRegistry() require.NoError(gatherer.Register("a", registerA)) + { + counterA := prometheus.NewCounter(counterOpts) + require.NoError(registerA.Register(counterA)) + } registerB := prometheus.NewRegistry() require.NoError(gatherer.Register("b", registerB)) - - counterA := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "counter", - Help: "help", - }) - require.NoError(registerA.Register(counterA)) - - counterB := prometheus.NewCounter(prometheus.CounterOpts{ - Name: "counter", - Help: "help", - }) - counterB.Inc() - require.NoError(registerB.Register(counterB)) + { + counterB := prometheus.NewCounter(counterOpts) + counterB.Inc() + require.NoError(registerB.Register(counterB)) + } metrics, err := gatherer.Gather() require.NoError(err) @@ -44,7 +40,7 @@ func TestPrefixGatherer_Gather(t *testing.T) { []*dto.MetricFamily{ { Name: proto.String("a_counter"), - Help: proto.String("help"), + Help: proto.String(counterOpts.Help), Type: dto.MetricType_COUNTER.Enum(), Metric: []*dto.Metric{ { @@ -57,7 +53,7 @@ func TestPrefixGatherer_Gather(t *testing.T) { }, { Name: proto.String("b_counter"), - Help: proto.String("help"), + Help: proto.String(counterOpts.Help), Type: dto.MetricType_COUNTER.Enum(), Metric: []*dto.Metric{ { @@ -74,6 +70,41 @@ func TestPrefixGatherer_Gather(t *testing.T) { } func TestPrefixGatherer_Register(t *testing.T) { + firstPrefixedGatherer := &prefixedGatherer{ + prefix: "first", + gatherer: &testGatherer{}, + } + firstPrefixGatherer := func() *prefixGatherer { + return &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{ + firstPrefixedGatherer.prefix, + }, + gatherers: prometheus.Gatherers{ + firstPrefixedGatherer, + }, + }, + } + } + secondPrefixedGatherer := &prefixedGatherer{ + prefix: "second", + gatherer: &testGatherer{ + mfs: []*dto.MetricFamily{{}}, + }, + } + secondPrefixGatherer := &prefixGatherer{ + multiGatherer: multiGatherer{ + names: []string{ + firstPrefixedGatherer.prefix, + secondPrefixedGatherer.prefix, + }, + gatherers: prometheus.Gatherers{ + firstPrefixedGatherer, + secondPrefixedGatherer, + }, + }, + } + tests := []struct { name string prefixGatherer *prefixGatherer @@ -83,88 +114,28 @@ func TestPrefixGatherer_Register(t *testing.T) { expectedPrefixGatherer *prefixGatherer }{ { - name: "first registration", - prefixGatherer: &prefixGatherer{}, - prefix: "first", - gatherer: &testGatherer{}, - expectedErr: nil, - expectedPrefixGatherer: &prefixGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &prefixedGatherer{ - prefix: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, + name: "first registration", + prefixGatherer: &prefixGatherer{}, + prefix: firstPrefixedGatherer.prefix, + gatherer: firstPrefixedGatherer.gatherer, + expectedErr: nil, + expectedPrefixGatherer: firstPrefixGatherer(), }, { - name: "second registration", - prefixGatherer: &prefixGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &prefixedGatherer{ - prefix: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, - prefix: "second", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - expectedErr: nil, - expectedPrefixGatherer: &prefixGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first", "second"}, - gatherers: prometheus.Gatherers{ - &prefixedGatherer{ - prefix: "first", - gatherer: &testGatherer{}, - }, - &prefixedGatherer{ - prefix: "second", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - }, - }, - }, - }, + name: "second registration", + prefixGatherer: firstPrefixGatherer(), + prefix: secondPrefixedGatherer.prefix, + gatherer: secondPrefixedGatherer.gatherer, + expectedErr: nil, + expectedPrefixGatherer: secondPrefixGatherer, }, { - name: "conflicts with previous registration", - prefixGatherer: &prefixGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &prefixedGatherer{ - prefix: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, - prefix: "first", - gatherer: &testGatherer{ - mfs: []*dto.MetricFamily{{}}, - }, - expectedErr: errDuplicateGatherer, - expectedPrefixGatherer: &prefixGatherer{ - multiGatherer: multiGatherer{ - names: []string{"first"}, - gatherers: prometheus.Gatherers{ - &prefixedGatherer{ - prefix: "first", - gatherer: &testGatherer{}, - }, - }, - }, - }, + name: "conflicts with previous registration", + prefixGatherer: firstPrefixGatherer(), + prefix: firstPrefixedGatherer.prefix, + gatherer: secondPrefixedGatherer.gatherer, + expectedErr: errDuplicateGatherer, + expectedPrefixGatherer: firstPrefixGatherer(), }, } for _, test := range tests { From 66a95ef32376046a3097811195e843454d4bc16e Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 3 Jun 2024 12:19:18 -0400 Subject: [PATCH 41/53] add comment --- api/metrics/label_gatherer.go | 3 +++ api/metrics/prefix_gatherer.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go index c608cc7badf..cf69dd300eb 100644 --- a/api/metrics/label_gatherer.go +++ b/api/metrics/label_gatherer.go @@ -61,6 +61,9 @@ type labeledGatherer struct { } func (g *labeledGatherer) Gather() ([]*dto.MetricFamily, error) { + // Gather returns partially filled metrics in the case of an error. So, it + // is expected to still return the metrics in the case and error is + // returned. metricFamilies, err := g.gatherer.Gather() for _, metricFamily := range metricFamilies { for _, metric := range metricFamily.Metric { diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go index 6e88fe8e550..31f81914e0b 100644 --- a/api/metrics/prefix_gatherer.go +++ b/api/metrics/prefix_gatherer.go @@ -53,6 +53,9 @@ type prefixedGatherer struct { } func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { + // Gather returns partially filled metrics in the case of an error. So, it + // is expected to still return the metrics in the case and error is + // returned. metricFamilies, err := g.gatherer.Gather() for _, metricFamily := range metricFamilies { metricFamily.Name = proto.String(metric.AppendNamespace( From d518ac525975accc1368c2585974ef8a3b0cb005 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 3 Jun 2024 12:43:56 -0400 Subject: [PATCH 42/53] nit --- api/metrics/label_gatherer.go | 3 +-- api/metrics/prefix_gatherer.go | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/api/metrics/label_gatherer.go b/api/metrics/label_gatherer.go index cf69dd300eb..3b8951a75b7 100644 --- a/api/metrics/label_gatherer.go +++ b/api/metrics/label_gatherer.go @@ -62,8 +62,7 @@ type labeledGatherer struct { func (g *labeledGatherer) Gather() ([]*dto.MetricFamily, error) { // Gather returns partially filled metrics in the case of an error. So, it - // is expected to still return the metrics in the case and error is - // returned. + // is expected to still return the metrics in the case an error is returned. metricFamilies, err := g.gatherer.Gather() for _, metricFamily := range metricFamilies { for _, metric := range metricFamily.Metric { diff --git a/api/metrics/prefix_gatherer.go b/api/metrics/prefix_gatherer.go index 31f81914e0b..1f0b78a2438 100644 --- a/api/metrics/prefix_gatherer.go +++ b/api/metrics/prefix_gatherer.go @@ -54,8 +54,7 @@ type prefixedGatherer struct { func (g *prefixedGatherer) Gather() ([]*dto.MetricFamily, error) { // Gather returns partially filled metrics in the case of an error. So, it - // is expected to still return the metrics in the case and error is - // returned. + // is expected to still return the metrics in the case an error is returned. metricFamilies, err := g.gatherer.Gather() for _, metricFamily := range metricFamilies { metricFamily.Name = proto.String(metric.AppendNamespace( From 487f6fbbce6ba76a45ea1529311721bb3ba9b7ee Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Mon, 3 Jun 2024 16:32:22 -0400 Subject: [PATCH 43/53] merged --- tests/e2e/x/transfer/virtuous.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/e2e/x/transfer/virtuous.go b/tests/e2e/x/transfer/virtuous.go index 951a0cb9de4..253110e88ca 100644 --- a/tests/e2e/x/transfer/virtuous.go +++ b/tests/e2e/x/transfer/virtuous.go @@ -60,11 +60,7 @@ var _ = e2e.DescribeXChainSerial("[Virtuous Transfer Tx AVAX]", func() { require.NoError(err) for _, metrics := range allNodeMetrics { -<<<<<<< HEAD xBlksProcessing, ok := tests.GetMetricValue(metrics, blksProcessingMetric, xChainMetricLabels) -======= - xBlksProcessing, ok := tests.GetMetricValue(metrics, xBlksProcessingMetric, nil) ->>>>>>> master if !ok || xBlksProcessing > 0 { return false } From 4728e5206f1ff71d5d5473a119878b8b1e640bff Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Tue, 4 Jun 2024 12:35:27 -0400 Subject: [PATCH 44/53] fix vm namespace --- chains/manager.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chains/manager.go b/chains/manager.go index 719ee76e3f7..0ff43a67701 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -1560,9 +1560,10 @@ func (m *manager) getOrMakeVMRegisterer(vmID ids.ID, chainAlias string) (metrics vmGatherer, ok := m.vmGatherer[vmID] if !ok { vmName := constants.VMName(vmID) + vmNamespace := metric.AppendNamespace(constants.PlatformName, vmName) vmGatherer = metrics.NewLabelGatherer(ChainLabel) err := m.Metrics.Register( - vmName, + vmNamespace, vmGatherer, ) if err != nil { From d564c9a93390358fdc4f28d4a56f753d9e4b72c3 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 17:04:16 -0400 Subject: [PATCH 45/53] remove readme --- api/metrics/README.md | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 api/metrics/README.md diff --git a/api/metrics/README.md b/api/metrics/README.md deleted file mode 100644 index 392cbefb8af..00000000000 --- a/api/metrics/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# Metrics - -```mermaid -graph LR - A[avalanche] --> B[api] - A --> C[chain] - A --> D[db] - A --> E[health] - A --> F[network] - A --> G[process] - A --> H[requests] - A --> I[resource_tracker] - A --> J[responses] - A --> K[system_resources] - C -- $chainID --> L[avalanche] - C -- $chainID --> M[handler] - C -- $chainID --> N[meterchainvm] - C -- $chainID --> O[meterdagvm] - C -- $chainID --> P[meterdb] - C -- $chainID --> Q[proposervm] - C -- $chainID --> R[snowman] - C -- $chainID --> S[$vmID] -``` From 1411f1b9a2489353d8ac9e8443b2d77de38f9778 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 18:33:38 -0400 Subject: [PATCH 46/53] use constants --- node/node.go | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/node/node.go b/node/node.go index 5c2284edeed..09fb05d06e8 100644 --- a/node/node.go +++ b/node/node.go @@ -91,11 +91,17 @@ const ( ipResolutionTimeout = 30 * time.Second - apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" - benchlistNamespace = constants.PlatformName + metric.NamespaceSeparator + "benchlist" - dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db" - meterDBNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdb" - networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" + apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" + benchlistNamespace = constants.PlatformName + metric.NamespaceSeparator + "benchlist" + dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db" + healthNamespace = constants.PlatformName + metric.NamespaceSeparator + "health" + meterDBNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdb" + networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" + processNamespace = constants.PlatformName + metric.NamespaceSeparator + "process" + requestsNamespace = constants.PlatformName + metric.NamespaceSeparator + "requests" + resourceTrackerNamespace = constants.PlatformName + metric.NamespaceSeparator + "resource_tracker" + responsesNamespace = constants.PlatformName + metric.NamespaceSeparator + "responses" + systemResourcesNamespace = constants.PlatformName + metric.NamespaceSeparator + "system_resources" ) var ( @@ -1072,7 +1078,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { requestsReg, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "requests"), + requestsNamespace, ) if err != nil { return err @@ -1080,7 +1086,7 @@ func (n *Node) initChainManager(avaxAssetID ids.ID) error { responseReg, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "responses"), + responsesNamespace, ) if err != nil { return err @@ -1295,7 +1301,7 @@ func (n *Node) initMetricsAPI() error { processReg, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "process"), + processNamespace, ) if err != nil { return err @@ -1428,7 +1434,7 @@ func (n *Node) initInfoAPI() error { func (n *Node) initHealthAPI() error { healthReg, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "health"), + healthNamespace, ) if err != nil { return err @@ -1574,7 +1580,7 @@ func (n *Node) initAPIAliases(genesisBytes []byte) error { func (n *Node) initResourceManager() error { systemResourcesRegisterer, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "system_resources"), + systemResourcesNamespace, ) if err != nil { return err @@ -1595,7 +1601,7 @@ func (n *Node) initResourceManager() error { resourceTrackerRegisterer, err := metrics.MakeAndRegister( n.MetricsGatherer, - metric.AppendNamespace(constants.PlatformName, "resource_tracker"), + resourceTrackerNamespace, ) if err != nil { return err From ee318aa7505ccc0d6e495483fd87c511d94a713b Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 19:43:23 -0400 Subject: [PATCH 47/53] remove avalanche registerer from context --- chains/manager.go | 41 ++++++++++--------- snow/consensus/snowman/consensus_test.go | 8 ++-- snow/consensus/snowman/topological.go | 2 +- snow/context.go | 6 +-- .../avalanche/bootstrap/bootstrapper.go | 4 +- .../avalanche/bootstrap/bootstrapper_test.go | 10 +++-- snow/engine/snowman/bootstrap/bootstrapper.go | 2 +- .../snowman/bootstrap/bootstrapper_test.go | 6 +-- snow/engine/snowman/syncer/utils_test.go | 2 +- snow/engine/snowman/transitive.go | 6 +-- snow/engine/snowman/transitive_test.go | 2 +- snow/networking/sender/sender.go | 12 +----- snow/networking/sender/sender_test.go | 15 +++---- snow/snowtest/snowtest.go | 13 +++--- vms/platformvm/vm_test.go | 5 ++- 15 files changed, 66 insertions(+), 68 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 0ff43a67701..91195ab77a3 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -499,14 +499,6 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c return nil, err } - avalancheMetrics, err := metrics.MakeAndRegister( - m.avalancheGatherer, - primaryAlias, - ) - if err != nil { - return nil, err - } - vmMetrics, err := m.getOrMakeVMRegisterer(chainParams.VMID, primaryAlias) if err != nil { return nil, err @@ -535,12 +527,11 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c ValidatorState: m.validatorState, ChainDataDir: chainDataDir, }, - PrimaryAlias: primaryAlias, - SnowmanRegisterer: snowmanMetrics, - AvalancheRegisterer: avalancheMetrics, - BlockAcceptor: m.BlockAcceptorGroup, - TxAcceptor: m.TxAcceptorGroup, - VertexAcceptor: m.VertexAcceptorGroup, + PrimaryAlias: primaryAlias, + Registerer: snowmanMetrics, + BlockAcceptor: m.BlockAcceptorGroup, + TxAcceptor: m.TxAcceptorGroup, + VertexAcceptor: m.VertexAcceptorGroup, } // Get a factory for the vm we want to use on our chain @@ -655,11 +646,19 @@ func (m *manager) createAvalancheChain( txBootstrappingDB := prefixdb.New(TxBootstrappingDBPrefix, prefixDB) blockBootstrappingDB := prefixdb.New(BlockBootstrappingDBPrefix, prefixDB) - vtxBlocker, err := queue.NewWithMissing(vertexBootstrappingDB, "vtx", ctx.AvalancheRegisterer) + avalancheMetrics, err := metrics.MakeAndRegister( + m.avalancheGatherer, + primaryAlias, + ) + if err != nil { + return nil, err + } + + vtxBlocker, err := queue.NewWithMissing(vertexBootstrappingDB, "vtx", avalancheMetrics) if err != nil { return nil, err } - txBlocker, err := queue.New(txBootstrappingDB, "tx", ctx.AvalancheRegisterer) + txBlocker, err := queue.New(txBootstrappingDB, "tx", avalancheMetrics) if err != nil { return nil, err } @@ -673,6 +672,7 @@ func (m *manager) createAvalancheChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_AVALANCHE, sb, + avalancheMetrics, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche sender: %w", err) @@ -691,6 +691,7 @@ func (m *manager) createAvalancheChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, sb, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche sender: %w", err) @@ -916,7 +917,7 @@ func (m *manager) createAvalancheChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.SnowmanRegisterer, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize snow base message handler: %w", err) @@ -983,7 +984,7 @@ func (m *manager) createAvalancheChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.AvalancheRegisterer, + avalancheMetrics, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche base message handler: %w", err) @@ -1015,6 +1016,7 @@ func (m *manager) createAvalancheChain( avalancheBootstrapper, err := avbootstrap.New( avalancheBootstrapperConfig, snowmanBootstrapper.Start, + avalancheMetrics, ) if err != nil { return nil, fmt.Errorf("error initializing avalanche bootstrapper: %w", err) @@ -1095,6 +1097,7 @@ func (m *manager) createSnowmanChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, sb, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize sender: %w", err) @@ -1312,7 +1315,7 @@ func (m *manager) createSnowmanChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.SnowmanRegisterer, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize snow base message handler: %w", err) diff --git a/snow/consensus/snowman/consensus_test.go b/snow/consensus/snowman/consensus_test.go index 49d02064071..5a5ed07ec86 100644 --- a/snow/consensus/snowman/consensus_test.go +++ b/snow/consensus/snowman/consensus_test.go @@ -507,7 +507,7 @@ func RecordPollSplitVoteNoChangeTest(t *testing.T, factory Factory) { snowCtx := snowtest.Context(t, snowtest.CChainID) ctx := snowtest.ConsensusContext(snowCtx) registerer := prometheus.NewRegistry() - ctx.SnowmanRegisterer = registerer + ctx.Registerer = registerer params := snowball.Parameters{ K: 2, @@ -1115,7 +1115,7 @@ func MetricsProcessingErrorTest(t *testing.T, factory Factory) { Name: "blks_processing", }) - require.NoError(ctx.SnowmanRegisterer.Register(numProcessing)) + require.NoError(ctx.Registerer.Register(numProcessing)) err := sm.Initialize( ctx, @@ -1149,7 +1149,7 @@ func MetricsAcceptedErrorTest(t *testing.T, factory Factory) { Name: "blks_accepted_count", }) - require.NoError(ctx.SnowmanRegisterer.Register(numAccepted)) + require.NoError(ctx.Registerer.Register(numAccepted)) err := sm.Initialize( ctx, @@ -1183,7 +1183,7 @@ func MetricsRejectedErrorTest(t *testing.T, factory Factory) { Name: "blks_rejected_count", }) - require.NoError(ctx.SnowmanRegisterer.Register(numRejected)) + require.NoError(ctx.Registerer.Register(numRejected)) err := sm.Initialize( ctx, diff --git a/snow/consensus/snowman/topological.go b/snow/consensus/snowman/topological.go index 79a817c1a19..f2ef015654c 100644 --- a/snow/consensus/snowman/topological.go +++ b/snow/consensus/snowman/topological.go @@ -111,7 +111,7 @@ func (ts *Topological) Initialize( ts.metrics, err = newMetrics( ctx.Log, - ctx.SnowmanRegisterer, + ctx.Registerer, lastAcceptedHeight, lastAcceptedTime, ) diff --git a/snow/context.go b/snow/context.go index 1ef471b302a..26fc67f213a 100644 --- a/snow/context.go +++ b/snow/context.go @@ -69,10 +69,8 @@ type ConsensusContext struct { // within. PrimaryAlias string - // Registers all snowman consensus metrics. - SnowmanRegisterer Registerer - // Registers all avalanche consensus metrics. - AvalancheRegisterer Registerer + // Registers all consensus metrics. + Registerer Registerer // BlockAcceptor is the callback that will be fired whenever a VM is // notified that their block was accepted. diff --git a/snow/engine/avalanche/bootstrap/bootstrapper.go b/snow/engine/avalanche/bootstrap/bootstrapper.go index 55e3307e933..00f9ab64a45 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper.go @@ -8,6 +8,7 @@ import ( "fmt" "time" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "github.com/ava-labs/avalanchego/cache" @@ -47,6 +48,7 @@ var _ common.BootstrapableEngine = (*bootstrapper)(nil) func New( config Config, onFinished func(ctx context.Context, lastReqID uint32) error, + reg prometheus.Registerer, ) (common.BootstrapableEngine, error) { b := &bootstrapper{ Config: config, @@ -66,7 +68,7 @@ func New( processedCache: &cache.LRU[ids.ID, struct{}]{Size: cacheSize}, onFinished: onFinished, } - return b, b.metrics.Initialize(config.Ctx.AvalancheRegisterer) + return b, b.metrics.Initialize(reg) } // Note: To align with the Snowman invariant, it should be guaranteed the VM is diff --git a/snow/engine/avalanche/bootstrap/bootstrapper_test.go b/snow/engine/avalanche/bootstrap/bootstrapper_test.go index 47f92057552..2792e8682f6 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper_test.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper_test.go @@ -76,10 +76,10 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *vertex.Te peer := ids.GenerateTestNodeID() require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, peer, nil, ids.Empty, 1)) - vtxBlocker, err := queue.NewWithMissing(prefixdb.New([]byte("vtx"), db), "vtx", ctx.AvalancheRegisterer) + vtxBlocker, err := queue.NewWithMissing(prefixdb.New([]byte("vtx"), db), "vtx", prometheus.NewRegistry()) require.NoError(err) - txBlocker, err := queue.New(prefixdb.New([]byte("tx"), db), "tx", ctx.AvalancheRegisterer) + txBlocker, err := queue.New(prefixdb.New([]byte("tx"), db), "tx", prometheus.NewRegistry()) require.NoError(err) peerTracker := tracker.NewPeers() @@ -88,7 +88,7 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *vertex.Te startupTracker := tracker.NewStartup(peerTracker, totalWeight/2+1) vdrs.RegisterSetCallbackListener(constants.PrimaryNetworkID, startupTracker) - avaGetHandler, err := getter.New(manager, sender, ctx.Log, time.Second, 2000, ctx.AvalancheRegisterer) + avaGetHandler, err := getter.New(manager, sender, ctx.Log, time.Second, 2000, prometheus.NewRegistry()) require.NoError(err) p2pTracker, err := p2p.NewPeerTracker( @@ -172,6 +172,7 @@ func TestBootstrapperSingleFrontier(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -278,6 +279,7 @@ func TestBootstrapperByzantineResponses(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -444,6 +446,7 @@ func TestBootstrapperTxDependencies(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -567,6 +570,7 @@ func TestBootstrapperIncompleteAncestors(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/engine/snowman/bootstrap/bootstrapper.go b/snow/engine/snowman/bootstrap/bootstrapper.go index 94df37a4019..6b8462f83f6 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper.go +++ b/snow/engine/snowman/bootstrap/bootstrapper.go @@ -116,7 +116,7 @@ type Bootstrapper struct { } func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) error) (*Bootstrapper, error) { - metrics, err := newMetrics(config.Ctx.SnowmanRegisterer) + metrics, err := newMetrics(config.Ctx.Registerer) return &Bootstrapper{ Config: config, metrics: metrics, diff --git a/snow/engine/snowman/bootstrap/bootstrapper_test.go b/snow/engine/snowman/bootstrap/bootstrapper_test.go index 85a971ebab2..5577f62fa81 100644 --- a/snow/engine/snowman/bootstrap/bootstrapper_test.go +++ b/snow/engine/snowman/bootstrap/bootstrapper_test.go @@ -76,7 +76,7 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *block.Tes require.NoError(startupTracker.Connected(context.Background(), peer, version.CurrentApp)) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( @@ -127,7 +127,7 @@ func TestBootstrapperStartsOnlyIfEnoughStakeIsConnected(t *testing.T) { startupTracker := tracker.NewStartup(tracker.NewPeers(), startupAlpha) peers.RegisterSetCallbackListener(ctx.SubnetID, startupTracker) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( @@ -642,7 +642,7 @@ func TestBootstrapNoParseOnNew(t *testing.T) { peers.RegisterSetCallbackListener(ctx.SubnetID, startupTracker) require.NoError(startupTracker.Connected(context.Background(), peer, version.CurrentApp)) - snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.SnowmanRegisterer) + snowGetHandler, err := getter.New(vm, sender, ctx.Log, time.Second, 2000, ctx.Registerer) require.NoError(err) blk1 := snowmantest.BuildChild(snowmantest.Genesis) diff --git a/snow/engine/snowman/syncer/utils_test.go b/snow/engine/snowman/syncer/utils_test.go index 72a5083e0ba..a5217a4bf0d 100644 --- a/snow/engine/snowman/syncer/utils_test.go +++ b/snow/engine/snowman/syncer/utils_test.go @@ -96,7 +96,7 @@ func buildTestsObjects( ctx.Log, time.Second, 2000, - ctx.SnowmanRegisterer, + ctx.Registerer, ) require.NoError(err) diff --git a/snow/engine/snowman/transitive.go b/snow/engine/snowman/transitive.go index d1e2c2aae2c..9e89fedd22b 100644 --- a/snow/engine/snowman/transitive.go +++ b/snow/engine/snowman/transitive.go @@ -98,7 +98,7 @@ func New(config Config) (*Transitive, error) { nonVerifiedCache, err := metercacher.New[ids.ID, snowman.Block]( "non_verified_cache", - config.Ctx.SnowmanRegisterer, + config.Ctx.Registerer, cache.NewSizedLRU[ids.ID, snowman.Block]( nonVerifiedCacheSize, cachedBlockSize, @@ -118,13 +118,13 @@ func New(config Config) (*Transitive, error) { polls, err := poll.NewSet( factory, config.Ctx.Log, - config.Ctx.SnowmanRegisterer, + config.Ctx.Registerer, ) if err != nil { return nil, err } - metrics, err := newMetrics(config.Ctx.SnowmanRegisterer) + metrics, err := newMetrics(config.Ctx.Registerer) if err != nil { return nil, err } diff --git a/snow/engine/snowman/transitive_test.go b/snow/engine/snowman/transitive_test.go index 1ce546dd0e9..2961b018c8c 100644 --- a/snow/engine/snowman/transitive_test.go +++ b/snow/engine/snowman/transitive_test.go @@ -103,7 +103,7 @@ func setup(t *testing.T, config Config) (ids.NodeID, validators.Manager, *common config.Ctx.Log, time.Second, 2000, - config.Ctx.SnowmanRegisterer, + config.Ctx.Registerer, ) require.NoError(err) config.AllGetsServer = snowGetHandler diff --git a/snow/networking/sender/sender.go b/snow/networking/sender/sender.go index 317d2cc3a7f..e4e36bd3ebb 100644 --- a/snow/networking/sender/sender.go +++ b/snow/networking/sender/sender.go @@ -5,7 +5,6 @@ package sender import ( "context" - "fmt" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -58,6 +57,7 @@ func New( timeouts timeout.Manager, engineType p2p.EngineType, subnet subnets.Subnet, + reg prometheus.Registerer, ) (common.Sender, error) { s := &sender{ ctx: ctx, @@ -75,16 +75,6 @@ func New( engineType: engineType, subnet: subnet, } - - var reg prometheus.Registerer - switch engineType { - case p2p.EngineType_ENGINE_TYPE_SNOWMAN: - reg = ctx.SnowmanRegisterer - case p2p.EngineType_ENGINE_TYPE_AVALANCHE: - reg = ctx.AvalancheRegisterer - default: - return nil, fmt.Errorf("unknown engine type %s", engineType) - } return s, reg.Register(s.failedDueToBench) } diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 1c7b1b22008..34f138f6db2 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -99,6 +99,7 @@ func TestTimeout(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -375,6 +376,7 @@ func TestReliableMessages(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -531,6 +533,7 @@ func TestReliableMessagesToMyself(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -833,7 +836,7 @@ func TestSender_Bootstrap_Requests(t *testing.T) { // Instantiate new registerers to avoid duplicate metrics // registration - ctx.SnowmanRegisterer = prometheus.NewRegistry() + ctx.Registerer = prometheus.NewRegistry() sender, err := New( ctx, @@ -843,6 +846,7 @@ func TestSender_Bootstrap_Requests(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1049,11 +1053,6 @@ func TestSender_Bootstrap_Responses(t *testing.T) { router = router.NewMockRouter(ctrl) ) - // Instantiate new registerers to avoid duplicate metrics - // registration - ctx.SnowmanRegisterer = prometheus.NewRegistry() - ctx.AvalancheRegisterer = prometheus.NewRegistry() - sender, err := New( ctx, msgCreator, @@ -1062,6 +1061,7 @@ func TestSender_Bootstrap_Responses(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1218,7 +1218,7 @@ func TestSender_Single_Request(t *testing.T) { // Instantiate new registerers to avoid duplicate metrics // registration - ctx.SnowmanRegisterer = prometheus.NewRegistry() + ctx.Registerer = prometheus.NewRegistry() sender, err := New( ctx, @@ -1228,6 +1228,7 @@ func TestSender_Single_Request(t *testing.T) { timeoutManager, engineType, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/snowtest/snowtest.go b/snow/snowtest/snowtest.go index 399ab5fac04..3cacc8e873b 100644 --- a/snow/snowtest/snowtest.go +++ b/snow/snowtest/snowtest.go @@ -39,13 +39,12 @@ func (noOpAcceptor) Accept(*snow.ConsensusContext, ids.ID, []byte) error { func ConsensusContext(ctx *snow.Context) *snow.ConsensusContext { return &snow.ConsensusContext{ - Context: ctx, - PrimaryAlias: ctx.ChainID.String(), - SnowmanRegisterer: prometheus.NewRegistry(), - AvalancheRegisterer: prometheus.NewRegistry(), - BlockAcceptor: noOpAcceptor{}, - TxAcceptor: noOpAcceptor{}, - VertexAcceptor: noOpAcceptor{}, + Context: ctx, + PrimaryAlias: ctx.ChainID.String(), + Registerer: prometheus.NewRegistry(), + BlockAcceptor: noOpAcceptor{}, + TxAcceptor: noOpAcceptor{}, + VertexAcceptor: noOpAcceptor{}, } } diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index f9feb19b8f1..963186e98b0 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1446,6 +1446,7 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(consensusCtx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1473,14 +1474,14 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { consensusCtx.Log, time.Second, 2000, - consensusCtx.SnowmanRegisterer, + consensusCtx.Registerer, ) require.NoError(err) peerTracker, err := p2p.NewPeerTracker( ctx.Log, "peer_tracker", - consensusCtx.SnowmanRegisterer, + consensusCtx.Registerer, set.Of(ctx.NodeID), nil, ) From 22f652cfba5384bf5a58ad097eb6c31e0e8081a1 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 20:08:29 -0400 Subject: [PATCH 48/53] Remove avalanche metrics registerer from consensus context --- chains/manager.go | 39 +++++++++++-------- snow/context.go | 9 +---- .../avalanche/bootstrap/bootstrapper.go | 4 +- .../avalanche/bootstrap/bootstrapper_test.go | 10 +++-- snow/networking/sender/sender.go | 12 +----- snow/networking/sender/sender_test.go | 11 +++--- snow/snowtest/snowtest.go | 11 +++--- vms/platformvm/vm_test.go | 1 + 8 files changed, 46 insertions(+), 51 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 7fee70b8f81..8548954e1c5 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -425,15 +425,6 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c return nil, fmt.Errorf("error while registering chain's metrics %w", err) } - // This converts the prefix for all the Avalanche consensus metrics from - // `avalanche_{chainID}_` into `avalanche_{chainID}_avalanche_` so that - // there are no conflicts when registering the Snowman consensus metrics. - avalancheConsensusMetrics := prometheus.NewRegistry() - avalancheDAGNamespace := metric.AppendNamespace(chainNamespace, "avalanche") - if err := m.Metrics.Register(avalancheDAGNamespace, avalancheConsensusMetrics); err != nil { - return nil, fmt.Errorf("error while registering DAG metrics %w", err) - } - vmMetrics := metrics.NewMultiGatherer() vmNamespace := metric.AppendNamespace(chainNamespace, "vm") if err := m.Metrics.Register(vmNamespace, vmMetrics); err != nil { @@ -463,11 +454,10 @@ func (m *manager) buildChain(chainParams ChainParameters, sb subnets.Subnet) (*c ValidatorState: m.validatorState, ChainDataDir: chainDataDir, }, - BlockAcceptor: m.BlockAcceptorGroup, - TxAcceptor: m.TxAcceptorGroup, - VertexAcceptor: m.VertexAcceptorGroup, - Registerer: consensusMetrics, - AvalancheRegisterer: avalancheConsensusMetrics, + BlockAcceptor: m.BlockAcceptorGroup, + TxAcceptor: m.TxAcceptorGroup, + VertexAcceptor: m.VertexAcceptorGroup, + Registerer: consensusMetrics, } // Get a factory for the vm we want to use on our chain @@ -572,11 +562,22 @@ func (m *manager) createAvalancheChain( txBootstrappingDB := prefixdb.New(TxBootstrappingDBPrefix, prefixDB) blockBootstrappingDB := prefixdb.New(BlockBootstrappingDBPrefix, prefixDB) - vtxBlocker, err := queue.NewWithMissing(vertexBootstrappingDB, "vtx", ctx.AvalancheRegisterer) + // This converts the prefix for all the Avalanche consensus metrics from + // `avalanche_{chainID}_` into `avalanche_{chainID}_avalanche_` so that + // there are no conflicts when registering the Snowman consensus metrics. + avalancheConsensusMetrics := prometheus.NewRegistry() + primaryAlias := m.PrimaryAliasOrDefault(ctx.ChainID) + chainNamespace := metric.AppendNamespace(constants.PlatformName, primaryAlias) + avalancheDAGNamespace := metric.AppendNamespace(chainNamespace, "avalanche") + if err := m.Metrics.Register(avalancheDAGNamespace, avalancheConsensusMetrics); err != nil { + return nil, fmt.Errorf("error while registering DAG metrics %w", err) + } + + vtxBlocker, err := queue.NewWithMissing(vertexBootstrappingDB, "vtx", avalancheConsensusMetrics) if err != nil { return nil, err } - txBlocker, err := queue.New(txBootstrappingDB, "tx", ctx.AvalancheRegisterer) + txBlocker, err := queue.New(txBootstrappingDB, "tx", avalancheConsensusMetrics) if err != nil { return nil, err } @@ -590,6 +591,7 @@ func (m *manager) createAvalancheChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_AVALANCHE, sb, + avalancheConsensusMetrics, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche sender: %w", err) @@ -608,6 +610,7 @@ func (m *manager) createAvalancheChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, sb, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche sender: %w", err) @@ -864,7 +867,7 @@ func (m *manager) createAvalancheChain( ctx.Log, m.BootstrapMaxTimeGetAncestors, m.BootstrapAncestorsMaxContainersSent, - ctx.AvalancheRegisterer, + avalancheConsensusMetrics, ) if err != nil { return nil, fmt.Errorf("couldn't initialize avalanche base message handler: %w", err) @@ -896,6 +899,7 @@ func (m *manager) createAvalancheChain( avalancheBootstrapper, err := avbootstrap.New( avalancheBootstrapperConfig, snowmanBootstrapper.Start, + avalancheConsensusMetrics, ) if err != nil { return nil, fmt.Errorf("error initializing avalanche bootstrapper: %w", err) @@ -966,6 +970,7 @@ func (m *manager) createSnowmanChain( m.TimeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, sb, + ctx.Registerer, ) if err != nil { return nil, fmt.Errorf("couldn't initialize sender: %w", err) diff --git a/snow/context.go b/snow/context.go index f610adca999..2fa50157189 100644 --- a/snow/context.go +++ b/snow/context.go @@ -65,15 +65,8 @@ type Registerer interface { type ConsensusContext struct { *Context - // Registers all common and snowman consensus metrics. Unlike the avalanche - // consensus engine metrics, we do not prefix the name with the engine name, - // as snowman is used for all chains by default. + // Registers all consensus metrics. Registerer Registerer - // Only used to register Avalanche consensus metrics. Previously, all - // metrics were prefixed with "avalanche_{chainID}_". Now we add avalanche - // to the prefix, "avalanche_{chainID}_avalanche_", to differentiate - // consensus operations after the DAG linearization. - AvalancheRegisterer Registerer // BlockAcceptor is the callback that will be fired whenever a VM is // notified that their block was accepted. diff --git a/snow/engine/avalanche/bootstrap/bootstrapper.go b/snow/engine/avalanche/bootstrap/bootstrapper.go index 55e3307e933..00f9ab64a45 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper.go @@ -8,6 +8,7 @@ import ( "fmt" "time" + "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" "github.com/ava-labs/avalanchego/cache" @@ -47,6 +48,7 @@ var _ common.BootstrapableEngine = (*bootstrapper)(nil) func New( config Config, onFinished func(ctx context.Context, lastReqID uint32) error, + reg prometheus.Registerer, ) (common.BootstrapableEngine, error) { b := &bootstrapper{ Config: config, @@ -66,7 +68,7 @@ func New( processedCache: &cache.LRU[ids.ID, struct{}]{Size: cacheSize}, onFinished: onFinished, } - return b, b.metrics.Initialize(config.Ctx.AvalancheRegisterer) + return b, b.metrics.Initialize(reg) } // Note: To align with the Snowman invariant, it should be guaranteed the VM is diff --git a/snow/engine/avalanche/bootstrap/bootstrapper_test.go b/snow/engine/avalanche/bootstrap/bootstrapper_test.go index 47f92057552..2792e8682f6 100644 --- a/snow/engine/avalanche/bootstrap/bootstrapper_test.go +++ b/snow/engine/avalanche/bootstrap/bootstrapper_test.go @@ -76,10 +76,10 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *vertex.Te peer := ids.GenerateTestNodeID() require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, peer, nil, ids.Empty, 1)) - vtxBlocker, err := queue.NewWithMissing(prefixdb.New([]byte("vtx"), db), "vtx", ctx.AvalancheRegisterer) + vtxBlocker, err := queue.NewWithMissing(prefixdb.New([]byte("vtx"), db), "vtx", prometheus.NewRegistry()) require.NoError(err) - txBlocker, err := queue.New(prefixdb.New([]byte("tx"), db), "tx", ctx.AvalancheRegisterer) + txBlocker, err := queue.New(prefixdb.New([]byte("tx"), db), "tx", prometheus.NewRegistry()) require.NoError(err) peerTracker := tracker.NewPeers() @@ -88,7 +88,7 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *common.SenderTest, *vertex.Te startupTracker := tracker.NewStartup(peerTracker, totalWeight/2+1) vdrs.RegisterSetCallbackListener(constants.PrimaryNetworkID, startupTracker) - avaGetHandler, err := getter.New(manager, sender, ctx.Log, time.Second, 2000, ctx.AvalancheRegisterer) + avaGetHandler, err := getter.New(manager, sender, ctx.Log, time.Second, 2000, prometheus.NewRegistry()) require.NoError(err) p2pTracker, err := p2p.NewPeerTracker( @@ -172,6 +172,7 @@ func TestBootstrapperSingleFrontier(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -278,6 +279,7 @@ func TestBootstrapperByzantineResponses(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -444,6 +446,7 @@ func TestBootstrapperTxDependencies(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) @@ -567,6 +570,7 @@ func TestBootstrapperIncompleteAncestors(t *testing.T) { }) return nil }, + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/networking/sender/sender.go b/snow/networking/sender/sender.go index 37076972fe1..e4e36bd3ebb 100644 --- a/snow/networking/sender/sender.go +++ b/snow/networking/sender/sender.go @@ -5,7 +5,6 @@ package sender import ( "context" - "fmt" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -58,6 +57,7 @@ func New( timeouts timeout.Manager, engineType p2p.EngineType, subnet subnets.Subnet, + reg prometheus.Registerer, ) (common.Sender, error) { s := &sender{ ctx: ctx, @@ -75,16 +75,6 @@ func New( engineType: engineType, subnet: subnet, } - - var reg prometheus.Registerer - switch engineType { - case p2p.EngineType_ENGINE_TYPE_SNOWMAN: - reg = ctx.Registerer - case p2p.EngineType_ENGINE_TYPE_AVALANCHE: - reg = ctx.AvalancheRegisterer - default: - return nil, fmt.Errorf("unknown engine type %s", engineType) - } return s, reg.Register(s.failedDueToBench) } diff --git a/snow/networking/sender/sender_test.go b/snow/networking/sender/sender_test.go index 6bd2bc558c9..9453f43e4fa 100644 --- a/snow/networking/sender/sender_test.go +++ b/snow/networking/sender/sender_test.go @@ -100,6 +100,7 @@ func TestTimeout(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -376,6 +377,7 @@ func TestReliableMessages(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -532,6 +534,7 @@ func TestReliableMessagesToMyself(t *testing.T) { tm, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -843,6 +846,7 @@ func TestSender_Bootstrap_Requests(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1049,11 +1053,6 @@ func TestSender_Bootstrap_Responses(t *testing.T) { router = router.NewMockRouter(ctrl) ) - // Instantiate new registerers to avoid duplicate metrics - // registration - ctx.Registerer = prometheus.NewRegistry() - ctx.AvalancheRegisterer = prometheus.NewRegistry() - sender, err := New( ctx, msgCreator, @@ -1062,6 +1061,7 @@ func TestSender_Bootstrap_Responses(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) @@ -1228,6 +1228,7 @@ func TestSender_Single_Request(t *testing.T) { timeoutManager, engineType, subnets.New(ctx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) diff --git a/snow/snowtest/snowtest.go b/snow/snowtest/snowtest.go index 0ddee75707a..86374f76651 100644 --- a/snow/snowtest/snowtest.go +++ b/snow/snowtest/snowtest.go @@ -39,12 +39,11 @@ func (noOpAcceptor) Accept(*snow.ConsensusContext, ids.ID, []byte) error { func ConsensusContext(ctx *snow.Context) *snow.ConsensusContext { return &snow.ConsensusContext{ - Context: ctx, - Registerer: prometheus.NewRegistry(), - AvalancheRegisterer: prometheus.NewRegistry(), - BlockAcceptor: noOpAcceptor{}, - TxAcceptor: noOpAcceptor{}, - VertexAcceptor: noOpAcceptor{}, + Context: ctx, + Registerer: prometheus.NewRegistry(), + BlockAcceptor: noOpAcceptor{}, + TxAcceptor: noOpAcceptor{}, + VertexAcceptor: noOpAcceptor{}, } } diff --git a/vms/platformvm/vm_test.go b/vms/platformvm/vm_test.go index 34b7d54b76e..50c67768871 100644 --- a/vms/platformvm/vm_test.go +++ b/vms/platformvm/vm_test.go @@ -1447,6 +1447,7 @@ func TestBootstrapPartiallyAccepted(t *testing.T) { timeoutManager, p2ppb.EngineType_ENGINE_TYPE_SNOWMAN, subnets.New(consensusCtx.NodeID, subnets.Config{}), + prometheus.NewRegistry(), ) require.NoError(err) From 7dffd6cea6004fc978fb95361bc05e91538bde44 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 20:51:04 -0400 Subject: [PATCH 49/53] nit --- chains/manager.go | 66 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/chains/manager.go b/chains/manager.go index 91195ab77a3..bdc6d0ef018 100644 --- a/chains/manager.go +++ b/chains/manager.go @@ -73,19 +73,19 @@ import ( ) const ( + ChainLabel = "chain" + defaultChannelSize = 1 initialQueueSize = 3 - handlerNamespace = constants.PlatformName + metric.NamespaceSeparator + "handler" - stakeNamespace = constants.PlatformName + metric.NamespaceSeparator + "stake" - p2pNamespace = constants.PlatformName + metric.NamespaceSeparator + "p2p" - snowmanNamespace = constants.PlatformName + metric.NamespaceSeparator + "snowman" avalancheNamespace = constants.PlatformName + metric.NamespaceSeparator + "avalanche" - proposervmNamespace = constants.PlatformName + metric.NamespaceSeparator + "proposervm" + handlerNamespace = constants.PlatformName + metric.NamespaceSeparator + "handler" meterchainvmNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterchainvm" meterdagvmNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdagvm" - - ChainLabel = "chain" + proposervmNamespace = constants.PlatformName + metric.NamespaceSeparator + "proposervm" + p2pNamespace = constants.PlatformName + metric.NamespaceSeparator + "p2p" + snowmanNamespace = constants.PlatformName + metric.NamespaceSeparator + "snowman" + stakeNamespace = constants.PlatformName + metric.NamespaceSeparator + "stake" ) var ( @@ -272,56 +272,56 @@ type manager struct { // snowman++ related interface to allow validators retrieval validatorState validators.State - handlerGatherer metrics.MultiGatherer // chainID - stakeGatherer metrics.MultiGatherer // chainID - p2pGatherer metrics.MultiGatherer // chainID - snowmanGatherer metrics.MultiGatherer // chainID avalancheGatherer metrics.MultiGatherer // chainID - proposervmGatherer metrics.MultiGatherer // chainID + handlerGatherer metrics.MultiGatherer // chainID meterChainVMGatherer metrics.MultiGatherer // chainID meterDAGVMGatherer metrics.MultiGatherer // chainID + proposervmGatherer metrics.MultiGatherer // chainID + p2pGatherer metrics.MultiGatherer // chainID + snowmanGatherer metrics.MultiGatherer // chainID + stakeGatherer metrics.MultiGatherer // chainID vmGatherer map[ids.ID]metrics.MultiGatherer // vmID -> chainID } // New returns a new Manager func New(config *ManagerConfig) (Manager, error) { - handlerGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(handlerNamespace, handlerGatherer); err != nil { + avalancheGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(avalancheNamespace, avalancheGatherer); err != nil { return nil, err } - stakeGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(stakeNamespace, stakeGatherer); err != nil { + handlerGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(handlerNamespace, handlerGatherer); err != nil { return nil, err } - p2pGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(p2pNamespace, p2pGatherer); err != nil { + meterChainVMGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(meterchainvmNamespace, meterChainVMGatherer); err != nil { return nil, err } - snowmanGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { + meterDAGVMGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(meterdagvmNamespace, meterDAGVMGatherer); err != nil { return nil, err } - avalancheGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(avalancheNamespace, avalancheGatherer); err != nil { + proposervmGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(proposervmNamespace, proposervmGatherer); err != nil { return nil, err } - proposervmGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(proposervmNamespace, proposervmGatherer); err != nil { + p2pGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(p2pNamespace, p2pGatherer); err != nil { return nil, err } - meterChainVMGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(meterchainvmNamespace, meterChainVMGatherer); err != nil { + snowmanGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(snowmanNamespace, snowmanGatherer); err != nil { return nil, err } - meterDAGVMGatherer := metrics.NewLabelGatherer(ChainLabel) - if err := config.Metrics.Register(meterdagvmNamespace, meterDAGVMGatherer); err != nil { + stakeGatherer := metrics.NewLabelGatherer(ChainLabel) + if err := config.Metrics.Register(stakeNamespace, stakeGatherer); err != nil { return nil, err } @@ -333,14 +333,14 @@ func New(config *ManagerConfig) (Manager, error) { unblockChainCreatorCh: make(chan struct{}), chainCreatorShutdownCh: make(chan struct{}), - handlerGatherer: handlerGatherer, - stakeGatherer: stakeGatherer, - p2pGatherer: p2pGatherer, - snowmanGatherer: snowmanGatherer, avalancheGatherer: avalancheGatherer, - proposervmGatherer: proposervmGatherer, + handlerGatherer: handlerGatherer, meterChainVMGatherer: meterChainVMGatherer, meterDAGVMGatherer: meterDAGVMGatherer, + proposervmGatherer: proposervmGatherer, + p2pGatherer: p2pGatherer, + snowmanGatherer: snowmanGatherer, + stakeGatherer: stakeGatherer, vmGatherer: make(map[ids.ID]metrics.MultiGatherer), }, nil } From 409b8a32b76a52897be10704da1a763ce4ed52dc Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 20:55:28 -0400 Subject: [PATCH 50/53] nit --- tests/e2e/x/transfer/virtuous.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/x/transfer/virtuous.go b/tests/e2e/x/transfer/virtuous.go index 253110e88ca..3bb22adebc1 100644 --- a/tests/e2e/x/transfer/virtuous.go +++ b/tests/e2e/x/transfer/virtuous.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" + "github.com/ava-labs/avalanchego/chains" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/snow/choices" "github.com/ava-labs/avalanchego/tests" @@ -34,7 +35,7 @@ const ( ) var xChainMetricLabels = prometheus.Labels{ - "chain": "X", + chains.ChainLabel: "X", } // This test requires that the network not have ongoing blocks and From 7e548640821995509ce022069e6b3a9bd33a0396 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 21:03:43 -0400 Subject: [PATCH 51/53] nit --- tests/e2e/x/transfer/virtuous.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/e2e/x/transfer/virtuous.go b/tests/e2e/x/transfer/virtuous.go index 3bb22adebc1..10a2359e7f9 100644 --- a/tests/e2e/x/transfer/virtuous.go +++ b/tests/e2e/x/transfer/virtuous.go @@ -30,8 +30,8 @@ import ( const ( totalRounds = 50 - blksProcessingMetric = "avalanche_chain_snowman_blks_processing" - blksAcceptedMetric = "avalanche_chain_snowman_blks_accepted_count" + blksProcessingMetric = "avalanche_snowman_blks_processing" + blksAcceptedMetric = "avalanche_snowman_blks_accepted_count" ) var xChainMetricLabels = prometheus.Labels{ From 577fdf0e2c2ec4eac8ce4cf115e5a332eeee7852 Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 21:26:55 -0400 Subject: [PATCH 52/53] Small metrics cleanup --- snow/networking/handler/handler.go | 18 ++++++++++-- snow/networking/handler/message_queue.go | 28 +++++++++++-------- snow/networking/handler/message_queue_test.go | 19 +++++++++---- vms/avm/vm.go | 12 ++++---- vms/platformvm/vm.go | 12 ++++---- 5 files changed, 57 insertions(+), 32 deletions(-) diff --git a/snow/networking/handler/handler.go b/snow/networking/handler/handler.go index f1966adc4dc..9388d2d66be 100644 --- a/snow/networking/handler/handler.go +++ b/snow/networking/handler/handler.go @@ -165,11 +165,25 @@ func New( return nil, fmt.Errorf("initializing handler metrics errored with: %w", err) } cpuTracker := resourceTracker.CPUTracker() - h.syncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "handler") + h.syncMessageQueue, err = NewMessageQueue( + h.ctx.Log, + h.ctx.SubnetID, + h.validators, + cpuTracker, + "handler", + h.ctx.Registerer, + ) if err != nil { return nil, fmt.Errorf("initializing sync message queue errored with: %w", err) } - h.asyncMessageQueue, err = NewMessageQueue(h.ctx, h.validators, cpuTracker, "handler_async") + h.asyncMessageQueue, err = NewMessageQueue( + h.ctx.Log, + h.ctx.SubnetID, + h.validators, + cpuTracker, + "handler_async", + h.ctx.Registerer, + ) if err != nil { return nil, fmt.Errorf("initializing async message queue errored with: %w", err) } diff --git a/snow/networking/handler/message_queue.go b/snow/networking/handler/message_queue.go index f17cfc1a2e9..4d632c62d77 100644 --- a/snow/networking/handler/message_queue.go +++ b/snow/networking/handler/message_queue.go @@ -13,10 +13,10 @@ import ( "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/message" "github.com/ava-labs/avalanchego/proto/pb/p2p" - "github.com/ava-labs/avalanchego/snow" "github.com/ava-labs/avalanchego/snow/networking/tracker" "github.com/ava-labs/avalanchego/snow/validators" "github.com/ava-labs/avalanchego/utils/buffer" + "github.com/ava-labs/avalanchego/utils/logging" "github.com/ava-labs/avalanchego/utils/timer/mockable" ) @@ -60,7 +60,8 @@ type messageQueue struct { clock mockable.Clock metrics messageQueueMetrics - ctx *snow.ConsensusContext + log logging.Logger + subnetID ids.ID // Validator set for the chain associated with this vdrs validators.Manager // Tracks CPU utilization of each node @@ -75,20 +76,23 @@ type messageQueue struct { } func NewMessageQueue( - ctx *snow.ConsensusContext, + log logging.Logger, + subnetID ids.ID, vdrs validators.Manager, cpuTracker tracker.Tracker, metricsNamespace string, + reg prometheus.Registerer, ) (MessageQueue, error) { m := &messageQueue{ - ctx: ctx, + log: log, + subnetID: subnetID, vdrs: vdrs, cpuTracker: cpuTracker, cond: sync.NewCond(&sync.Mutex{}), nodeToUnprocessedMsgs: make(map[ids.NodeID]int), msgAndCtxs: buffer.NewUnboundedDeque[*msgAndContext](1 /*=initSize*/), } - return m, m.metrics.initialize(metricsNamespace, ctx.Registerer) + return m, m.metrics.initialize(metricsNamespace, reg) } func (m *messageQueue) Push(ctx context.Context, msg Message) { @@ -137,7 +141,7 @@ func (m *messageQueue) Pop() (context.Context, Message, bool) { i := 0 for { if i == n { - m.ctx.Log.Debug("canPop is false for all unprocessed messages", + m.log.Debug("canPop is false for all unprocessed messages", zap.Int("numMessages", n), ) } @@ -212,21 +216,21 @@ func (m *messageQueue) canPop(msg message.InboundMessage) bool { // the number of nodes with unprocessed messages. baseMaxCPU := 1 / float64(len(m.nodeToUnprocessedMsgs)) nodeID := msg.NodeID() - weight := m.vdrs.GetWeight(m.ctx.SubnetID, nodeID) + weight := m.vdrs.GetWeight(m.subnetID, nodeID) var portionWeight float64 - if totalVdrsWeight, err := m.vdrs.TotalWeight(m.ctx.SubnetID); err != nil { + if totalVdrsWeight, err := m.vdrs.TotalWeight(m.subnetID); err != nil { // The sum of validator weights should never overflow, but if they do, // we treat portionWeight as 0. - m.ctx.Log.Error("failed to get total weight of validators", - zap.Stringer("subnetID", m.ctx.SubnetID), + m.log.Error("failed to get total weight of validators", + zap.Stringer("subnetID", m.subnetID), zap.Error(err), ) } else if totalVdrsWeight == 0 { // The sum of validator weights should never be 0, but handle that case // for completeness here to avoid divide by 0. - m.ctx.Log.Warn("validator set is empty", - zap.Stringer("subnetID", m.ctx.SubnetID), + m.log.Warn("validator set is empty", + zap.Stringer("subnetID", m.subnetID), ) } else { portionWeight = float64(weight) / float64(totalVdrsWeight) diff --git a/snow/networking/handler/message_queue_test.go b/snow/networking/handler/message_queue_test.go index 577a4686faa..a74ffcfb446 100644 --- a/snow/networking/handler/message_queue_test.go +++ b/snow/networking/handler/message_queue_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" @@ -15,21 +16,27 @@ import ( "github.com/ava-labs/avalanchego/message" "github.com/ava-labs/avalanchego/proto/pb/p2p" "github.com/ava-labs/avalanchego/snow/networking/tracker" - "github.com/ava-labs/avalanchego/snow/snowtest" "github.com/ava-labs/avalanchego/snow/validators" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/logging" ) func TestQueue(t *testing.T) { ctrl := gomock.NewController(t) require := require.New(t) cpuTracker := tracker.NewMockTracker(ctrl) - snowCtx := snowtest.Context(t, snowtest.CChainID) - ctx := snowtest.ConsensusContext(snowCtx) vdrs := validators.NewManager() vdr1ID, vdr2ID := ids.GenerateTestNodeID(), ids.GenerateTestNodeID() - require.NoError(vdrs.AddStaker(ctx.SubnetID, vdr1ID, nil, ids.Empty, 1)) - require.NoError(vdrs.AddStaker(ctx.SubnetID, vdr2ID, nil, ids.Empty, 1)) - mIntf, err := NewMessageQueue(ctx, vdrs, cpuTracker, "") + require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr1ID, nil, ids.Empty, 1)) + require.NoError(vdrs.AddStaker(constants.PrimaryNetworkID, vdr2ID, nil, ids.Empty, 1)) + mIntf, err := NewMessageQueue( + logging.NoLog{}, + constants.PrimaryNetworkID, + vdrs, + cpuTracker, + "", + prometheus.NewRegistry(), + ) require.NoError(err) u := mIntf.(*messageQueue) currentTime := time.Now() diff --git a/vms/avm/vm.go b/vms/avm/vm.go index ab05b053b39..6a455132c1a 100644 --- a/vms/avm/vm.go +++ b/vms/avm/vm.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/cache" "github.com/ava-labs/avalanchego/database" "github.com/ava-labs/avalanchego/database/versiondb" @@ -33,7 +34,6 @@ import ( "github.com/ava-labs/avalanchego/version" "github.com/ava-labs/avalanchego/vms/avm/block" "github.com/ava-labs/avalanchego/vms/avm/config" - "github.com/ava-labs/avalanchego/vms/avm/metrics" "github.com/ava-labs/avalanchego/vms/avm/network" "github.com/ava-labs/avalanchego/vms/avm/state" "github.com/ava-labs/avalanchego/vms/avm/txs" @@ -47,6 +47,7 @@ import ( blockbuilder "github.com/ava-labs/avalanchego/vms/avm/block/builder" blockexecutor "github.com/ava-labs/avalanchego/vms/avm/block/executor" extensions "github.com/ava-labs/avalanchego/vms/avm/fxs" + avmmetrics "github.com/ava-labs/avalanchego/vms/avm/metrics" txexecutor "github.com/ava-labs/avalanchego/vms/avm/txs/executor" xmempool "github.com/ava-labs/avalanchego/vms/avm/txs/mempool" ) @@ -66,7 +67,7 @@ type VM struct { config.Config - metrics metrics.Metrics + metrics avmmetrics.Metrics avax.AddressManager ids.Aliaser @@ -173,16 +174,15 @@ func (vm *VM) Initialize( zap.Reflect("config", avmConfig), ) - registerer := prometheus.NewRegistry() - if err := ctx.Metrics.Register("", registerer); err != nil { + vm.registerer, err = metrics.MakeAndRegister(ctx.Metrics, "") + if err != nil { return err } - vm.registerer = registerer vm.connectedPeers = make(map[ids.NodeID]*version.Application) // Initialize metrics as soon as possible - vm.metrics, err = metrics.New(registerer) + vm.metrics, err = avmmetrics.New(vm.registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } diff --git a/vms/platformvm/vm.go b/vms/platformvm/vm.go index 565960cff59..efbfe0fa545 100644 --- a/vms/platformvm/vm.go +++ b/vms/platformvm/vm.go @@ -12,9 +12,9 @@ import ( "time" "github.com/gorilla/rpc/v2" - "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" + "github.com/ava-labs/avalanchego/api/metrics" "github.com/ava-labs/avalanchego/cache" "github.com/ava-labs/avalanchego/codec" "github.com/ava-labs/avalanchego/codec/linearcodec" @@ -35,7 +35,6 @@ import ( "github.com/ava-labs/avalanchego/vms/platformvm/block" "github.com/ava-labs/avalanchego/vms/platformvm/config" "github.com/ava-labs/avalanchego/vms/platformvm/fx" - "github.com/ava-labs/avalanchego/vms/platformvm/metrics" "github.com/ava-labs/avalanchego/vms/platformvm/network" "github.com/ava-labs/avalanchego/vms/platformvm/reward" "github.com/ava-labs/avalanchego/vms/platformvm/state" @@ -47,6 +46,7 @@ import ( snowmanblock "github.com/ava-labs/avalanchego/snow/engine/snowman/block" blockbuilder "github.com/ava-labs/avalanchego/vms/platformvm/block/builder" blockexecutor "github.com/ava-labs/avalanchego/vms/platformvm/block/executor" + platformvmmetrics "github.com/ava-labs/avalanchego/vms/platformvm/metrics" txexecutor "github.com/ava-labs/avalanchego/vms/platformvm/txs/executor" pmempool "github.com/ava-labs/avalanchego/vms/platformvm/txs/mempool" pvalidators "github.com/ava-labs/avalanchego/vms/platformvm/validators" @@ -65,7 +65,7 @@ type VM struct { *network.Network validators.State - metrics metrics.Metrics + metrics platformvmmetrics.Metrics // Used to get time. Useful for faking time during tests. clock mockable.Clock @@ -113,13 +113,13 @@ func (vm *VM) Initialize( } chainCtx.Log.Info("using VM execution config", zap.Reflect("config", execConfig)) - registerer := prometheus.NewRegistry() - if err := chainCtx.Metrics.Register("", registerer); err != nil { + registerer, err := metrics.MakeAndRegister(chainCtx.Metrics, "") + if err != nil { return err } // Initialize metrics as soon as possible - vm.metrics, err = metrics.New(registerer) + vm.metrics, err = platformvmmetrics.New(registerer) if err != nil { return fmt.Errorf("failed to initialize metrics: %w", err) } From 7f25cf4b6e4ab42256fab1af5767d2fae4a48e9e Mon Sep 17 00:00:00 2001 From: Stephen Buttolph Date: Wed, 5 Jun 2024 21:41:40 -0400 Subject: [PATCH 53/53] nit --- vms/proposervm/config.go | 1 + 1 file changed, 1 insertion(+) diff --git a/vms/proposervm/config.go b/vms/proposervm/config.go index 493f549d246..296f6a60520 100644 --- a/vms/proposervm/config.go +++ b/vms/proposervm/config.go @@ -35,6 +35,7 @@ type Config struct { // Block certificate StakingCertLeaf *staking.Certificate + // Registerer for prometheus metrics Registerer prometheus.Registerer }