From 0da35aef76252a71f049fb9511a5ce8292c92024 Mon Sep 17 00:00:00 2001 From: corver Date: Tue, 11 Apr 2023 19:46:12 +0200 Subject: [PATCH] core/tracker: improve ignoring of unsupported duties (#2087) Improves ignoring of unsupported duties, so ignore for both failure and participation logs and metrics. Also fix edge case mentioned in #1348 where aggregation duties succeeded and then later fails. category: bug ticket: #1348 cherry-pick: #2086 --- core/tracker/tracker.go | 65 +++++++++++++++------ core/tracker/tracker_internal_test.go | 81 +++++++++++++++++++++++++++ testutil/random.go | 5 ++ 3 files changed, 133 insertions(+), 18 deletions(-) diff --git a/core/tracker/tracker.go b/core/tracker/tracker.go index 68279c164..7ce305e97 100644 --- a/core/tracker/tracker.go +++ b/core/tracker/tracker.go @@ -262,6 +262,8 @@ func (t *Tracker) Run(ctx context.Context) error { ctx = log.WithTopic(ctx, "tracker") defer close(t.quit) + ignoreUnsupported := newUnsupportedIgnorer() + for { select { case <-ctx.Done(): @@ -283,6 +285,10 @@ func (t *Tracker) Run(ctx context.Context) error { // Analyse failed duties failed, failedStep, failedMsg, failedErr := analyseDutyFailed(duty, t.events, parsigs.MsgRootsConsistent()) + if ignoreUnsupported(ctx, duty, failed, failedStep, failedMsg) { + continue // Ignore unsupported duties + } + t.failedDutyReporter(ctx, duty, failed, failedStep, failedMsg, failedErr) // Analyse peer participation @@ -566,8 +572,6 @@ func extractParSigs(ctx context.Context, events []event) parsigsByMsg { // newFailedDutyReporter returns failed duty reporter which instruments failed duties. func newFailedDutyReporter() func(ctx context.Context, duty core.Duty, failed bool, step step, reason string, err error) { - var loggedNoSelections bool - // Initialise counters to 0 to avoid non-existent metrics issues when querying prometheus. for _, dutyType := range core.AllDutyTypes() { dutyFailed.WithLabelValues(dutyType.String()).Add(0) @@ -582,37 +586,62 @@ func newFailedDutyReporter() func(ctx context.Context, duty core.Duty, failed bo return } - dutySuccess.WithLabelValues(duty.Type.String()).Inc() dutyExpect.WithLabelValues(duty.Type.String()).Inc() + dutySuccess.WithLabelValues(duty.Type.String()).Inc() return } - if duty.Type == core.DutyAggregator && step == fetcher && reason == msgFetcherAggregatorZeroPrepares { - if !loggedNoSelections { + log.Warn(ctx, "Duty failed", err, + z.Any("step", step), + z.Str("reason", reason), + ) + + dutyExpect.WithLabelValues(duty.Type.String()).Inc() + dutyFailed.WithLabelValues(duty.Type.String()).Inc() + } +} + +// newUnsupportedIgnorer returns a filter that ignores duties that are not supported by the node. +func newUnsupportedIgnorer() func(ctx context.Context, duty core.Duty, failed bool, step step, reason string) bool { + var ( + loggedNoAggregator bool + loggedNoContribution bool + aggregationSupported bool + contributionSupported bool + ) + + return func(ctx context.Context, duty core.Duty, failed bool, step step, reason string) bool { + if !failed { + if duty.Type == core.DutyAggregator { + aggregationSupported = true + } + if duty.Type == core.DutySyncContribution { + contributionSupported = true + } + + return false + } + + if !aggregationSupported && duty.Type == core.DutyAggregator && step == fetcher && reason == msgFetcherAggregatorZeroPrepares { + if !loggedNoAggregator { log.Warn(ctx, "Ignoring attestation aggregation failures since VCs do not seem to support beacon committee selection aggregation", nil) } - loggedNoSelections = true + loggedNoAggregator = true - return + return true } - if duty.Type == core.DutySyncContribution && step == fetcher && reason == msgFetcherSyncContributionZeroPrepares { - if !loggedNoSelections { + if !contributionSupported && duty.Type == core.DutySyncContribution && step == fetcher && reason == msgFetcherSyncContributionZeroPrepares { + if !loggedNoContribution { log.Warn(ctx, "Ignoring sync contribution failures since VCs do not seem to support sync committee selection aggregation", nil) } - loggedNoSelections = true + loggedNoContribution = true - return + return true } - log.Warn(ctx, "Duty failed", err, - z.Any("step", step), - z.Str("reason", reason), - ) - - dutyFailed.WithLabelValues(duty.Type.String()).Inc() - dutyExpect.WithLabelValues(duty.Type.String()).Inc() + return false } } diff --git a/core/tracker/tracker_internal_test.go b/core/tracker/tracker_internal_test.go index d42206aed..a4ca3c26e 100644 --- a/core/tracker/tracker_internal_test.go +++ b/core/tracker/tracker_internal_test.go @@ -5,6 +5,7 @@ package tracker import ( "context" "fmt" + "math/rand" "net/http" "reflect" "testing" @@ -1176,3 +1177,83 @@ func TestUnexpectedFailures(t *testing.T) { } } } + +func TestIgnoreUnsupported(t *testing.T) { + // Note these tests are stateful, so order is important. + tests := []struct { + name string + duty core.Duty + failed bool + step step + reason string + result bool + }{ + { + name: "Attester not ignored", + duty: core.NewAttesterDuty(123), + failed: testutil.RandomBool(), + step: randomStep(), + reason: msgAggSigDB, + result: false, + }, + { + name: "Aggregator failed and ignored", + duty: core.NewAggregatorDuty(123), + failed: true, + step: fetcher, + reason: msgFetcherAggregatorZeroPrepares, + result: true, + }, + { + name: "Aggregator success", + duty: core.NewAggregatorDuty(123), + failed: false, + step: fetcher, + reason: msgFetcherAggregatorZeroPrepares, + result: false, + }, + { + name: "Aggregator failed but not ignored", + duty: core.NewAggregatorDuty(123), + failed: true, + step: fetcher, + reason: msgFetcherAggregatorZeroPrepares, + result: false, + }, + { + name: "SyncContrib failed and ignored", + duty: core.NewSyncContributionDuty(123), + failed: true, + step: fetcher, + reason: msgFetcherSyncContributionZeroPrepares, + result: true, + }, + { + name: "SyncContrib success", + duty: core.NewSyncContributionDuty(123), + failed: false, + step: fetcher, + reason: msgFetcherSyncContributionZeroPrepares, + result: false, + }, + { + name: "SyncContrib failed but not ignored", + duty: core.NewSyncContributionDuty(123), + failed: true, + step: fetcher, + reason: msgFetcherSyncContributionZeroPrepares, + result: false, + }, + } + + ignorer := newUnsupportedIgnorer() + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require.Equal(t, test.result, ignorer(context.Background(), test.duty, test.failed, test.step, test.reason)) + }) + } +} + +func randomStep() step { + return step(rand.Intn(int(sentinel))) +} diff --git a/testutil/random.go b/testutil/random.go index 024647f04..5427f4a3b 100644 --- a/testutil/random.go +++ b/testutil/random.go @@ -900,3 +900,8 @@ func GenerateInsecureK1Key(t *testing.T, seed int) *k1.PrivateKey { return k1.PrivKeyFromBytes(k.D.Bytes()) } + +// RandomBool returns a random boolean. +func RandomBool() bool { + return rand.Intn(2) == 0 +}