From fd22d7801d91bf9007c20c593aaec2a68c40149d Mon Sep 17 00:00:00 2001 From: Dylan Marriner Date: Mon, 27 Feb 2023 16:40:17 -0800 Subject: [PATCH] Bugfix: Specifying multiple tags/labels with -a causes sporadic incorrect metric calculations This fixes a bug that caused diff calculations to sporadically break when multiple labels were added via the -a flag. Labels were stored as key/values in a map and returned in a potentially different order each time since map iteration order is random. This caused the translator to emit the cumulative value instead of the diffed value, because it would seem as though the metric was new when in fact it had been previously cached with a different label ordering. For example, the translator might look for "counter-key2:value2-key1:value1" when it had been previously cached as "counter-key1:value1-key2:value2" --- CHANGELOG.md | 1 + cmd/veneur-prometheus/main_test.go | 45 +++++++++++++++++++++++++ cmd/veneur-prometheus/translate.go | 13 +++++-- cmd/veneur-prometheus/translate_test.go | 2 +- 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ebda43ed..529505449 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ * A fix for forwarding metrics with gRPC using the kubernetes discoverer. Thanks, [androohan](https://github.com/androohan)! * Regenerate testing certs/CA that have expired and have broken tests. Thanks, [randallm](https://github.com/randallm) * The config field `trace_lightstep_access_token` is redacted if printed. Thanks [arnavdugar](https://github.com/arnavdugar)! +* A fix for the -a flag in veneur-prometheus. Thanks, [dmarriner](https://github.com/dmarriner) # 14.1.0, 2021-03-16 diff --git a/cmd/veneur-prometheus/main_test.go b/cmd/veneur-prometheus/main_test.go index d792f4e9d..9caf390ef 100644 --- a/cmd/veneur-prometheus/main_test.go +++ b/cmd/veneur-prometheus/main_test.go @@ -301,6 +301,51 @@ func TestHistogramsNewBucketsAreTranslatedToDiffs(t *testing.T) { assert.False(t, hasbucket5) } +// This test verifies a fix for a previous bug that caused diff calculations to sporadically break when multiple labels +// were added via the -a flag. Labels were stored as key/values in a map and returned in a potentially different order +// each time since map iteration order is random. This caused the translator to emit the cumulative value instead of +// the diffed value, because it would seem as though the metric was new when in fact it had been previously cached +// with a different label ordering. For example, the translator might look for "counter-key2:value2-key1:value1" when +// it had been previously cached as "counter-key1:value1-key2:value2" +func TestCountsWithAddingLabels(t *testing.T) { + counter := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "counter", + Help: "A typical counter.", + }) + + ts, err := testPrometheusEndpoint( + counter, + ) + require.NoError(t, err) + defer ts.Close() + + cache := new(countCache) + cfg := testConfig(t, ts.URL) + cfg.addedLabels = map[string]string{ + "key1": "value1", + "key2": "value2", + } + statsClient, err := statsd.New("localhost:8125", statsd.WithoutTelemetry()) + assert.NoError(t, err) + + stats, err := collect(context.Background(), cfg, statsClient, cache) + assert.NoError(t, err) + splitStats(stats) + + // Run 100 time to ensure we aren't just passing the test we got lucky + for i := 0; i < 100; i++ { + counter.Inc() + counter.Inc() + counter.Inc() + + stats, err = collect(context.Background(), cfg, statsClient, cache) + assert.NoError(t, err) + counts, _ := splitStats(stats) + count, _ := countValue(counts, "counter", "key1:value1", "key2:value2") + assert.Equal(t, 3, count) + } +} + func TestHistogramsAreTranslatedToDiffs(t *testing.T) { histogram := prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "histogram", diff --git a/cmd/veneur-prometheus/translate.go b/cmd/veneur-prometheus/translate.go index 0bd7257d6..798522927 100644 --- a/cmd/veneur-prometheus/translate.go +++ b/cmd/veneur-prometheus/translate.go @@ -4,6 +4,7 @@ import ( "fmt" "math" "regexp" + "sort" dto "github.com/prometheus/client_model/go" "github.com/stripe/veneur/v14/sources/openmetrics" @@ -201,8 +202,16 @@ func (t translator) Tags(labels []*dto.LabelPair) []string { tags = append(tags, fmt.Sprintf("%s:%s", labelName, labelValue)) } - for name, value := range t.added { - tags = append(tags, fmt.Sprintf("%s:%s", name, value)) + // Tags need to be returned in sorted order so that metric cache keys are consistent across metric observation + // sweeps + names := make([]string, 0, len(t.added)) + for k := range t.added { + names = append(names, k) + } + sort.Strings(names) + + for _, name := range names { + tags = append(tags, fmt.Sprintf("%s:%s", name, t.added[name])) } return tags diff --git a/cmd/veneur-prometheus/translate_test.go b/cmd/veneur-prometheus/translate_test.go index 10419c5d5..682a41013 100644 --- a/cmd/veneur-prometheus/translate_test.go +++ b/cmd/veneur-prometheus/translate_test.go @@ -70,7 +70,7 @@ func TestAddTags(t *testing.T) { "so:good", } - assert.ElementsMatch(t, expectedTags, tags) + assert.Equal(t, expectedTags, tags) } func TestReplaceTags(t *testing.T) {