From 14391588209789acbb8f4b9c19f92c929efca2d3 Mon Sep 17 00:00:00 2001 From: Mia Epner Date: Thu, 29 Feb 2024 14:38:33 +0100 Subject: [PATCH 1/5] add metrics for secret sync clients --- helper/metricsutil/wrapped_metrics.go | 1 + vault/activity_log.go | 33 ++++++-- vault/activity_log_test.go | 116 ++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 6 deletions(-) diff --git a/helper/metricsutil/wrapped_metrics.go b/helper/metricsutil/wrapped_metrics.go index 0cf453123304..8b33c8802003 100644 --- a/helper/metricsutil/wrapped_metrics.go +++ b/helper/metricsutil/wrapped_metrics.go @@ -44,6 +44,7 @@ type TelemetryConstConfig struct { } type Metrics interface { + SetGauge(key []string, val float32) SetGaugeWithLabels(key []string, val float32, labels []Label) IncrCounterWithLabels(key []string, val float32, labels []Label) AddSampleWithLabels(key []string, val float32, labels []Label) diff --git a/vault/activity_log.go b/vault/activity_log.go index 55a38d738729..f145aa77e7e3 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -2247,7 +2247,6 @@ func (a *ActivityLog) writePrecomputedQuery(ctx context.Context, segmentTime tim // the byNamespace map also needs to be transformed into a protobuf pq.Namespaces = a.transformALNamespaceBreakdowns(opts.byNamespace) - err := a.queryStore.Put(ctx, pq) if err != nil { a.logger.Warn("failed to store precomputed query", "error", err) @@ -2316,11 +2315,27 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime a.breakdownTokenSegment(token, opts.byNamespace) } - // write metrics + // handle metrics reporting + a.reportPrecomputedQueryMetrics(ctx, segmentTime, opts) + + // convert the maps to the proper format and write them as precomputed queries + return a.writePrecomputedQuery(ctx, segmentTime, opts) +} + +func (a *ActivityLog) reportPrecomputedQueryMetrics(ctx context.Context, segmentTime time.Time, opts pqOptions) { + if segmentTime != opts.activePeriodEnd && segmentTime != opts.activePeriodStart { + return + } + // we don't want to introduce any new namespaced metrics. For secret sync + // (and all newer client types) we'll instead keep maps of the total + summedMetricsMonthly := make(map[string]int) + summedMetricsReporting := make(map[string]int) + for nsID, entry := range opts.byNamespace { // If this is the most recent month, or the start of the reporting period, output // a metric for each namespace. - if segmentTime == opts.activePeriodEnd { + switch segmentTime { + case opts.activePeriodEnd: a.metrics.SetGaugeWithLabels( []string{"identity", "entity", "active", "monthly"}, float32(entry.Counts.countByType(entityActivityType)), @@ -2335,7 +2350,8 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, }, ) - } else if segmentTime == opts.activePeriodStart { + summedMetricsMonthly[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType) + case opts.activePeriodStart: a.metrics.SetGaugeWithLabels( []string{"identity", "entity", "active", "reporting_period"}, float32(entry.Counts.countByType(entityActivityType)), @@ -2350,11 +2366,16 @@ func (a *ActivityLog) segmentToPrecomputedQuery(ctx context.Context, segmentTime {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, }, ) + summedMetricsReporting[secretSyncActivityType] += entry.Counts.countByType(secretSyncActivityType) } } - // convert the maps to the proper format and write them as precomputed queries - return a.writePrecomputedQuery(ctx, segmentTime, opts) + for clientType, count := range summedMetricsMonthly { + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "monthly"}, float32(count)) + } + for clientType, count := range summedMetricsReporting { + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "reporting_period"}, float32(count)) + } } // goroutine to process the request in the intent log, creating precomputed queries. diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index dc02fed1c8ee..c3ea3fab044f 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/armon/go-metrics" "github.com/axiomhq/hyperloglog" "github.com/go-test/deep" "github.com/golang/protobuf/proto" @@ -4841,3 +4842,118 @@ func TestAddActivityToFragment(t *testing.T) { }) } } + +// TestActivityLog_reportPrecomputedQueryMetrics creates 3 clients per type and +// calls reportPrecomputedQueryMetrics. The test verifies that the metric sink +// gets metrics reported correctly, based on the segment time matching the +// active period start or end +func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { + core, _, _, metricsSink := TestCoreUnsealedWithMetrics(t) + a := core.activityLog + byMonth := make(summaryByMonth) + byNS := make(summaryByNamespace) + segmentTime := time.Now() + + // for each client type, make 3 clients in their own namespaces + for i := 0; i < 3; i++ { + for _, clientType := range []string{secretSyncActivityType, nonEntityTokenActivityType, entityActivityType} { + client := &activity.EntityRecord{ + ClientID: fmt.Sprintf("%s-%d", clientType, i), + NamespaceID: fmt.Sprintf("ns-%d", i), + MountAccessor: fmt.Sprintf("mnt-%d", i), + ClientType: clientType, + NonEntity: clientType == nonEntityTokenActivityType, + } + processClientRecord(client, byNS, byMonth, segmentTime) + } + } + endTime := timeutil.EndOfMonth(segmentTime) + opts := pqOptions{ + byNamespace: byNS, + byMonth: byMonth, + endTime: endTime, + } + + otherTime := segmentTime.Add(time.Hour) + + hasNoMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string) { + t.Helper() + for _, metric := range gauges { + if metric.Name == name { + require.Fail(t, "metric found", name) + } + } + } + hasMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string, value float32, namespaceLabel *string) { + t.Helper() + fullMetric := fmt.Sprintf("%s;cluster=test-cluster", name) + if namespaceLabel != nil { + fullMetric = fmt.Sprintf("%s;namespace=%s;cluster=test-cluster", name, *namespaceLabel) + } + require.Contains(t, gauges, fullMetric) + metric := gauges[fullMetric] + require.Equal(t, value, metric.Value) + } + + testCases := []struct { + name string + activePeriodEnd time.Time + activePeriodStart time.Time + }{ + { + name: "monthly metric", + activePeriodEnd: segmentTime, + activePeriodStart: otherTime, + }, + { + name: "reporting period metric", + activePeriodEnd: otherTime, + activePeriodStart: segmentTime, + }, + { + name: "no metric", + activePeriodEnd: otherTime, + activePeriodStart: otherTime, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts.activePeriodStart = tc.activePeriodStart + opts.activePeriodEnd = tc.activePeriodEnd + + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + gauges := data[len(data)-1].Gauges + + switch segmentTime { + case opts.activePeriodEnd: + // expect the metrics ending with "monthly" + // the namespace was never registered in core, so it'll be + // reported with a "deleted-" prefix + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, gauges, "identity.entity.active.monthly", 1, &ns) + hasMetric(t, gauges, "identity.nonentity.active.monthly", 1, &ns) + } + // secret sync metrics should be the sum of clients across all + // namespaces + hasMetric(t, gauges, "identity.secretsync.active.monthly", 3, nil) + case opts.activePeriodStart: + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, gauges, "identity.entity.active.reporting_period", 1, &ns) + hasMetric(t, gauges, "identity.nonentity.active.reporting_period", 1, &ns) + } + hasMetric(t, gauges, "identity.secretsync.active.reporting_period", 3, nil) + default: + hasNoMetric(t, gauges, "identity.entity.active.monthly") + hasNoMetric(t, gauges, "identity.nonentity.active.monthly") + hasNoMetric(t, gauges, "identity.secretsync.active.monthly") + hasNoMetric(t, gauges, "identity.entity.active.reporting_period") + hasNoMetric(t, gauges, "identity.entity.active.reporting_period") + hasNoMetric(t, gauges, "identity.secretsync.active.reporting_period") + } + }) + } +} From c26d79f233c3c13456a1d4497a70c8850f35ffe5 Mon Sep 17 00:00:00 2001 From: Mia Epner Date: Thu, 29 Feb 2024 14:44:19 +0100 Subject: [PATCH 2/5] changelog --- changelog/25713.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog/25713.txt diff --git a/changelog/25713.txt b/changelog/25713.txt new file mode 100644 index 000000000000..fdcb7d8cc62e --- /dev/null +++ b/changelog/25713.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core/metrics (enterprise): add metrics for secret sync client count +``` From 7bc95b4a1180d02574be9e5b6b058bd4165acc50 Mon Sep 17 00:00:00 2001 From: Mia Epner Date: Thu, 29 Feb 2024 14:45:05 +0100 Subject: [PATCH 3/5] remove enterprise tag from changelog --- changelog/25713.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/25713.txt b/changelog/25713.txt index fdcb7d8cc62e..250045059ec5 100644 --- a/changelog/25713.txt +++ b/changelog/25713.txt @@ -1,3 +1,3 @@ ```release-note:improvement -core/metrics (enterprise): add metrics for secret sync client count +core/metrics: add metrics for secret sync client count ``` From 1d51d59b0ccb15071d42882334cf24d9d08982d2 Mon Sep 17 00:00:00 2001 From: Mia Epner Date: Thu, 29 Feb 2024 15:19:48 +0100 Subject: [PATCH 4/5] fix test and make clearer what it's testing --- vault/activity_log_test.go | 123 ++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 63 deletions(-) diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index c3ea3fab044f..d38b5874ccb0 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -4876,84 +4876,81 @@ func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { otherTime := segmentTime.Add(time.Hour) - hasNoMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string) { + hasNoMetric := func(t *testing.T, intervals []*metrics.IntervalMetrics, name string) { t.Helper() + gauges := intervals[len(intervals)-1].Gauges for _, metric := range gauges { if metric.Name == name { require.Fail(t, "metric found", name) } } } - hasMetric := func(t *testing.T, gauges map[string]metrics.GaugeValue, name string, value float32, namespaceLabel *string) { + hasMetric := func(t *testing.T, intervals []*metrics.IntervalMetrics, name string, value float32, namespaceLabel *string) { t.Helper() fullMetric := fmt.Sprintf("%s;cluster=test-cluster", name) if namespaceLabel != nil { fullMetric = fmt.Sprintf("%s;namespace=%s;cluster=test-cluster", name, *namespaceLabel) } + gauges := intervals[len(intervals)-1].Gauges require.Contains(t, gauges, fullMetric) metric := gauges[fullMetric] require.Equal(t, value, metric.Value) } - testCases := []struct { - name string - activePeriodEnd time.Time - activePeriodStart time.Time - }{ - { - name: "monthly metric", - activePeriodEnd: segmentTime, - activePeriodStart: otherTime, - }, - { - name: "reporting period metric", - activePeriodEnd: otherTime, - activePeriodStart: segmentTime, - }, - { - name: "no metric", - activePeriodEnd: otherTime, - activePeriodStart: otherTime, - }, - } - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - opts.activePeriodStart = tc.activePeriodStart - opts.activePeriodEnd = tc.activePeriodEnd - - a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) - - data := metricsSink.Data() - gauges := data[len(data)-1].Gauges - - switch segmentTime { - case opts.activePeriodEnd: - // expect the metrics ending with "monthly" - // the namespace was never registered in core, so it'll be - // reported with a "deleted-" prefix - for i := 0; i < 3; i++ { - ns := fmt.Sprintf("deleted-ns-%d", i) - hasMetric(t, gauges, "identity.entity.active.monthly", 1, &ns) - hasMetric(t, gauges, "identity.nonentity.active.monthly", 1, &ns) - } - // secret sync metrics should be the sum of clients across all - // namespaces - hasMetric(t, gauges, "identity.secretsync.active.monthly", 3, nil) - case opts.activePeriodStart: - for i := 0; i < 3; i++ { - ns := fmt.Sprintf("deleted-ns-%d", i) - hasMetric(t, gauges, "identity.entity.active.reporting_period", 1, &ns) - hasMetric(t, gauges, "identity.nonentity.active.reporting_period", 1, &ns) - } - hasMetric(t, gauges, "identity.secretsync.active.reporting_period", 3, nil) - default: - hasNoMetric(t, gauges, "identity.entity.active.monthly") - hasNoMetric(t, gauges, "identity.nonentity.active.monthly") - hasNoMetric(t, gauges, "identity.secretsync.active.monthly") - hasNoMetric(t, gauges, "identity.entity.active.reporting_period") - hasNoMetric(t, gauges, "identity.entity.active.reporting_period") - hasNoMetric(t, gauges, "identity.secretsync.active.reporting_period") - } - }) - } + t.Run("no metrics", func(t *testing.T) { + // neither option is equal to the segment time, so no metrics should be + // reported + opts.activePeriodStart = otherTime + opts.activePeriodEnd = otherTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + hasNoMetric(t, data, "identity.entity.active.monthly") + hasNoMetric(t, data, "identity.nonentity.active.monthly") + hasNoMetric(t, data, "identity.secretsync.active.monthly") + hasNoMetric(t, data, "identity.entity.active.reporting_period") + hasNoMetric(t, data, "identity.entity.active.reporting_period") + hasNoMetric(t, data, "identity.secretsync.active.reporting_period") + }) + t.Run("monthly metric", func(t *testing.T) { + // activePeriodEnd is equal to the segment time, indicating that monthly + // metrics should be reported + opts.activePeriodEnd = segmentTime + opts.activePeriodStart = otherTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + // expect the metrics ending with "monthly" + // the namespace was never registered in core, so it'll be + // reported with a "deleted-" prefix + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, data, "identity.entity.active.monthly", 1, &ns) + hasMetric(t, data, "identity.nonentity.active.monthly", 1, &ns) + } + // secret sync metrics should be the sum of clients across all + // namespaces + hasMetric(t, data, "identity.secretsync.active.monthly", 3, nil) + }) + t.Run("reporting period metric", func(t *testing.T) { + // activePeriodEnd is not equal to the segment time but activePeriodStart + // is, which indicates that metrics for the reporting period should be + // reported + opts.activePeriodEnd = otherTime + opts.activePeriodStart = segmentTime + a.reportPrecomputedQueryMetrics(context.Background(), segmentTime, opts) + + data := metricsSink.Data() + // expect the metrics ending with "reporting_period" + // the namespace was never registered in core, so it'll be + // reported with a "deleted-" prefix + for i := 0; i < 3; i++ { + ns := fmt.Sprintf("deleted-ns-%d", i) + hasMetric(t, data, "identity.entity.active.reporting_period", 1, &ns) + hasMetric(t, data, "identity.nonentity.active.reporting_period", 1, &ns) + } + // secret sync metrics should be the sum of clients across all + // namespaces + hasMetric(t, data, "identity.secretsync.active.reporting_period", 3, nil) + }) } From bf34197aa1af8aaba516d0c84d30df7495003a82 Mon Sep 17 00:00:00 2001 From: Mia Epner Date: Thu, 29 Feb 2024 16:44:16 +0100 Subject: [PATCH 5/5] replace with underscores --- vault/activity_log.go | 4 ++-- vault/activity_log_test.go | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vault/activity_log.go b/vault/activity_log.go index f145aa77e7e3..1ec03e4d5bd6 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -2371,10 +2371,10 @@ func (a *ActivityLog) reportPrecomputedQueryMetrics(ctx context.Context, segment } for clientType, count := range summedMetricsMonthly { - a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "monthly"}, float32(count)) + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", "_"), "active", "monthly"}, float32(count)) } for clientType, count := range summedMetricsReporting { - a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", ""), "active", "reporting_period"}, float32(count)) + a.metrics.SetGauge([]string{"identity", strings.ReplaceAll(clientType, "-", "_"), "active", "reporting_period"}, float32(count)) } } diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index d38b5874ccb0..e1eb91a85523 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -4907,10 +4907,10 @@ func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { data := metricsSink.Data() hasNoMetric(t, data, "identity.entity.active.monthly") hasNoMetric(t, data, "identity.nonentity.active.monthly") - hasNoMetric(t, data, "identity.secretsync.active.monthly") + hasNoMetric(t, data, "identity.secret_sync.active.monthly") hasNoMetric(t, data, "identity.entity.active.reporting_period") hasNoMetric(t, data, "identity.entity.active.reporting_period") - hasNoMetric(t, data, "identity.secretsync.active.reporting_period") + hasNoMetric(t, data, "identity.secret_sync.active.reporting_period") }) t.Run("monthly metric", func(t *testing.T) { // activePeriodEnd is equal to the segment time, indicating that monthly @@ -4930,7 +4930,7 @@ func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { } // secret sync metrics should be the sum of clients across all // namespaces - hasMetric(t, data, "identity.secretsync.active.monthly", 3, nil) + hasMetric(t, data, "identity.secret_sync.active.monthly", 3, nil) }) t.Run("reporting period metric", func(t *testing.T) { // activePeriodEnd is not equal to the segment time but activePeriodStart @@ -4951,6 +4951,6 @@ func TestActivityLog_reportPrecomputedQueryMetrics(t *testing.T) { } // secret sync metrics should be the sum of clients across all // namespaces - hasMetric(t, data, "identity.secretsync.active.reporting_period", 3, nil) + hasMetric(t, data, "identity.secret_sync.active.reporting_period", 3, nil) }) }