From c50f63ad3570ec56b1e2870bae787be545f39211 Mon Sep 17 00:00:00 2001 From: Adel Haj Hassan <41540817+adel121@users.noreply.github.com> Date: Wed, 8 Nov 2023 16:43:12 +0100 Subject: [PATCH] [pkg/util/kubernetes][CONTINT-477] add rate_limit_queries_remaining_min telemetry in cluster agent external metrics server (#20497) * add rate_limit_queries_remaining_min telemetry in cluster agent external metrics server * added release note * add mutex lock to mrr struct * improve unit test * add a get() method to the minTracker struct * add lock on the get method --- .../kubernetes/autoscalers/datadogexternal.go | 29 ++++++++++++ .../autoscalers/datadogexternal_util.go | 47 +++++++++++++++++++ .../autoscalers/datadogexternal_util_test.go | 47 +++++++++++++++++++ ...aining_min_telemetry-233fcfdbc0fe3822.yaml | 11 +++++ 4 files changed, 134 insertions(+) create mode 100644 pkg/util/kubernetes/autoscalers/datadogexternal_util.go create mode 100644 pkg/util/kubernetes/autoscalers/datadogexternal_util_test.go create mode 100644 releasenotes-dca/notes/add-rate_limit_queries_remaining_min_telemetry-233fcfdbc0fe3822.yaml diff --git a/pkg/util/kubernetes/autoscalers/datadogexternal.go b/pkg/util/kubernetes/autoscalers/datadogexternal.go index d2c313dc01143..c00e8d59ad79e 100644 --- a/pkg/util/kubernetes/autoscalers/datadogexternal.go +++ b/pkg/util/kubernetes/autoscalers/datadogexternal.go @@ -13,11 +13,13 @@ import ( "math" "strconv" "strings" + "sync" "time" "gopkg.in/zorkian/go-datadog-api.v2" utilserror "k8s.io/apimachinery/pkg/util/errors" + "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/telemetry" le "github.com/DataDog/datadog-agent/pkg/util/kubernetes/apiserver/leaderelection/metrics" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -36,6 +38,9 @@ var ( rateLimitsRemaining = telemetry.NewGaugeWithOpts("", "rate_limit_queries_remaining", []string{"endpoint", le.JoinLeaderLabel}, "number of queries remaining before next reset", telemetry.Options{NoDoubleUnderscoreSep: true}) + rateLimitsRemainingMin = telemetry.NewGaugeWithOpts("", "rate_limit_queries_remaining_min", + []string{"endpoint", le.JoinLeaderLabel}, "minimum number of queries remaining before next reset observed during an expiration interval of 2*refresh period", + telemetry.Options{NoDoubleUnderscoreSep: true}) rateLimitsReset = telemetry.NewGaugeWithOpts("", "rate_limit_queries_reset", []string{"endpoint", le.JoinLeaderLabel}, "number of seconds before next reset", telemetry.Options{NoDoubleUnderscoreSep: true}) @@ -61,6 +66,21 @@ const ( queryEndpoint = "/api/v1/query" ) +var ( + minRemainingRequestsTracker *minTracker + once sync.Once +) + +func getMinRemainingRequestsTracker() *minTracker { + once.Do(func() { + refreshPeriod := config.Datadog.GetInt("external_metrics_provider.refresh_period") + expiryDuration := 2 * refreshPeriod + minRemainingRequestsTracker = newMinTracker(time.Duration(time.Duration(expiryDuration) * time.Second)) + }) + + return minRemainingRequestsTracker +} + // isRateLimitError is a helper function that checks if the received error is a rate limit error func isRateLimitError(err error) bool { if err == nil { @@ -171,6 +191,15 @@ func (p *Processor) queryDatadogExternal(ddQueries []string, timeWindow time.Dur } } + // Update rateLimitsRemainingMin metric + updateMap := p.datadogClient.GetRateLimitStats() + queryLimits := updateMap[queryEndpoint] + newVal, err := strconv.Atoi(queryLimits.Remaining) + if err == nil { + getMinRemainingRequestsTracker().update(newVal) + rateLimitsRemainingMin.Set(float64(minRemainingRequestsTracker.get()), queryEndpoint, le.JoinLeaderLabel) + } + return processedMetrics, nil } diff --git a/pkg/util/kubernetes/autoscalers/datadogexternal_util.go b/pkg/util/kubernetes/autoscalers/datadogexternal_util.go new file mode 100644 index 0000000000000..b888a386c6432 --- /dev/null +++ b/pkg/util/kubernetes/autoscalers/datadogexternal_util.go @@ -0,0 +1,47 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2017-present Datadog, Inc. + +//go:build kubeapiserver + +package autoscalers + +import ( + "sync" + "time" +) + +type minTracker struct { + sync.Mutex + val int + timestamp time.Time + expiryDuration time.Duration +} + +func newMinTracker(expiryDuration time.Duration) *minTracker { + return &minTracker{ + val: -1, + timestamp: time.Now(), + expiryDuration: expiryDuration, + } +} + +func (mt *minTracker) update(newVal int) { + mt.Lock() + defer mt.Unlock() + + isSet := mt.val >= 0 + hasExpired := time.Since(mt.timestamp) > mt.expiryDuration + + if newVal <= mt.val || !isSet || hasExpired { + mt.val = newVal + mt.timestamp = time.Now() + } +} + +func (mt *minTracker) get() int { + mt.Lock() + defer mt.Unlock() + return mt.val +} diff --git a/pkg/util/kubernetes/autoscalers/datadogexternal_util_test.go b/pkg/util/kubernetes/autoscalers/datadogexternal_util_test.go new file mode 100644 index 0000000000000..e9781496bf39a --- /dev/null +++ b/pkg/util/kubernetes/autoscalers/datadogexternal_util_test.go @@ -0,0 +1,47 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2017-present Datadog, Inc. + +//go:build kubeapiserver + +package autoscalers + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestUpdateMinTracker(t *testing.T) { + expiryDuration := 60 * time.Second + + mt := newMinTracker(expiryDuration) + + // Should update + mt.update(10) + assert.Equal(t, mt.get(), 10) + + // Should not update, since value didn't expire yet + mt.update(11) + assert.Equal(t, mt.get(), 10) + + // simulate waiting half the expirationDuration + mt.timestamp = time.Now().Add(-expiryDuration / 2) + + // Should not update + mt.update(199) + assert.Equal(t, mt.get(), 10) + + // Shoud update, even if value didn't expire because new value is lower + mt.update(5) + assert.Equal(t, mt.get(), 5) + + // Change timestamp to simulate expiration + mt.timestamp = time.Now().Add(-2 * expiryDuration) + + // Shoud update because current value has expired + mt.update(100) + assert.Equal(t, mt.get(), 100) +} diff --git a/releasenotes-dca/notes/add-rate_limit_queries_remaining_min_telemetry-233fcfdbc0fe3822.yaml b/releasenotes-dca/notes/add-rate_limit_queries_remaining_min_telemetry-233fcfdbc0fe3822.yaml new file mode 100644 index 0000000000000..d76c197d2e94d --- /dev/null +++ b/releasenotes-dca/notes/add-rate_limit_queries_remaining_min_telemetry-233fcfdbc0fe3822.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG-DCA.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +features: + - | + Report `rate_limit_queries_remaining_min` telemetry from `external-metrics` server.