diff --git a/CHANGELOG.md b/CHANGELOG.md index 81078b32e3..3de810c69b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Added +- [#6437](https://github.com/thanos-io/thanos/pull/6437) Receive: make tenant stats limit configurable - [#6369](https://github.com/thanos-io/thanos/pull/6369) Receive: add az-aware replication support for Ketama algorithm - [#6185](https://github.com/thanos-io/thanos/pull/6185) Tracing: tracing in OTLP support configuring service_name. - [#6192](https://github.com/thanos-io/thanos/pull/6192) Store: add flag `bucket-web-label` to select the label to use as timeline title in web UI diff --git a/docs/components/receive.md b/docs/components/receive.md index 2e2a74e1f5..8ec28f9af6 100644 --- a/docs/components/receive.md +++ b/docs/components/receive.md @@ -32,7 +32,7 @@ The [Thanos Receive Controller](https://github.com/observatorium/thanos-receive- ## TSDB stats -Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats). +Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. Use the `limit` query parameter to tweak the number of stats to return (the default is 10). The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats). Note that each Thanos Receive will only expose local stats and replicated series will not be included in the response. diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go index 3cce0552ab..78836162ba 100644 --- a/pkg/receive/handler.go +++ b/pkg/receive/handler.go @@ -10,6 +10,7 @@ import ( "fmt" "io" stdlog "log" + "math" "net" "net/http" "path" @@ -54,12 +55,16 @@ const ( DefaultTenantHeader = "THANOS-TENANT" // DefaultTenant is the default value used for when no tenant is passed via the tenant header. DefaultTenant = "default-tenant" + // DefaultStatsLimit is the default value used for limiting tenant stats. + DefaultStatsLimit = 10 // DefaultTenantLabel is the default label-name used for when no tenant is passed via the tenant header. DefaultTenantLabel = "tenant_id" // DefaultReplicaHeader is the default header used to designate the replica count of a write request. DefaultReplicaHeader = "THANOS-REPLICA" // AllTenantsQueryParam is the query parameter for getting TSDB stats for all tenants. AllTenantsQueryParam = "all_tenants" + // LimitStatsQueryParam is the query parameter for limiting the amount of returned TSDB stats. + LimitStatsQueryParam = "limit" // Labels for metrics. labelSuccess = "success" labelError = "error" @@ -280,6 +285,21 @@ func (h *Handler) testReady(f http.HandlerFunc) http.HandlerFunc { } } +func getStatsLimitParameter(r *http.Request) (int, error) { + statsLimitStr := r.URL.Query().Get(LimitStatsQueryParam) + if statsLimitStr == "" { + return DefaultStatsLimit, nil + } + statsLimit, err := strconv.ParseInt(statsLimitStr, 10, 0) + if err != nil { + return 0, fmt.Errorf("unable to parse '%s' parameter: %w", LimitStatsQueryParam, err) + } + if statsLimit > math.MaxInt { + return 0, fmt.Errorf("'%s' parameter is larger than %d", LimitStatsQueryParam, math.MaxInt) + } + return int(statsLimit), nil +} + func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusapi.TenantStats, *api.ApiError) { if !h.isReady() { return nil, &api.ApiError{Typ: api.ErrorInternal, Err: fmt.Errorf("service unavailable")} @@ -292,15 +312,20 @@ func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusap return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err} } + statsLimit, err := getStatsLimitParameter(r) + if err != nil { + return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err} + } + if getAllTenantStats { - return h.options.TSDBStats.TenantStats(statsByLabelName), nil + return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName), nil } if tenantID == "" { tenantID = h.options.DefaultTenantID } - return h.options.TSDBStats.TenantStats(statsByLabelName, tenantID), nil + return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName, tenantID), nil } // Close stops the Handler. diff --git a/pkg/receive/handler_test.go b/pkg/receive/handler_test.go index ae4ec9b985..2e6cdb4cc7 100644 --- a/pkg/receive/handler_test.go +++ b/pkg/receive/handler_test.go @@ -16,6 +16,7 @@ import ( "path/filepath" "runtime" "runtime/pprof" + "strconv" "strings" "sync" "testing" @@ -1520,3 +1521,50 @@ func TestRelabel(t *testing.T) { }) } } + +func TestGetStatsLimitParameter(t *testing.T) { + t.Run("invalid limit parameter, not integer", func(t *testing.T) { + r, err := http.NewRequest(http.MethodGet, "http://0:0", nil) + testutil.Ok(t, err) + + q := r.URL.Query() + q.Add(LimitStatsQueryParam, "abc") + r.URL.RawQuery = q.Encode() + + _, err = getStatsLimitParameter(r) + testutil.NotOk(t, err) + }) + t.Run("invalid limit parameter, too large", func(t *testing.T) { + r, err := http.NewRequest(http.MethodGet, "http://0:0", nil) + testutil.Ok(t, err) + + q := r.URL.Query() + q.Add(LimitStatsQueryParam, strconv.FormatUint(math.MaxInt+1, 10)) + r.URL.RawQuery = q.Encode() + + _, err = getStatsLimitParameter(r) + testutil.NotOk(t, err) + }) + t.Run("not present returns default", func(t *testing.T) { + r, err := http.NewRequest(http.MethodGet, "http://0:0", nil) + testutil.Ok(t, err) + + limit, err := getStatsLimitParameter(r) + testutil.Ok(t, err) + testutil.Equals(t, limit, DefaultStatsLimit) + }) + t.Run("if present and valid, the parameter is returned", func(t *testing.T) { + r, err := http.NewRequest(http.MethodGet, "http://0:0", nil) + testutil.Ok(t, err) + + const givenLimit = 20 + + q := r.URL.Query() + q.Add(LimitStatsQueryParam, strconv.FormatUint(givenLimit, 10)) + r.URL.RawQuery = q.Encode() + + limit, err := getStatsLimitParameter(r) + testutil.Ok(t, err) + testutil.Equals(t, limit, givenLimit) + }) +} diff --git a/pkg/receive/multitsdb.go b/pkg/receive/multitsdb.go index 2fceb1ac77..349ceb98ef 100644 --- a/pkg/receive/multitsdb.go +++ b/pkg/receive/multitsdb.go @@ -44,7 +44,7 @@ import ( type TSDBStats interface { // TenantStats returns TSDB head stats for the given tenants. // If no tenantIDs are provided, stats for all tenants are returned. - TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats + TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats } type MultiTSDB struct { @@ -518,7 +518,7 @@ func (t *MultiTSDB) TSDBExemplars() map[string]*exemplars.TSDB { return res } -func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats { +func (t *MultiTSDB) TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats { t.mtx.RLock() defer t.mtx.RUnlock() if len(tenantIDs) == 0 { @@ -545,7 +545,7 @@ func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) [] if db == nil { return } - stats := db.Head().Stats(statsByLabelName, 10) + stats := db.Head().Stats(statsByLabelName, limit) mu.Lock() defer mu.Unlock() diff --git a/pkg/receive/multitsdb_test.go b/pkg/receive/multitsdb_test.go index 9d5b0ac26d..58241cd5a0 100644 --- a/pkg/receive/multitsdb_test.go +++ b/pkg/receive/multitsdb_test.go @@ -568,7 +568,7 @@ func TestMultiTSDBStats(t *testing.T) { testutil.Ok(t, appendSample(m, "baz", time.Now())) testutil.Equals(t, 3, len(m.TSDBLocalClients())) - stats := m.TenantStats(labels.MetricName, test.tenants...) + stats := m.TenantStats(10, labels.MetricName, test.tenants...) testutil.Equals(t, test.expectedStats, len(stats)) }) }