From 1bbd9851bc55bd95f9720e793331cb6a99b391b8 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Fri, 29 Jul 2022 16:32:32 +0200 Subject: [PATCH 1/2] Fix multi-tenant exemplar matchers The exemplar proxy synthesizes a query based on PromQL expression matchers and individual store's label sets. When a store has multiple label sets with same label names but different values (e.g. multitenant Receivers), each exemplar matcher will be repeated once for each label set. Because of this, a receiver hosting 200 tenants can get the same exemplar matcher 200 times. This leads to the underlying stores slowing down and timing out when asked for exemplars. This commit modifies the exemplar proxy to deduplicate matchers and only send a matcher once to an underlying store. Signed-off-by: Filip Petkovski --- CHANGELOG.md | 3 +- pkg/exemplars/proxy.go | 32 +++++++++--------- pkg/exemplars/proxy_test.go | 66 +++++++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4b2cde109..711fdb69ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed - [#5502](https://github.com/thanos-io/thanos/pull/5502) Receive: Handle exemplar storage errors as conflict error. -- [#5534](https://github.com/thanos-io/thanos/pull/5534) Query: Set struct return by query api alerts same as prometheus api +- [#5534](https://github.com/thanos-io/thanos/pull/5534) Query: Set struct return by query api alerts same as prometheus api. +- [#5554](https://github.com/thanos-io/thanos/pull/5534) Query/Receiver: Fix querying exemplars from multi-tenant receivers. ### Added diff --git a/pkg/exemplars/proxy.go b/pkg/exemplars/proxy.go index 5218605922..0b9cfc45e1 100644 --- a/pkg/exemplars/proxy.go +++ b/pkg/exemplars/proxy.go @@ -6,6 +6,7 @@ package exemplars import ( "context" "io" + "strings" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -80,10 +81,11 @@ func (s *Proxy) Exemplars(req *exemplarspb.ExemplarsRequest, srv exemplarspb.Exe ) for _, st := range s.exemplars() { - query := "" + var queryParts []string + Matchers: for _, matchers := range selectors { - metricsSelector := "" + matcherSet := make(map[string]struct{}) for _, m := range matchers { for _, ls := range st.LabelSets { if lv := ls.Get(m.Name); lv != "" { @@ -96,27 +98,27 @@ func (s *Proxy) Exemplars(req *exemplarspb.ExemplarsRequest, srv exemplarspb.Exe continue } } - if metricsSelector == "" { - metricsSelector += m.String() - } else { - metricsSelector += ", " + m.String() - } + matcherSet[m.String()] = struct{}{} } } - // Construct the query by concatenating metric selectors with '+'. - // We cannot preserve the original query info, but the returned - // results are the same. - if query == "" { - query += "{" + metricsSelector + "}" - } else { - query += " + {" + metricsSelector + "}" + + matchers := make([]string, 0, len(matcherSet)) + for m := range matcherSet { + matchers = append(matchers, m) } + + queryParts = append(queryParts, "{"+strings.Join(matchers, ", ")+"}") } // No matchers match this store. - if query == "" { + if len(queryParts) == 0 { continue } + + // Construct the query by concatenating metric selectors with '+'. + // We cannot preserve the original query info, but the returned + // results are the same. + query := strings.Join(queryParts, "+") r := &exemplarspb.ExemplarsRequest{ Start: req.Start, End: req.End, diff --git a/pkg/exemplars/proxy_test.go b/pkg/exemplars/proxy_test.go index 4daac7a28d..08b18d9f02 100644 --- a/pkg/exemplars/proxy_test.go +++ b/pkg/exemplars/proxy_test.go @@ -5,12 +5,17 @@ package exemplars import ( "context" + "fmt" "io" "os" "reflect" "sync" "testing" + "github.com/prometheus/prometheus/promql/parser" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "github.com/go-kit/log" "github.com/pkg/errors" "github.com/prometheus/prometheus/model/labels" @@ -49,9 +54,33 @@ func (t *testExemplarClient) Recv() (*exemplarspb.ExemplarsResponse, error) { } func (t *testExemplarClient) Exemplars(ctx context.Context, in *exemplarspb.ExemplarsRequest, opts ...grpc.CallOption) (exemplarspb.Exemplars_ExemplarsClient, error) { + expr, err := parser.ParseExpr(in.Query) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + if err := t.assertUniqueMatchers(expr); err != nil { + return nil, err + } + return t, t.exemplarErr } +func (t *testExemplarClient) assertUniqueMatchers(expr parser.Expr) error { + matchersList := parser.ExtractSelectors(expr) + for _, matchers := range matchersList { + matcherSet := make(map[string]struct{}) + for _, matcher := range matchers { + if _, ok := matcherSet[matcher.String()]; ok { + return status.Error(codes.Internal, fmt.Sprintf("duplicate matcher set found %s", matcher)) + } + matcherSet[matcher.String()] = struct{}{} + } + } + + return nil +} + var _ exemplarspb.ExemplarsClient = &testExemplarClient{} type testExemplarServer struct { @@ -94,7 +123,7 @@ func TestProxy(t *testing.T) { { name: "proxy success", request: &exemplarspb.ExemplarsRequest{ - Query: "http_request_duration_bucket", + Query: `http_request_duration_bucket`, PartialResponseStrategy: storepb.PartialResponseStrategy_WARN, }, clients: []*exemplarspb.ExemplarStore{ @@ -105,7 +134,38 @@ func TestProxy(t *testing.T) { Exemplars: []*exemplarspb.Exemplar{{Value: 1}}, }), }, - LabelSets: []labels.Labels{labels.FromMap(map[string]string{"cluster": "A"})}, + LabelSets: []labels.Labels{ + labels.FromMap(map[string]string{"cluster": "A"}), + labels.FromMap(map[string]string{"cluster": "B"}), + }, + }, + }, + server: &testExemplarServer{}, + wantResponses: []*exemplarspb.ExemplarsResponse{ + exemplarspb.NewExemplarsResponse(&exemplarspb.ExemplarData{ + SeriesLabels: labelpb.ZLabelSet{Labels: labelpb.ZLabelsFromPromLabels(labels.FromMap(map[string]string{"__name__": "http_request_duration_bucket"}))}, + Exemplars: []*exemplarspb.Exemplar{{Value: 1}}, + }), + }, + }, + { + name: "proxy success with multiple selectors", + request: &exemplarspb.ExemplarsRequest{ + Query: `http_request_duration_bucket{region="us-east1"} / on (region) group_left() http_request_duration_bucket`, + PartialResponseStrategy: storepb.PartialResponseStrategy_WARN, + }, + clients: []*exemplarspb.ExemplarStore{ + { + ExemplarsClient: &testExemplarClient{ + response: exemplarspb.NewExemplarsResponse(&exemplarspb.ExemplarData{ + SeriesLabels: labelpb.ZLabelSet{Labels: labelpb.ZLabelsFromPromLabels(labels.FromMap(map[string]string{"__name__": "http_request_duration_bucket"}))}, + Exemplars: []*exemplarspb.Exemplar{{Value: 1}}, + }), + }, + LabelSets: []labels.Labels{ + labels.FromMap(map[string]string{"cluster": "A"}), + labels.FromMap(map[string]string{"cluster": "B"}), + }, }, }, server: &testExemplarServer{}, @@ -119,7 +179,7 @@ func TestProxy(t *testing.T) { { name: "warning proxy success", request: &exemplarspb.ExemplarsRequest{ - Query: "http_request_duration_bucket", + Query: `http_request_duration_bucket`, PartialResponseStrategy: storepb.PartialResponseStrategy_WARN, }, clients: []*exemplarspb.ExemplarStore{ From ba20cf097863cf7b56e724fa2a1222935629a384 Mon Sep 17 00:00:00 2001 From: Filip Petkovski Date: Mon, 1 Aug 2022 07:27:14 +0200 Subject: [PATCH 2/2] Address CR comments Signed-off-by: Filip Petkovski --- CHANGELOG.md | 2 +- pkg/exemplars/proxy.go | 10 ++++++---- pkg/exemplars/proxy_test.go | 7 +++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 711fdb69ae..0d23dba25f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed - [#5502](https://github.com/thanos-io/thanos/pull/5502) Receive: Handle exemplar storage errors as conflict error. - [#5534](https://github.com/thanos-io/thanos/pull/5534) Query: Set struct return by query api alerts same as prometheus api. -- [#5554](https://github.com/thanos-io/thanos/pull/5534) Query/Receiver: Fix querying exemplars from multi-tenant receivers. +- [#5554](https://github.com/thanos-io/thanos/pull/5554) Query/Receiver: Fix querying exemplars from multi-tenant receivers. ### Added diff --git a/pkg/exemplars/proxy.go b/pkg/exemplars/proxy.go index 0b9cfc45e1..0e4903ebae 100644 --- a/pkg/exemplars/proxy.go +++ b/pkg/exemplars/proxy.go @@ -80,8 +80,10 @@ func (s *Proxy) Exemplars(req *exemplarspb.ExemplarsRequest, srv exemplarspb.Exe exemplars []*exemplarspb.ExemplarData ) + queryParts := make([]string, 0) + labelMatchers := make([]string, 0) for _, st := range s.exemplars() { - var queryParts []string + queryParts = queryParts[:0] Matchers: for _, matchers := range selectors { @@ -102,12 +104,12 @@ func (s *Proxy) Exemplars(req *exemplarspb.ExemplarsRequest, srv exemplarspb.Exe } } - matchers := make([]string, 0, len(matcherSet)) + labelMatchers = labelMatchers[:0] for m := range matcherSet { - matchers = append(matchers, m) + labelMatchers = append(labelMatchers, m) } - queryParts = append(queryParts, "{"+strings.Join(matchers, ", ")+"}") + queryParts = append(queryParts, "{"+strings.Join(labelMatchers, ", ")+"}") } // No matchers match this store. diff --git a/pkg/exemplars/proxy_test.go b/pkg/exemplars/proxy_test.go index 08b18d9f02..cf91a078a9 100644 --- a/pkg/exemplars/proxy_test.go +++ b/pkg/exemplars/proxy_test.go @@ -12,15 +12,14 @@ import ( "sync" "testing" - "github.com/prometheus/prometheus/promql/parser" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - "github.com/go-kit/log" "github.com/pkg/errors" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" "go.uber.org/atomic" "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" "github.com/thanos-io/thanos/pkg/store/labelpb"