From 2c86371f99eba308ceac1404e0463bb253d9fd28 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 18 May 2023 09:24:49 -0700 Subject: [PATCH] Implementing Regex optimization on the MatchNotRegexp matcher type --- pkg/store/bucket.go | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/pkg/store/bucket.go b/pkg/store/bucket.go index c2648ed4001..8f711a67c15 100644 --- a/pkg/store/bucket.go +++ b/pkg/store/bucket.go @@ -2282,15 +2282,7 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings { func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) { if m.Type == labels.MatchRegexp { if vals := findSetMatches(m.Value); len(vals) > 0 { - // Sorting will improve the performance dramatically if the dataset is relatively large - // since entries in the postings offset table was sorted by label name and value, - // the sequential reading is much faster. - sort.Strings(vals) - toAdd := make([]labels.Label, 0, len(vals)) - for _, val := range vals { - toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val}) - } - return newPostingGroup(false, toAdd, nil), nil + return newPostingGroup(false, labelsFromSetMatchers(m.Name, vals), nil), nil } } @@ -2298,12 +2290,22 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er // have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575 // and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555. if m.Matches("") { + var toRemove []labels.Label + + // Inverse of a MatchNotRegexp is MatchRegexp (double negation). + // Fast-path for set matching. + if m.Type == labels.MatchNotRegexp { + if vals := findSetMatches(m.Value); len(vals) > 0 { + toRemove = labelsFromSetMatchers(m.Name, vals) + return newPostingGroup(true, nil, toRemove), nil + } + } + vals, err := lvalsFn(m.Name) if err != nil { return nil, err } - var toRemove []labels.Label for _, val := range vals { if ctx.Err() != nil { return nil, ctx.Err() @@ -2339,6 +2341,18 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er return newPostingGroup(false, toAdd, nil), nil } +func labelsFromSetMatchers(name string, vals []string) []labels.Label { + // Sorting will improve the performance dramatically if the dataset is relatively large + // since entries in the postings offset table was sorted by label name and value, + // the sequential reading is much faster. + sort.Strings(vals) + toAdd := make([]labels.Label, 0, len(vals)) + for _, val := range vals { + toAdd = append(toAdd, labels.Label{Name: name, Value: val}) + } + return toAdd +} + type postingPtr struct { keyID int ptr index.Range