Skip to content

Commit

Permalink
Implementing Regex optimization on the MatchNotRegexp and `MatchNot…
Browse files Browse the repository at this point in the history
…Equal` matcher type (thanos-io#6379)

* Implementing Regex optimization on the MatchNotRegexp matcher type

Signed-off-by: Alan Protasio <[email protected]>

* Opmizing MatchNotEqual

Signed-off-by: Alan Protasio <[email protected]>

---------

Signed-off-by: Alan Protasio <[email protected]>
  • Loading branch information
alanprot authored and HC Zhu committed Jun 27, 2023
1 parent 1160a88 commit 8b54757
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 10 deletions.
41 changes: 31 additions & 10 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -2282,28 +2282,37 @@ func checkNilPosting(l labels.Label, p index.Postings) index.Postings {
func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, error) {
if m.Type == labels.MatchRegexp {
if vals := findSetMatches(m.Value); len(vals) > 0 {
// Sorting will improve the performance dramatically if the dataset is relatively large
// since entries in the postings offset table was sorted by label name and value,
// the sequential reading is much faster.
sort.Strings(vals)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: m.Name, Value: val})
}
return newPostingGroup(false, toAdd, nil), nil
return newPostingGroup(false, labelsFromSetMatchers(m.Name, vals), nil), nil
}
}

// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555.
if m.Matches("") {
var toRemove []labels.Label

// Fast-path for MatchNotRegexp matching.
// Inverse of a MatchNotRegexp is MatchRegexp (double negation).
// Fast-path for set matching.
if m.Type == labels.MatchNotRegexp {
if vals := findSetMatches(m.Value); len(vals) > 0 {
toRemove = labelsFromSetMatchers(m.Name, vals)
return newPostingGroup(true, nil, toRemove), nil
}
}

// Fast-path for MatchNotEqual matching.
// Inverse of a MatchNotEqual is MatchEqual (double negation).
if m.Type == labels.MatchNotEqual {
return newPostingGroup(true, nil, []labels.Label{{Name: m.Name, Value: m.Value}}), nil
}

vals, err := lvalsFn(m.Name)
if err != nil {
return nil, err
}

var toRemove []labels.Label
for _, val := range vals {
if ctx.Err() != nil {
return nil, ctx.Err()
Expand Down Expand Up @@ -2339,6 +2348,18 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er
return newPostingGroup(false, toAdd, nil), nil
}

func labelsFromSetMatchers(name string, vals []string) []labels.Label {
// Sorting will improve the performance dramatically if the dataset is relatively large
// since entries in the postings offset table was sorted by label name and value,
// the sequential reading is much faster.
sort.Strings(vals)
toAdd := make([]labels.Label, 0, len(vals))
for _, val := range vals {
toAdd = append(toAdd, labels.Label{Name: name, Value: val})
}
return toAdd
}

type postingPtr struct {
keyID int
ptr index.Range
Expand Down
2 changes: 2 additions & 0 deletions pkg/store/bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,8 @@ func benchBucketSeries(t testutil.TB, sampleType chunkenc.ValueType, skipChunk b
matchersCase := []*labels.Matcher{
labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"),
labels.MustNewMatcher(labels.MatchNotEqual, "foo", "bar"),
labels.MustNewMatcher(labels.MatchEqual, "j", "0"),
labels.MustNewMatcher(labels.MatchNotEqual, "j", "0"),
labels.MustNewMatcher(labels.MatchRegexp, "j", "(0|1)"),
labels.MustNewMatcher(labels.MatchRegexp, "j", "0|1"),
labels.MustNewMatcher(labels.MatchNotRegexp, "j", "(0|1)"),
Expand Down

0 comments on commit 8b54757

Please sign in to comment.