diff --git a/pkg/logql/filter.go b/pkg/logql/filter.go index a6a0f5046a97..d4a2e9760a3d 100644 --- a/pkg/logql/filter.go +++ b/pkg/logql/filter.go @@ -116,21 +116,33 @@ func (r regexpFilter) Filter(line []byte) bool { return r.Match(line) } -type containsFilter []byte +type containsFilter struct { + match []byte + caseInsensitive bool +} func (l containsFilter) Filter(line []byte) bool { - return bytes.Contains(line, l) + if l.caseInsensitive { + line = bytes.ToLower(line) + } + return bytes.Contains(line, l.match) } func (l containsFilter) String() string { - return string(l) + return string(l.match) } -func newContainsFilter(match string) LineFilter { - if match == "" { +func newContainsFilter(match []byte, caseInsensitive bool) LineFilter { + if len(match) == 0 { return TrueFilter } - return containsFilter(match) + if caseInsensitive { + match = bytes.ToLower(match) + } + return containsFilter{ + match: match, + caseInsensitive: caseInsensitive, + } } // newFilter creates a new line filter from a match string and type. @@ -141,9 +153,9 @@ func newFilter(match string, mt labels.MatchType) (LineFilter, error) { case labels.MatchNotRegexp: return parseRegexpFilter(match, false) case labels.MatchEqual: - return newContainsFilter(match), nil + return newContainsFilter([]byte(match), false), nil case labels.MatchNotEqual: - return newNotFilter(newContainsFilter(match)), nil + return newNotFilter(newContainsFilter([]byte(match), false)), nil default: return nil, fmt.Errorf("unknown matcher: %v", match) } @@ -181,7 +193,7 @@ func simplify(reg *syntax.Regexp) (LineFilter, bool) { clearCapture(reg) return simplify(reg) case syntax.OpLiteral: - return containsFilter(string(reg.Rune)), true + return newContainsFilter([]byte(string((reg.Rune))), isCaseInsensitive(reg)), true case syntax.OpStar: if reg.Sub[0].Op == syntax.OpAnyCharNotNL { return TrueFilter, true @@ -192,6 +204,10 @@ func simplify(reg *syntax.Regexp) (LineFilter, bool) { return nil, false } +func isCaseInsensitive(reg *syntax.Regexp) bool { + return (reg.Flags & syntax.FoldCase) != 0 +} + // clearCapture removes capture operation as they are not used for filtering. func clearCapture(regs ...*syntax.Regexp) { for _, r := range regs { @@ -267,8 +283,7 @@ func simplifyConcat(reg *syntax.Regexp, baseLiteral []byte) (LineFilter, bool) { // if we have only a concat with literals. if baseLiteral != nil { - return containsFilter(baseLiteral), true - + return newContainsFilter(baseLiteral, isCaseInsensitive(reg)), true } return nil, false @@ -282,14 +297,14 @@ func simplifyConcatAlternate(reg *syntax.Regexp, literal []byte, curr LineFilter for _, alt := range reg.Sub { switch alt.Op { case syntax.OpEmptyMatch: - curr = chainOrFilter(curr, containsFilter(literal)) + curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg))) case syntax.OpLiteral: // concat the root literal with the alternate one. altBytes := []byte(string(alt.Rune)) altLiteral := make([]byte, 0, len(literal)+len(altBytes)) altLiteral = append(altLiteral, literal...) altLiteral = append(altLiteral, altBytes...) - curr = chainOrFilter(curr, containsFilter(altLiteral)) + curr = chainOrFilter(curr, newContainsFilter(altLiteral, isCaseInsensitive(reg))) case syntax.OpConcat: f, ok := simplifyConcat(alt, literal) if !ok { @@ -300,7 +315,7 @@ func simplifyConcatAlternate(reg *syntax.Regexp, literal []byte, curr LineFilter if alt.Sub[0].Op != syntax.OpAnyCharNotNL { return nil, false } - curr = chainOrFilter(curr, containsFilter(literal)) + curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg))) default: return nil, false } diff --git a/pkg/logql/filter_test.go b/pkg/logql/filter_test.go index d25a5729c19d..89005eb2ceef 100644 --- a/pkg/logql/filter_test.go +++ b/pkg/logql/filter_test.go @@ -9,7 +9,7 @@ import ( func Test_SimplifiedRegex(t *testing.T) { fixtures := []string{ - "foo", "foobar", "bar", "foobuzz", "buzz", "f", " ", "fba", "foofoofoo", "b", "foob", "bfoo", + "foo", "foobar", "bar", "foobuzz", "buzz", "f", " ", "fba", "foofoofoo", "b", "foob", "bfoo", "FoO", } for _, test := range []struct { re string @@ -18,40 +18,41 @@ func Test_SimplifiedRegex(t *testing.T) { match bool }{ // regex we intend to support. - {"foo", true, containsFilter("foo"), true}, - {"not", true, newNotFilter(containsFilter("not")), false}, - {"(foo)", true, containsFilter("foo"), true}, - {"(foo|ba)", true, newOrFilter(containsFilter("foo"), containsFilter("ba")), true}, - {"(foo|ba|ar)", true, newOrFilter(newOrFilter(containsFilter("foo"), containsFilter("ba")), containsFilter("ar")), true}, - {"(foo|(ba|ar))", true, newOrFilter(containsFilter("foo"), newOrFilter(containsFilter("ba"), containsFilter("ar"))), true}, - {"foo.*", true, containsFilter("foo"), true}, - {".*foo", true, newNotFilter(containsFilter("foo")), false}, - {".*foo.*", true, containsFilter("foo"), true}, - {"(.*)(foo).*", true, containsFilter("foo"), true}, - {"(foo.*|.*ba)", true, newOrFilter(containsFilter("foo"), containsFilter("ba")), true}, - {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(containsFilter("foo"), containsFilter("bar"))), false}, - {".*foo.*|bar", true, newNotFilter(newOrFilter(containsFilter("foo"), containsFilter("bar"))), false}, - {".*foo|bar", true, newNotFilter(newOrFilter(containsFilter("foo"), containsFilter("bar"))), false}, + {"foo", true, newContainsFilter([]byte("foo"), false), true}, + {"not", true, newNotFilter(newContainsFilter([]byte("not"), false)), false}, + {"(foo)", true, newContainsFilter([]byte("foo"), false), true}, + {"(foo|ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + {"(foo|ba|ar)", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), newContainsFilter([]byte("ar"), false)), true}, + {"(foo|(ba|ar))", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("ba"), false), newContainsFilter([]byte("ar"), false))), true}, + {"foo.*", true, newContainsFilter([]byte("foo"), false), true}, + {".*foo", true, newNotFilter(newContainsFilter([]byte("foo"), false)), false}, + {".*foo.*", true, newContainsFilter([]byte("foo"), false), true}, + {"(.*)(foo).*", true, newContainsFilter([]byte("foo"), false), true}, + {"(foo.*|.*ba)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("ba"), false)), true}, + {"(foo.*|.*bar.*)", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + {".*foo.*|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, + {".*foo|bar", true, newNotFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false))), false}, // This construct is similar to (...), but won't create a capture group. - {"(?:.*foo.*|bar)", true, newOrFilter(containsFilter("foo"), containsFilter("bar")), true}, + {"(?:.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, // named capture group - {"(?P.*foo.*|bar)", true, newOrFilter(containsFilter("foo"), containsFilter("bar")), true}, + {"(?P.*foo.*|bar)", true, newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), true}, // parsed as (?-s:.)*foo(?-s:.)*|b(?:ar|uzz) - {".*foo.*|bar|buzz", true, newOrFilter(containsFilter("foo"), newOrFilter(containsFilter("bar"), containsFilter("buzz"))), true}, + {".*foo.*|bar|buzz", true, newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("buzz"), false))), true}, // parsed as (?-s:.)*foo(?-s:.)*|bar|uzz - {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(containsFilter("foo"), containsFilter("bar")), containsFilter("uzz")), true}, + {".*foo.*|bar|uzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("bar"), false)), newContainsFilter([]byte("uzz"), false)), true}, // parsed as foo|b(?:ar|(?:)|uzz)|zz - {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(containsFilter("foo"), newOrFilter(newOrFilter(containsFilter("bar"), containsFilter("b")), containsFilter("buzz"))), containsFilter("zz")), true}, + {"foo|bar|b|buzz|zz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("foo"), false), newOrFilter(newOrFilter(newContainsFilter([]byte("bar"), false), newContainsFilter([]byte("b"), false)), newContainsFilter([]byte("buzz"), false))), newContainsFilter([]byte("zz"), false)), true}, // parsed as f(?:(?:)|oo(?:(?:)|bar)) - {"f|foo|foobar", true, newOrFilter(containsFilter("f"), newOrFilter(containsFilter("foo"), containsFilter("foobar"))), true}, + {"f|foo|foobar", true, newOrFilter(newContainsFilter([]byte("f"), false), newOrFilter(newContainsFilter([]byte("foo"), false), newContainsFilter([]byte("foobar"), false))), true}, // parsed as f(?:(?-s:.)*|oobar(?-s:.)*)|(?-s:.)*buzz - {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(containsFilter("f"), containsFilter("foobar")), containsFilter("buzz")), true}, + {"f.*|foobar.*|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, // parsed as ((f(?-s:.)*)|foobar(?-s:.)*)|(?-s:.)*buzz - {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(containsFilter("f"), containsFilter("foobar")), containsFilter("buzz")), true}, + {"((f.*)|foobar.*)|.*buzz", true, newOrFilter(newOrFilter(newContainsFilter([]byte("f"), false), newContainsFilter([]byte("foobar"), false)), newContainsFilter([]byte("buzz"), false)), true}, {".*", true, TrueFilter, true}, {".*|.*", true, TrueFilter, true}, {".*||||", true, TrueFilter, true}, {"", true, TrueFilter, true}, + {"(?i)foo", true, newContainsFilter([]byte("foo"), true), true}, // regex we are not supporting. {"[a-z]+foo", false, nil, false}, @@ -93,26 +94,27 @@ func Test_SimplifiedRegex(t *testing.T) { } func Test_TrueFilter(t *testing.T) { + empty := []byte("") for _, test := range []struct { name string f LineFilter expectTrue bool }{ - {"empty match", newContainsFilter(""), true}, - {"not empty match", newNotFilter(newContainsFilter("")), false}, - {"match", newContainsFilter("foo"), false}, - {"empty match and", newAndFilter(newContainsFilter(""), newContainsFilter("")), true}, - {"empty match or", newOrFilter(newContainsFilter(""), newContainsFilter("")), true}, - {"nil right and", newAndFilter(newContainsFilter(""), nil), true}, - {"nil left or", newOrFilter(nil, newContainsFilter("")), true}, - {"nil right and not empty", newAndFilter(newContainsFilter("foo"), nil), false}, - {"nil left or not empty", newOrFilter(nil, newContainsFilter("foo")), false}, + {"empty match", newContainsFilter(empty, false), true}, + {"not empty match", newNotFilter(newContainsFilter(empty, true)), false}, + {"match", newContainsFilter([]byte("foo"), false), false}, + {"empty match and", newAndFilter(newContainsFilter(empty, false), newContainsFilter(empty, false)), true}, + {"empty match or", newOrFilter(newContainsFilter(empty, false), newContainsFilter(empty, false)), true}, + {"nil right and", newAndFilter(newContainsFilter(empty, false), nil), true}, + {"nil left or", newOrFilter(nil, newContainsFilter(empty, false)), true}, + {"nil right and not empty", newAndFilter(newContainsFilter([]byte("foo"), false), nil), false}, + {"nil left or not empty", newOrFilter(nil, newContainsFilter([]byte("foo"), false)), false}, {"nil both and", newAndFilter(nil, nil), true}, {"nil both or", newOrFilter(nil, nil), true}, - {"empty match and chained", newAndFilter(newContainsFilter(""), newAndFilter(newContainsFilter(""), newAndFilter(newContainsFilter(""), newContainsFilter("")))), true}, - {"empty match or chained", newOrFilter(newContainsFilter(""), newOrFilter(newContainsFilter(""), newOrFilter(newContainsFilter(""), newContainsFilter("")))), true}, - {"empty match and", newNotFilter(newAndFilter(newContainsFilter(""), newContainsFilter(""))), false}, - {"empty match or", newNotFilter(newOrFilter(newContainsFilter(""), newContainsFilter(""))), false}, + {"empty match and chained", newAndFilter(newContainsFilter(empty, false), newAndFilter(newContainsFilter(empty, false), newAndFilter(newContainsFilter(empty, false), newContainsFilter(empty, false)))), true}, + {"empty match or chained", newOrFilter(newContainsFilter(empty, false), newOrFilter(newContainsFilter(empty, true), newOrFilter(newContainsFilter(empty, false), newContainsFilter(empty, false)))), true}, + {"empty match and", newNotFilter(newAndFilter(newContainsFilter(empty, false), newContainsFilter(empty, false))), false}, + {"empty match or", newNotFilter(newOrFilter(newContainsFilter(empty, false), newContainsFilter(empty, false))), false}, } { t.Run(test.name, func(t *testing.T) { if test.expectTrue { @@ -131,6 +133,7 @@ func Benchmark_LineFilter(b *testing.B) { re string }{ {"foo.*"}, + {"(?i)foo"}, {".*foo.*"}, {".*foo"}, {"foo|bar"}, @@ -147,7 +150,7 @@ func Benchmark_LineFilter(b *testing.B) { } // see https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go -// A note on compiler optimisations +// A note on compiler optimizations var res bool func benchmarkRegex(b *testing.B, re, line string, match bool) {