forked from gane5hvarma/panther
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add gork and fastmatch packages for parsing text into key/value pairs…
… (#1877) * initial commit * Handle unquoting * Add gork * return pair matches * Add tests for patterns and validate field/pattern names * Add comments and fmt * mage gen fmt * Fix test * fmt lint fix * add comments * simplify lookup * Fix GRGW * comments on struct fields and remove unused slice * Restore 'fields' and associated methods, these will be needed Co-authored-by: panther-bot <[email protected]>
- Loading branch information
1 parent
d5f6ef6
commit 8629bca
Showing
10 changed files
with
980 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
package fastmatch | ||
|
||
/** | ||
* Panther is a Cloud-Native SIEM for the Modern Security Team. | ||
* Copyright (C) 2020 Panther Labs Inc | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
import ( | ||
"errors" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
) | ||
|
||
// Pattern matches a string and extracts key/value pairs. | ||
type Pattern struct { | ||
// text to match at start of input | ||
prefix string | ||
// the rest of the fields | ||
delimiters []delimiter | ||
// non-empty field names | ||
fields []string | ||
// reusable buffer for unquoting stings | ||
scratch []rune | ||
} | ||
|
||
type delimiter struct { | ||
// delimiter to match at end of field | ||
match string | ||
// name of the field | ||
name string | ||
// if set to `'` or `"` we should look out for escaping quotes | ||
quote byte | ||
} | ||
|
||
var splitFields = regexp.MustCompile(`%{\s*(?P<tag>[^}]*)\s*}`) | ||
|
||
// Compile compiles a pattern. | ||
// Patterns use `%{` and `}` delimiters to define the placing of fields in a string. | ||
// Two consecutive fields *must* have some delimiter text between them for the pattern to be valid. | ||
// For example: | ||
// `%{foo} %{bar}` is valid | ||
// `%{foo}%{bar}` is not valid | ||
// Pattern names currently have no restrictions apart from that they cannot contain `}`. | ||
// Please be conservative with your field names as that might change in the future... | ||
func Compile(pattern string) (*Pattern, error) { | ||
tags := splitFields.FindAllStringSubmatch(pattern, -1) | ||
if tags == nil { | ||
// pattern contains no fields | ||
return nil, errInvalidPattern | ||
} | ||
matchDelimiters := splitFields.Split(pattern, -1) | ||
// First delimiter is a prefix at the start of text. | ||
prefix, matchDelimiters := matchDelimiters[0], matchDelimiters[1:] | ||
delimiters := make([]delimiter, 0, len(tags)) | ||
fields := make([]string, 0, len(tags)) | ||
last := len(matchDelimiters) - 1 | ||
// Keep not of the previous delimiter for auto detecting quotes | ||
prev := prefix | ||
for i, m := range matchDelimiters { | ||
// Do not allow empty delimiters unless it's the last field | ||
if i < last && m == "" { | ||
return nil, errInvalidPattern | ||
} | ||
tag := tags[i][1] | ||
d := delimiter{} | ||
// Autodetects quotes | ||
d.reset(tag, m, prev) | ||
prev = m | ||
delimiters = append(delimiters, d) | ||
if d.name != "" { | ||
fields = append(fields, d.name) | ||
} | ||
} | ||
return &Pattern{ | ||
prefix: prefix, | ||
delimiters: delimiters, | ||
fields: fields, | ||
}, nil | ||
} | ||
|
||
func (d *delimiter) reset(tag, match, prev string) { | ||
quote := prevQuote(prev) | ||
if quote != nextQuote(match) { | ||
quote = 0 | ||
} | ||
d.name = tag | ||
d.quote = quote | ||
d.match = match | ||
} | ||
|
||
func prevQuote(s string) byte { | ||
if n := len(s) - 1; 0 <= n && n < len(s) { | ||
switch q := s[n]; q { | ||
case '"', '\'': | ||
return q | ||
} | ||
} | ||
return 0 | ||
} | ||
|
||
func nextQuote(s string) byte { | ||
if len(s) > 0 { | ||
switch q := s[0]; q { | ||
case '"', '\'': | ||
return q | ||
} | ||
} | ||
return 0 | ||
} | ||
|
||
// Returns the number of non-empty field names | ||
func (p *Pattern) NumFields() int { | ||
return len(p.fields) | ||
} | ||
|
||
// Returns a non-empty field name by index. | ||
// Panics if index is out of range. | ||
// Use in conjunction with NumFields to check the range | ||
func (p *Pattern) FieldName(i int) string { | ||
return p.fields[i] | ||
} | ||
|
||
var ( | ||
errMatch = errors.New("match failed") | ||
errInvalidPattern = errors.New("invalid pattern") | ||
) | ||
|
||
// MatchString matches src and appends key/value pairs to dst. | ||
// Note that if an error occurs the original slice is returned. | ||
func (p *Pattern) MatchString(dst []string, src string) ([]string, error) { | ||
tail := src | ||
if prefix := p.prefix; len(prefix) <= len(tail) && tail[:len(prefix)] == prefix { | ||
tail = tail[len(prefix):] | ||
} else { | ||
return dst, errMatch | ||
} | ||
matches := dst | ||
delimiters := p.delimiters | ||
for i := range delimiters { | ||
d := &delimiters[i] | ||
switch seek := d.match; seek { | ||
case "": | ||
if name := d.name; name != "" { | ||
matches = append(matches, name, tail) | ||
} | ||
return matches, nil | ||
default: | ||
match, ss, err := p.match(tail, seek, d.quote) | ||
if err != nil { | ||
return dst, err | ||
} | ||
if name := d.name; name != "" { | ||
matches = append(matches, name, match) | ||
} | ||
tail = ss | ||
} | ||
} | ||
return matches, nil | ||
} | ||
|
||
func (p *Pattern) match(src, delim string, quote byte) (match, tail string, err error) { | ||
if (quote == '"' || quote == '\'') && strings.IndexByte(src, '\\') != -1 { | ||
// Only trigger quoted match if there is an escaping slash (`\\`) somewhere ahead | ||
return p.matchQuoted(src, delim, quote) | ||
} | ||
// Fast match case | ||
if pos := strings.Index(src, delim); 0 <= pos && pos < len(src) { | ||
// Split match part from rest of text | ||
match, tail = src[:pos], src[pos:] | ||
// Consume the delimiter | ||
tail = tail[len(delim):] | ||
return match, tail, nil | ||
} | ||
return "", src, errMatch | ||
} | ||
|
||
// matchQuoted matches fields while escaping quotes in a single pass. | ||
// It properly handles unicode multibytes so it is much slower than non-quoted match. | ||
func (p *Pattern) matchQuoted(src, delim string, quote byte) (match, tail string, err error) { | ||
tail = src | ||
// Copy and reset scratch slice header to stack | ||
scratch := p.scratch[:0] | ||
// Go over each unicode character in src until we reach the quote | ||
for len(tail) > 0 && tail[0] != quote { | ||
// This reads a unicode character properly handling `\\` escapes | ||
c, _, ss, err := strconv.UnquoteChar(tail, quote) | ||
if err != nil { | ||
p.scratch = scratch // Restore scratch buffer | ||
return "", src, err | ||
} | ||
// Gather all characters | ||
scratch = append(scratch, c) | ||
// Advance the loop | ||
tail = ss | ||
} | ||
p.scratch = scratch // Restore scratch buffer | ||
// Check that the rest for the text starts with delimiter | ||
if strings.HasPrefix(tail, delim) { | ||
// Match found, consume the delimiter and return | ||
return string(scratch), strings.TrimPrefix(tail, delim), nil | ||
} | ||
return "", src, errMatch | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package fastmatch_test | ||
|
||
/** | ||
* Panther is a Cloud-Native SIEM for the Modern Security Team. | ||
* Copyright (C) 2020 Panther Labs Inc | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/panther-labs/panther/pkg/x/fastmatch" | ||
) | ||
|
||
func BenchmarkPattern_MatchString(b *testing.B) { | ||
input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326" | ||
pattern := `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}` | ||
pat, err := fastmatch.Compile(pattern) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
b.ReportAllocs() | ||
matches := make([]string, 10) | ||
for i := 0; i < b.N; i++ { | ||
matches, err = pat.MatchString(matches[:0], input) | ||
if err != nil { | ||
b.Fatal(err) | ||
} | ||
if len(matches) != 18 { | ||
b.Fatal(matches) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
package fastmatch | ||
|
||
/** | ||
* Panther is a Cloud-Native SIEM for the Modern Security Team. | ||
* Copyright (C) 2020 Panther Labs Inc | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Affero General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Affero General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Affero General Public License | ||
* along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestMatchString(t *testing.T) { | ||
type testCase struct { | ||
Name string | ||
Input string | ||
Pattern string | ||
Matches []string | ||
} | ||
for _, tc := range []testCase{ | ||
{"two fields", "foo bar", "%{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}}, | ||
{"two fields prefix", "LOG: foo bar", "LOG: %{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}}, | ||
{"no match", "foo", "%{foo} %{bar}", nil}, | ||
{"two fields empty last", "foo ", "%{foo} %{bar}", []string{"foo", "foo", "bar", ""}}, | ||
{"two fields empty first", " bar", "%{foo} %{bar}", []string{"foo", "", "bar", "bar"}}, | ||
{"two fields quoted first", `"\"foo\" bar" baz`, `"%{foo}" %{bar}`, []string{"foo", `"foo" bar`, "bar", "baz"}}, | ||
{"two fields quoted last", `foo "\"bar\"baz"`, `%{foo} "%{bar}"`, []string{"foo", `foo`, "bar", `"bar"baz`}}, | ||
{"two fields one empty", "foo bar", "%{foo} %{}", []string{"foo", "foo"}}, | ||
{"common log", | ||
"127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326", | ||
`%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`, | ||
[]string{ | ||
"remote_ip", "127.0.0.1", | ||
"identity", "-", | ||
"user", "frank", | ||
"timestamp", "10/Oct/2000:13:55:36 -0700", | ||
"method", "GET", | ||
"request_uri", "/apache_pb.gif", | ||
"protocol", "HTTP/1.0", | ||
"status", "200", | ||
"bytes_sent", "2326", | ||
}, | ||
}, | ||
} { | ||
tc := tc | ||
t.Run(tc.Name, func(t *testing.T) { | ||
assert := require.New(t) | ||
p, err := Compile(tc.Pattern) | ||
assert.NoError(err) | ||
match, err := p.MatchString(nil, tc.Input) | ||
assert.Equal(tc.Matches != nil, err == nil) | ||
assert.Equal(tc.Matches, match, "invalid match\nexpect: %v\nactual: %v", tc.Matches, match) | ||
}) | ||
} | ||
} | ||
|
||
func TestPattern_match(t *testing.T) { | ||
// nolint:maligned | ||
type testCase struct { | ||
Name string | ||
Input string | ||
Delimiter string | ||
Quote byte | ||
Tail string | ||
Match string | ||
WantErr bool | ||
} | ||
for _, tc := range []testCase{ | ||
{"simple", "foo ", " ", 0, "", "foo", false}, | ||
{"double quote", `foo \"bar\"" `, "\" ", '"', "", `foo "bar"`, false}, | ||
{"single quote", `foo \'bar\'' `, "' ", '\'', "", `foo 'bar'`, false}, | ||
} { | ||
tc := tc | ||
t.Run(tc.Name, func(t *testing.T) { | ||
assert := require.New(t) | ||
p := Pattern{} | ||
match, tail, err := p.match(tc.Input, tc.Delimiter, tc.Quote) | ||
if tc.WantErr { | ||
assert.Error(err) | ||
assert.Empty(match) | ||
assert.Equal(tc.Input, tail) | ||
return | ||
} | ||
assert.NoError(err) | ||
assert.Equal(tc.Match, match) | ||
assert.Equal(tc.Tail, tail) | ||
}) | ||
} | ||
} |
Oops, something went wrong.