Skip to content

Commit

Permalink
Add gork and fastmatch packages for parsing text into key/value pairs…
Browse files Browse the repository at this point in the history
… (#1877)

* initial commit

* Handle unquoting

* Add gork

* return pair matches

* Add tests for patterns and validate field/pattern names

* Add comments and fmt

* mage gen fmt

* Fix test

* fmt lint fix

* add comments

* simplify lookup

* Fix GRGW

* comments on struct fields and remove unused slice

* Restore 'fields' and associated methods, these will be needed

Co-authored-by: panther-bot <[email protected]>
  • Loading branch information
alxarch and panther-bot authored Oct 29, 2020
1 parent d5f6ef6 commit 8629bca
Show file tree
Hide file tree
Showing 10 changed files with 980 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.6.1
github.com/tidwall/gjson v1.6.1
github.com/valyala/fasttemplate v1.2.1
go.uber.org/multierr v1.5.0
go.uber.org/zap v1.16.0
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhV
github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU=
github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
Expand Down
217 changes: 217 additions & 0 deletions pkg/x/fastmatch/fastmatch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package fastmatch

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"errors"
"regexp"
"strconv"
"strings"
)

// Pattern matches a string and extracts key/value pairs.
type Pattern struct {
// text to match at start of input
prefix string
// the rest of the fields
delimiters []delimiter
// non-empty field names
fields []string
// reusable buffer for unquoting stings
scratch []rune
}

type delimiter struct {
// delimiter to match at end of field
match string
// name of the field
name string
// if set to `'` or `"` we should look out for escaping quotes
quote byte
}

var splitFields = regexp.MustCompile(`%{\s*(?P<tag>[^}]*)\s*}`)

// Compile compiles a pattern.
// Patterns use `%{` and `}` delimiters to define the placing of fields in a string.
// Two consecutive fields *must* have some delimiter text between them for the pattern to be valid.
// For example:
// `%{foo} %{bar}` is valid
// `%{foo}%{bar}` is not valid
// Pattern names currently have no restrictions apart from that they cannot contain `}`.
// Please be conservative with your field names as that might change in the future...
func Compile(pattern string) (*Pattern, error) {
tags := splitFields.FindAllStringSubmatch(pattern, -1)
if tags == nil {
// pattern contains no fields
return nil, errInvalidPattern
}
matchDelimiters := splitFields.Split(pattern, -1)
// First delimiter is a prefix at the start of text.
prefix, matchDelimiters := matchDelimiters[0], matchDelimiters[1:]
delimiters := make([]delimiter, 0, len(tags))
fields := make([]string, 0, len(tags))
last := len(matchDelimiters) - 1
// Keep not of the previous delimiter for auto detecting quotes
prev := prefix
for i, m := range matchDelimiters {
// Do not allow empty delimiters unless it's the last field
if i < last && m == "" {
return nil, errInvalidPattern
}
tag := tags[i][1]
d := delimiter{}
// Autodetects quotes
d.reset(tag, m, prev)
prev = m
delimiters = append(delimiters, d)
if d.name != "" {
fields = append(fields, d.name)
}
}
return &Pattern{
prefix: prefix,
delimiters: delimiters,
fields: fields,
}, nil
}

func (d *delimiter) reset(tag, match, prev string) {
quote := prevQuote(prev)
if quote != nextQuote(match) {
quote = 0
}
d.name = tag
d.quote = quote
d.match = match
}

func prevQuote(s string) byte {
if n := len(s) - 1; 0 <= n && n < len(s) {
switch q := s[n]; q {
case '"', '\'':
return q
}
}
return 0
}

func nextQuote(s string) byte {
if len(s) > 0 {
switch q := s[0]; q {
case '"', '\'':
return q
}
}
return 0
}

// Returns the number of non-empty field names
func (p *Pattern) NumFields() int {
return len(p.fields)
}

// Returns a non-empty field name by index.
// Panics if index is out of range.
// Use in conjunction with NumFields to check the range
func (p *Pattern) FieldName(i int) string {
return p.fields[i]
}

var (
errMatch = errors.New("match failed")
errInvalidPattern = errors.New("invalid pattern")
)

// MatchString matches src and appends key/value pairs to dst.
// Note that if an error occurs the original slice is returned.
func (p *Pattern) MatchString(dst []string, src string) ([]string, error) {
tail := src
if prefix := p.prefix; len(prefix) <= len(tail) && tail[:len(prefix)] == prefix {
tail = tail[len(prefix):]
} else {
return dst, errMatch
}
matches := dst
delimiters := p.delimiters
for i := range delimiters {
d := &delimiters[i]
switch seek := d.match; seek {
case "":
if name := d.name; name != "" {
matches = append(matches, name, tail)
}
return matches, nil
default:
match, ss, err := p.match(tail, seek, d.quote)
if err != nil {
return dst, err
}
if name := d.name; name != "" {
matches = append(matches, name, match)
}
tail = ss
}
}
return matches, nil
}

func (p *Pattern) match(src, delim string, quote byte) (match, tail string, err error) {
if (quote == '"' || quote == '\'') && strings.IndexByte(src, '\\') != -1 {
// Only trigger quoted match if there is an escaping slash (`\\`) somewhere ahead
return p.matchQuoted(src, delim, quote)
}
// Fast match case
if pos := strings.Index(src, delim); 0 <= pos && pos < len(src) {
// Split match part from rest of text
match, tail = src[:pos], src[pos:]
// Consume the delimiter
tail = tail[len(delim):]
return match, tail, nil
}
return "", src, errMatch
}

// matchQuoted matches fields while escaping quotes in a single pass.
// It properly handles unicode multibytes so it is much slower than non-quoted match.
func (p *Pattern) matchQuoted(src, delim string, quote byte) (match, tail string, err error) {
tail = src
// Copy and reset scratch slice header to stack
scratch := p.scratch[:0]
// Go over each unicode character in src until we reach the quote
for len(tail) > 0 && tail[0] != quote {
// This reads a unicode character properly handling `\\` escapes
c, _, ss, err := strconv.UnquoteChar(tail, quote)
if err != nil {
p.scratch = scratch // Restore scratch buffer
return "", src, err
}
// Gather all characters
scratch = append(scratch, c)
// Advance the loop
tail = ss
}
p.scratch = scratch // Restore scratch buffer
// Check that the rest for the text starts with delimiter
if strings.HasPrefix(tail, delim) {
// Match found, consume the delimiter and return
return string(scratch), strings.TrimPrefix(tail, delim), nil
}
return "", src, errMatch
}
45 changes: 45 additions & 0 deletions pkg/x/fastmatch/fastmatch_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package fastmatch_test

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"testing"

"github.com/panther-labs/panther/pkg/x/fastmatch"
)

func BenchmarkPattern_MatchString(b *testing.B) {
input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"
pattern := `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`
pat, err := fastmatch.Compile(pattern)
if err != nil {
b.Fatal(err)
}
b.ReportAllocs()
matches := make([]string, 10)
for i := 0; i < b.N; i++ {
matches, err = pat.MatchString(matches[:0], input)
if err != nil {
b.Fatal(err)
}
if len(matches) != 18 {
b.Fatal(matches)
}
}
}
103 changes: 103 additions & 0 deletions pkg/x/fastmatch/fastmatch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package fastmatch

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestMatchString(t *testing.T) {
type testCase struct {
Name string
Input string
Pattern string
Matches []string
}
for _, tc := range []testCase{
{"two fields", "foo bar", "%{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}},
{"two fields prefix", "LOG: foo bar", "LOG: %{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}},
{"no match", "foo", "%{foo} %{bar}", nil},
{"two fields empty last", "foo ", "%{foo} %{bar}", []string{"foo", "foo", "bar", ""}},
{"two fields empty first", " bar", "%{foo} %{bar}", []string{"foo", "", "bar", "bar"}},
{"two fields quoted first", `"\"foo\" bar" baz`, `"%{foo}" %{bar}`, []string{"foo", `"foo" bar`, "bar", "baz"}},
{"two fields quoted last", `foo "\"bar\"baz"`, `%{foo} "%{bar}"`, []string{"foo", `foo`, "bar", `"bar"baz`}},
{"two fields one empty", "foo bar", "%{foo} %{}", []string{"foo", "foo"}},
{"common log",
"127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326",
`%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`,
[]string{
"remote_ip", "127.0.0.1",
"identity", "-",
"user", "frank",
"timestamp", "10/Oct/2000:13:55:36 -0700",
"method", "GET",
"request_uri", "/apache_pb.gif",
"protocol", "HTTP/1.0",
"status", "200",
"bytes_sent", "2326",
},
},
} {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
assert := require.New(t)
p, err := Compile(tc.Pattern)
assert.NoError(err)
match, err := p.MatchString(nil, tc.Input)
assert.Equal(tc.Matches != nil, err == nil)
assert.Equal(tc.Matches, match, "invalid match\nexpect: %v\nactual: %v", tc.Matches, match)
})
}
}

func TestPattern_match(t *testing.T) {
// nolint:maligned
type testCase struct {
Name string
Input string
Delimiter string
Quote byte
Tail string
Match string
WantErr bool
}
for _, tc := range []testCase{
{"simple", "foo ", " ", 0, "", "foo", false},
{"double quote", `foo \"bar\"" `, "\" ", '"', "", `foo "bar"`, false},
{"single quote", `foo \'bar\'' `, "' ", '\'', "", `foo 'bar'`, false},
} {
tc := tc
t.Run(tc.Name, func(t *testing.T) {
assert := require.New(t)
p := Pattern{}
match, tail, err := p.match(tc.Input, tc.Delimiter, tc.Quote)
if tc.WantErr {
assert.Error(err)
assert.Empty(match)
assert.Equal(tc.Input, tail)
return
}
assert.NoError(err)
assert.Equal(tc.Match, match)
assert.Equal(tc.Tail, tail)
})
}
}
Loading

0 comments on commit 8629bca

Please sign in to comment.