From 8629bca85d81ec8dc97cf4be24e9fafcfe40807b Mon Sep 17 00:00:00 2001 From: Alexandros Sigalas Date: Thu, 29 Oct 2020 15:10:36 +0200 Subject: [PATCH] Add gork and fastmatch packages for parsing text into key/value pairs (#1877) * initial commit * Handle unquoting * Add gork * return pair matches * Add tests for patterns and validate field/pattern names * Add comments and fmt * mage gen fmt * Fix test * fmt lint fix * add comments * simplify lookup * Fix GRGW * comments on struct fields and remove unused slice * Restore 'fields' and associated methods, these will be needed Co-authored-by: panther-bot --- go.mod | 1 + go.sum | 4 + pkg/x/fastmatch/fastmatch.go | 217 +++++++++++++++ pkg/x/fastmatch/fastmatch_benchmark_test.go | 45 ++++ pkg/x/fastmatch/fastmatch_test.go | 103 +++++++ pkg/x/gork/builtin.go | 102 +++++++ pkg/x/gork/builtin_test.go | 111 ++++++++ pkg/x/gork/gork.go | 281 ++++++++++++++++++++ pkg/x/gork/gork_benchmark_test.go | 47 ++++ pkg/x/gork/gork_test.go | 69 +++++ 10 files changed, 980 insertions(+) create mode 100644 pkg/x/fastmatch/fastmatch.go create mode 100644 pkg/x/fastmatch/fastmatch_benchmark_test.go create mode 100644 pkg/x/fastmatch/fastmatch_test.go create mode 100644 pkg/x/gork/builtin.go create mode 100644 pkg/x/gork/builtin_test.go create mode 100644 pkg/x/gork/gork.go create mode 100644 pkg/x/gork/gork_benchmark_test.go create mode 100644 pkg/x/gork/gork_test.go diff --git a/go.mod b/go.mod index 4b36348ff1..bc928899b7 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.6.1 github.com/tidwall/gjson v1.6.1 + github.com/valyala/fasttemplate v1.2.1 go.uber.org/multierr v1.5.0 go.uber.org/zap v1.16.0 golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 diff --git a/go.sum b/go.sum index 09ed812dec..110c1cdb0b 100644 --- a/go.sum +++ b/go.sum @@ -233,6 +233,10 @@ github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhV github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU= github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw= github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I= github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y= diff --git a/pkg/x/fastmatch/fastmatch.go b/pkg/x/fastmatch/fastmatch.go new file mode 100644 index 0000000000..782f1b0ff3 --- /dev/null +++ b/pkg/x/fastmatch/fastmatch.go @@ -0,0 +1,217 @@ +package fastmatch + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "errors" + "regexp" + "strconv" + "strings" +) + +// Pattern matches a string and extracts key/value pairs. +type Pattern struct { + // text to match at start of input + prefix string + // the rest of the fields + delimiters []delimiter + // non-empty field names + fields []string + // reusable buffer for unquoting stings + scratch []rune +} + +type delimiter struct { + // delimiter to match at end of field + match string + // name of the field + name string + // if set to `'` or `"` we should look out for escaping quotes + quote byte +} + +var splitFields = regexp.MustCompile(`%{\s*(?P[^}]*)\s*}`) + +// Compile compiles a pattern. +// Patterns use `%{` and `}` delimiters to define the placing of fields in a string. +// Two consecutive fields *must* have some delimiter text between them for the pattern to be valid. +// For example: +// `%{foo} %{bar}` is valid +// `%{foo}%{bar}` is not valid +// Pattern names currently have no restrictions apart from that they cannot contain `}`. +// Please be conservative with your field names as that might change in the future... +func Compile(pattern string) (*Pattern, error) { + tags := splitFields.FindAllStringSubmatch(pattern, -1) + if tags == nil { + // pattern contains no fields + return nil, errInvalidPattern + } + matchDelimiters := splitFields.Split(pattern, -1) + // First delimiter is a prefix at the start of text. + prefix, matchDelimiters := matchDelimiters[0], matchDelimiters[1:] + delimiters := make([]delimiter, 0, len(tags)) + fields := make([]string, 0, len(tags)) + last := len(matchDelimiters) - 1 + // Keep not of the previous delimiter for auto detecting quotes + prev := prefix + for i, m := range matchDelimiters { + // Do not allow empty delimiters unless it's the last field + if i < last && m == "" { + return nil, errInvalidPattern + } + tag := tags[i][1] + d := delimiter{} + // Autodetects quotes + d.reset(tag, m, prev) + prev = m + delimiters = append(delimiters, d) + if d.name != "" { + fields = append(fields, d.name) + } + } + return &Pattern{ + prefix: prefix, + delimiters: delimiters, + fields: fields, + }, nil +} + +func (d *delimiter) reset(tag, match, prev string) { + quote := prevQuote(prev) + if quote != nextQuote(match) { + quote = 0 + } + d.name = tag + d.quote = quote + d.match = match +} + +func prevQuote(s string) byte { + if n := len(s) - 1; 0 <= n && n < len(s) { + switch q := s[n]; q { + case '"', '\'': + return q + } + } + return 0 +} + +func nextQuote(s string) byte { + if len(s) > 0 { + switch q := s[0]; q { + case '"', '\'': + return q + } + } + return 0 +} + +// Returns the number of non-empty field names +func (p *Pattern) NumFields() int { + return len(p.fields) +} + +// Returns a non-empty field name by index. +// Panics if index is out of range. +// Use in conjunction with NumFields to check the range +func (p *Pattern) FieldName(i int) string { + return p.fields[i] +} + +var ( + errMatch = errors.New("match failed") + errInvalidPattern = errors.New("invalid pattern") +) + +// MatchString matches src and appends key/value pairs to dst. +// Note that if an error occurs the original slice is returned. +func (p *Pattern) MatchString(dst []string, src string) ([]string, error) { + tail := src + if prefix := p.prefix; len(prefix) <= len(tail) && tail[:len(prefix)] == prefix { + tail = tail[len(prefix):] + } else { + return dst, errMatch + } + matches := dst + delimiters := p.delimiters + for i := range delimiters { + d := &delimiters[i] + switch seek := d.match; seek { + case "": + if name := d.name; name != "" { + matches = append(matches, name, tail) + } + return matches, nil + default: + match, ss, err := p.match(tail, seek, d.quote) + if err != nil { + return dst, err + } + if name := d.name; name != "" { + matches = append(matches, name, match) + } + tail = ss + } + } + return matches, nil +} + +func (p *Pattern) match(src, delim string, quote byte) (match, tail string, err error) { + if (quote == '"' || quote == '\'') && strings.IndexByte(src, '\\') != -1 { + // Only trigger quoted match if there is an escaping slash (`\\`) somewhere ahead + return p.matchQuoted(src, delim, quote) + } + // Fast match case + if pos := strings.Index(src, delim); 0 <= pos && pos < len(src) { + // Split match part from rest of text + match, tail = src[:pos], src[pos:] + // Consume the delimiter + tail = tail[len(delim):] + return match, tail, nil + } + return "", src, errMatch +} + +// matchQuoted matches fields while escaping quotes in a single pass. +// It properly handles unicode multibytes so it is much slower than non-quoted match. +func (p *Pattern) matchQuoted(src, delim string, quote byte) (match, tail string, err error) { + tail = src + // Copy and reset scratch slice header to stack + scratch := p.scratch[:0] + // Go over each unicode character in src until we reach the quote + for len(tail) > 0 && tail[0] != quote { + // This reads a unicode character properly handling `\\` escapes + c, _, ss, err := strconv.UnquoteChar(tail, quote) + if err != nil { + p.scratch = scratch // Restore scratch buffer + return "", src, err + } + // Gather all characters + scratch = append(scratch, c) + // Advance the loop + tail = ss + } + p.scratch = scratch // Restore scratch buffer + // Check that the rest for the text starts with delimiter + if strings.HasPrefix(tail, delim) { + // Match found, consume the delimiter and return + return string(scratch), strings.TrimPrefix(tail, delim), nil + } + return "", src, errMatch +} diff --git a/pkg/x/fastmatch/fastmatch_benchmark_test.go b/pkg/x/fastmatch/fastmatch_benchmark_test.go new file mode 100644 index 0000000000..4cc7bbb85f --- /dev/null +++ b/pkg/x/fastmatch/fastmatch_benchmark_test.go @@ -0,0 +1,45 @@ +package fastmatch_test + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "testing" + + "github.com/panther-labs/panther/pkg/x/fastmatch" +) + +func BenchmarkPattern_MatchString(b *testing.B) { + input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326" + pattern := `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}` + pat, err := fastmatch.Compile(pattern) + if err != nil { + b.Fatal(err) + } + b.ReportAllocs() + matches := make([]string, 10) + for i := 0; i < b.N; i++ { + matches, err = pat.MatchString(matches[:0], input) + if err != nil { + b.Fatal(err) + } + if len(matches) != 18 { + b.Fatal(matches) + } + } +} diff --git a/pkg/x/fastmatch/fastmatch_test.go b/pkg/x/fastmatch/fastmatch_test.go new file mode 100644 index 0000000000..8eccd2ff10 --- /dev/null +++ b/pkg/x/fastmatch/fastmatch_test.go @@ -0,0 +1,103 @@ +package fastmatch + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMatchString(t *testing.T) { + type testCase struct { + Name string + Input string + Pattern string + Matches []string + } + for _, tc := range []testCase{ + {"two fields", "foo bar", "%{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}}, + {"two fields prefix", "LOG: foo bar", "LOG: %{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}}, + {"no match", "foo", "%{foo} %{bar}", nil}, + {"two fields empty last", "foo ", "%{foo} %{bar}", []string{"foo", "foo", "bar", ""}}, + {"two fields empty first", " bar", "%{foo} %{bar}", []string{"foo", "", "bar", "bar"}}, + {"two fields quoted first", `"\"foo\" bar" baz`, `"%{foo}" %{bar}`, []string{"foo", `"foo" bar`, "bar", "baz"}}, + {"two fields quoted last", `foo "\"bar\"baz"`, `%{foo} "%{bar}"`, []string{"foo", `foo`, "bar", `"bar"baz`}}, + {"two fields one empty", "foo bar", "%{foo} %{}", []string{"foo", "foo"}}, + {"common log", + "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326", + `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`, + []string{ + "remote_ip", "127.0.0.1", + "identity", "-", + "user", "frank", + "timestamp", "10/Oct/2000:13:55:36 -0700", + "method", "GET", + "request_uri", "/apache_pb.gif", + "protocol", "HTTP/1.0", + "status", "200", + "bytes_sent", "2326", + }, + }, + } { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + assert := require.New(t) + p, err := Compile(tc.Pattern) + assert.NoError(err) + match, err := p.MatchString(nil, tc.Input) + assert.Equal(tc.Matches != nil, err == nil) + assert.Equal(tc.Matches, match, "invalid match\nexpect: %v\nactual: %v", tc.Matches, match) + }) + } +} + +func TestPattern_match(t *testing.T) { + // nolint:maligned + type testCase struct { + Name string + Input string + Delimiter string + Quote byte + Tail string + Match string + WantErr bool + } + for _, tc := range []testCase{ + {"simple", "foo ", " ", 0, "", "foo", false}, + {"double quote", `foo \"bar\"" `, "\" ", '"', "", `foo "bar"`, false}, + {"single quote", `foo \'bar\'' `, "' ", '\'', "", `foo 'bar'`, false}, + } { + tc := tc + t.Run(tc.Name, func(t *testing.T) { + assert := require.New(t) + p := Pattern{} + match, tail, err := p.match(tc.Input, tc.Delimiter, tc.Quote) + if tc.WantErr { + assert.Error(err) + assert.Empty(match) + assert.Equal(tc.Input, tail) + return + } + assert.NoError(err) + assert.Equal(tc.Match, match) + assert.Equal(tc.Tail, tail) + }) + } +} diff --git a/pkg/x/gork/builtin.go b/pkg/x/gork/builtin.go new file mode 100644 index 0000000000..5349b3fb25 --- /dev/null +++ b/pkg/x/gork/builtin.go @@ -0,0 +1,102 @@ +package gork + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +// Patterns based on https://github.com/logrusorgru/grokky +// nolint: lll +const BuiltinPatterns = ` +DATA .*? +GREEDYDATA .* +NOTSPACE \S+ +SPACE \s* +WORD \b\w+\b +QUOTEDSTRING "(?:\\.|[^\\"]+)+"|""|'(?:\\.|[^\\']+)+'|'' +HEXDIGIT [0-9a-fAF] +UUID %{HEXDIGIT}{8}-(?:%{HEXDIGIT}{4}-){3}%{HEXDIGIT}{12} + +# Numbers +INT [+-]?(?:[0-9]+) +BASE10NUM [+-]?(?:[0-9]+(?:\.[0-9]+)?)|\.[0-9]+ +NUMBER %{BASE10NUM} +BASE16NUM (?:0[xX])?%{HEXDIGIT}+ +POSINT \b[1-9][0-9]*\b +NONNEGINT \b[0-9]+\b + +# Network +CISCOMAC (?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4} +WINDOWSMAC (?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2} +COMMONMAC (?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2} +MAC %{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC} +IPV6 \b(?:(?:(?:%{HEXDIGIT}{1,4}:){7}(?:%{HEXDIGIT}{1,4}|:))|(?:(?:%{HEXDIGIT}{1,4}:){6}(?::%{HEXDIGIT}{1,4}|(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:%{HEXDIGIT}{1,4}:){5}(?:(?:(?::%{HEXDIGIT}{1,4}){1,2})|:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|((%{HEXDIGIT}{1,4}:){4}(((:%{HEXDIGIT}{1,4}){1,3})|((:%{HEXDIGIT}{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){3}(((:%{HEXDIGIT}{1,4}){1,4})|((:%{HEXDIGIT}{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){2}(((:%{HEXDIGIT}{1,4}){1,5})|((:%{HEXDIGIT}{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){1}(((:%{HEXDIGIT}{1,4}){1,6})|((:%{HEXDIGIT}{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:%{HEXDIGIT}{1,4}){1,7})|((:%{HEXDIGIT}{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\b +IPV4INT 25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9] +IPV4 \b(?:(?:%{IPV4INT})\.){3}(?:%{IPV4INT})\b +IP %{IPV6}|%{IPV4} +HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b) +IPORHOST %{IP}|%{HOSTNAME} +HOSTPORT %{IPORHOST}:%{POSINT} + +# URI + +USERNAME [a-zA-Z0-9._-]+ +UNIXPATH (?:/[\w_%!$@:.,-]?/?)(\S+)? +WINPATH (?:[A-Za-z]:|\\)(?:\\[^\\?*]*)+ +PATH (?:%{UNIXPATH}|%{WINPATH}) +TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) +URIPROTO [A-Za-z]+(?:\+[A-Za-z+]+)? +URIHOST %{IPORHOST}(?::%{POSINT})? +URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ +URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]* +URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? +URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? + +# Timestamps +MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b +MONTHNUM 0?[1-9]|1[0-2] +MONTHNUM2 0[1-9]|1[0-2] +MONTHDAY (?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9] +DAY \b(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)\b +YEAR (?:\d\d){1,2} +HOUR 2[0123]|[01]?[0-9] +MINUTE [0-5][0-9] +SECOND (?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)? +KITCHEN %{HOUR}:%{MINUTE} +TIME %{HOUR}:%{MINUTE}:%{SECOND} +DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} +DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} +ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) +ISO8601_SECOND (?:%{SECOND}|60) +TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? +DATE %{DATE_US}|%{DATE_EU} +DATETIME %{DATE}[- ]%{TIME} +TZ [A-Z]{3} +TZOFFSET [+-]\d{4} +TIMESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} +TIMESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} +TIMESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} +TIMESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} +SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} +HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{TZOFFSET} + +# Aliases +NS %{NOTSPACE} +QS %{QUOTEDSTRING} +HOST %{HOSTNAME} +PID %{POSINT} +USER %{USERNAME} +` diff --git a/pkg/x/gork/builtin_test.go b/pkg/x/gork/builtin_test.go new file mode 100644 index 0000000000..cb91c8798f --- /dev/null +++ b/pkg/x/gork/builtin_test.go @@ -0,0 +1,111 @@ +package gork + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +var patternTests = [][]string{ + {"DATA", "", ""}, + {"WORD", " foo_bar.", "foo_bar"}, + {"WORD", " "}, + {"NOTSPACE", "foo ", "foo"}, + {"NOTSPACE", " foo", "foo"}, + {"NOTSPACE", "foo\t", "foo"}, + {"NOTSPACE", "\tfoo", "foo"}, + {"NOTSPACE", "\t foo", "foo"}, + {"QUOTEDSTRING", `"foo"`, `"foo"`}, + {"QS", `"foo"`, `"foo"`}, + {"QS", `"foo" "`, `"foo"`}, + {"QS", `"foo \"bar\""`, `"foo \"bar\""`}, + {"QUOTEDSTRING", `'foo'`, `'foo'`}, + {"QS", `'foo'`, `'foo'`}, + {"QS", `'foo' '`, `'foo'`}, + {"QS", `'foo \'bar\''`, `'foo \'bar\''`}, + {"SPACE", " foo", " "}, + {"SPACE", "\tfoo", "\t"}, + {"SPACE", ".foo", ""}, + {"INT", "42", "42"}, + {"INT", "+42", "+42"}, + {"INT", "-42", "-42"}, + {"INT", "-42.0", "-42"}, + {"INT", "0", "0"}, + {"INT", "01", "01"}, + {"INT", "001", "001"}, + {"IP", "127.0.0.1", "127.0.0.1"}, + {"IP", "0.0.0.0", "0.0.0.0"}, + {"IP", "300.0.0.0"}, + {"IP", "255.0.0.0", "255.0.0.0"}, + {"IP", "255.255.255.255", "255.255.255.255"}, + {"IP", "255.2555.255.255"}, + {"IP", "300.0"}, + {"IP", "2001:0db8:0000:0000:0000:8a2e:0370:7334", "2001:0db8:0000:0000:0000:8a2e:0370:7334"}, + {"IP", "2001:db8::8a2e:370:7334", "2001:db8::8a2e:370:7334"}, + {"MONTHDAY", "01", "01"}, + {"MONTHDAY", "31", "31"}, + {"MONTHDAY", "10", "10"}, + {"MONTH", "/Oct", "Oct"}, + {"YEAR", "2000", "2000"}, + {"TIME", "13:55:36", "13:55:36"}, + {"TZOFFSET", "-0700", "-0700"}, + {"HTTPDATE", "10/Oct/2000:13:55:36 -0700", "10/Oct/2000:13:55:36 -0700"}, +} + +func TestBuiltinPatterns(t *testing.T) { + assert := require.New(t) + env := Env{} + patterns, err := ReadPatterns(strings.NewReader(BuiltinPatterns)) + assert.NoError(err) + assert.NoError(env.SetMap(patterns)) + numTests := map[string]int{} + for _, tc := range patternTests { + name, input, expect := tc[0], tc[1], tc[2:] + t.Run(name+"_"+input, func(t *testing.T) { + assert := require.New(t) + src := name + if !strings.Contains(src, "%{") { + src = "%{" + src + ":actual}" + if len(expect) == 1 { + expect = []string{"actual", expect[0]} + } + } + pattern, err := env.Compile(src) + assert.NoError(err) + matches, err := pattern.MatchString(nil, input) + + if len(expect) == 0 { + assert.Error(err) + assert.Nil(matches) + } else { + assert.NoError(err) + assert.Equal(expect, matches, "match %q failed", name) + numTests[name]++ + } + }) + } + for name := range patterns { + if numTests[name] == 0 { + t.Logf("no tests for pattern %q", name) + } + } +} diff --git a/pkg/x/gork/gork.go b/pkg/x/gork/gork.go new file mode 100644 index 0000000000..9e42e7b176 --- /dev/null +++ b/pkg/x/gork/gork.go @@ -0,0 +1,281 @@ +package gork + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "bufio" + "fmt" + "io" + "regexp" + "strings" + + "github.com/pkg/errors" + "github.com/valyala/fasttemplate" +) + +const ( + startDelimiter = "%{" + endDelimiter = "}" +) + +// Pattern can match strings to extract key/value pairs +type Pattern struct { + src string + expr *regexp.Regexp + names []string +} + +// Regexp returns the full regular expression for this pattern +func (p *Pattern) Regexp() string { + return p.expr.String() +} + +// String returns the pattern +func (p *Pattern) String() string { + return p.src +} + +// MatchString matches src appending key/value pairs to dst. +// If the text does not match an error is return +func (p *Pattern) MatchString(dst []string, src string) ([]string, error) { + matches := p.expr.FindStringSubmatchIndex(src) + if matches == nil { + return dst, errors.New("No match") + } + if len(matches) > 2 { + // Regexp always sets first match to full string + matches = matches[2:] + var start, end int + for i := 0; 0 <= i && i < len(p.names) && len(matches) >= 2; i++ { + name := p.names[i] + // We skip unnamed groups + if name == "" { + continue + } + start, end, matches = matches[0], matches[1], matches[2:] + dst = append(dst, name, src[start:end]) + } + } + return dst, nil +} + +// Env is a collection of named patterns +type Env struct { + patterns map[string]*Pattern +} + +// New returns an environment containing basic patterns +func New() *Env { + return defaultEnv.Clone() +} + +var defaultEnv = mustDefaultEnv() + +func mustDefaultEnv() *Env { + env := Env{} + r := strings.NewReader(BuiltinPatterns) + if err := env.ReadPatterns(r); err != nil { + panic(err) + } + return &env +} + +// ReadPatterns reads, compiles and adds named patterns to an environment from an io.Reader +func (e *Env) ReadPatterns(r io.Reader) error { + patterns, err := ReadPatterns(r) + if err != nil { + return err + } + if err := e.SetMap(patterns); err != nil { + return err + } + return nil +} + +// ReadPatterns reads named patterns from an io.Reader +func ReadPatterns(r io.Reader) (map[string]string, error) { + patterns := make(map[string]string) + scanner := bufio.NewScanner(r) + numLines := 0 + for scanner.Scan() { + numLines++ + line := scanner.Text() + if line == "" || strings.HasPrefix(line, "#") { + continue + } + match := patternDef.FindStringSubmatch(line) + if match == nil { + return nil, errors.Errorf("invalid pattern definition at line #%d", numLines) + } + name, src := match[1], match[2] + patterns[name] = src + } + if err := scanner.Err(); err != nil { + return nil, err + } + return patterns, nil +} + +var patternDef = regexp.MustCompile(`^(\w+)\s+(.*)`) + +// SetMap adds multiple patterns to an environment. +func (e *Env) SetMap(patterns map[string]string) error { + child := e.Clone() + for name, pattern := range patterns { + // We check for duplicate only in the parent environment. + if err := e.checkDuplicate(name); err != nil { + return err + } + // Compilation is recursive so we might have compiled this already + if _, skip := child.patterns[name]; skip { + continue + } + expr, err := child.compile(name, pattern, patterns, nil) + if err != nil { + return err + } + e.set(name, expr) + } + for name, pattern := range child.patterns { + e.set(name, pattern) + } + return nil +} + +// Clone clones an environment +func (e *Env) Clone() *Env { + patterns := make(map[string]*Pattern, len(e.patterns)) + for name, pattern := range e.patterns { + patterns[name] = pattern + } + return &Env{ + patterns: patterns, + } +} + +// MustSet compiles and stores a named pattern or panics if the pattern is invalid or exists already. +func (e *Env) MustSet(name string, pattern string) { + if err := e.Set(name, pattern); err != nil { + panic(err) + } +} + +// MustSet compiles and stores a named pattern or fails if the pattern is invalid or exists already. +func (e *Env) Set(name string, pattern string) error { + if err := e.checkDuplicate(name); err != nil { + return err + } + expr, err := e.compile(name, pattern, nil, nil) + if err != nil { + return err + } + e.set(name, expr) + return nil +} + +// Compile compiles a pattern expanding named patterns. +func (e *Env) Compile(pattern string) (*Pattern, error) { + return e.compile(pattern, pattern, nil, nil) +} + +var ( + validPatternName = regexp.MustCompile(`^[A-Z][A-Z0-9_]*$`) + validFieldName = regexp.MustCompile(`[A-Za-z_][A-Za-z0-9_]*`) +) + +func (e *Env) compile(root, src string, patterns map[string]string, visited []string) (*Pattern, error) { + tpl := fasttemplate.New(src, startDelimiter, endDelimiter) + s := strings.Builder{} + _, err := tpl.ExecuteFunc(&s, func(w io.Writer, tag string) (int, error) { + // TODO: Allow arbitrary field names by switching named groups with auto-incrementing name + // To achieve this we need to build the 'names' slice as we render the template + name, field := splitTag(tag) + if !validPatternName.MatchString(name) { + return 0, errors.Errorf("invalid pattern name %q in tag %q of pattern %q", name, tag, root) + } + if field != "" && !validFieldName.MatchString(field) { + return 0, errors.Errorf("invalid field name %q in tag %q of pattern %q", field, tag, root) + } + for _, visited := range visited { + if visited == name { + return 0, errors.Errorf("recursive pattern %q %v", root, visited) + } + } + expr := e.lookup(name) + if expr == nil { + // Try to compile the pattern + if src, ok := patterns[name]; ok { + subexpr, err := e.compile(name, src, patterns, append(visited, name)) + if err != nil { + return 0, err + } + // Avoid duplicate compilations + e.set(name, subexpr) + expr = subexpr + } else { + return 0, errors.Errorf("unresolved pattern %q", name) + } + } + var group string + if field == "" { + group = fmt.Sprintf("(?:%s)", expr.Regexp()) + } else { + group = fmt.Sprintf("(?P<%s>%s)", field, expr.Regexp()) + } + return w.Write([]byte(group)) + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to expand pattern %q", root) + } + + expr, err := regexp.Compile(s.String()) + if err != nil { + return nil, errors.Wrapf(err, "failed to compile pattern %q", root) + } + return &Pattern{ + src: src, + expr: expr, + names: expr.SubexpNames()[1:], + }, nil +} + +func (e *Env) lookup(name string) *Pattern { + return e.patterns[name] +} + +func (e *Env) set(name string, expr *Pattern) { + if e.patterns == nil { + e.patterns = make(map[string]*Pattern) + } + e.patterns[name] = expr +} +func (e *Env) checkDuplicate(name string) error { + if duplicate := e.lookup(name); duplicate != nil { + return errors.Errorf("expresion %q already defined as %q", name, duplicate.String()) + } + return nil +} + +func splitTag(tag string) (pattern, field string) { + tag = strings.TrimSpace(tag) + if pos := strings.IndexByte(tag, ':'); 0 <= pos && pos < len(tag) { + return tag[:pos], tag[pos+1:] + } + return tag, "" +} diff --git a/pkg/x/gork/gork_benchmark_test.go b/pkg/x/gork/gork_benchmark_test.go new file mode 100644 index 0000000000..0c524cb56e --- /dev/null +++ b/pkg/x/gork/gork_benchmark_test.go @@ -0,0 +1,47 @@ +package gork_test + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "testing" + + "github.com/panther-labs/panther/pkg/x/gork" +) + +//nolint:lll +func BenchmarkMatchString(b *testing.B) { + env := gork.New() + pattern := `%{NS:remote_ip} %{NS:identity} %{NS:user} \[%{HTTPDATE:timestamp}\] "%{NS:method} %{NS:request_uri} %{NS:protocol}" %{NS:status} %{NS:bytes_sent}` + expr, err := env.Compile(pattern) + if err != nil { + b.Fatal(err) + } + input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326" + matches := make([]string, 10) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + matches, err = expr.MatchString(matches[:0], input) + if err != nil { + b.Fatal(err) + } + if len(matches) != 18 { + b.Error(matches) + } + } +} diff --git a/pkg/x/gork/gork_test.go b/pkg/x/gork/gork_test.go new file mode 100644 index 0000000000..3a024708a9 --- /dev/null +++ b/pkg/x/gork/gork_test.go @@ -0,0 +1,69 @@ +package gork + +/** + * Panther is a Cloud-Native SIEM for the Modern Security Team. + * Copyright (C) 2020 Panther Labs Inc + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +// nolint:lll +func TestMatchString(t *testing.T) { + assert := require.New(t) + env := New() + src := `%{DATA:remote_ip} %{DATA:identity} %{DATA:user} \[%{HTTPDATE:timestamp}\] "%{DATA:method} %{DATA:request_uri} %{DATA:protocol}" %{DATA:status} %{DATA:bytes_sent}$` + pattern, err := env.Compile(src) + assert.NoError(err) + input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326" + matches, err := pattern.MatchString(nil, input) + assert.NoError(err) + assert.Equal([]string{ + "remote_ip", "127.0.0.1", + "identity", "-", + "user", "frank", + "timestamp", "10/Oct/2000:13:55:36 -0700", + "method", "GET", + "request_uri", "/apache_pb.gif", + "protocol", "HTTP/1.0", + "status", "200", + "bytes_sent", "2326", + }, matches) +} + +func TestRecursive(t *testing.T) { + assert := require.New(t) + { + env := Env{} + patterns := `FOO %{FOO}` + err := env.ReadPatterns(strings.NewReader(patterns)) + assert.Error(err) + assert.Contains(err.Error(), "recursive") + } + { + env := Env{} + patterns := ` +FOO %{BAR} +BAR %{FOO}` + err := env.ReadPatterns(strings.NewReader(patterns)) + assert.Error(err) + assert.Contains(err.Error(), "recursive") + } +}