diff --git a/go.mod b/go.mod
index 4b36348ff1..bc928899b7 100644
--- a/go.mod
+++ b/go.mod
@@ -28,6 +28,7 @@ require (
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.6.1
github.com/tidwall/gjson v1.6.1
+ github.com/valyala/fasttemplate v1.2.1
go.uber.org/multierr v1.5.0
go.uber.org/zap v1.16.0
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208
diff --git a/go.sum b/go.sum
index 09ed812dec..110c1cdb0b 100644
--- a/go.sum
+++ b/go.sum
@@ -233,6 +233,10 @@ github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhV
github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU=
github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ=
+github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
+github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
+github.com/valyala/fasttemplate v1.2.1 h1:TVEnxayobAdVkhQfrfes2IzOB6o+z4roRkPF52WA1u4=
+github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v0.0.0-20180714160509-73f8eece6fdc/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
diff --git a/pkg/x/fastmatch/fastmatch.go b/pkg/x/fastmatch/fastmatch.go
new file mode 100644
index 0000000000..782f1b0ff3
--- /dev/null
+++ b/pkg/x/fastmatch/fastmatch.go
@@ -0,0 +1,217 @@
+package fastmatch
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "errors"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+// Pattern matches a string and extracts key/value pairs.
+type Pattern struct {
+ // text to match at start of input
+ prefix string
+ // the rest of the fields
+ delimiters []delimiter
+ // non-empty field names
+ fields []string
+ // reusable buffer for unquoting stings
+ scratch []rune
+}
+
+type delimiter struct {
+ // delimiter to match at end of field
+ match string
+ // name of the field
+ name string
+ // if set to `'` or `"` we should look out for escaping quotes
+ quote byte
+}
+
+var splitFields = regexp.MustCompile(`%{\s*(?P[^}]*)\s*}`)
+
+// Compile compiles a pattern.
+// Patterns use `%{` and `}` delimiters to define the placing of fields in a string.
+// Two consecutive fields *must* have some delimiter text between them for the pattern to be valid.
+// For example:
+// `%{foo} %{bar}` is valid
+// `%{foo}%{bar}` is not valid
+// Pattern names currently have no restrictions apart from that they cannot contain `}`.
+// Please be conservative with your field names as that might change in the future...
+func Compile(pattern string) (*Pattern, error) {
+ tags := splitFields.FindAllStringSubmatch(pattern, -1)
+ if tags == nil {
+ // pattern contains no fields
+ return nil, errInvalidPattern
+ }
+ matchDelimiters := splitFields.Split(pattern, -1)
+ // First delimiter is a prefix at the start of text.
+ prefix, matchDelimiters := matchDelimiters[0], matchDelimiters[1:]
+ delimiters := make([]delimiter, 0, len(tags))
+ fields := make([]string, 0, len(tags))
+ last := len(matchDelimiters) - 1
+ // Keep not of the previous delimiter for auto detecting quotes
+ prev := prefix
+ for i, m := range matchDelimiters {
+ // Do not allow empty delimiters unless it's the last field
+ if i < last && m == "" {
+ return nil, errInvalidPattern
+ }
+ tag := tags[i][1]
+ d := delimiter{}
+ // Autodetects quotes
+ d.reset(tag, m, prev)
+ prev = m
+ delimiters = append(delimiters, d)
+ if d.name != "" {
+ fields = append(fields, d.name)
+ }
+ }
+ return &Pattern{
+ prefix: prefix,
+ delimiters: delimiters,
+ fields: fields,
+ }, nil
+}
+
+func (d *delimiter) reset(tag, match, prev string) {
+ quote := prevQuote(prev)
+ if quote != nextQuote(match) {
+ quote = 0
+ }
+ d.name = tag
+ d.quote = quote
+ d.match = match
+}
+
+func prevQuote(s string) byte {
+ if n := len(s) - 1; 0 <= n && n < len(s) {
+ switch q := s[n]; q {
+ case '"', '\'':
+ return q
+ }
+ }
+ return 0
+}
+
+func nextQuote(s string) byte {
+ if len(s) > 0 {
+ switch q := s[0]; q {
+ case '"', '\'':
+ return q
+ }
+ }
+ return 0
+}
+
+// Returns the number of non-empty field names
+func (p *Pattern) NumFields() int {
+ return len(p.fields)
+}
+
+// Returns a non-empty field name by index.
+// Panics if index is out of range.
+// Use in conjunction with NumFields to check the range
+func (p *Pattern) FieldName(i int) string {
+ return p.fields[i]
+}
+
+var (
+ errMatch = errors.New("match failed")
+ errInvalidPattern = errors.New("invalid pattern")
+)
+
+// MatchString matches src and appends key/value pairs to dst.
+// Note that if an error occurs the original slice is returned.
+func (p *Pattern) MatchString(dst []string, src string) ([]string, error) {
+ tail := src
+ if prefix := p.prefix; len(prefix) <= len(tail) && tail[:len(prefix)] == prefix {
+ tail = tail[len(prefix):]
+ } else {
+ return dst, errMatch
+ }
+ matches := dst
+ delimiters := p.delimiters
+ for i := range delimiters {
+ d := &delimiters[i]
+ switch seek := d.match; seek {
+ case "":
+ if name := d.name; name != "" {
+ matches = append(matches, name, tail)
+ }
+ return matches, nil
+ default:
+ match, ss, err := p.match(tail, seek, d.quote)
+ if err != nil {
+ return dst, err
+ }
+ if name := d.name; name != "" {
+ matches = append(matches, name, match)
+ }
+ tail = ss
+ }
+ }
+ return matches, nil
+}
+
+func (p *Pattern) match(src, delim string, quote byte) (match, tail string, err error) {
+ if (quote == '"' || quote == '\'') && strings.IndexByte(src, '\\') != -1 {
+ // Only trigger quoted match if there is an escaping slash (`\\`) somewhere ahead
+ return p.matchQuoted(src, delim, quote)
+ }
+ // Fast match case
+ if pos := strings.Index(src, delim); 0 <= pos && pos < len(src) {
+ // Split match part from rest of text
+ match, tail = src[:pos], src[pos:]
+ // Consume the delimiter
+ tail = tail[len(delim):]
+ return match, tail, nil
+ }
+ return "", src, errMatch
+}
+
+// matchQuoted matches fields while escaping quotes in a single pass.
+// It properly handles unicode multibytes so it is much slower than non-quoted match.
+func (p *Pattern) matchQuoted(src, delim string, quote byte) (match, tail string, err error) {
+ tail = src
+ // Copy and reset scratch slice header to stack
+ scratch := p.scratch[:0]
+ // Go over each unicode character in src until we reach the quote
+ for len(tail) > 0 && tail[0] != quote {
+ // This reads a unicode character properly handling `\\` escapes
+ c, _, ss, err := strconv.UnquoteChar(tail, quote)
+ if err != nil {
+ p.scratch = scratch // Restore scratch buffer
+ return "", src, err
+ }
+ // Gather all characters
+ scratch = append(scratch, c)
+ // Advance the loop
+ tail = ss
+ }
+ p.scratch = scratch // Restore scratch buffer
+ // Check that the rest for the text starts with delimiter
+ if strings.HasPrefix(tail, delim) {
+ // Match found, consume the delimiter and return
+ return string(scratch), strings.TrimPrefix(tail, delim), nil
+ }
+ return "", src, errMatch
+}
diff --git a/pkg/x/fastmatch/fastmatch_benchmark_test.go b/pkg/x/fastmatch/fastmatch_benchmark_test.go
new file mode 100644
index 0000000000..4cc7bbb85f
--- /dev/null
+++ b/pkg/x/fastmatch/fastmatch_benchmark_test.go
@@ -0,0 +1,45 @@
+package fastmatch_test
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "testing"
+
+ "github.com/panther-labs/panther/pkg/x/fastmatch"
+)
+
+func BenchmarkPattern_MatchString(b *testing.B) {
+ input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"
+ pattern := `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`
+ pat, err := fastmatch.Compile(pattern)
+ if err != nil {
+ b.Fatal(err)
+ }
+ b.ReportAllocs()
+ matches := make([]string, 10)
+ for i := 0; i < b.N; i++ {
+ matches, err = pat.MatchString(matches[:0], input)
+ if err != nil {
+ b.Fatal(err)
+ }
+ if len(matches) != 18 {
+ b.Fatal(matches)
+ }
+ }
+}
diff --git a/pkg/x/fastmatch/fastmatch_test.go b/pkg/x/fastmatch/fastmatch_test.go
new file mode 100644
index 0000000000..8eccd2ff10
--- /dev/null
+++ b/pkg/x/fastmatch/fastmatch_test.go
@@ -0,0 +1,103 @@
+package fastmatch
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestMatchString(t *testing.T) {
+ type testCase struct {
+ Name string
+ Input string
+ Pattern string
+ Matches []string
+ }
+ for _, tc := range []testCase{
+ {"two fields", "foo bar", "%{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}},
+ {"two fields prefix", "LOG: foo bar", "LOG: %{foo} %{bar}", []string{"foo", "foo", "bar", "bar"}},
+ {"no match", "foo", "%{foo} %{bar}", nil},
+ {"two fields empty last", "foo ", "%{foo} %{bar}", []string{"foo", "foo", "bar", ""}},
+ {"two fields empty first", " bar", "%{foo} %{bar}", []string{"foo", "", "bar", "bar"}},
+ {"two fields quoted first", `"\"foo\" bar" baz`, `"%{foo}" %{bar}`, []string{"foo", `"foo" bar`, "bar", "baz"}},
+ {"two fields quoted last", `foo "\"bar\"baz"`, `%{foo} "%{bar}"`, []string{"foo", `foo`, "bar", `"bar"baz`}},
+ {"two fields one empty", "foo bar", "%{foo} %{}", []string{"foo", "foo"}},
+ {"common log",
+ "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326",
+ `%{remote_ip} %{identity} %{user} [%{timestamp}] "%{method} %{request_uri} %{protocol}" %{status} %{bytes_sent}`,
+ []string{
+ "remote_ip", "127.0.0.1",
+ "identity", "-",
+ "user", "frank",
+ "timestamp", "10/Oct/2000:13:55:36 -0700",
+ "method", "GET",
+ "request_uri", "/apache_pb.gif",
+ "protocol", "HTTP/1.0",
+ "status", "200",
+ "bytes_sent", "2326",
+ },
+ },
+ } {
+ tc := tc
+ t.Run(tc.Name, func(t *testing.T) {
+ assert := require.New(t)
+ p, err := Compile(tc.Pattern)
+ assert.NoError(err)
+ match, err := p.MatchString(nil, tc.Input)
+ assert.Equal(tc.Matches != nil, err == nil)
+ assert.Equal(tc.Matches, match, "invalid match\nexpect: %v\nactual: %v", tc.Matches, match)
+ })
+ }
+}
+
+func TestPattern_match(t *testing.T) {
+ // nolint:maligned
+ type testCase struct {
+ Name string
+ Input string
+ Delimiter string
+ Quote byte
+ Tail string
+ Match string
+ WantErr bool
+ }
+ for _, tc := range []testCase{
+ {"simple", "foo ", " ", 0, "", "foo", false},
+ {"double quote", `foo \"bar\"" `, "\" ", '"', "", `foo "bar"`, false},
+ {"single quote", `foo \'bar\'' `, "' ", '\'', "", `foo 'bar'`, false},
+ } {
+ tc := tc
+ t.Run(tc.Name, func(t *testing.T) {
+ assert := require.New(t)
+ p := Pattern{}
+ match, tail, err := p.match(tc.Input, tc.Delimiter, tc.Quote)
+ if tc.WantErr {
+ assert.Error(err)
+ assert.Empty(match)
+ assert.Equal(tc.Input, tail)
+ return
+ }
+ assert.NoError(err)
+ assert.Equal(tc.Match, match)
+ assert.Equal(tc.Tail, tail)
+ })
+ }
+}
diff --git a/pkg/x/gork/builtin.go b/pkg/x/gork/builtin.go
new file mode 100644
index 0000000000..5349b3fb25
--- /dev/null
+++ b/pkg/x/gork/builtin.go
@@ -0,0 +1,102 @@
+package gork
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+// Patterns based on https://github.com/logrusorgru/grokky
+// nolint: lll
+const BuiltinPatterns = `
+DATA .*?
+GREEDYDATA .*
+NOTSPACE \S+
+SPACE \s*
+WORD \b\w+\b
+QUOTEDSTRING "(?:\\.|[^\\"]+)+"|""|'(?:\\.|[^\\']+)+'|''
+HEXDIGIT [0-9a-fAF]
+UUID %{HEXDIGIT}{8}-(?:%{HEXDIGIT}{4}-){3}%{HEXDIGIT}{12}
+
+# Numbers
+INT [+-]?(?:[0-9]+)
+BASE10NUM [+-]?(?:[0-9]+(?:\.[0-9]+)?)|\.[0-9]+
+NUMBER %{BASE10NUM}
+BASE16NUM (?:0[xX])?%{HEXDIGIT}+
+POSINT \b[1-9][0-9]*\b
+NONNEGINT \b[0-9]+\b
+
+# Network
+CISCOMAC (?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}
+WINDOWSMAC (?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}
+COMMONMAC (?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}
+MAC %{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}
+IPV6 \b(?:(?:(?:%{HEXDIGIT}{1,4}:){7}(?:%{HEXDIGIT}{1,4}|:))|(?:(?:%{HEXDIGIT}{1,4}:){6}(?::%{HEXDIGIT}{1,4}|(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(?:(?:%{HEXDIGIT}{1,4}:){5}(?:(?:(?::%{HEXDIGIT}{1,4}){1,2})|:(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|((%{HEXDIGIT}{1,4}:){4}(((:%{HEXDIGIT}{1,4}){1,3})|((:%{HEXDIGIT}{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){3}(((:%{HEXDIGIT}{1,4}){1,4})|((:%{HEXDIGIT}{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){2}(((:%{HEXDIGIT}{1,4}){1,5})|((:%{HEXDIGIT}{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|((%{HEXDIGIT}{1,4}:){1}(((:%{HEXDIGIT}{1,4}){1,6})|((:%{HEXDIGIT}{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:%{HEXDIGIT}{1,4}){1,7})|((:%{HEXDIGIT}{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\b
+IPV4INT 25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9]
+IPV4 \b(?:(?:%{IPV4INT})\.){3}(?:%{IPV4INT})\b
+IP %{IPV6}|%{IPV4}
+HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
+IPORHOST %{IP}|%{HOSTNAME}
+HOSTPORT %{IPORHOST}:%{POSINT}
+
+# URI
+
+USERNAME [a-zA-Z0-9._-]+
+UNIXPATH (?:/[\w_%!$@:.,-]?/?)(\S+)?
+WINPATH (?:[A-Za-z]:|\\)(?:\\[^\\?*]*)+
+PATH (?:%{UNIXPATH}|%{WINPATH})
+TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
+URIPROTO [A-Za-z]+(?:\+[A-Za-z+]+)?
+URIHOST %{IPORHOST}(?::%{POSINT})?
+URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+
+URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
+URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
+URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
+
+# Timestamps
+MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|June?|July?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b
+MONTHNUM 0?[1-9]|1[0-2]
+MONTHNUM2 0[1-9]|1[0-2]
+MONTHDAY (?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]
+DAY \b(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)\b
+YEAR (?:\d\d){1,2}
+HOUR 2[0123]|[01]?[0-9]
+MINUTE [0-5][0-9]
+SECOND (?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?
+KITCHEN %{HOUR}:%{MINUTE}
+TIME %{HOUR}:%{MINUTE}:%{SECOND}
+DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
+DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
+ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
+ISO8601_SECOND (?:%{SECOND}|60)
+TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
+DATE %{DATE_US}|%{DATE_EU}
+DATETIME %{DATE}[- ]%{TIME}
+TZ [A-Z]{3}
+TZOFFSET [+-]\d{4}
+TIMESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
+TIMESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
+TIMESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
+TIMESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
+SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
+HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{TZOFFSET}
+
+# Aliases
+NS %{NOTSPACE}
+QS %{QUOTEDSTRING}
+HOST %{HOSTNAME}
+PID %{POSINT}
+USER %{USERNAME}
+`
diff --git a/pkg/x/gork/builtin_test.go b/pkg/x/gork/builtin_test.go
new file mode 100644
index 0000000000..cb91c8798f
--- /dev/null
+++ b/pkg/x/gork/builtin_test.go
@@ -0,0 +1,111 @@
+package gork
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+var patternTests = [][]string{
+ {"DATA", "", ""},
+ {"WORD", " foo_bar.", "foo_bar"},
+ {"WORD", " "},
+ {"NOTSPACE", "foo ", "foo"},
+ {"NOTSPACE", " foo", "foo"},
+ {"NOTSPACE", "foo\t", "foo"},
+ {"NOTSPACE", "\tfoo", "foo"},
+ {"NOTSPACE", "\t foo", "foo"},
+ {"QUOTEDSTRING", `"foo"`, `"foo"`},
+ {"QS", `"foo"`, `"foo"`},
+ {"QS", `"foo" "`, `"foo"`},
+ {"QS", `"foo \"bar\""`, `"foo \"bar\""`},
+ {"QUOTEDSTRING", `'foo'`, `'foo'`},
+ {"QS", `'foo'`, `'foo'`},
+ {"QS", `'foo' '`, `'foo'`},
+ {"QS", `'foo \'bar\''`, `'foo \'bar\''`},
+ {"SPACE", " foo", " "},
+ {"SPACE", "\tfoo", "\t"},
+ {"SPACE", ".foo", ""},
+ {"INT", "42", "42"},
+ {"INT", "+42", "+42"},
+ {"INT", "-42", "-42"},
+ {"INT", "-42.0", "-42"},
+ {"INT", "0", "0"},
+ {"INT", "01", "01"},
+ {"INT", "001", "001"},
+ {"IP", "127.0.0.1", "127.0.0.1"},
+ {"IP", "0.0.0.0", "0.0.0.0"},
+ {"IP", "300.0.0.0"},
+ {"IP", "255.0.0.0", "255.0.0.0"},
+ {"IP", "255.255.255.255", "255.255.255.255"},
+ {"IP", "255.2555.255.255"},
+ {"IP", "300.0"},
+ {"IP", "2001:0db8:0000:0000:0000:8a2e:0370:7334", "2001:0db8:0000:0000:0000:8a2e:0370:7334"},
+ {"IP", "2001:db8::8a2e:370:7334", "2001:db8::8a2e:370:7334"},
+ {"MONTHDAY", "01", "01"},
+ {"MONTHDAY", "31", "31"},
+ {"MONTHDAY", "10", "10"},
+ {"MONTH", "/Oct", "Oct"},
+ {"YEAR", "2000", "2000"},
+ {"TIME", "13:55:36", "13:55:36"},
+ {"TZOFFSET", "-0700", "-0700"},
+ {"HTTPDATE", "10/Oct/2000:13:55:36 -0700", "10/Oct/2000:13:55:36 -0700"},
+}
+
+func TestBuiltinPatterns(t *testing.T) {
+ assert := require.New(t)
+ env := Env{}
+ patterns, err := ReadPatterns(strings.NewReader(BuiltinPatterns))
+ assert.NoError(err)
+ assert.NoError(env.SetMap(patterns))
+ numTests := map[string]int{}
+ for _, tc := range patternTests {
+ name, input, expect := tc[0], tc[1], tc[2:]
+ t.Run(name+"_"+input, func(t *testing.T) {
+ assert := require.New(t)
+ src := name
+ if !strings.Contains(src, "%{") {
+ src = "%{" + src + ":actual}"
+ if len(expect) == 1 {
+ expect = []string{"actual", expect[0]}
+ }
+ }
+ pattern, err := env.Compile(src)
+ assert.NoError(err)
+ matches, err := pattern.MatchString(nil, input)
+
+ if len(expect) == 0 {
+ assert.Error(err)
+ assert.Nil(matches)
+ } else {
+ assert.NoError(err)
+ assert.Equal(expect, matches, "match %q failed", name)
+ numTests[name]++
+ }
+ })
+ }
+ for name := range patterns {
+ if numTests[name] == 0 {
+ t.Logf("no tests for pattern %q", name)
+ }
+ }
+}
diff --git a/pkg/x/gork/gork.go b/pkg/x/gork/gork.go
new file mode 100644
index 0000000000..9e42e7b176
--- /dev/null
+++ b/pkg/x/gork/gork.go
@@ -0,0 +1,281 @@
+package gork
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "regexp"
+ "strings"
+
+ "github.com/pkg/errors"
+ "github.com/valyala/fasttemplate"
+)
+
+const (
+ startDelimiter = "%{"
+ endDelimiter = "}"
+)
+
+// Pattern can match strings to extract key/value pairs
+type Pattern struct {
+ src string
+ expr *regexp.Regexp
+ names []string
+}
+
+// Regexp returns the full regular expression for this pattern
+func (p *Pattern) Regexp() string {
+ return p.expr.String()
+}
+
+// String returns the pattern
+func (p *Pattern) String() string {
+ return p.src
+}
+
+// MatchString matches src appending key/value pairs to dst.
+// If the text does not match an error is return
+func (p *Pattern) MatchString(dst []string, src string) ([]string, error) {
+ matches := p.expr.FindStringSubmatchIndex(src)
+ if matches == nil {
+ return dst, errors.New("No match")
+ }
+ if len(matches) > 2 {
+ // Regexp always sets first match to full string
+ matches = matches[2:]
+ var start, end int
+ for i := 0; 0 <= i && i < len(p.names) && len(matches) >= 2; i++ {
+ name := p.names[i]
+ // We skip unnamed groups
+ if name == "" {
+ continue
+ }
+ start, end, matches = matches[0], matches[1], matches[2:]
+ dst = append(dst, name, src[start:end])
+ }
+ }
+ return dst, nil
+}
+
+// Env is a collection of named patterns
+type Env struct {
+ patterns map[string]*Pattern
+}
+
+// New returns an environment containing basic patterns
+func New() *Env {
+ return defaultEnv.Clone()
+}
+
+var defaultEnv = mustDefaultEnv()
+
+func mustDefaultEnv() *Env {
+ env := Env{}
+ r := strings.NewReader(BuiltinPatterns)
+ if err := env.ReadPatterns(r); err != nil {
+ panic(err)
+ }
+ return &env
+}
+
+// ReadPatterns reads, compiles and adds named patterns to an environment from an io.Reader
+func (e *Env) ReadPatterns(r io.Reader) error {
+ patterns, err := ReadPatterns(r)
+ if err != nil {
+ return err
+ }
+ if err := e.SetMap(patterns); err != nil {
+ return err
+ }
+ return nil
+}
+
+// ReadPatterns reads named patterns from an io.Reader
+func ReadPatterns(r io.Reader) (map[string]string, error) {
+ patterns := make(map[string]string)
+ scanner := bufio.NewScanner(r)
+ numLines := 0
+ for scanner.Scan() {
+ numLines++
+ line := scanner.Text()
+ if line == "" || strings.HasPrefix(line, "#") {
+ continue
+ }
+ match := patternDef.FindStringSubmatch(line)
+ if match == nil {
+ return nil, errors.Errorf("invalid pattern definition at line #%d", numLines)
+ }
+ name, src := match[1], match[2]
+ patterns[name] = src
+ }
+ if err := scanner.Err(); err != nil {
+ return nil, err
+ }
+ return patterns, nil
+}
+
+var patternDef = regexp.MustCompile(`^(\w+)\s+(.*)`)
+
+// SetMap adds multiple patterns to an environment.
+func (e *Env) SetMap(patterns map[string]string) error {
+ child := e.Clone()
+ for name, pattern := range patterns {
+ // We check for duplicate only in the parent environment.
+ if err := e.checkDuplicate(name); err != nil {
+ return err
+ }
+ // Compilation is recursive so we might have compiled this already
+ if _, skip := child.patterns[name]; skip {
+ continue
+ }
+ expr, err := child.compile(name, pattern, patterns, nil)
+ if err != nil {
+ return err
+ }
+ e.set(name, expr)
+ }
+ for name, pattern := range child.patterns {
+ e.set(name, pattern)
+ }
+ return nil
+}
+
+// Clone clones an environment
+func (e *Env) Clone() *Env {
+ patterns := make(map[string]*Pattern, len(e.patterns))
+ for name, pattern := range e.patterns {
+ patterns[name] = pattern
+ }
+ return &Env{
+ patterns: patterns,
+ }
+}
+
+// MustSet compiles and stores a named pattern or panics if the pattern is invalid or exists already.
+func (e *Env) MustSet(name string, pattern string) {
+ if err := e.Set(name, pattern); err != nil {
+ panic(err)
+ }
+}
+
+// MustSet compiles and stores a named pattern or fails if the pattern is invalid or exists already.
+func (e *Env) Set(name string, pattern string) error {
+ if err := e.checkDuplicate(name); err != nil {
+ return err
+ }
+ expr, err := e.compile(name, pattern, nil, nil)
+ if err != nil {
+ return err
+ }
+ e.set(name, expr)
+ return nil
+}
+
+// Compile compiles a pattern expanding named patterns.
+func (e *Env) Compile(pattern string) (*Pattern, error) {
+ return e.compile(pattern, pattern, nil, nil)
+}
+
+var (
+ validPatternName = regexp.MustCompile(`^[A-Z][A-Z0-9_]*$`)
+ validFieldName = regexp.MustCompile(`[A-Za-z_][A-Za-z0-9_]*`)
+)
+
+func (e *Env) compile(root, src string, patterns map[string]string, visited []string) (*Pattern, error) {
+ tpl := fasttemplate.New(src, startDelimiter, endDelimiter)
+ s := strings.Builder{}
+ _, err := tpl.ExecuteFunc(&s, func(w io.Writer, tag string) (int, error) {
+ // TODO: Allow arbitrary field names by switching named groups with auto-incrementing name
+ // To achieve this we need to build the 'names' slice as we render the template
+ name, field := splitTag(tag)
+ if !validPatternName.MatchString(name) {
+ return 0, errors.Errorf("invalid pattern name %q in tag %q of pattern %q", name, tag, root)
+ }
+ if field != "" && !validFieldName.MatchString(field) {
+ return 0, errors.Errorf("invalid field name %q in tag %q of pattern %q", field, tag, root)
+ }
+ for _, visited := range visited {
+ if visited == name {
+ return 0, errors.Errorf("recursive pattern %q %v", root, visited)
+ }
+ }
+ expr := e.lookup(name)
+ if expr == nil {
+ // Try to compile the pattern
+ if src, ok := patterns[name]; ok {
+ subexpr, err := e.compile(name, src, patterns, append(visited, name))
+ if err != nil {
+ return 0, err
+ }
+ // Avoid duplicate compilations
+ e.set(name, subexpr)
+ expr = subexpr
+ } else {
+ return 0, errors.Errorf("unresolved pattern %q", name)
+ }
+ }
+ var group string
+ if field == "" {
+ group = fmt.Sprintf("(?:%s)", expr.Regexp())
+ } else {
+ group = fmt.Sprintf("(?P<%s>%s)", field, expr.Regexp())
+ }
+ return w.Write([]byte(group))
+ })
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to expand pattern %q", root)
+ }
+
+ expr, err := regexp.Compile(s.String())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to compile pattern %q", root)
+ }
+ return &Pattern{
+ src: src,
+ expr: expr,
+ names: expr.SubexpNames()[1:],
+ }, nil
+}
+
+func (e *Env) lookup(name string) *Pattern {
+ return e.patterns[name]
+}
+
+func (e *Env) set(name string, expr *Pattern) {
+ if e.patterns == nil {
+ e.patterns = make(map[string]*Pattern)
+ }
+ e.patterns[name] = expr
+}
+func (e *Env) checkDuplicate(name string) error {
+ if duplicate := e.lookup(name); duplicate != nil {
+ return errors.Errorf("expresion %q already defined as %q", name, duplicate.String())
+ }
+ return nil
+}
+
+func splitTag(tag string) (pattern, field string) {
+ tag = strings.TrimSpace(tag)
+ if pos := strings.IndexByte(tag, ':'); 0 <= pos && pos < len(tag) {
+ return tag[:pos], tag[pos+1:]
+ }
+ return tag, ""
+}
diff --git a/pkg/x/gork/gork_benchmark_test.go b/pkg/x/gork/gork_benchmark_test.go
new file mode 100644
index 0000000000..0c524cb56e
--- /dev/null
+++ b/pkg/x/gork/gork_benchmark_test.go
@@ -0,0 +1,47 @@
+package gork_test
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "testing"
+
+ "github.com/panther-labs/panther/pkg/x/gork"
+)
+
+//nolint:lll
+func BenchmarkMatchString(b *testing.B) {
+ env := gork.New()
+ pattern := `%{NS:remote_ip} %{NS:identity} %{NS:user} \[%{HTTPDATE:timestamp}\] "%{NS:method} %{NS:request_uri} %{NS:protocol}" %{NS:status} %{NS:bytes_sent}`
+ expr, err := env.Compile(pattern)
+ if err != nil {
+ b.Fatal(err)
+ }
+ input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"
+ matches := make([]string, 10)
+ b.ReportAllocs()
+ for i := 0; i < b.N; i++ {
+ matches, err = expr.MatchString(matches[:0], input)
+ if err != nil {
+ b.Fatal(err)
+ }
+ if len(matches) != 18 {
+ b.Error(matches)
+ }
+ }
+}
diff --git a/pkg/x/gork/gork_test.go b/pkg/x/gork/gork_test.go
new file mode 100644
index 0000000000..3a024708a9
--- /dev/null
+++ b/pkg/x/gork/gork_test.go
@@ -0,0 +1,69 @@
+package gork
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+// nolint:lll
+func TestMatchString(t *testing.T) {
+ assert := require.New(t)
+ env := New()
+ src := `%{DATA:remote_ip} %{DATA:identity} %{DATA:user} \[%{HTTPDATE:timestamp}\] "%{DATA:method} %{DATA:request_uri} %{DATA:protocol}" %{DATA:status} %{DATA:bytes_sent}$`
+ pattern, err := env.Compile(src)
+ assert.NoError(err)
+ input := "127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326"
+ matches, err := pattern.MatchString(nil, input)
+ assert.NoError(err)
+ assert.Equal([]string{
+ "remote_ip", "127.0.0.1",
+ "identity", "-",
+ "user", "frank",
+ "timestamp", "10/Oct/2000:13:55:36 -0700",
+ "method", "GET",
+ "request_uri", "/apache_pb.gif",
+ "protocol", "HTTP/1.0",
+ "status", "200",
+ "bytes_sent", "2326",
+ }, matches)
+}
+
+func TestRecursive(t *testing.T) {
+ assert := require.New(t)
+ {
+ env := Env{}
+ patterns := `FOO %{FOO}`
+ err := env.ReadPatterns(strings.NewReader(patterns))
+ assert.Error(err)
+ assert.Contains(err.Error(), "recursive")
+ }
+ {
+ env := Env{}
+ patterns := `
+FOO %{BAR}
+BAR %{FOO}`
+ err := env.ReadPatterns(strings.NewReader(patterns))
+ assert.Error(err)
+ assert.Contains(err.Error(), "recursive")
+ }
+}