Skip to content

Commit

Permalink
Adds YAML-based tests for log parsers (#1455)
Browse files Browse the repository at this point in the history
* Adds YAML-based testing for log parsers

* Remove duplicate code in testutil package

* Add comments and remove unused helper

* Remove checks

* Remove commented line

* Update cloudflarelogs to YAML tests

Co-authored-by: Kostas Papageorgiou <[email protected]>
  • Loading branch information
alxarch and Kostas Papageorgiou authored Sep 30, 2020
1 parent a0702a2 commit b373c60
Show file tree
Hide file tree
Showing 10 changed files with 974 additions and 979 deletions.
236 changes: 236 additions & 0 deletions internal/log_analysis/log_processor/logtypes/logtesting/logtesting.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
package logtesting

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"io"
"os"
"reflect"
"strings"
"testing"
"text/template"
"time"

jsoniter "github.com/json-iterator/go"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"

"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog/omitempty"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog/tcodec"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/registry"
)

// RunTestsFromYAML reads all test cases in a YAML file and runs them.
func RunTestsFromYAML(t *testing.T, resolve *logtypes.Registry, filename string) {
t.Helper()
f, err := os.Open(filename)
if err != nil {
t.Errorf("failed to open %q: %s", filename, err)
return
}
dec := yaml.NewDecoder(f)
dec.SetStrict(true)
for {
testCase := TestCase{
Resolve: resolve,
}
if err := dec.Decode(&testCase); err != nil {
if err == io.EOF {
return
}
t.Fatalf("failed to read YAML test case: %s", err)
return
}
t.Run(testCase.Name, testCase.Run)
}
}

// RunTests is a helper that runs all test cases in sequence
func RunTests(t *testing.T, tests ...TestCase) {
t.Helper()
for _, tc := range tests {
t.Run(tc.Name, tc.Run)
}
}

// TestCase is a test case validating the input and output for a parser
// TODO: add fields to allow test cases to test that a parser produces errors
type TestCase struct {
Name string `json:"name" yaml:"name"`
Input string `json:"input" yaml:"input"`
Result string `json:"result" yaml:"result"`
Results []string `json:"results" yaml:"results"`
LogType string `json:"logType" yaml:"logType"`
Resolve *logtypes.Registry `json:"-" yaml:"-"`
}

// Run runs a test case
func (c *TestCase) Run(t *testing.T) {
TestRegisteredParser(t, c.Resolve, c.LogType, c.Input, append([]string{c.Result}, c.Results...)...)
}

// TestRegisteredParser is a helper to run a test for a registered log parser
func TestRegisteredParser(t *testing.T, resolve *logtypes.Registry, logType, input string, expect ...string) {
t.Helper()
assert := require.New(t)
if resolve == nil {
resolve = registry.Default()
}
entry := resolve.Get(logType)
assert.NotNil(entry, "unregistered parser %q", logType)
p, err := entry.NewParser(nil)
assert.NoError(err, "failed to create log parser")
results, err := p.ParseLog(input)
assert.NoError(err)
if len(expect) == 0 {
require.Nil(t, results)
return
}
schema := entry.Schema()
indicators := pantherlog.FieldSetFromType(reflect.TypeOf(schema))
assert.NotNil(results)
assert.Equal(len(expect), len(results), "Invalid number of patherlog results produced by parser")
for i, result := range results {
expect := expect[i]
expect = mustRenderExpect(expect, logType)
TestResult(t, expect, result, indicators...)
}
}

// JSON returns a jsoniter.API to be used for parser tests.
// The returned API forces omitempty to all fields and relies on global tcodec registration.
// It does not include conversion of output timestamp formats to make it easier to write test cases.
func JSON() jsoniter.API {
api := jsoniter.Config{
EscapeHTML: true,
SortMapKeys: true,
ValidateJsonRawMessage: true,
}.Froze()
api.RegisterExtension(omitempty.New("json"))
return api
}

// Checks that `actual` is a parser result matching `expect`
// If expect.RowID is empty it checks if actual has non-empty RowID
// If expect.EventTime is zero it checks if actual.EventTime equals actual.ParseTime
// If expect.ParseTime is zero it checks if actual.ParseTime is non-zero
// Otherwise equality is checked strictly
func TestResult(t *testing.T, expect string, actual *pantherlog.Result, indicators ...pantherlog.FieldID) {
t.Helper()
logType := jsoniter.Get([]byte(expect), pantherlog.FieldLogTypeJSON).ToString()
require.Equal(t, logType, actual.PantherLogType)
expectResult := pantherlog.Result{}
if indicators == nil {
indicators = pantherlog.FieldSetFromJSON([]byte(expect))
}
require.NoError(t, unmarshalResultJSON([]byte(expect), &expectResult, indicators))
var expectAny map[string]interface{}
require.NoError(t, jsoniter.UnmarshalFromString(expect, &expectAny))
var actualAny map[string]interface{}
data, err := JSON().Marshal(actual)
require.NoError(t, err)
require.NoError(t, jsoniter.Unmarshal(data, &actualAny))
require.False(t, actual.PantherParseTime.IsZero(), "zero parse time")
if expectResult.PantherEventTime.IsZero() {
EqualTimestamp(t, actual.PantherParseTime, actual.PantherEventTime, "event time not equal to parse time")
} else {
EqualTimestamp(t, expectResult.PantherEventTime, actual.PantherEventTime, "invalid event time")
}
require.NotEmpty(t, actual.PantherRowID)
// The following dance ensures that produced JSON matches values from `actual` result
require.Equal(t, actual.PantherEventTime.UTC().Format(time.RFC3339Nano), actualAny["p_event_time"], "Invalid JSON event time")
require.Equal(t, actual.PantherParseTime.UTC().Format(time.RFC3339Nano), actualAny["p_parse_time"], "Invalid JSON parse time")
require.Equal(t, actual.PantherRowID, actualAny["p_row_id"], "Invalid JSON row id")
// Since these values are checked to be valid we assign them to expect to check the rest of the JSON values
expectAny["p_event_time"] = actualAny["p_event_time"]
expectAny["p_parse_time"] = actualAny["p_parse_time"]
expectAny["p_row_id"] = actualAny["p_row_id"]
// By now expect JSON and actual JSON must be equal
expectJSON, err := jsoniter.MarshalToString(expectAny)
require.NoError(t, err)
actualJSON, err := jsoniter.MarshalToString(actualAny)
require.NoError(t, err)
require.JSONEq(t, expectJSON, actualJSON)
}

// EqualTimestamp is a helper that checks timestamps for equality with human readable message
func EqualTimestamp(t *testing.T, expect, actual time.Time, msgAndArgs ...interface{}) {
t.Helper()
require.False(t, actual.IsZero(), "zero timestamp")
require.Equal(t, expect.UTC().Format(time.RFC3339Nano), actual.UTC().Format(time.RFC3339Nano), msgAndArgs...)
}

// unmarshalResultJSON unmarshals a result from JSON
// The parsing is inefficient. It's purpose is to be used in tests to verify output results.
func unmarshalResultJSON(data []byte, r *pantherlog.Result, indicators pantherlog.FieldSet) error {
tmp := struct {
LogType string `json:"p_log_type"`
EventTime tcodec.Time `json:"p_event_time" tcodec:"rfc3339"`
ParseTime tcodec.Time `json:"p_parse_time" tcodec:"rfc3339"`
RowID string `json:"p_row_id"`
SourceID string `json:"p_source_id"`
SourceLabel string `json:"p_source_label"`
}{}
if err := jsoniter.Unmarshal(data, &tmp); err != nil {
return err
}
values := pantherlog.BlankValueBuffer()
for _, kind := range indicators {
fieldName := pantherlog.FieldNameJSON(kind)
any := jsoniter.Get(data, fieldName)
if any == nil || any.ValueType() == jsoniter.InvalidValue {
continue
}
var v []string
any.ToVal(&v)
if v != nil {
values.WriteValues(kind, v...)
}
}
*r = pantherlog.Result{
CoreFields: pantherlog.CoreFields{
PantherLogType: tmp.LogType,
PantherRowID: tmp.RowID,
PantherEventTime: tmp.EventTime,
PantherParseTime: tmp.ParseTime,
PantherSourceID: tmp.SourceID,
PantherSourceLabel: tmp.SourceLabel,
},
}
values.WriteValuesTo(r)
values.Recycle()
return nil
}

func mustRenderExpect(expect, logType string) string {
tpl := template.Must(template.New(logType).Parse(expect))
s := strings.Builder{}
data := &struct {
LogType string
}{
LogType: logType,
}
if err := tpl.Execute(&s, &data); err != nil {
panic(err)
}

return s.String()
}
6 changes: 3 additions & 3 deletions internal/log_analysis/log_processor/pantherlog/result_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ import (
"github.com/stretchr/testify/require"

"github.com/panther-labs/panther/internal/log_analysis/awsglue"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes/logtesting"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog/null"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog/omitempty"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog/tcodec"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/parsers"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/parsers/testutil"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/parsers/timestamp"
"github.com/panther-labs/panther/pkg/box"
"github.com/panther-labs/panther/pkg/unbox"
Expand Down Expand Up @@ -88,7 +88,7 @@ func TestNewResultBuilder(t *testing.T) {
result, err := b.BuildResult("TestEvent", &event)
require.NoError(t, err)
require.Equal(t, "TestEvent", result.PantherLogType)
testutil.EqualTimestamp(t, now, result.PantherParseTime)
logtesting.EqualTimestamp(t, now, result.PantherParseTime)
// Ensure event time is zero time
require.Equal(t, time.Time{}, result.PantherEventTime)
require.Equal(t, rowID, result.PantherRowID)
Expand Down Expand Up @@ -136,7 +136,7 @@ func TestOldResults(t *testing.T) {
api := buildAPI()
result := event.Result()
require.Equal(t, "Foo", result.PantherLogType)
testutil.EqualTimestamp(t, now, result.PantherParseTime)
logtesting.EqualTimestamp(t, now, result.PantherParseTime)
// Ensure event time is zero time
//require.Equal(t, time.Time{}, result.PantherEventTime)
require.Equal(t, rowID, result.PantherRowID)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package cloudflarelogs_test

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"testing"

"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes/logtesting"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/registry"
)

func TestCloudflareLogParsers(t *testing.T) {
logtesting.RunTestsFromYAML(t, registry.Default(), "./testdata/cloudflare_tests.yml")
}
Loading

0 comments on commit b373c60

Please sign in to comment.