diff --git a/cmd/devtools/customlogs/customlogs/infer.go b/cmd/devtools/customlogs/customlogs/infer.go
new file mode 100644
index 0000000000..ff11aadcb1
--- /dev/null
+++ b/cmd/devtools/customlogs/customlogs/infer.go
@@ -0,0 +1,174 @@
+package customlogs
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "bufio"
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+
+ jsoniter "github.com/json-iterator/go"
+ "github.com/pkg/errors"
+ "go.uber.org/zap"
+ "gopkg.in/yaml.v2"
+
+ "github.com/panther-labs/panther/internal/log_analysis/log_processor/customlogs"
+ "github.com/panther-labs/panther/internal/log_analysis/log_processor/logschema"
+ "github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes"
+)
+
+type InferOpts struct {
+ SkipTest *bool
+}
+
+var inferJsoniter = jsoniter.Config{
+ UseNumber: true,
+}.Froze()
+
+// Infers a schema given a sample of logs
+func Infer(logger *zap.Logger, opts *InferOpts) {
+ inputFiles := flag.Args()
+ if len(inputFiles) == 0 {
+ logger.Fatal("You need to specify at least one file")
+ flag.Usage()
+ }
+
+ var valueSchema *logschema.ValueSchema
+ var err error
+ for _, file := range inputFiles {
+ valueSchema, err = inferFromFile(valueSchema, file)
+ if err != nil {
+ logger.Fatal("failed to generate schema", zap.Error(err))
+ }
+ }
+
+ // Remove empty objects
+ valueSchema = valueSchema.NonEmpty()
+
+ if !*opts.SkipTest {
+ // In order to validate that the schema generated is correct,
+ // run the parser against the logs, fail in case of error
+ for _, file := range inputFiles {
+ if err = validateSchema(valueSchema, file); err != nil {
+ logger.Fatal("failed while testing schema with file. You can specify '-skip-test' argument to skip this step", zap.Error(err))
+ }
+ }
+ }
+
+ schema, err := yaml.Marshal(logschema.Schema{Version: 0, Fields: valueSchema.Fields})
+ if err != nil {
+ logger.Fatal("failed to marshal schema", zap.Error(err))
+ }
+ fmt.Println(string(schema))
+}
+
+func inferFromFile(root *logschema.ValueSchema, file string) (*logschema.ValueSchema, error) {
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close() // nolint: errcheck
+
+ reader := bufio.NewReader(f)
+ lineNum := 0
+ run := true
+ for run {
+ lineNum++
+ line, err := reader.ReadBytes('\n')
+ if err != nil {
+ if err == io.EOF {
+ // Don't go through more lines, but make sure to process existing line
+ run = false
+ } else {
+ return root, errors.Wrap(err, "failed while reading file")
+ }
+ }
+ line = bytes.TrimSpace(line)
+ if len(line) == 0 {
+ continue
+ }
+
+ var data map[string]interface{}
+ if err = inferJsoniter.Unmarshal(line, &data); err != nil {
+ return nil, errors.Wrapf(err, "failed to parse line [%d] as JSON", lineNum)
+ }
+ lineObject := logschema.InferJSONValueSchema(data)
+ if lineObject.Type != logschema.TypeObject {
+ return nil, errors.New("invalid schema")
+ }
+ root = logschema.Merge(root, lineObject)
+ }
+
+ return root, nil
+}
+
+// Validates the schema. It generates a parser of the provided schema
+// and tries to parse the contents of the file.
+func validateSchema(valueSchema *logschema.ValueSchema, file string) error {
+ desc := logtypes.Desc{
+ Name: "Custom.Test",
+ Description: "Custom log test schema",
+ ReferenceURL: "-",
+ }
+ schema := &logschema.Schema{Version: 0, Fields: valueSchema.Fields}
+ entry, err := customlogs.Build(desc, schema)
+ if err != nil {
+ validationErrors := logschema.ValidationErrors(err)
+ if len(validationErrors) > 0 {
+ return errors.New(validationErrors[0].String())
+ }
+ return err
+ }
+ parser, err := entry.NewParser(nil)
+ if err != nil {
+ return err
+ }
+
+ fd, err := os.Open(file)
+ if err != nil {
+ return err
+ }
+
+ reader := bufio.NewReader(fd)
+ run := true
+ for run {
+ line, err := reader.ReadString('\n')
+ if err != nil {
+ if err == io.EOF {
+ // Don't go through more lines, but make sure to process existing line
+ run = false
+ } else {
+ return errors.Wrap(err, "failed while reading file")
+ }
+ }
+ line = strings.TrimSpace(line)
+ if len(line) == 0 {
+ continue
+ }
+
+ if _, err = parser.ParseLog(line); err != nil {
+ return err
+ }
+ }
+ return nil
+}
diff --git a/cmd/devtools/customlogs/customlogs/infer_test.go b/cmd/devtools/customlogs/customlogs/infer_test.go
new file mode 100644
index 0000000000..b38455fda5
--- /dev/null
+++ b/cmd/devtools/customlogs/customlogs/infer_test.go
@@ -0,0 +1,41 @@
+package customlogs
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "fmt"
+ "io/ioutil"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "gopkg.in/yaml.v2"
+)
+
+func TestProcessLine(t *testing.T) {
+ schema, err := inferFromFile(nil, "./testdata/sample_1.jsonl")
+ schema = schema.NonEmpty()
+ assert.NoError(t, err)
+ fd, err := ioutil.ReadFile("./testdata/schema_1.yml")
+ assert.NoError(t, err)
+
+ marshalled, err := yaml.Marshal(schema)
+ assert.NoError(t, err)
+ fmt.Println(string(marshalled))
+ assert.YAMLEq(t, string(fd), string(marshalled))
+}
diff --git a/cmd/devtools/customlogs/customlogs/main.go b/cmd/devtools/customlogs/customlogs/test.go
similarity index 85%
rename from cmd/devtools/customlogs/customlogs/main.go
rename to cmd/devtools/customlogs/customlogs/test.go
index bc25080212..da990c9d19 100644
--- a/cmd/devtools/customlogs/customlogs/main.go
+++ b/cmd/devtools/customlogs/customlogs/test.go
@@ -1,4 +1,4 @@
-package main
+package customlogs
/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
@@ -24,14 +24,12 @@ import (
"fmt"
"io"
"io/ioutil"
- "log"
"os"
"strings"
"go.uber.org/zap"
"gopkg.in/yaml.v2"
- "github.com/panther-labs/panther/cmd/opstools"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/customlogs"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logschema"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes"
@@ -39,30 +37,17 @@ import (
"github.com/panther-labs/panther/internal/log_analysis/log_processor/parsers"
)
-// CLI flags
-var opts = struct {
+type TestOpts struct {
Schema *string
Output *string
-}{
- Schema: flag.String("s", "", "Schema file"),
- Output: flag.String("o", "", "Write parsed results to file (defaults to stdout)"),
}
-func main() {
- opstools.SetUsage(`-s SCHEMA_FILE [-o OUTPUT_FILE] [INPUT_FILES...]`)
- flag.Parse()
- loggerConfig := zap.NewDevelopmentConfig()
- loggerConfig.DisableStacktrace = true
- loggerConfig.DisableCaller = true
- z, err := loggerConfig.Build()
- if err != nil {
- log.Fatalln("failed to start logger: ", err.Error())
- }
- logger := z.Sugar()
+// Test validates a log schema against a sample of logs
+func Test(logger *zap.SugaredLogger, opts *TestOpts) {
schemaFile := *opts.Schema
if schemaFile == "" {
flag.Usage()
- log.Fatal("no schema file provided")
+ logger.Fatal("no schema file provided")
}
schemaData, err := ioutil.ReadFile(schemaFile)
if err != nil {
@@ -81,7 +66,7 @@ func main() {
if err != nil {
validationErrors := logschema.ValidationErrors(err)
if len(validationErrors) > 0 {
- logger.Error("Schema validation failed:")
+ logger.Error("File validation failed:")
for _, e := range validationErrors {
logger.Errorf(" - %s", e.String())
}
diff --git a/cmd/devtools/customlogs/customlogs/testdata/sample_1.jsonl b/cmd/devtools/customlogs/customlogs/testdata/sample_1.jsonl
new file mode 100644
index 0000000000..4ef5b1c801
--- /dev/null
+++ b/cmd/devtools/customlogs/customlogs/testdata/sample_1.jsonl
@@ -0,0 +1,3 @@
+{"string" : "value", "required_string": "value", "boolean": true, "boolean_as_string": "true", "float": 0.0, "time": "2020-01-01T00:00:00Z", "float_as_string": "0.0", "int_as_string": "0", "not_int_as_string": "0"}
+{"required_string": "value", "boolean_as_string": "false", "int": 0, "object": {"string": ""}, "array": [1,2]}
+{"required_string": "value", "not_int_as_string": "value", "string" : null, "object": {}, "empty_object": {"empty": {"more_empty": {}}}}
diff --git a/cmd/devtools/customlogs/customlogs/testdata/schema_1.yml b/cmd/devtools/customlogs/customlogs/testdata/schema_1.yml
new file mode 100644
index 0000000000..3ad4ee4384
--- /dev/null
+++ b/cmd/devtools/customlogs/customlogs/testdata/schema_1.yml
@@ -0,0 +1,49 @@
+# Panther is a Cloud-Native SIEM for the Modern Security Team.
+# Copyright (C) 2020 Panther Labs Inc
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+type: object
+fields:
+ - name: boolean
+ type: boolean
+ - name: float
+ type: float
+ - name: float_as_string
+ type: float
+ - name: int_as_string
+ type: bigint
+ - name: string
+ type: string
+ - name: time
+ type: timestamp
+ timeFormat: rfc3339
+ - name: array
+ type: array
+ element:
+ type: bigint
+ - name: int
+ type: bigint
+ - name: boolean_as_string
+ type: boolean
+ - name: not_int_as_string
+ type: string
+ - name: object
+ type: object
+ fields:
+ - name: string
+ type: string
+ - name: required_string
+ type: string
+ required: true
diff --git a/cmd/devtools/customlogs/main.go b/cmd/devtools/customlogs/main.go
new file mode 100644
index 0000000000..dae6c58dec
--- /dev/null
+++ b/cmd/devtools/customlogs/main.go
@@ -0,0 +1,80 @@
+package main
+
+/**
+ * Panther is a Cloud-Native SIEM for the Modern Security Team.
+ * Copyright (C) 2020 Panther Labs Inc
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+import (
+ "flag"
+ "log"
+ "os"
+ "strings"
+
+ "go.uber.org/zap"
+
+ "github.com/panther-labs/panther/cmd/devtools/customlogs/customlogs"
+ "github.com/panther-labs/panther/cmd/opstools"
+)
+
+// CLI commands
+const testCmd = "test"
+const inferCmd = "infer"
+
+func main() {
+ opstools.SetUsage(strings.Join([]string{testCmd, inferCmd}, ","))
+
+ loggerConfig := zap.NewDevelopmentConfig()
+ loggerConfig.DisableStacktrace = true
+ loggerConfig.DisableCaller = true
+ z, err := loggerConfig.Build()
+ if err != nil {
+ log.Fatalln("failed to start logger: ", err.Error())
+ }
+ logger := z.Sugar()
+
+ if len(os.Args) < 2 {
+ flag.Usage()
+ logger.Fatalf("Need to provide one command")
+ }
+
+ switch cmd := os.Args[1]; cmd {
+ case testCmd:
+ opstools.SetUsage(`-s SCHEMA_FILE [-o OUTPUT_FILE] [INPUT_FILES...]`)
+ opts := &customlogs.TestOpts{
+ Schema: flag.String("s", "", "Schema file"),
+ Output: flag.String("o", "", "Write parsed results to file (defaults to stdout)"),
+ }
+ if err := flag.CommandLine.Parse(os.Args[2:]); err != nil {
+ logger.Fatalf("failed to parse command line arguments")
+ flag.Usage()
+ }
+ customlogs.Test(logger, opts)
+ case inferCmd:
+ opstools.SetUsage(`[INPUT_FILES...]`)
+ opts := &customlogs.InferOpts{
+ SkipTest: flag.Bool("skip-test", false, "Skips testing the schema against the logs"),
+ }
+ if err := flag.CommandLine.Parse(os.Args[2:]); err != nil {
+ logger.Fatalf("failed to parse command line arguments")
+ flag.Usage()
+ }
+ customlogs.Infer(logger.Desugar(), opts)
+ default:
+ logger.Fatalf("Invalid command [%s]", cmd)
+ flag.Usage()
+ }
+}
diff --git a/internal/log_analysis/log_processor/logschema/infer.go b/internal/log_analysis/log_processor/logschema/infer.go
index 8c6fe2e4af..c321956c3d 100644
--- a/internal/log_analysis/log_processor/logschema/infer.go
+++ b/internal/log_analysis/log_processor/logschema/infer.go
@@ -22,6 +22,7 @@ import (
"encoding/json"
"net"
"net/url"
+ "sort"
"strconv"
"time"
@@ -51,6 +52,9 @@ func InferJSONValueSchema(x interface{}) *ValueSchema {
ValueSchema: *vs,
})
}
+ sort.Slice(fields, func(i, j int) bool {
+ return fields[i].Name < fields[j].Name
+ })
return &ValueSchema{
Type: TypeObject,
Fields: fields,
@@ -86,22 +90,22 @@ func InferJSONValueSchema(x interface{}) *ValueSchema {
}
func inferString(s string) *ValueSchema {
- if _, err := json.Number(s).Int64(); err != nil {
+ if _, err := json.Number(s).Int64(); err == nil {
return &ValueSchema{
Type: TypeBigInt,
}
}
- if _, err := json.Number(s).Float64(); err != nil {
+ if _, err := json.Number(s).Float64(); err == nil {
return &ValueSchema{
Type: TypeFloat,
}
}
- if _, err := strconv.ParseBool(s); err != nil {
+ if _, err := strconv.ParseBool(s); err == nil {
return &ValueSchema{
Type: TypeBoolean,
}
}
- if _, err := time.Parse(time.RFC3339, s); err != nil {
+ if _, err := time.Parse(time.RFC3339, s); err == nil {
return &ValueSchema{
Type: TypeTimestamp,
TimeFormat: "rfc3339",
@@ -117,7 +121,7 @@ func inferIndicators(s string) []string {
if ip := net.ParseIP(s); ip != nil {
return []string{"ip"}
}
- if _, err := url.Parse(s); err == nil {
+ if u, err := url.Parse(s); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
return []string{"url"}
}
if _, err := arn.Parse(s); err == nil {
@@ -133,9 +137,6 @@ func (v *ValueSchema) NonEmpty() *ValueSchema {
}
switch v.Type {
case TypeObject:
- if v.Fields == nil {
- return nil
- }
fields := make([]FieldSchema, 0, len(v.Fields))
for _, f := range v.Fields {
if v := f.ValueSchema.NonEmpty(); v != nil {
@@ -143,6 +144,9 @@ func (v *ValueSchema) NonEmpty() *ValueSchema {
fields = append(fields, f)
}
}
+ if len(fields) == 0 {
+ return nil
+ }
return &ValueSchema{
Type: TypeObject,
Fields: fields,