Skip to content

Commit

Permalink
CLI tool that infers Custom Logs schema out of samples (#2194)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kostas Papageorgiou authored Dec 11, 2020
1 parent 961e60f commit 784fac6
Show file tree
Hide file tree
Showing 7 changed files with 365 additions and 29 deletions.
174 changes: 174 additions & 0 deletions cmd/devtools/customlogs/customlogs/infer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package customlogs

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"bufio"
"bytes"
"flag"
"fmt"
"io"
"os"
"strings"

jsoniter "github.com/json-iterator/go"
"github.com/pkg/errors"
"go.uber.org/zap"
"gopkg.in/yaml.v2"

"github.com/panther-labs/panther/internal/log_analysis/log_processor/customlogs"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logschema"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes"
)

type InferOpts struct {
SkipTest *bool
}

var inferJsoniter = jsoniter.Config{
UseNumber: true,
}.Froze()

// Infers a schema given a sample of logs
func Infer(logger *zap.Logger, opts *InferOpts) {
inputFiles := flag.Args()
if len(inputFiles) == 0 {
logger.Fatal("You need to specify at least one file")
flag.Usage()
}

var valueSchema *logschema.ValueSchema
var err error
for _, file := range inputFiles {
valueSchema, err = inferFromFile(valueSchema, file)
if err != nil {
logger.Fatal("failed to generate schema", zap.Error(err))
}
}

// Remove empty objects
valueSchema = valueSchema.NonEmpty()

if !*opts.SkipTest {
// In order to validate that the schema generated is correct,
// run the parser against the logs, fail in case of error
for _, file := range inputFiles {
if err = validateSchema(valueSchema, file); err != nil {
logger.Fatal("failed while testing schema with file. You can specify '-skip-test' argument to skip this step", zap.Error(err))
}
}
}

schema, err := yaml.Marshal(logschema.Schema{Version: 0, Fields: valueSchema.Fields})
if err != nil {
logger.Fatal("failed to marshal schema", zap.Error(err))
}
fmt.Println(string(schema))
}

func inferFromFile(root *logschema.ValueSchema, file string) (*logschema.ValueSchema, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close() // nolint: errcheck

reader := bufio.NewReader(f)
lineNum := 0
run := true
for run {
lineNum++
line, err := reader.ReadBytes('\n')
if err != nil {
if err == io.EOF {
// Don't go through more lines, but make sure to process existing line
run = false
} else {
return root, errors.Wrap(err, "failed while reading file")
}
}
line = bytes.TrimSpace(line)
if len(line) == 0 {
continue
}

var data map[string]interface{}
if err = inferJsoniter.Unmarshal(line, &data); err != nil {
return nil, errors.Wrapf(err, "failed to parse line [%d] as JSON", lineNum)
}
lineObject := logschema.InferJSONValueSchema(data)
if lineObject.Type != logschema.TypeObject {
return nil, errors.New("invalid schema")
}
root = logschema.Merge(root, lineObject)
}

return root, nil
}

// Validates the schema. It generates a parser of the provided schema
// and tries to parse the contents of the file.
func validateSchema(valueSchema *logschema.ValueSchema, file string) error {
desc := logtypes.Desc{
Name: "Custom.Test",
Description: "Custom log test schema",
ReferenceURL: "-",
}
schema := &logschema.Schema{Version: 0, Fields: valueSchema.Fields}
entry, err := customlogs.Build(desc, schema)
if err != nil {
validationErrors := logschema.ValidationErrors(err)
if len(validationErrors) > 0 {
return errors.New(validationErrors[0].String())
}
return err
}
parser, err := entry.NewParser(nil)
if err != nil {
return err
}

fd, err := os.Open(file)
if err != nil {
return err
}

reader := bufio.NewReader(fd)
run := true
for run {
line, err := reader.ReadString('\n')
if err != nil {
if err == io.EOF {
// Don't go through more lines, but make sure to process existing line
run = false
} else {
return errors.Wrap(err, "failed while reading file")
}
}
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}

if _, err = parser.ParseLog(line); err != nil {
return err
}
}
return nil
}
41 changes: 41 additions & 0 deletions cmd/devtools/customlogs/customlogs/infer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package customlogs

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
* Copyright (C) 2020 Panther Labs Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import (
"fmt"
"io/ioutil"
"testing"

"github.com/stretchr/testify/assert"
"gopkg.in/yaml.v2"
)

func TestProcessLine(t *testing.T) {
schema, err := inferFromFile(nil, "./testdata/sample_1.jsonl")
schema = schema.NonEmpty()
assert.NoError(t, err)
fd, err := ioutil.ReadFile("./testdata/schema_1.yml")
assert.NoError(t, err)

marshalled, err := yaml.Marshal(schema)
assert.NoError(t, err)
fmt.Println(string(marshalled))
assert.YAMLEq(t, string(fd), string(marshalled))
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package customlogs

/**
* Panther is a Cloud-Native SIEM for the Modern Security Team.
Expand All @@ -24,45 +24,30 @@ import (
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"

"go.uber.org/zap"
"gopkg.in/yaml.v2"

"github.com/panther-labs/panther/cmd/opstools"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/customlogs"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logschema"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/logtypes"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/pantherlog"
"github.com/panther-labs/panther/internal/log_analysis/log_processor/parsers"
)

// CLI flags
var opts = struct {
type TestOpts struct {
Schema *string
Output *string
}{
Schema: flag.String("s", "", "Schema file"),
Output: flag.String("o", "", "Write parsed results to file (defaults to stdout)"),
}

func main() {
opstools.SetUsage(`-s SCHEMA_FILE [-o OUTPUT_FILE] [INPUT_FILES...]`)
flag.Parse()
loggerConfig := zap.NewDevelopmentConfig()
loggerConfig.DisableStacktrace = true
loggerConfig.DisableCaller = true
z, err := loggerConfig.Build()
if err != nil {
log.Fatalln("failed to start logger: ", err.Error())
}
logger := z.Sugar()
// Test validates a log schema against a sample of logs
func Test(logger *zap.SugaredLogger, opts *TestOpts) {
schemaFile := *opts.Schema
if schemaFile == "" {
flag.Usage()
log.Fatal("no schema file provided")
logger.Fatal("no schema file provided")
}
schemaData, err := ioutil.ReadFile(schemaFile)
if err != nil {
Expand All @@ -81,7 +66,7 @@ func main() {
if err != nil {
validationErrors := logschema.ValidationErrors(err)
if len(validationErrors) > 0 {
logger.Error("Schema validation failed:")
logger.Error("File validation failed:")
for _, e := range validationErrors {
logger.Errorf(" - %s", e.String())
}
Expand Down
3 changes: 3 additions & 0 deletions cmd/devtools/customlogs/customlogs/testdata/sample_1.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"string" : "value", "required_string": "value", "boolean": true, "boolean_as_string": "true", "float": 0.0, "time": "2020-01-01T00:00:00Z", "float_as_string": "0.0", "int_as_string": "0", "not_int_as_string": "0"}
{"required_string": "value", "boolean_as_string": "false", "int": 0, "object": {"string": ""}, "array": [1,2]}
{"required_string": "value", "not_int_as_string": "value", "string" : null, "object": {}, "empty_object": {"empty": {"more_empty": {}}}}
49 changes: 49 additions & 0 deletions cmd/devtools/customlogs/customlogs/testdata/schema_1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Panther is a Cloud-Native SIEM for the Modern Security Team.
# Copyright (C) 2020 Panther Labs Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

type: object
fields:
- name: boolean
type: boolean
- name: float
type: float
- name: float_as_string
type: float
- name: int_as_string
type: bigint
- name: string
type: string
- name: time
type: timestamp
timeFormat: rfc3339
- name: array
type: array
element:
type: bigint
- name: int
type: bigint
- name: boolean_as_string
type: boolean
- name: not_int_as_string
type: string
- name: object
type: object
fields:
- name: string
type: string
- name: required_string
type: string
required: true
Loading

0 comments on commit 784fac6

Please sign in to comment.