From 653cc0cf6b353165475260ab8c7e049e1c6a2202 Mon Sep 17 00:00:00 2001 From: Andrew Kroh Date: Thu, 15 Jun 2017 19:27:21 -0400 Subject: [PATCH] Add kernel.failure_mode config option This make the kernel's failure mode configurable from Metricbeat (see the docs changes more details). Another change was made to disable kernel auditing when Metricbeat stops since it will no longer be receiving messages. --- CHANGELOG.asciidoc | 2 +- NOTICE | 2 +- metricbeat/metricbeat.full.yml | 1 + metricbeat/module/audit/_meta/config.full.yml | 1 + .../module/audit/kernel/_meta/docs.asciidoc | 9 +++++++ metricbeat/module/audit/kernel/audit_linux.go | 12 ++++++--- .../module/audit/kernel/audit_linux_test.go | 5 ++-- metricbeat/module/audit/kernel/config.go | 25 ++++++++++++++++++- .../module/audit/kernel/config_linux_test.go | 8 ++++++ .../elastic/go-libaudit/CHANGELOG.md | 1 + .../github.com/elastic/go-libaudit/audit.go | 22 ++++++++++++++++ vendor/vendor.json | 22 ++++++++-------- 12 files changed, 91 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index da19405c2bd..72a281a7a26 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -63,8 +63,8 @@ https://github.com/elastic/beats/compare/v6.0.0-alpha1...master[Check the HEAD d *Metricbeat* - Add random startup delay to each metricset to avoid the thundering herd problem. {issue}4010[4010] - - Add the ability to configure audit rules to the kernel module. {pull}4482[4482] +- Add the ability to configure kernel's audit failure mode. {pull}4516[4516] *Packetbeat* diff --git a/NOTICE b/NOTICE index 6fd30f22e4e..8d537ed8fa8 100644 --- a/NOTICE +++ b/NOTICE @@ -270,7 +270,7 @@ SOFTWARE. -------------------------------------------------------------------- Dependency: github.com/elastic/go-libaudit Version: v0.0.3 -Revision: b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf +Revision: 01c7fd0956dbe3fea61047c330de4c1e06b6427d License type (autodetected): Apache License 2.0 ./vendor/github.com/elastic/go-libaudit/LICENSE: -------------------------------------------------------------------- diff --git a/metricbeat/metricbeat.full.yml b/metricbeat/metricbeat.full.yml index b749023ebfb..e27e676f0be 100644 --- a/metricbeat/metricbeat.full.yml +++ b/metricbeat/metricbeat.full.yml @@ -126,6 +126,7 @@ metricbeat.modules: enabled: false metricsets: ["kernel"] kernel.resolve_ids: true + kernel.failure_mode: silent kernel.backlog_limit: 8196 kernel.rate_limit: 0 kernel.include_raw_message: false diff --git a/metricbeat/module/audit/_meta/config.full.yml b/metricbeat/module/audit/_meta/config.full.yml index 549835bc425..ef0e6b19a68 100644 --- a/metricbeat/module/audit/_meta/config.full.yml +++ b/metricbeat/module/audit/_meta/config.full.yml @@ -2,6 +2,7 @@ enabled: false metricsets: ["kernel"] kernel.resolve_ids: true + kernel.failure_mode: silent kernel.backlog_limit: 8196 kernel.rate_limit: 0 kernel.include_raw_message: false diff --git a/metricbeat/module/audit/kernel/_meta/docs.asciidoc b/metricbeat/module/audit/kernel/_meta/docs.asciidoc index 38b7fb85aba..1eafd38c8e9 100644 --- a/metricbeat/module/audit/kernel/_meta/docs.asciidoc +++ b/metricbeat/module/audit/kernel/_meta/docs.asciidoc @@ -43,6 +43,7 @@ is example showing all configuration options with their default values. - module: audit metricsets: ["kernel"] kernel.resolve_ids: true + kernel.failure_mode: silent kernel.backlog_limit: 8196 kernel.rate_limit: 0 kernel.include_raw_message: false @@ -52,6 +53,14 @@ is example showing all configuration options with their default values. *`kernel.resolve_ids`*:: This boolean setting enables the resolution of UIDs and GIDs to their associated names. The default value is true. +*`kernel.failure_mode`*:: This determines the kernel's behavior on critical +failures such as errors sending events to Metricbeat, the backlog limit was +exceeded, the kernel ran out of memory, or the rate limit was exceeded. The +options are `silent`, `log`, or `panic`. `silent` basically makes the kernel +ignore the errors, `log` makes the kernel write the audit messages using +`printk` so they show up in system's syslog, and `panic` causes the kernel to +panic to prevent use of the machine. Metricbeat's default is `silent`. + *`kernel.backlog_limit`*:: This controls the maximum number of audit messages that will be buffered by kernel. diff --git a/metricbeat/module/audit/kernel/audit_linux.go b/metricbeat/module/audit/kernel/audit_linux.go index 37735553dd5..975be4b8920 100644 --- a/metricbeat/module/audit/kernel/audit_linux.go +++ b/metricbeat/module/audit/kernel/audit_linux.go @@ -145,9 +145,9 @@ func (ms *MetricSet) initClient() error { } debugf("%v audit status from kernel at start: status=%+v", logPrefix, status) - if status.Enabled == 0 { - if err = ms.client.SetEnabled(true, libaudit.NoWait); err != nil { - return errors.Wrap(err, "failed to enable auditing in the kernel") + if fm, _ := ms.config.failureMode(); status.Failure != fm { + if err = ms.client.SetFailure(libaudit.FailureMode(fm), libaudit.NoWait); err != nil { + return errors.Wrap(err, "failed to set audit failure mode in kernel") } } @@ -163,6 +163,12 @@ func (ms *MetricSet) initClient() error { } } + if status.Enabled == 0 { + if err = ms.client.SetEnabled(true, libaudit.NoWait); err != nil { + return errors.Wrap(err, "failed to enable auditing in the kernel") + } + } + if err := ms.client.SetPID(libaudit.NoWait); err != nil { return errors.Wrap(err, "failed to set audit PID") } diff --git a/metricbeat/module/audit/kernel/audit_linux_test.go b/metricbeat/module/audit/kernel/audit_linux_test.go index 603bc7f9c50..72ce6999fc1 100644 --- a/metricbeat/module/audit/kernel/audit_linux_test.go +++ b/metricbeat/module/audit/kernel/audit_linux_test.go @@ -44,8 +44,9 @@ func TestData(t *testing.T) { func getConfig() map[string]interface{} { return map[string]interface{}{ - "module": "audit", - "metricsets": []string{"kernel"}, + "module": "audit", + "metricsets": []string{"kernel"}, + "kernel.failure_mode": "log", "kernel.audit_rules": ` -w /etc/passwd -p wa -k auth -a always,exit -F arch=b64 -S execve -k exec diff --git a/metricbeat/module/audit/kernel/config.go b/metricbeat/module/audit/kernel/config.go index e0cbe835e0c..16cd88a4710 100644 --- a/metricbeat/module/audit/kernel/config.go +++ b/metricbeat/module/audit/kernel/config.go @@ -15,6 +15,7 @@ import ( // Config defines the kernel metricset's possible configuration options. type Config struct { ResolveIDs bool `config:"kernel.resolve_ids"` // Resolve UID/GIDs to names. + FailureMode string `config:"kernel.failure_mode"` // Failure mode for the kernel (silent, log, panic). BacklogLimit uint32 `config:"kernel.backlog_limit"` // Max number of message to buffer in the kernel. RateLimit uint32 `config:"kernel.rate_limit"` // Rate limit in messages/sec of messages from kernel. RawMessage bool `config:"kernel.include_raw_message"` // Include the list of raw audit messages in the event. @@ -29,8 +30,16 @@ type auditRule struct { // Validate validates the rules specified in the config. func (c Config) Validate() error { + var errs multierror.Errors _, err := c.rules() - return err + if err != nil { + errs = append(errs, err) + } + _, err = c.failureMode() + if err != nil { + errs = append(errs, err) + } + return errs.Err() } // Rules returns a list of rules specified in the config. @@ -77,8 +86,22 @@ func (c Config) rules() ([]auditRule, error) { return auditRules, nil } +func (c Config) failureMode() (uint32, error) { + switch strings.ToLower(c.FailureMode) { + case "silent": + return 0, nil + case "log": + return 1, nil + case "panic": + return 2, nil + default: + return 0, errors.Errorf("invalid kernel.failure_mode '%v' (use silent, log, or panic)", c.FailureMode) + } +} + var defaultConfig = Config{ ResolveIDs: true, + FailureMode: "silent", BacklogLimit: 8192, RateLimit: 0, RawMessage: false, diff --git a/metricbeat/module/audit/kernel/config_linux_test.go b/metricbeat/module/audit/kernel/config_linux_test.go index aaf1250db8c..c62765b0b1f 100644 --- a/metricbeat/module/audit/kernel/config_linux_test.go +++ b/metricbeat/module/audit/kernel/config_linux_test.go @@ -56,6 +56,14 @@ kernel.audit_rules: | t.Log(err) } +func TestConfigValidateFailureMode(t *testing.T) { + config := defaultConfig + config.FailureMode = "boom" + err := config.Validate() + assert.Error(t, err) + t.Log(err) +} + func parseConfig(t testing.TB, yaml string) (Config, error) { c, err := common.NewConfigWithYAML([]byte(yaml), "") if err != nil { diff --git a/vendor/github.com/elastic/go-libaudit/CHANGELOG.md b/vendor/github.com/elastic/go-libaudit/CHANGELOG.md index 551b9a66405..8aec18b3551 100644 --- a/vendor/github.com/elastic/go-libaudit/CHANGELOG.md +++ b/vendor/github.com/elastic/go-libaudit/CHANGELOG.md @@ -8,6 +8,7 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Add a package for building audit rules that can be added to the kernel. - Add GetRules, DeleteRules, DeleteRule, and AddRule methods to AuditClient. - auparse - Add conversion of POSIX exit code values to their name. +- Add SetFailure to AuditClient. #8 ### Changed diff --git a/vendor/github.com/elastic/go-libaudit/audit.go b/vendor/github.com/elastic/go-libaudit/audit.go index 0dfc8a2a670..db427324510 100644 --- a/vendor/github.com/elastic/go-libaudit/audit.go +++ b/vendor/github.com/elastic/go-libaudit/audit.go @@ -54,6 +54,18 @@ const ( NoWait ) +// FailureMode defines the kernel's behavior on critical errors. +type FailureMode uint32 + +const ( + // SilentOnFailure ignores errors. + SilentOnFailure FailureMode = 0 + // LogOnFailure logs errors using printk. + LogOnFailure + // PanicOnFailure causes a kernel panic on error. + PanicOnFailure +) + // AuditClient is a client for communicating with the Linux kernels audit // interface over netlink. type AuditClient struct { @@ -303,6 +315,16 @@ func (c *AuditClient) SetEnabled(enabled bool, wm WaitMode) error { return c.set(status, wm) } +// SetFailure sets the action that the kernel will perform when the backlog +// limit is reached or when it encounters an error and cannot proceed. +func (c *AuditClient) SetFailure(fm FailureMode, wm WaitMode) error { + status := AuditStatus{ + Mask: AuditStatusFailure, + Failure: uint32(fm), + } + return c.set(status, wm) +} + // RawAuditMessage is a raw audit message received from the kernel. type RawAuditMessage struct { Type auparse.AuditMessageType diff --git a/vendor/vendor.json b/vendor/vendor.json index 78c0acc10a5..f368d784fba 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -290,40 +290,40 @@ "revisionTime": "2016-08-05T00:47:13Z" }, { - "checksumSHA1": "MfmdLoBh0b2JUUyS2QLu2fcGYIQ=", + "checksumSHA1": "5cppdt4ZlFpURXuZ7QeBQvdhmKQ=", "path": "github.com/elastic/go-libaudit", - "revision": "b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf", - "revisionTime": "2017-06-08T23:02:06Z", + "revision": "01c7fd0956dbe3fea61047c330de4c1e06b6427d", + "revisionTime": "2017-06-15T21:41:34Z", "version": "v0.0.3", "versionExact": "v0.0.3" }, { "checksumSHA1": "Ca2OhodWbbke1v+cctf9y/HdwZM=", "path": "github.com/elastic/go-libaudit/aucoalesce", - "revision": "b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf", - "revisionTime": "2017-06-08T23:02:06Z", + "revision": "01c7fd0956dbe3fea61047c330de4c1e06b6427d", + "revisionTime": "2017-06-15T21:41:34Z", "version": "v0.0.3", "versionExact": "v0.0.3" }, { "checksumSHA1": "9hp7icHi638Ye1yXSgmoyy/HvW4=", "path": "github.com/elastic/go-libaudit/auparse", - "revision": "b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf", - "revisionTime": "2017-06-08T23:02:06Z", + "revision": "01c7fd0956dbe3fea61047c330de4c1e06b6427d", + "revisionTime": "2017-06-15T21:41:34Z", "version": "v0.0.3", "versionExact": "v0.0.3" }, { "checksumSHA1": "H0rnscnKHbkjmXc4whC3gtIPR0c=", "path": "github.com/elastic/go-libaudit/rule", - "revision": "b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf", - "revisionTime": "2017-06-08T23:02:06Z" + "revision": "01c7fd0956dbe3fea61047c330de4c1e06b6427d", + "revisionTime": "2017-06-15T21:41:34Z" }, { "checksumSHA1": "36UaYid29Kyhrsa5D8N6BoM8dVw=", "path": "github.com/elastic/go-libaudit/rule/flags", - "revision": "b2d37f9d37d8e2a81bf69b5fae3c1f49bf6a14bf", - "revisionTime": "2017-06-08T23:02:06Z" + "revision": "01c7fd0956dbe3fea61047c330de4c1e06b6427d", + "revisionTime": "2017-06-15T21:41:34Z" }, { "checksumSHA1": "3jizmlZPCyo6FAZY8Trk9jA8NH4=",