From 6a30867ecd2d6aa81ed09d7dc51308e0ebaaca63 Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Tue, 4 Feb 2020 10:07:18 +0100 Subject: [PATCH] [Auditbeat] Reduce system/socket logging noise (#15890) * Make ThreadEnter errors less noisy The socket dataset uses ThreadEnter(event) / ThreadLeave(event) as a single-event per-thread state tracking, useful for correlating function calls and their return values. However, in some cases functions are stacked, like sys_execve() calling itself recursively or inet6_create() calling inet_create(). This results in an existing stored event to be evicted, which is not a problem but currently is causing a warning to be printed to the logs. This patch makes two changes to this situation: - Only print warnings from the state machine when socketdetailed selector is enabled. The state machine currently only generates warnings for ThreadEnter/ThreadExit issues. - Change ThreadEnter errors to be constructed on demand by their Error() method, so that the somewhat expensive fmt.Sprintf() / event.String() is only invoked if the error is going to be printed to the log. This is a huge CPU saving in systems where this benign eviction is happening frequently. * socketdetailed selector has to be enabled explicitly This selector is extremely noisy. This change excludes it from being enabled when debug is enabled with `-d '*'` and requires it to be explicitly defined: `-d '*,socketdetailed'`. * Print template variables when socket debug is enabled It makes no sense to print guesses' progress in regular debug (socket) and the resulting template variables only when socketdetailed is set. * Fix syscall arguments usage in guesses The syscall parameter templates (`SYS_Pn`) were not valid until guess_syscall_args runs. As the variables `SYS_Pn` already existed, another guess using them could use the wrong values because the dependency mechanism only checks if the variable exists. The fix for this is to have temporary variables (`_SYS_Pn`) and have guess_syscall_args create the definitive ones. This didn't cause any bug as the only guess that used syscall args is the new guess_deref which is enabled on demand via an environment variable for diagnostic purposes. (cherry picked from commit 32a3cdee1a8c853c05e810f593840576e19eb025) --- .../module/system/socket/arch_amd64.go | 15 +++++----- .../module/system/socket/guess/deref.go | 3 +- .../module/system/socket/guess/syscallargs.go | 29 ++++++++----------- .../module/system/socket/socket_linux.go | 13 +++++---- .../auditbeat/module/system/socket/state.go | 15 +++++++++- 5 files changed, 44 insertions(+), 31 deletions(-) diff --git a/x-pack/auditbeat/module/system/socket/arch_amd64.go b/x-pack/auditbeat/module/system/socket/arch_amd64.go index 9ec425e4634..4f7c578f8ee 100644 --- a/x-pack/auditbeat/module/system/socket/arch_amd64.go +++ b/x-pack/auditbeat/module/system/socket/arch_amd64.go @@ -17,13 +17,14 @@ var archVariables = common.MapStr{ "P5": "%r8", "P6": "%r9", - // System call parameters - "SYS_P1": "%di", - "SYS_P2": "%si", - "SYS_P3": "%dx", - "SYS_P4": "%cx", // This already translated from r10 by syscall handler - "SYS_P5": "%r8", - "SYS_P6": "%r9", + // System call parameters. These are temporary, the definitive SYS_Px args + // will be determined by guess/syscallargs.go. + "_SYS_P1": "%di", + "_SYS_P2": "%si", + "_SYS_P3": "%dx", + "_SYS_P4": "%cx", // This already translated from r10 by syscall handler + "_SYS_P5": "%r8", + "_SYS_P6": "%r9", "RET": "%ax", } diff --git a/x-pack/auditbeat/module/system/socket/guess/deref.go b/x-pack/auditbeat/module/system/socket/guess/deref.go index ae7804bb06d..a019b7cf7b9 100644 --- a/x-pack/auditbeat/module/system/socket/guess/deref.go +++ b/x-pack/auditbeat/module/system/socket/guess/deref.go @@ -73,6 +73,7 @@ func (g *guessDeref) Provides() []string { func (g *guessDeref) Requires() []string { return []string{ "SYS_UNAME", + "SYS_P1", } } @@ -85,7 +86,7 @@ func (g *guessDeref) Probes() ([]helper.ProbeDef, error) { Type: tracing.TypeKProbe, Name: "guess_null_ptr_deref", Address: "{{.SYS_UNAME}}", - Fetchargs: helper.MakeMemoryDump("{{.P1}}", 0, credDumpBytes), + Fetchargs: helper.MakeMemoryDump("{{.SYS_P1}}", 0, credDumpBytes), }, Decoder: tracing.NewDumpDecoder, }, diff --git a/x-pack/auditbeat/module/system/socket/guess/syscallargs.go b/x-pack/auditbeat/module/system/socket/guess/syscallargs.go index 833aa794de3..563ec9e1355 100644 --- a/x-pack/auditbeat/module/system/socket/guess/syscallargs.go +++ b/x-pack/auditbeat/module/system/socket/guess/syscallargs.go @@ -71,8 +71,8 @@ func (g *guessSyscallArgs) Provides() []string { func (g *guessSyscallArgs) Requires() []string { return []string{ "SYS_GETTIMEOFDAY", - "SYS_P1", - "SYS_P2", + "_SYS_P1", + "_SYS_P2", } } @@ -84,7 +84,7 @@ func (g *guessSyscallArgs) Probes() ([]helper.ProbeDef, error) { Probe: tracing.Probe{ Name: "syscall_args_guess", Address: "{{.SYS_GETTIMEOFDAY}}", - Fetchargs: "p1reg={{.SYS_P1}} p2reg={{.SYS_P2}} p1pt=+0x70({{.SYS_P1}}) p2pt=+0x68({{(.SYS_P1)}})", + Fetchargs: "p1reg={{._SYS_P1}} p2reg={{._SYS_P2}} p1pt=+0x70({{._SYS_P1}}) p2pt=+0x68({{(._SYS_P1)}})", }, Decoder: helper.NewStructDecoder(func() interface{} { return new(syscallGuess) }), }, @@ -116,20 +116,15 @@ func (g *guessSyscallArgs) Extract(ev interface{}) (common.MapStr, bool) { } if args.RegP1 == g.expected[0] && args.RegP2 == g.expected[1] { - // Current kernel uses the old calling convention. There's no need to - // adjust the SYS_Pn variables. - // However, a Guesser always has to return the variables it declares - // in Provides(). - dummyResult := common.MapStr{} - for _, name := range g.Provides() { - value, found := g.ctx.Vars[name] - if value == nil || !found { - g.ctx.Log.Errorf("Expected variable %s is not available. value=%s found=%v", name, value, found) - return nil, false - } - dummyResult[name] = value - } - return dummyResult, true + // Current kernel uses the old calling convention. + return common.MapStr{ + "SYS_P1": g.ctx.Vars["_SYS_P1"], + "SYS_P2": g.ctx.Vars["_SYS_P2"], + "SYS_P3": g.ctx.Vars["_SYS_P3"], + "SYS_P4": g.ctx.Vars["_SYS_P4"], + "SYS_P5": g.ctx.Vars["_SYS_P5"], + "SYS_P6": g.ctx.Vars["_SYS_P6"], + }, true } // New calling convention detected. Adjust syscall arguments to read // well known offsets of the pt_regs structure at position 1. diff --git a/x-pack/auditbeat/module/system/socket/socket_linux.go b/x-pack/auditbeat/module/system/socket/socket_linux.go index 9a56c66e44b..d92f2e7ba19 100644 --- a/x-pack/auditbeat/module/system/socket/socket_linux.go +++ b/x-pack/auditbeat/module/system/socket/socket_linux.go @@ -103,7 +103,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { log: logger, isDebug: logp.IsDebug(metricsetName), detailLog: logp.NewLogger(detailSelector), - isDetailed: logp.IsDebug(detailSelector), + isDetailed: logp.HasSelector(detailSelector), sniffer: sniffer, } @@ -199,8 +199,11 @@ func (m *MetricSet) Run(r mb.PushReporterV2) { if m.isDetailed { m.detailLog.Debug(v.String()) } - if err := v.Update(st); err != nil { - m.log.Infof("error processing event '%s': %v", v.String(), err) + if err := v.Update(st); err != nil && m.isDetailed { + // These errors are seldom interesting, as the flow state engine + // doesn't have many error conditions and all benign enough to + // not be worth logging them by default. + m.detailLog.Warnf("Issue while processing event '%s': %v", v.String(), err) } atomic.AddUint64(&eventCount, 1) @@ -356,7 +359,7 @@ func (m *MetricSet) Setup() (err error) { return errors.Wrap(err, "unable to guess one or more required parameters") } - if m.isDetailed { + if m.isDebug { names := make([]string, 0, len(m.templateVars)) for name := range m.templateVars { names = append(names, name) @@ -364,7 +367,7 @@ func (m *MetricSet) Setup() (err error) { sort.Strings(names) m.log.Debugf("%d template variables in use:", len(m.templateVars)) for _, key := range names { - m.detailLog.Debugf(" %s = %v", key, m.templateVars[key]) + m.log.Debugf(" %s = %v", key, m.templateVars[key]) } } diff --git a/x-pack/auditbeat/module/system/socket/state.go b/x-pack/auditbeat/module/system/socket/state.go index 78558fd71d7..9f1bd8964f7 100644 --- a/x-pack/auditbeat/module/system/socket/state.go +++ b/x-pack/auditbeat/module/system/socket/state.go @@ -541,13 +541,26 @@ func (s *state) getProcess(pid uint32) *process { return s.processes[pid] } +type threadEnterError struct { + tid uint32 + existing event +} + +// Error is the error message string. +func (t threadEnterError) Error() string { + return fmt.Sprintf("thread already had an event. tid=%d existing=%v", t.tid, t.existing) +} + func (s *state) ThreadEnter(tid uint32, ev event) error { s.Lock() prev, hasPrev := s.threads[tid] s.threads[tid] = ev s.Unlock() if hasPrev { - return fmt.Errorf("thread already had an event. tid=%d existing=%v", tid, prev) + return threadEnterError{ + tid: tid, + existing: prev, + } } return nil }