Skip to content

Commit

Permalink
[Auditbeat] Reduce system/socket logging noise (elastic#15890)
Browse files Browse the repository at this point in the history
* Make ThreadEnter errors less noisy

The socket dataset uses ThreadEnter(event) / ThreadLeave(event) as a
single-event per-thread state tracking, useful for correlating function
calls and their return values.

However, in some cases functions are stacked, like sys_execve() calling
itself recursively or inet6_create() calling inet_create(). This results
in an existing stored event to be evicted, which is not a problem but
currently is causing a warning to be printed to the logs.

This patch makes two changes to this situation:
- Only print warnings from the state machine when socketdetailed selector
  is enabled. The state machine currently only generates warnings for
  ThreadEnter/ThreadExit issues.

- Change ThreadEnter errors to be constructed on demand by their Error()
  method, so that the somewhat expensive fmt.Sprintf() / event.String()
  is only invoked if the error is going to be printed to the log.

This is a huge CPU saving in systems where this benign eviction is
happening frequently.

* socketdetailed selector has to be enabled explicitly

This selector is extremely noisy. This change excludes it from being
enabled when debug is enabled with `-d '*'` and requires it to be
explicitly defined: `-d '*,socketdetailed'`.

* Print template variables when socket debug is enabled

It makes no sense to print guesses' progress in regular debug (socket)
and the resulting template variables only when socketdetailed is set.

* Fix syscall arguments usage in guesses

The syscall parameter templates (`SYS_Pn`) were not valid until
guess_syscall_args runs. As the variables `SYS_Pn` already existed,
another guess using them could use the wrong values because the
dependency mechanism only checks if the variable exists.

The fix for this is to have temporary variables (`_SYS_Pn`) and have
guess_syscall_args create the definitive ones.

This didn't cause any bug as the only guess that used syscall
args is the new guess_deref which is enabled on demand via an
environment variable for diagnostic purposes.

(cherry picked from commit 32a3cde)
  • Loading branch information
adriansr committed Mar 19, 2020
1 parent bacc249 commit 6a30867
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 31 deletions.
15 changes: 8 additions & 7 deletions x-pack/auditbeat/module/system/socket/arch_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ var archVariables = common.MapStr{
"P5": "%r8",
"P6": "%r9",

// System call parameters
"SYS_P1": "%di",
"SYS_P2": "%si",
"SYS_P3": "%dx",
"SYS_P4": "%cx", // This already translated from r10 by syscall handler
"SYS_P5": "%r8",
"SYS_P6": "%r9",
// System call parameters. These are temporary, the definitive SYS_Px args
// will be determined by guess/syscallargs.go.
"_SYS_P1": "%di",
"_SYS_P2": "%si",
"_SYS_P3": "%dx",
"_SYS_P4": "%cx", // This already translated from r10 by syscall handler
"_SYS_P5": "%r8",
"_SYS_P6": "%r9",

"RET": "%ax",
}
3 changes: 2 additions & 1 deletion x-pack/auditbeat/module/system/socket/guess/deref.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ func (g *guessDeref) Provides() []string {
func (g *guessDeref) Requires() []string {
return []string{
"SYS_UNAME",
"SYS_P1",
}
}

Expand All @@ -85,7 +86,7 @@ func (g *guessDeref) Probes() ([]helper.ProbeDef, error) {
Type: tracing.TypeKProbe,
Name: "guess_null_ptr_deref",
Address: "{{.SYS_UNAME}}",
Fetchargs: helper.MakeMemoryDump("{{.P1}}", 0, credDumpBytes),
Fetchargs: helper.MakeMemoryDump("{{.SYS_P1}}", 0, credDumpBytes),
},
Decoder: tracing.NewDumpDecoder,
},
Expand Down
29 changes: 12 additions & 17 deletions x-pack/auditbeat/module/system/socket/guess/syscallargs.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ func (g *guessSyscallArgs) Provides() []string {
func (g *guessSyscallArgs) Requires() []string {
return []string{
"SYS_GETTIMEOFDAY",
"SYS_P1",
"SYS_P2",
"_SYS_P1",
"_SYS_P2",
}
}

Expand All @@ -84,7 +84,7 @@ func (g *guessSyscallArgs) Probes() ([]helper.ProbeDef, error) {
Probe: tracing.Probe{
Name: "syscall_args_guess",
Address: "{{.SYS_GETTIMEOFDAY}}",
Fetchargs: "p1reg={{.SYS_P1}} p2reg={{.SYS_P2}} p1pt=+0x70({{.SYS_P1}}) p2pt=+0x68({{(.SYS_P1)}})",
Fetchargs: "p1reg={{._SYS_P1}} p2reg={{._SYS_P2}} p1pt=+0x70({{._SYS_P1}}) p2pt=+0x68({{(._SYS_P1)}})",
},
Decoder: helper.NewStructDecoder(func() interface{} { return new(syscallGuess) }),
},
Expand Down Expand Up @@ -116,20 +116,15 @@ func (g *guessSyscallArgs) Extract(ev interface{}) (common.MapStr, bool) {
}

if args.RegP1 == g.expected[0] && args.RegP2 == g.expected[1] {
// Current kernel uses the old calling convention. There's no need to
// adjust the SYS_Pn variables.
// However, a Guesser always has to return the variables it declares
// in Provides().
dummyResult := common.MapStr{}
for _, name := range g.Provides() {
value, found := g.ctx.Vars[name]
if value == nil || !found {
g.ctx.Log.Errorf("Expected variable %s is not available. value=%s found=%v", name, value, found)
return nil, false
}
dummyResult[name] = value
}
return dummyResult, true
// Current kernel uses the old calling convention.
return common.MapStr{
"SYS_P1": g.ctx.Vars["_SYS_P1"],
"SYS_P2": g.ctx.Vars["_SYS_P2"],
"SYS_P3": g.ctx.Vars["_SYS_P3"],
"SYS_P4": g.ctx.Vars["_SYS_P4"],
"SYS_P5": g.ctx.Vars["_SYS_P5"],
"SYS_P6": g.ctx.Vars["_SYS_P6"],
}, true
}
// New calling convention detected. Adjust syscall arguments to read
// well known offsets of the pt_regs structure at position 1.
Expand Down
13 changes: 8 additions & 5 deletions x-pack/auditbeat/module/system/socket/socket_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) {
log: logger,
isDebug: logp.IsDebug(metricsetName),
detailLog: logp.NewLogger(detailSelector),
isDetailed: logp.IsDebug(detailSelector),
isDetailed: logp.HasSelector(detailSelector),
sniffer: sniffer,
}

Expand Down Expand Up @@ -199,8 +199,11 @@ func (m *MetricSet) Run(r mb.PushReporterV2) {
if m.isDetailed {
m.detailLog.Debug(v.String())
}
if err := v.Update(st); err != nil {
m.log.Infof("error processing event '%s': %v", v.String(), err)
if err := v.Update(st); err != nil && m.isDetailed {
// These errors are seldom interesting, as the flow state engine
// doesn't have many error conditions and all benign enough to
// not be worth logging them by default.
m.detailLog.Warnf("Issue while processing event '%s': %v", v.String(), err)
}
atomic.AddUint64(&eventCount, 1)

Expand Down Expand Up @@ -356,15 +359,15 @@ func (m *MetricSet) Setup() (err error) {
return errors.Wrap(err, "unable to guess one or more required parameters")
}

if m.isDetailed {
if m.isDebug {
names := make([]string, 0, len(m.templateVars))
for name := range m.templateVars {
names = append(names, name)
}
sort.Strings(names)
m.log.Debugf("%d template variables in use:", len(m.templateVars))
for _, key := range names {
m.detailLog.Debugf(" %s = %v", key, m.templateVars[key])
m.log.Debugf(" %s = %v", key, m.templateVars[key])
}
}

Expand Down
15 changes: 14 additions & 1 deletion x-pack/auditbeat/module/system/socket/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,13 +541,26 @@ func (s *state) getProcess(pid uint32) *process {
return s.processes[pid]
}

type threadEnterError struct {
tid uint32
existing event
}

// Error is the error message string.
func (t threadEnterError) Error() string {
return fmt.Sprintf("thread already had an event. tid=%d existing=%v", t.tid, t.existing)
}

func (s *state) ThreadEnter(tid uint32, ev event) error {
s.Lock()
prev, hasPrev := s.threads[tid]
s.threads[tid] = ev
s.Unlock()
if hasPrev {
return fmt.Errorf("thread already had an event. tid=%d existing=%v", tid, prev)
return threadEnterError{
tid: tid,
existing: prev,
}
}
return nil
}
Expand Down

0 comments on commit 6a30867

Please sign in to comment.