Skip to content

Commit

Permalink
[usm] service discovery add bypassed processes (#30278)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuri-lipnesh authored Oct 23, 2024
1 parent db780eb commit 46a89d2
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 56 deletions.
63 changes: 63 additions & 0 deletions pkg/collector/corechecks/servicediscovery/module/comm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build linux

package module

import (
"bytes"
"os"
"strconv"
"strings"

"github.com/shirou/gopsutil/v3/process"

"github.com/DataDog/datadog-agent/pkg/util/kernel"
)

// ignoreComms is a list of process names that should not be reported as a service.
var ignoreComms = map[string]struct{}{
"systemd": {}, // manages system and service components
"dhclient": {}, // utility that uses the DHCP to configure network interfaces
"local-volume-pr": {}, // 'local-volume-provisioner' manages the lifecycle of Persistent Volumes
"sshd": {}, // a daemon that handles secure communication
"cilium-agent": {}, // accepts configuration for networking, load balancing etc. (like cilium-agent)
"kubelet": {}, // Kubernetes agent
"chronyd": {}, // a daemon that implements the Network Time Protocol (NTP)
"containerd": {}, // engine to run containers
"dockerd": {}, // engine to run containers and 'docker-proxy'
"livenessprobe": {}, // Kubernetes tool that monitors a container's health
}

// ignoreFamily list of commands that should not be reported as a service.
var ignoreFamily = map[string]struct{}{
"systemd": {}, // 'systemd-networkd', 'systemd-resolved' etc
"datadog": {}, // datadog processes
"containerd": {}, // 'containerd-shim...'
"docker": {}, // 'docker-proxy'
}

// shouldIgnoreComm returns true if process should be ignored
func shouldIgnoreComm(proc *process.Process) bool {
commPath := kernel.HostProc(strconv.Itoa(int(proc.Pid)), "comm")
contents, err := os.ReadFile(commPath)
if err != nil {
return true
}

dash := bytes.IndexByte(contents, '-')
if dash > 0 {
_, found := ignoreFamily[string(contents[:dash])]
if found {
return true
}
}

comm := strings.TrimSuffix(string(contents), "\n")
_, found := ignoreComms[comm]

return found
}
207 changes: 207 additions & 0 deletions pkg/collector/corechecks/servicediscovery/module/comm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build test && linux_bpf

package module

import (
"context"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

httptestutil "github.com/DataDog/datadog-agent/pkg/network/protocols/http/testutil"
"github.com/DataDog/datadog-agent/pkg/network/usm/testutil"
)

const (
longComm = "this-is_command-name-longer-than-sixteen-bytes"
)

// TestIgnoreComm checks that the 'sshd' command is ignored and the 'node' command is not
func TestIgnoreComm(t *testing.T) {
serverDir := buildFakeServer(t)
url := setupDiscoveryModule(t)

ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() { cancel() })

badBin := filepath.Join(serverDir, "sshd")
badCmd := exec.CommandContext(ctx, badBin)
require.NoError(t, badCmd.Start())

// Also run a non-ignored server so that we can use it in the eventually
// loop below so that we don't have to wait a long time to be sure that we
// really ignored badBin and just didn't miss it because of a race.
goodBin := filepath.Join(serverDir, "node")
goodCmd := exec.CommandContext(ctx, goodBin)
require.NoError(t, goodCmd.Start())

goodPid := goodCmd.Process.Pid
badPid := badCmd.Process.Pid

require.EventuallyWithT(t, func(collect *assert.CollectT) {
svcMap := getServicesMap(t, url)
assert.Contains(collect, svcMap, goodPid)
require.NotContains(t, svcMap, badPid)
}, 30*time.Second, 100*time.Millisecond)
}

// TestIgnoreCommsLengths checks that the map contains names no longer than 15 bytes.
func TestIgnoreCommsLengths(t *testing.T) {
for comm := range ignoreComms {
assert.LessOrEqual(t, len(comm), 15, "Process name %q too big", comm)
}
}

// buildTestBin returns the path to a binary file
func buildTestBin(t *testing.T) string {
curDir, err := httptestutil.CurDir()
require.NoError(t, err)

serverBin, err := testutil.BuildGoBinaryWrapper(filepath.Join(curDir, "testutil"), "fake_server")
require.NoError(t, err)

return serverBin
}

// TestShouldIgnoreComm check cases of ignored and non-ignored processes
func TestShouldIgnoreComm(t *testing.T) {
testCases := []struct {
name string
comm string
ignore bool
}{
{
name: "should ignore command docker-proxy",
comm: "docker-proxy",
ignore: true,
},
{
name: "should ignore command containerd",
comm: "containerd",
ignore: true,
},
{
name: "should ignore command local-volume-provisioner",
comm: "local-volume-provisioner",
ignore: true,
},
{
name: "should not ignore command java-some",
comm: "java-some",
ignore: false,
},
{
name: "should not ignore command long command",
comm: longComm,
ignore: false,
},
}

serverBin := buildTestBin(t)
serverDir := filepath.Dir(serverBin)

for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() { cancel() })

makeAlias(t, test.comm, serverBin)
bin := filepath.Join(serverDir, test.comm)
cmd := exec.CommandContext(ctx, bin)
err := cmd.Start()
require.NoError(t, err)

require.EventuallyWithT(t, func(collect *assert.CollectT) {
proc, err := customNewProcess(int32(cmd.Process.Pid))
require.NoError(t, err)
ignore := shouldIgnoreComm(proc)
require.Equal(t, test.ignore, ignore)
}, 30*time.Second, 100*time.Millisecond)
})
}
}

func makeSymLink(b *testing.B, src string, dst string) {
target, err := os.Readlink(dst)
if err == nil && target == dst {
return
}

err = os.Symlink(src, dst)
if err != nil {
b.Fatal(err)
}
}

// startProcessLongComm starts a process with a long command name, used to benchmark command name extraction.
func startProcessLongComm(b *testing.B) *exec.Cmd {
b.Helper()

dstPath := filepath.Join(b.TempDir(), longComm)
ctx, cancel := context.WithCancel(context.Background())
b.Cleanup(func() {
cancel()
os.Remove(dstPath)
})
makeSymLink(b, "/bin/sleep", dstPath)

cmd := exec.CommandContext(ctx, dstPath, "49")
err := cmd.Start()
if err != nil {
b.Fatal(err)
}

return cmd
}

// BenchmarkProcName benchmarks reading of entire command name from /proc.
func BenchmarkProcName(b *testing.B) {
cmd := startProcessLongComm(b)

b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
// create a new process on each iteration to eliminate name caching from the calculation
proc, err := customNewProcess(int32(cmd.Process.Pid))
if err != nil {
b.Fatal(err)
}
comm, err := proc.Name()
if err != nil {
b.Fatal(err)
}
if len(comm) != len(longComm) {
b.Fatalf("wrong comm length, expected: %d, got: %d (%s)", len(longComm), len(comm), comm)
}
}
}

// BenchmarkShouldIgnoreComm benchmarks reading of command name from /proc/<pid>/comm.
func BenchmarkShouldIgnoreComm(b *testing.B) {
cmd := startProcessLongComm(b)

b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
proc, err := customNewProcess(int32(cmd.Process.Pid))
if err != nil {
b.Fatal(err)
}
ok := shouldIgnoreComm(proc)
if ok {
b.Fatalf("process should not have been ignored")
}
}
}
20 changes: 1 addition & 19 deletions pkg/collector/corechecks/servicediscovery/module/impl_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,19 +391,6 @@ func customNewProcess(pid int32) (*process.Process, error) {
return p, nil
}

// ignoreComms is a list of process names (matched against /proc/PID/comm) to
// never report as a service. Note that comm is limited to 16 characters.
var ignoreComms = map[string]struct{}{
"sshd": {},
"dhclient": {},
"systemd": {},
"systemd-resolved": {},
"systemd-networkd": {},
"datadog-agent": {},
"livenessprobe": {},
"docker-proxy": {},
}

// maxNumberOfPorts is the maximum number of listening ports which we report per
// service.
const maxNumberOfPorts = 50
Expand All @@ -415,12 +402,7 @@ func (s *discovery) getService(context parsingContext, pid int32) *model.Service
return nil
}

comm, err := proc.Name()
if err != nil {
return nil
}

if _, found := ignoreComms[comm]; found {
if shouldIgnoreComm(proc) {
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

// This doesn't need BPF but it's built with this tag to only run with
// This doesn't need BPF, but it's built with this tag to only run with
// system-probe tests.
//go:build linux_bpf
//go:build test && linux_bpf

package module

Expand Down Expand Up @@ -624,41 +624,6 @@ func TestCommandLineSanitization(t *testing.T) {
}, 30*time.Second, 100*time.Millisecond)
}

func TestIgnore(t *testing.T) {
serverDir := buildFakeServer(t)
url := setupDiscoveryModule(t)

ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(func() { cancel() })

badBin := filepath.Join(serverDir, "sshd")
badCmd := exec.CommandContext(ctx, badBin)
require.NoError(t, badCmd.Start())

// Also run a non-ignored server so that we can use it in the eventually
// loop below so that we don't have to wait a long time to be sure that we
// really ignored badBin and just didn't miss it because of a race.
goodBin := filepath.Join(serverDir, "node")
goodCmd := exec.CommandContext(ctx, goodBin)
require.NoError(t, goodCmd.Start())

goodPid := goodCmd.Process.Pid
badPid := badCmd.Process.Pid

require.EventuallyWithT(t, func(collect *assert.CollectT) {
svcMap := getServicesMap(t, url)
assert.Contains(collect, svcMap, goodPid)
require.NotContains(t, svcMap, badPid)
}, 30*time.Second, 100*time.Millisecond)
}

func TestIgnoreCommsLengths(t *testing.T) {
for comm := range ignoreComms {
// /proc/PID/comm is limited to 16 characters.
assert.LessOrEqual(t, len(comm), 16, "Process name %q too big", comm)
}
}

func TestNodeDocker(t *testing.T) {
cert, key, err := testutil.GetCertsPaths()
require.NoError(t, err)
Expand Down

0 comments on commit 46a89d2

Please sign in to comment.