From 053e6654cbc90038a8c851dd81d4cae3d6bea82a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guillermo=20Julia=CC=81n?= Date: Thu, 12 Sep 2024 16:57:35 +0200 Subject: [PATCH] Refactor istio monitor --- pkg/network/usm/istio.go | 179 ++++++---------------------------- pkg/network/usm/istio_test.go | 133 +++++++++++++------------ 2 files changed, 103 insertions(+), 209 deletions(-) diff --git a/pkg/network/usm/istio.go b/pkg/network/usm/istio.go index 12bcc0df940466..91cce852ff7bef 100644 --- a/pkg/network/usm/istio.go +++ b/pkg/network/usm/istio.go @@ -8,22 +8,20 @@ package usm import ( - "fmt" "strings" - "sync" - "time" + manager "github.com/DataDog/ebpf-manager" + + "github.com/DataDog/datadog-agent/pkg/ebpf/uprobes" "github.com/DataDog/datadog-agent/pkg/network/config" - "github.com/DataDog/datadog-agent/pkg/network/usm/utils" - "github.com/DataDog/datadog-agent/pkg/process/monitor" - "github.com/DataDog/datadog-agent/pkg/util/kernel" "github.com/DataDog/datadog-agent/pkg/util/log" - manager "github.com/DataDog/ebpf-manager" ) const ( istioSslReadRetprobe = "istio_uretprobe__SSL_read" istioSslWriteRetprobe = "istio_uretprobe__SSL_write" + + IstioAttacherName = "istio" ) var istioProbes = []manager.ProbesSelector{ @@ -80,64 +78,39 @@ var istioProbes = []manager.ProbesSelector{ // because the Envoy binary embedded in the Istio containers have debug symbols // whereas the "vanilla" Envoy images are distributed without them. type istioMonitor struct { - registry *utils.FileRegistry - procRoot string + attacher *uprobes.UprobeAttacher envoyCmd string - - // `utils.FileRegistry` callbacks - registerCB func(utils.FilePath) error - unregisterCB func(utils.FilePath) error - - // Termination - wg sync.WaitGroup - done chan struct{} } -// Validate that istioMonitor implements the Attacher interface. -var _ utils.Attacher = &istioMonitor{} - func newIstioMonitor(c *config.Config, mgr *manager.Manager) *istioMonitor { if !c.EnableIstioMonitoring { return nil } - procRoot := kernel.ProcFSRoot() - return &istioMonitor{ - registry: utils.NewFileRegistry("istio"), - procRoot: procRoot, + monitor := &istioMonitor{ envoyCmd: c.EnvoyPath, - done: make(chan struct{}), - - // Callbacks - registerCB: addHooks(mgr, procRoot, istioProbes), - unregisterCB: removeHooks(mgr, istioProbes), + attacher: nil, } -} -// DetachPID detaches a given pid from the eBPF program -func (m *istioMonitor) DetachPID(pid uint32) error { - return m.registry.Unregister(pid) -} - -var ( - // ErrNoEnvoyPath is returned when no envoy path is found for a given PID - ErrNoEnvoyPath = fmt.Errorf("no envoy path found for PID") -) + attachCfg := uprobes.AttacherConfig{ + ProcRoot: c.ProcRoot, + Rules: []*uprobes.AttachRule{{ + Targets: uprobes.AttachToExecutable, + ProbesSelector: nodeJSProbes, + ExecutableFilter: monitor.isIstioBinary, + }}, + EbpfConfig: &c.Config, + ExcludeTargets: uprobes.ExcludeSelf | uprobes.ExcludeInternal | uprobes.ExcludeBuildkit | uprobes.ExcludeContainerdTmp, + EnablePeriodicScanNewProcesses: true, + } -// AttachPID attaches a given pid to the eBPF program -func (m *istioMonitor) AttachPID(pid uint32) error { - path := m.getEnvoyPath(pid) - if path == "" { - return ErrNoEnvoyPath + attacher, err := uprobes.NewUprobeAttacher(IstioAttacherName, attachCfg, mgr, nil, &uprobes.NativeBinaryInspector{}) + if err != nil { + log.Errorf("Cannot create uprobe attacher: %v", err) } - return m.registry.Register( - path, - pid, - m.registerCB, - m.unregisterCB, - utils.IgnoreCB, - ) + monitor.attacher = attacher + return monitor } // Start the istioMonitor @@ -146,48 +119,7 @@ func (m *istioMonitor) Start() { return } - processMonitor := monitor.GetProcessMonitor() - - // Subscribe to process events - doneExec := processMonitor.SubscribeExec(m.handleProcessExec) - doneExit := processMonitor.SubscribeExit(m.handleProcessExit) - - // Attach to existing processes - m.sync() - - m.wg.Add(1) - go func() { - // This ticker is responsible for controlling the rate at which - // we scrape the whole procFS again in order to ensure that we - // terminate any dangling uprobes and register new processes - // missed by the process monitor stream - processSync := time.NewTicker(scanTerminatedProcessesInterval) - - defer func() { - processSync.Stop() - // Execute process monitor callback termination functions - doneExec() - doneExit() - // Stopping the process monitor (if we're the last instance) - processMonitor.Stop() - // Cleaning up all active hooks - m.registry.Clear() - // marking we're finished. - m.wg.Done() - }() - - for { - select { - case <-m.done: - return - case <-processSync.C: - m.sync() - m.registry.Log() - } - } - }() - - utils.AddAttacher("istio", m) + _ = m.attacher.Start() log.Info("Istio monitoring enabled") } @@ -197,62 +129,11 @@ func (m *istioMonitor) Stop() { return } - close(m.done) - m.wg.Wait() -} - -// sync state of istioMonitor with the current state of procFS -// the purpose of this method is two-fold: -// 1) register processes for which we missed exec events (targeted mostly at startup) -// 2) unregister processes for which we missed exit events -func (m *istioMonitor) sync() { - deletionCandidates := m.registry.GetRegisteredProcesses() - - _ = kernel.WithAllProcs(m.procRoot, func(pid int) error { - if _, ok := deletionCandidates[uint32(pid)]; ok { - // We have previously hooked into this process and it remains active, - // so we remove it from the deletionCandidates list, and move on to the next PID - delete(deletionCandidates, uint32(pid)) - return nil - } - - // This is a new PID so we attempt to attach SSL probes to it - _ = m.AttachPID(uint32(pid)) - return nil - }) - - // At this point all entries from deletionCandidates are no longer alive, so - // we should detach our SSL probes from them - for pid := range deletionCandidates { - m.handleProcessExit(pid) - } + m.attacher.Stop() } -func (m *istioMonitor) handleProcessExit(pid uint32) { - // We avoid filtering PIDs here because it's cheaper to simply do a registry lookup - // instead of fetching a process name in order to determine whether it is an - // envoy process or not (which at the very minimum involves syscalls) - _ = m.DetachPID(pid) -} - -func (m *istioMonitor) handleProcessExec(pid uint32) { - _ = m.AttachPID(pid) -} - -// getEnvoyPath returns the executable path of the envoy binary for a given PID. -// It constructs the path to the symbolic link for the executable file of the process with the given PID, -// then resolves this symlink to determine the actual path of the binary. -// -// If the resolved path contains the expected envoy command substring (as defined by m.envoyCmd), -// the function returns this path. If the PID does not correspond to an envoy process or if an error -// occurs during resolution, it returns an empty string. -func (m *istioMonitor) getEnvoyPath(pid uint32) string { - exePath := fmt.Sprintf("%s/%d/exe", m.procRoot, pid) - - envoyPath, err := utils.ResolveSymlink(exePath) - if err != nil || !strings.Contains(envoyPath, m.envoyCmd) { - return "" - } - - return envoyPath +// isIstioBinary checks whether the given file is an istioBinary, based on the expected envoy +// command substring (as defined by m.envoyCmd). +func (m *istioMonitor) isIstioBinary(path string, _ *uprobes.ProcInfo) bool { + return strings.Contains(path, m.envoyCmd) } diff --git a/pkg/network/usm/istio_test.go b/pkg/network/usm/istio_test.go index e61c3372ccc77a..8eb92a2fc14f52 100644 --- a/pkg/network/usm/istio_test.go +++ b/pkg/network/usm/istio_test.go @@ -8,32 +8,31 @@ package usm import ( - "os/exec" + "os" "path/filepath" - "strings" "testing" - "github.com/DataDog/datadog-agent/pkg/network/config" - "github.com/DataDog/datadog-agent/pkg/network/usm/utils" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + + "github.com/DataDog/datadog-agent/pkg/ebpf/uprobes" + "github.com/DataDog/datadog-agent/pkg/network/config" ) const ( defaultEnvoyName = "/bin/envoy" ) -func TestGetEnvoyPath(t *testing.T) { - _, pid := createFakeProcess(t, defaultEnvoyName) - monitor := newIstioTestMonitor(t) +func TestIsIstioBinary(t *testing.T) { + procRoot := uprobes.CreateFakeProcFS(t, []uprobes.FakeProcFSEntry{}) + m := newIstioTestMonitor(t, procRoot) t.Run("an actual envoy process", func(t *testing.T) { - path := monitor.getEnvoyPath(uint32(pid)) - assert.True(t, strings.HasSuffix(path, defaultEnvoyName)) + assert.True(t, m.isIstioBinary(defaultEnvoyName, uprobes.NewProcInfo(procRoot, 1))) }) t.Run("something else", func(t *testing.T) { - path := monitor.getEnvoyPath(uint32(2)) - assert.Empty(t, "", path) + assert.False(t, m.isIstioBinary("", uprobes.NewProcInfo(procRoot, 2))) }) } @@ -43,65 +42,78 @@ func TestGetEnvoyPathWithConfig(t *testing.T) { cfg.EnvoyPath = "/test/envoy" monitor := newIstioTestMonitorWithCFG(t, cfg) - _, pid := createFakeProcess(t, cfg.EnvoyPath) - - path := monitor.getEnvoyPath(uint32(pid)) - assert.True(t, strings.HasSuffix(path, cfg.EnvoyPath)) + assert.True(t, monitor.isIstioBinary(cfg.EnvoyPath, uprobes.NewProcInfo("", 0))) + assert.False(t, monitor.isIstioBinary("something/else/", uprobes.NewProcInfo("", 0))) } func TestIstioSync(t *testing.T) { - t.Run("calling sync multiple times", func(t *testing.T) { - procRoot1, _ := createFakeProcess(t, filepath.Join("test1", defaultEnvoyName)) - procRoot2, _ := createFakeProcess(t, filepath.Join("test2", defaultEnvoyName)) - monitor := newIstioTestMonitor(t) - registerRecorder := new(utils.CallbackRecorder) - - // Setup test callbacks - monitor.registerCB = registerRecorder.Callback() - monitor.unregisterCB = utils.IgnoreCB - - // Calling sync multiple times shouldn't matter. - // Once all envoy process are registered, calling it again shouldn't - // trigger additional callback executions - monitor.sync() - monitor.sync() - - pathID1, err := utils.NewPathIdentifier(procRoot1) - require.NoError(t, err) - - pathID2, err := utils.NewPathIdentifier(procRoot2) - require.NoError(t, err) - - // Each PathID should have triggered a callback exactly once - assert.Equal(t, 2, registerRecorder.TotalCalls()) - assert.Equal(t, 1, registerRecorder.CallsForPathID(pathID1)) - assert.Equal(t, 1, registerRecorder.CallsForPathID(pathID2)) + t.Run("calling sync for the first time", func(tt *testing.T) { + procRoot := uprobes.CreateFakeProcFS(tt, []uprobes.FakeProcFSEntry{ + {Pid: 1, Exe: defaultEnvoyName}, + {Pid: 2, Exe: "/bin/bash"}, + {Pid: 3, Exe: defaultEnvoyName}, + }) + monitor := newIstioTestMonitor(tt, procRoot) + + mockRegistry := &uprobes.MockFileRegistry{} + monitor.attacher.SetRegistry(mockRegistry) + mockRegistry.On("GetRegisteredProcesses").Return(map[uint32]struct{}{}) + mockRegistry.On("Register", defaultEnvoyName, uint32(1), mock.Anything, mock.Anything).Return(nil) + mockRegistry.On("Register", defaultEnvoyName, uint32(3), mock.Anything, mock.Anything).Return(nil) + + // Calling sync should detect the two envoy processes + monitor.attacher.Sync(true, true) + + mockRegistry.AssertExpectations(tt) }) -} -// createFakeProcess creates a fake process in a temporary location. -// returns the full path of the temporary process and the PID of the fake process. -func createFakeProcess(t *testing.T, processName string) (procRoot string, pid int) { - fakePath := filepath.Join(t.TempDir(), processName) - require.NoError(t, exec.Command("mkdir", "-p", filepath.Dir(fakePath)).Run()) - - // we are using the `yes` command as a fake envoy binary. - require.NoError(t, exec.Command("cp", "/usr/bin/yes", fakePath).Run()) - - cmd := exec.Command(fakePath) - require.NoError(t, cmd.Start()) - - // Schedule process termination after the test - t.Cleanup(func() { - _ = cmd.Process.Kill() + t.Run("detecting a dangling process", func(tt *testing.T) { + procRoot := uprobes.CreateFakeProcFS(tt, []uprobes.FakeProcFSEntry{ + {Pid: 1, Exe: defaultEnvoyName}, + {Pid: 2, Exe: "/bin/bash"}, + {Pid: 3, Exe: defaultEnvoyName}, + }) + monitor := newIstioTestMonitor(tt, procRoot) + + mockRegistry := &uprobes.MockFileRegistry{} + monitor.attacher.SetRegistry(mockRegistry) + mockRegistry.On("GetRegisteredProcesses").Return(map[uint32]struct{}{}) + mockRegistry.On("Register", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) // Tell the mock to just say ok to everything, we'll validate later + + monitor.attacher.Sync(true, true) + + mockRegistry.AssertCalled(tt, "Register", defaultEnvoyName, uint32(1), mock.Anything, mock.Anything) + mockRegistry.AssertCalled(tt, "Register", defaultEnvoyName, uint32(3), mock.Anything, mock.Anything) + mockRegistry.AssertCalled(tt, "GetRegisteredProcesses") + + // At this point we should have received: + // * 2 register calls + // * 1 GetRegisteredProcesses call + // * 0 unregister calls + require.Equal(tt, 3, len(mockRegistry.Calls), "calls made: %v", mockRegistry.Calls) + mockRegistry.AssertNotCalled(t, "Unregister", mock.Anything) + + // Now we emulate a process termination for PID 3 by removing it from the fake + // procFS tree + require.NoError(tt, os.RemoveAll(filepath.Join(procRoot, "3"))) + + // Now clear the mock registry expected calls and make it return the state as if the two PIDs were registered + mockRegistry.ExpectedCalls = nil + mockRegistry.On("Register", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) // Tell the mock to just say ok to everything, we'll validate later + mockRegistry.On("GetRegisteredProcesses").Return(map[uint32]struct{}{1: {}, 3: {}}) + mockRegistry.On("Unregister", mock.Anything).Return(nil) + + // Once we call sync() again, PID 3 termination should be detected + // and the unregister callback should be executed + monitor.attacher.Sync(true, true) + mockRegistry.AssertCalled(tt, "Unregister", uint32(3)) }) - - return fakePath, cmd.Process.Pid } -func newIstioTestMonitor(t *testing.T) *istioMonitor { +func newIstioTestMonitor(t *testing.T, procRoot string) *istioMonitor { cfg := config.New() cfg.EnableIstioMonitoring = true + cfg.ProcRoot = procRoot return newIstioTestMonitorWithCFG(t, cfg) } @@ -109,5 +121,6 @@ func newIstioTestMonitor(t *testing.T) *istioMonitor { func newIstioTestMonitorWithCFG(t *testing.T, cfg *config.Config) *istioMonitor { monitor := newIstioMonitor(cfg, nil) require.NotNil(t, monitor) + return monitor }