From ff9f37a7335f6c04d3387b3998067cfa07620f4e Mon Sep 17 00:00:00 2001 From: Pierre Gimalac Date: Mon, 5 Aug 2024 18:46:28 +0200 Subject: [PATCH 01/19] [revive] allow unused variables starting with underscore (#28162) --- .golangci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.golangci.yml b/.golangci.yml index 054fde646c0b69..d23f863c2aa7b5 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -149,6 +149,8 @@ linters-settings: - name: unexported-return - name: unreachable-code - name: unused-parameter + arguments: + - allowRegex: "^_" - name: var-declaration - name: var-naming # non-default rules: From ca8c80b7c199ef543e14a907a10b0cd7c0e34dee Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 5 Aug 2024 10:45:33 -0700 Subject: [PATCH 02/19] fix system-probe log_level runtime setting (#28106) --- cmd/system-probe/subcommands/run/command.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/system-probe/subcommands/run/command.go b/cmd/system-probe/subcommands/run/command.go index a3bf15fc97394b..752ee507af0a41 100644 --- a/cmd/system-probe/subcommands/run/command.go +++ b/cmd/system-probe/subcommands/run/command.go @@ -114,7 +114,7 @@ func Commands(globalParams *command.GlobalParams) []*cobra.Command { return settings.Params{ Settings: map[string]settings.RuntimeSetting{ - "log_level": &commonsettings.LogLevelRuntimeSetting{ConfigKey: configPrefix + "log_level"}, + "log_level": commonsettings.NewLogLevelRuntimeSetting(), "runtime_mutex_profile_fraction": &commonsettings.RuntimeMutexProfileFraction{ConfigPrefix: configPrefix}, "runtime_block_profile_rate": &commonsettings.RuntimeBlockProfileRate{ConfigPrefix: configPrefix}, "internal_profiling_goroutines": profilingGoRoutines, @@ -270,7 +270,7 @@ func runSystemProbe(ctxChan <-chan context.Context, errChan chan error) error { return settings.Params{ Settings: map[string]settings.RuntimeSetting{ - "log_level": &commonsettings.LogLevelRuntimeSetting{ConfigKey: configPrefix + "log_level"}, + "log_level": commonsettings.NewLogLevelRuntimeSetting(), "runtime_mutex_profile_fraction": &commonsettings.RuntimeMutexProfileFraction{ConfigPrefix: configPrefix}, "runtime_block_profile_rate": &commonsettings.RuntimeBlockProfileRate{ConfigPrefix: configPrefix}, "internal_profiling_goroutines": profilingGoRoutines, From 66de37b3dcbe0d39fe1c66b0ed014440f639c6ca Mon Sep 17 00:00:00 2001 From: Adam Karpowich Date: Mon, 5 Aug 2024 14:02:53 -0400 Subject: [PATCH 03/19] [NPM] skip eexist errors in bpf telemetry (#28170) --- pkg/network/ebpf/c/tracer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/network/ebpf/c/tracer.c b/pkg/network/ebpf/c/tracer.c index 72bf1750c5ae11..66482fb92a0922 100644 --- a/pkg/network/ebpf/c/tracer.c +++ b/pkg/network/ebpf/c/tracer.c @@ -234,8 +234,9 @@ int BPF_BYPASSABLE_KPROBE(kprobe__tcp_done, struct sock *sk) { // check if this connection was already flushed and ensure we don't flush again // upsert the timestamp to the map and delete if it already exists, flush connection otherwise + // skip EEXIST errors for telemetry since it is an expected error __u64 timestamp = bpf_ktime_get_ns(); - if (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST) == 0) { + if (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST, -EEXIST) == 0) { cleanup_conn(ctx, &t, sk); } else { bpf_map_delete_elem(&conn_close_flushed, &t); @@ -278,8 +279,9 @@ int BPF_BYPASSABLE_KPROBE(kprobe__tcp_close, struct sock *sk) { // check if this connection was already flushed and ensure we don't flush again // upsert the timestamp to the map and delete if it already exists, flush connection otherwise + // skip EEXIST errors for telemetry since it is an expected error __u64 timestamp = bpf_ktime_get_ns(); - if (!tcp_failed_connections_enabled() || (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST) == 0)) { + if (!tcp_failed_connections_enabled() || (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST, -EEXIST) == 0)) { cleanup_conn(ctx, &t, sk); } else { bpf_map_delete_elem(&conn_close_flushed, &t); From 7424b2490cb74f6fe2aa428a35d663455a04fb84 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 5 Aug 2024 12:29:58 -0700 Subject: [PATCH 04/19] add NPM connection batch extractor (#28174) --- .../tracer/connection/batch_extractor.go | 111 +++++++++++++++ .../tracer/connection/batch_extractor_test.go | 70 +++++++++ .../tracer/connection/perf_batching.go | 133 +++--------------- .../tracer/connection/perf_batching_test.go | 76 ++-------- pkg/network/tracer/connection/tracer.go | 7 +- 5 files changed, 218 insertions(+), 179 deletions(-) create mode 100644 pkg/network/tracer/connection/batch_extractor.go create mode 100644 pkg/network/tracer/connection/batch_extractor_test.go diff --git a/pkg/network/tracer/connection/batch_extractor.go b/pkg/network/tracer/connection/batch_extractor.go new file mode 100644 index 00000000000000..27a836ba79d7ed --- /dev/null +++ b/pkg/network/tracer/connection/batch_extractor.go @@ -0,0 +1,111 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +//go:build linux_bpf + +package connection + +import ( + "time" + + netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" +) + +const defaultExpiredStateInterval = 60 * time.Second + +type batchExtractor struct { + numCPUs int + // stateByCPU contains the state of each batch. + // The slice is indexed by the CPU core number. + stateByCPU []percpuState + expiredStateInterval time.Duration +} + +type percpuState struct { + // map of batch id -> offset of conns already processed by GetPendingConns + processed map[uint64]batchState +} + +type batchState struct { + offset uint16 + updated time.Time +} + +func newBatchExtractor(numCPUs int) *batchExtractor { + state := make([]percpuState, numCPUs) + for cpu := 0; cpu < numCPUs; cpu++ { + state[cpu] = percpuState{ + processed: make(map[uint64]batchState), + } + } + return &batchExtractor{ + numCPUs: numCPUs, + stateByCPU: state, + expiredStateInterval: defaultExpiredStateInterval, + } +} + +// NumCPUs returns the number of CPUs the batch extractor has been initialized for +func (e *batchExtractor) NumCPUs() int { + return e.numCPUs +} + +// NextConnection returns the next unprocessed connection from the batch. +// Returns nil if no more connections are left. +func (e *batchExtractor) NextConnection(b *netebpf.Batch) *netebpf.Conn { + cpu := int(b.Cpu) + if cpu >= e.numCPUs { + return nil + } + if b.Len == 0 { + return nil + } + + batchID := b.Id + cpuState := &e.stateByCPU[cpu] + offset := uint16(0) + if bState, ok := cpuState.processed[batchID]; ok { + offset = bState.offset + if offset >= netebpf.BatchSize { + delete(cpuState.processed, batchID) + return nil + } + if offset >= b.Len { + return nil + } + } + + defer func() { + cpuState.processed[batchID] = batchState{ + offset: offset + 1, + updated: time.Now(), + } + }() + + switch offset { + case 0: + return &b.C0 + case 1: + return &b.C1 + case 2: + return &b.C2 + case 3: + return &b.C3 + default: + panic("batch size is out of sync") + } +} + +// CleanupExpiredState removes entries from per-cpu state that haven't been updated in the last minute +func (e *batchExtractor) CleanupExpiredState(now time.Time) { + for cpu := 0; cpu < len(e.stateByCPU); cpu++ { + cpuState := &e.stateByCPU[cpu] + for id, s := range cpuState.processed { + if now.Sub(s.updated) >= e.expiredStateInterval { + delete(cpuState.processed, id) + } + } + } +} diff --git a/pkg/network/tracer/connection/batch_extractor_test.go b/pkg/network/tracer/connection/batch_extractor_test.go new file mode 100644 index 00000000000000..bc72af85bf5da3 --- /dev/null +++ b/pkg/network/tracer/connection/batch_extractor_test.go @@ -0,0 +1,70 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +//go:build linux_bpf + +package connection + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" +) + +const ( + numTestCPUs = 4 +) + +func TestBatchExtract(t *testing.T) { + t.Run("normal flush", func(t *testing.T) { + extractor := newBatchExtractor(numTestCPUs) + + batch := new(netebpf.Batch) + batch.Len = 4 + batch.Id = 0 + batch.Cpu = 0 + batch.C0.Tup.Pid = 1 + batch.C1.Tup.Pid = 2 + batch.C2.Tup.Pid = 3 + batch.C3.Tup.Pid = 4 + + var conns []*netebpf.Conn + for rc := extractor.NextConnection(batch); rc != nil; rc = extractor.NextConnection(batch) { + conns = append(conns, rc) + } + require.Len(t, conns, 4) + assert.Equal(t, uint32(1), conns[0].Tup.Pid) + assert.Equal(t, uint32(2), conns[1].Tup.Pid) + assert.Equal(t, uint32(3), conns[2].Tup.Pid) + assert.Equal(t, uint32(4), conns[3].Tup.Pid) + }) + + t.Run("partial flush", func(t *testing.T) { + extractor := newBatchExtractor(numTestCPUs) + // Simulate a partial flush + extractor.stateByCPU[0].processed = map[uint64]batchState{ + 0: {offset: 3}, + } + + batch := new(netebpf.Batch) + batch.Len = 4 + batch.Id = 0 + batch.Cpu = 0 + batch.C0.Tup.Pid = 1 + batch.C1.Tup.Pid = 2 + batch.C2.Tup.Pid = 3 + batch.C3.Tup.Pid = 4 + + var conns []*netebpf.Conn + for rc := extractor.NextConnection(batch); rc != nil; rc = extractor.NextConnection(batch) { + conns = append(conns, rc) + } + assert.Len(t, conns, 1) + assert.Equal(t, uint32(4), conns[0].Tup.Pid) + }) +} diff --git a/pkg/network/tracer/connection/perf_batching.go b/pkg/network/tracer/connection/perf_batching.go index 51b46bf7c7a3f8..03d38eaee3af6c 100644 --- a/pkg/network/tracer/connection/perf_batching.go +++ b/pkg/network/tracer/connection/perf_batching.go @@ -12,7 +12,6 @@ import ( "time" manager "github.com/DataDog/ebpf-manager" - cebpf "github.com/cilium/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/maps" "github.com/DataDog/datadog-agent/pkg/network" @@ -20,8 +19,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" ) -const defaultExpiredStateInterval = 60 * time.Second - // perfBatchManager is responsible for two things: // // * Keeping track of the state of each batch object we read off the perf ring; @@ -30,27 +27,19 @@ const defaultExpiredStateInterval = 60 * time.Second // The motivation is to impose an upper limit on how long a TCP close connection // event remains stored in the eBPF map before being processed by the NetworkAgent. type perfBatchManager struct { - // eBPF - batchMap *maps.GenericMap[uint32, netebpf.Batch] - - // stateByCPU contains the state of each batch. - // The slice is indexed by the CPU core number. - stateByCPU []percpuState - - expiredStateInterval time.Duration - - ch *cookieHasher + batchMap *maps.GenericMap[uint32, netebpf.Batch] + extractor *batchExtractor + ch *cookieHasher } // newPerfBatchManager returns a new `PerfBatchManager` and initializes the // eBPF map that holds the tcp_close batch objects. -func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], numCPUs uint32) (*perfBatchManager, error) { +func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], extractor *batchExtractor) (*perfBatchManager, error) { if batchMap == nil { return nil, fmt.Errorf("batchMap is nil") } - state := make([]percpuState, numCPUs) - for cpu := uint32(0); cpu < numCPUs; cpu++ { + for cpu := uint32(0); cpu < uint32(extractor.NumCPUs()); cpu++ { b := new(netebpf.Batch) // Ring buffer events don't have CPU information, so we associate each // batch entry with a CPU during startup. This information is used by @@ -59,35 +48,22 @@ func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], numCP if err := batchMap.Put(&cpu, b); err != nil { return nil, fmt.Errorf("error initializing perf batch manager maps: %w", err) } - state[cpu] = percpuState{ - processed: make(map[uint64]batchState), - } } return &perfBatchManager{ - batchMap: batchMap, - stateByCPU: state, - expiredStateInterval: defaultExpiredStateInterval, - ch: newCookieHasher(), + batchMap: batchMap, + extractor: extractor, + ch: newCookieHasher(), }, nil } // ExtractBatchInto extracts from the given batch all connections that haven't been processed yet. func (p *perfBatchManager) ExtractBatchInto(buffer *network.ConnectionBuffer, b *netebpf.Batch) { - cpu := int(b.Cpu) - if cpu >= len(p.stateByCPU) { - return - } - - batchID := b.Id - cpuState := &p.stateByCPU[cpu] - start := uint16(0) - if bState, ok := cpuState.processed[batchID]; ok { - start = bState.offset + for rc := p.extractor.NextConnection(b); rc != nil; rc = p.extractor.NextConnection(b) { + conn := buffer.Next() + populateConnStats(conn, &rc.Tup, &rc.Conn_stats, p.ch) + updateTCPStats(conn, &rc.Tcp_stats, rc.Tcp_retransmits) } - - p.extractBatchInto(buffer, b, start, netebpf.BatchSize) - delete(cpuState.processed, batchID) } // GetPendingConns return all connections that are in batches that are not yet full. @@ -95,96 +71,27 @@ func (p *perfBatchManager) ExtractBatchInto(buffer *network.ConnectionBuffer, b // This prevents double-processing of connections between GetPendingConns and Extract. func (p *perfBatchManager) GetPendingConns(buffer *network.ConnectionBuffer) { b := new(netebpf.Batch) - for cpu := uint32(0); cpu < uint32(len(p.stateByCPU)); cpu++ { - cpuState := &p.stateByCPU[cpu] - + for cpu := uint32(0); cpu < uint32(p.extractor.NumCPUs()); cpu++ { err := p.batchMap.Lookup(&cpu, b) if err != nil { continue } - batchLen := b.Len - if batchLen == 0 { - continue - } - - // have we already processed these messages? - start := uint16(0) - batchID := b.Id - if bState, ok := cpuState.processed[batchID]; ok { - start = bState.offset - } - - p.extractBatchInto(buffer, b, start, batchLen) - // update timestamp regardless since this partial batch still exists - cpuState.processed[batchID] = batchState{offset: batchLen, updated: time.Now()} - } - - p.cleanupExpiredState(time.Now()) -} - -type percpuState struct { - // map of batch id -> offset of conns already processed by GetPendingConns - processed map[uint64]batchState -} - -type batchState struct { - offset uint16 - updated time.Time -} - -// ExtractBatchInto extract network.ConnectionStats objects from the given `batch` into the supplied `buffer`. -// The `start` (inclusive) and `end` (exclusive) arguments represent the offsets of the connections we're interested in. -func (p *perfBatchManager) extractBatchInto(buffer *network.ConnectionBuffer, b *netebpf.Batch, start, end uint16) { - if start >= end || end > netebpf.BatchSize { - return - } - - var ct netebpf.Conn - for i := start; i < end; i++ { - switch i { - case 0: - ct = b.C0 - case 1: - ct = b.C1 - case 2: - ct = b.C2 - case 3: - ct = b.C3 - default: - panic("batch size is out of sync") - } - - conn := buffer.Next() - populateConnStats(conn, &ct.Tup, &ct.Conn_stats, p.ch) - updateTCPStats(conn, &ct.Tcp_stats, ct.Tcp_retransmits) - } -} - -func (p *perfBatchManager) cleanupExpiredState(now time.Time) { - for cpu := 0; cpu < len(p.stateByCPU); cpu++ { - cpuState := &p.stateByCPU[cpu] - for id, s := range cpuState.processed { - if now.Sub(s.updated) >= p.expiredStateInterval { - delete(cpuState.processed, id) - } + for rc := p.extractor.NextConnection(b); rc != nil; rc = p.extractor.NextConnection(b) { + c := buffer.Next() + populateConnStats(c, &rc.Tup, &rc.Conn_stats, p.ch) + updateTCPStats(c, &rc.Tcp_stats, rc.Tcp_retransmits) } } + p.extractor.CleanupExpiredState(time.Now()) } -func newConnBatchManager(mgr *manager.Manager) (*perfBatchManager, error) { +func newConnBatchManager(mgr *manager.Manager, extractor *batchExtractor) (*perfBatchManager, error) { connCloseMap, err := maps.GetMap[uint32, netebpf.Batch](mgr, probes.ConnCloseBatchMap) if err != nil { return nil, fmt.Errorf("unable to get map %s: %s", probes.ConnCloseBatchMap, err) } - numCPUs, err := cebpf.PossibleCPU() - if err != nil { - return nil, fmt.Errorf("unable to get number of CPUs: %s", err) - } - if err != nil { - return nil, err - } - batchMgr, err := newPerfBatchManager(connCloseMap, uint32(numCPUs)) + batchMgr, err := newPerfBatchManager(connCloseMap, extractor) if err != nil { return nil, err } diff --git a/pkg/network/tracer/connection/perf_batching_test.go b/pkg/network/tracer/connection/perf_batching_test.go index 8e6cbd28ce9350..c7d22aaff83aa8 100644 --- a/pkg/network/tracer/connection/perf_batching_test.go +++ b/pkg/network/tracer/connection/perf_batching_test.go @@ -22,56 +22,9 @@ import ( ) const ( - numTestCPUs = 4 - pidMax uint32 = 1 << 22 // PID_MAX_LIMIT on 64 bit systems + pidMax uint32 = 1 << 22 // PID_MAX_LIMIT on 64bit systems ) -func TestPerfBatchManagerExtract(t *testing.T) { - t.Run("normal flush", func(t *testing.T) { - manager := newEmptyBatchManager() - - batch := new(netebpf.Batch) - batch.Id = 0 - batch.Cpu = 0 - batch.C0.Tup.Pid = 1 - batch.C1.Tup.Pid = 2 - batch.C2.Tup.Pid = 3 - batch.C3.Tup.Pid = 4 - - buffer := network.NewConnectionBuffer(256, 256) - manager.ExtractBatchInto(buffer, batch) - conns := buffer.Connections() - assert.Len(t, conns, 4) - assert.Equal(t, uint32(1), conns[0].Pid) - assert.Equal(t, uint32(2), conns[1].Pid) - assert.Equal(t, uint32(3), conns[2].Pid) - assert.Equal(t, uint32(4), conns[3].Pid) - }) - - t.Run("partial flush", func(t *testing.T) { - manager := newEmptyBatchManager() - - batch := new(netebpf.Batch) - batch.Id = 0 - batch.Cpu = 0 - batch.C0.Tup.Pid = 1 - batch.C1.Tup.Pid = 2 - batch.C2.Tup.Pid = 3 - batch.C3.Tup.Pid = 4 - - // Simulate a partial flush - manager.stateByCPU[0].processed = map[uint64]batchState{ - 0: {offset: 3}, - } - - buffer := network.NewConnectionBuffer(256, 256) - manager.ExtractBatchInto(buffer, batch) - conns := buffer.Connections() - assert.Len(t, conns, 1) - assert.Equal(t, uint32(4), conns[0].Pid) - }) -} - func TestGetPendingConns(t *testing.T) { manager := newTestBatchManager(t) @@ -128,7 +81,7 @@ func TestGetPendingConns(t *testing.T) { func TestPerfBatchStateCleanup(t *testing.T) { manager := newTestBatchManager(t) - manager.expiredStateInterval = 100 * time.Millisecond + manager.extractor.expiredStateInterval = 100 * time.Millisecond batch := new(netebpf.Batch) batch.Id = 0 @@ -142,25 +95,17 @@ func TestPerfBatchStateCleanup(t *testing.T) { buffer := network.NewConnectionBuffer(256, 256) manager.GetPendingConns(buffer) - _, ok := manager.stateByCPU[cpu].processed[batch.Id] + _, ok := manager.extractor.stateByCPU[cpu].processed[batch.Id] require.True(t, ok) - assert.Equal(t, uint16(2), manager.stateByCPU[cpu].processed[batch.Id].offset) + assert.Equal(t, uint16(2), manager.extractor.stateByCPU[cpu].processed[batch.Id].offset) - manager.cleanupExpiredState(time.Now().Add(manager.expiredStateInterval)) + manager.extractor.CleanupExpiredState(time.Now().Add(manager.extractor.expiredStateInterval)) manager.GetPendingConns(buffer) // state should not have been cleaned up, since no more connections have happened - _, ok = manager.stateByCPU[cpu].processed[batch.Id] + _, ok = manager.extractor.stateByCPU[cpu].processed[batch.Id] require.True(t, ok) - assert.Equal(t, uint16(2), manager.stateByCPU[cpu].processed[batch.Id].offset) -} - -func newEmptyBatchManager() *perfBatchManager { - p := perfBatchManager{stateByCPU: make([]percpuState, numTestCPUs)} - for cpu := 0; cpu < numTestCPUs; cpu++ { - p.stateByCPU[cpu] = percpuState{processed: make(map[uint64]batchState)} - } - return &p + assert.Equal(t, uint16(2), manager.extractor.stateByCPU[cpu].processed[batch.Id].offset) } func newTestBatchManager(t *testing.T) *perfBatchManager { @@ -169,14 +114,15 @@ func newTestBatchManager(t *testing.T) *perfBatchManager { Type: ebpf.Hash, KeySize: 4, ValueSize: netebpf.SizeofBatch, - MaxEntries: 1024, + MaxEntries: numTestCPUs, }) require.NoError(t, err) - t.Cleanup(func() { m.Close() }) + t.Cleanup(func() { _ = m.Close() }) gm, err := ebpfmaps.Map[uint32, netebpf.Batch](m) require.NoError(t, err) - mgr, err := newPerfBatchManager(gm, numTestCPUs) + extractor := newBatchExtractor(numTestCPUs) + mgr, err := newPerfBatchManager(gm, extractor) require.NoError(t, err) return mgr } diff --git a/pkg/network/tracer/connection/tracer.go b/pkg/network/tracer/connection/tracer.go index e3e3efd2c770e1..46623198a21782 100644 --- a/pkg/network/tracer/connection/tracer.go +++ b/pkg/network/tracer/connection/tracer.go @@ -278,7 +278,12 @@ func NewTracer(config *config.Config, _ telemetryComponent.Component) (Tracer, e m.DumpHandler = dumpMapsHandler ddebpf.AddNameMappings(m, "npm_tracer") - batchMgr, err := newConnBatchManager(m) + numCPUs, err := ebpf.PossibleCPU() + if err != nil { + return nil, fmt.Errorf("could not determine number of CPUs: %w", err) + } + extractor := newBatchExtractor(numCPUs) + batchMgr, err := newConnBatchManager(m, extractor) if err != nil { return nil, fmt.Errorf("could not create connection batch manager: %w", err) } From 9c4bae680109488c88d27e86fb71d354990ceb79 Mon Sep 17 00:00:00 2001 From: Branden Clark Date: Mon, 5 Aug 2024 15:49:54 -0400 Subject: [PATCH 05/19] improve invalid gMSA error message (#28179) --- ...lid-gmsa-error-message-d6943b9dc18a0cd7.yaml | 5 +++++ .../CustomActions/Native/NativeMethods.cs | 4 +++- .../CustomActions/ProcessUserCustomActions.cs | 17 +++++++++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/msi-invalid-gmsa-error-message-d6943b9dc18a0cd7.yaml diff --git a/releasenotes/notes/msi-invalid-gmsa-error-message-d6943b9dc18a0cd7.yaml b/releasenotes/notes/msi-invalid-gmsa-error-message-d6943b9dc18a0cd7.yaml new file mode 100644 index 00000000000000..8ce5684c555a7a --- /dev/null +++ b/releasenotes/notes/msi-invalid-gmsa-error-message-d6943b9dc18a0cd7.yaml @@ -0,0 +1,5 @@ +--- +enhancements: + - | + Windows Agent Installer gives a better error message when a gMSA + account is provided for ``ddagentuser`` that Windows does not recognize. diff --git a/tools/windows/DatadogAgentInstaller/CustomActions/Native/NativeMethods.cs b/tools/windows/DatadogAgentInstaller/CustomActions/Native/NativeMethods.cs index 4376dd5d52be64..821e059484e1df 100644 --- a/tools/windows/DatadogAgentInstaller/CustomActions/Native/NativeMethods.cs +++ b/tools/windows/DatadogAgentInstaller/CustomActions/Native/NativeMethods.cs @@ -1,3 +1,4 @@ +using Datadog.CustomActions.Interfaces; using System; using System.ComponentModel; using System.Diagnostics; @@ -6,7 +7,6 @@ using System.Security.AccessControl; using System.Security.Principal; using System.Text; -using Datadog.CustomActions.Interfaces; // ReSharper disable InconsistentNaming @@ -488,6 +488,8 @@ ref Int32 len public bool IsServiceAccount(SecurityIdentifier securityIdentifier) { + // NetIsServiceAccount returns true if NetQueryServiceAccount returns MsaInfoInstalled, + // this is the same behavior as the Test-ADServiceAccount cmdlet in PowerShell. NetIsServiceAccount(null, securityIdentifier.Translate(typeof(NTAccount)).Value, out var isServiceAccount); isServiceAccount |= securityIdentifier.IsWellKnown(WellKnownSidType.LocalSystemSid) || securityIdentifier.IsWellKnown(WellKnownSidType.LocalServiceSid) || diff --git a/tools/windows/DatadogAgentInstaller/CustomActions/ProcessUserCustomActions.cs b/tools/windows/DatadogAgentInstaller/CustomActions/ProcessUserCustomActions.cs index 090e8cc92b0c03..0bd39edc9a1028 100644 --- a/tools/windows/DatadogAgentInstaller/CustomActions/ProcessUserCustomActions.cs +++ b/tools/windows/DatadogAgentInstaller/CustomActions/ProcessUserCustomActions.cs @@ -269,7 +269,7 @@ private void TestAgentUserIsNotCurrentUser(SecurityIdentifier agentUser, bool is /// /// Throws an exception if the password is required but not provided. /// - private void TestIfPasswordIsRequiredAndProvidedForExistingAccount(string ddAgentUserPassword, bool isDomainController, + private void TestIfPasswordIsRequiredAndProvidedForExistingAccount(string ddAgentUserName, string ddAgentUserPassword, bool isDomainController, bool isServiceAccount, bool isDomainAccount, bool datadogAgentServiceExists) { var passwordProvided = !string.IsNullOrEmpty(ddAgentUserPassword); @@ -286,6 +286,19 @@ private void TestIfPasswordIsRequiredAndProvidedForExistingAccount(string ddAgen return; } + // If the account name looks like a gMSA account, but wasn't detected as one. + // Only look for $ at the end of the account name if it's a domain account, because + // normal account names can end with $. In the case of a domain account that ends + // in $ that is NOT intended to be a gMSA account, the user must provide a password. + if (isDomainController || isDomainAccount) + { + if (ddAgentUserName.EndsWith("$") && !isServiceAccount) + { + throw new InvalidAgentUserConfigurationException( + $"The provided account '{ddAgentUserName}' ends with '$' but is not recognized as a valid gMSA account. Please ensure the username is correct and this host is a member of PrincipalsAllowedToRetrieveManagedPassword. If the account is a normal account, please provide a password."); + } + } + if (isDomainController) { // We choose not to create/manage the account/password on domain controllers because @@ -423,7 +436,7 @@ public ActionResult ProcessDdAgentUserCredentials(bool calledFromUIControl = fal $"\"{domain}\\{userName}\" ({securityIdentifier.Value}, {nameUse}) is a {(isDomainAccount ? "domain" : "local")} {(isServiceAccount ? "service " : string.Empty)}account"); TestAgentUserIsNotCurrentUser(securityIdentifier, isServiceAccount); - TestIfPasswordIsRequiredAndProvidedForExistingAccount(ddAgentUserPassword, isDomainController, isServiceAccount, isDomainAccount, datadogAgentServiceExists); + TestIfPasswordIsRequiredAndProvidedForExistingAccount(userName, ddAgentUserPassword, isDomainController, isServiceAccount, isDomainAccount, datadogAgentServiceExists); } else { From d1c3e19b30ecae772098f615a5c610308d0e2584 Mon Sep 17 00:00:00 2001 From: Branden Clark Date: Mon, 5 Aug 2024 16:31:28 -0400 Subject: [PATCH 06/19] delete subdirs on uninstall instead of top-level dir (#28143) Co-authored-by: Ursula Chen <58821586+urseberry@users.noreply.github.com> --- .../new-e2e_testing/windows.yml | 4 + ...install-dir-deletion-a5540d2d3523a399.yaml | 6 ++ test/new-e2e/tests/windows/common/registry.go | 7 ++ .../windows/install-test/install_test.go | 101 ++++++++++++++++++ .../CustomActions/CleanUpFilesCustomAction.cs | 6 +- .../InstallStateCustomActions.cs | 72 ++++++++++++- .../Datadog Agent/AgentCustomActions.cs | 1 + .../WixSetup/Datadog Agent/AgentInstaller.cs | 21 +++- 8 files changed, 210 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/msi-uninstall-scope-install-dir-deletion-a5540d2d3523a399.yaml diff --git a/.gitlab/kitchen_testing/new-e2e_testing/windows.yml b/.gitlab/kitchen_testing/new-e2e_testing/windows.yml index abf17715733995..2b0dcbc1b6a580 100644 --- a/.gitlab/kitchen_testing/new-e2e_testing/windows.yml +++ b/.gitlab/kitchen_testing/new-e2e_testing/windows.yml @@ -52,6 +52,8 @@ - E2E_MSI_TEST: TestSubServicesOpts/all-subservices - E2E_MSI_TEST: TestSubServicesOpts/no-subservices - E2E_MSI_TEST: TestInstallAltDir + - E2E_MSI_TEST: TestInstallExistingAltDir + - E2E_MSI_TEST: TestInstallAltDirAndCorruptForUninstall - E2E_MSI_TEST: TestInstallFail .new-e2e_windows_installer_v7_tests: @@ -73,6 +75,8 @@ - E2E_MSI_TEST: TestSubServicesOpts/all-subservices - E2E_MSI_TEST: TestSubServicesOpts/no-subservices - E2E_MSI_TEST: TestInstallAltDir + - E2E_MSI_TEST: TestInstallExistingAltDir + - E2E_MSI_TEST: TestInstallAltDirAndCorruptForUninstall - E2E_MSI_TEST: TestInstallFail # These tests are v7 only - E2E_MSI_TEST: TestNPMUpgradeToNPM diff --git a/releasenotes/notes/msi-uninstall-scope-install-dir-deletion-a5540d2d3523a399.yaml b/releasenotes/notes/msi-uninstall-scope-install-dir-deletion-a5540d2d3523a399.yaml new file mode 100644 index 00000000000000..d7d190480a8870 --- /dev/null +++ b/releasenotes/notes/msi-uninstall-scope-install-dir-deletion-a5540d2d3523a399.yaml @@ -0,0 +1,6 @@ +--- +enhancements: + - | + Uninstalling the Windows Agent MSI Installer removes specific + subdirectories of the install path to help prevent data loss when + ``PROJECTLOCATION`` is misconfigured to an existing directory. diff --git a/test/new-e2e/tests/windows/common/registry.go b/test/new-e2e/tests/windows/common/registry.go index e50fb68c6be0b8..19c44f530f02d7 100644 --- a/test/new-e2e/tests/windows/common/registry.go +++ b/test/new-e2e/tests/windows/common/registry.go @@ -31,3 +31,10 @@ func RegistryKeyExists(host *components.RemoteHost, path string) (bool, error) { } return strings.EqualFold(strings.TrimSpace(out), "True"), nil } + +// DeleteRegistryKey deletes a registry key on the remote host +func DeleteRegistryKey(host *components.RemoteHost, path string) error { + cmd := fmt.Sprintf("Remove-Item -Path '%s' -Recurse -Force", path) + _, err := host.Execute(cmd) + return err +} diff --git a/test/new-e2e/tests/windows/install-test/install_test.go b/test/new-e2e/tests/windows/install-test/install_test.go index 00ccbb3e662f05..f6d81cb21566d7 100644 --- a/test/new-e2e/tests/windows/install-test/install_test.go +++ b/test/new-e2e/tests/windows/install-test/install_test.go @@ -105,6 +105,70 @@ func (s *testInstallSuite) testCodeSignatures(t *Tester, remoteMSIPath string) { }) } +// TestInstallExistingAltDir installs the agent to an existing directory and +// checks that the files are not removed +func TestInstallExistingAltDir(t *testing.T) { + s := &testInstallExistingAltDirSuite{} + run(t, s) +} + +type testInstallExistingAltDirSuite struct { + baseAgentMSISuite +} + +func (s *testInstallExistingAltDirSuite) TestInstallExistingAltDir() { + vm := s.Env().RemoteHost + + installPath := `C:\altdir` + configRoot := `C:\altconfroot` + + // create the install dir and add some files to it + err := vm.MkdirAll(installPath) + s.Require().NoError(err) + fileData := map[string]string{ + "file1.txt": "file1 data", + "subdiir/file2.txt": "file2 data", + } + for file, data := range fileData { + parent := filepath.Dir(file) + if parent != "" { + err := vm.MkdirAll(filepath.Join(installPath, filepath.Dir(file))) + s.Require().NoError(err) + } + _, err = vm.WriteFile(filepath.Join(installPath, file), []byte(data)) + s.Require().NoError(err) + } + + // install the agent + _ = s.installAgentPackage(vm, s.AgentPackage, + windowsAgent.WithProjectLocation(installPath), + windowsAgent.WithApplicationDataDirectory(configRoot), + ) + + // uninstall the agent + s.Require().True( + s.uninstallAgent(), + ) + + // ensure the install dir and files are still there + for file, data := range fileData { + contents, err := vm.ReadFile(filepath.Join(installPath, file)) + if s.Assert().NoError(err, "file %s should still exist", file) { + assert.Equal(s.T(), string(data), string(contents), "file %s should still have the same contents", file) + } + } + // ensure the agent dirs are gone + removedPaths := []string{ + filepath.Join(installPath, "bin"), + filepath.Join(installPath, "embedded2"), + filepath.Join(installPath, "embedded3"), + } + for _, path := range removedPaths { + _, err := vm.Lstat(path) + s.Require().Error(err, "path %s should be removed", path) + } +} + func TestInstallAltDir(t *testing.T) { s := &testInstallAltDirSuite{} run(t, s) @@ -140,6 +204,43 @@ func (s *testInstallAltDirSuite) TestInstallAltDir() { s.uninstallAgentAndRunUninstallTests(t) } +func TestInstallAltDirAndCorruptForUninstall(t *testing.T) { + s := &testInstallAltDirAndCorruptForUninstallSuite{} + run(t, s) +} + +type testInstallAltDirAndCorruptForUninstallSuite struct { + baseAgentMSISuite +} + +func (s *testInstallAltDirAndCorruptForUninstallSuite) TestInstallAltDirAndCorruptForUninstall() { + vm := s.Env().RemoteHost + + installPath := `C:\altdir` + configRoot := `C:\altconfroot` + + // install the agent + _ = s.installAgentPackage(vm, s.AgentPackage, + windowsAgent.WithProjectLocation(installPath), + windowsAgent.WithApplicationDataDirectory(configRoot), + ) + + // remove registry key that contains install info to ensure uninstall succeeds + // with a corrupted install + err := windowsCommon.DeleteRegistryKey(vm, windowsAgent.RegistryKeyPath) + s.Require().NoError(err) + + // uninstall the agent + s.Require().True( + s.uninstallAgent(), + ) + + _, err = vm.Lstat(installPath) + s.Require().Error(err, "agent install dir should be removed") + _, err = vm.Lstat(configRoot) + s.Require().NoError(err, "agent config root dir should still exist") +} + func TestRepair(t *testing.T) { s := &testRepairSuite{} run(t, s) diff --git a/tools/windows/DatadogAgentInstaller/CustomActions/CleanUpFilesCustomAction.cs b/tools/windows/DatadogAgentInstaller/CustomActions/CleanUpFilesCustomAction.cs index 26e6ea2c169956..706c04b8e5109a 100644 --- a/tools/windows/DatadogAgentInstaller/CustomActions/CleanUpFilesCustomAction.cs +++ b/tools/windows/DatadogAgentInstaller/CustomActions/CleanUpFilesCustomAction.cs @@ -1,8 +1,8 @@ using Datadog.CustomActions.Extensions; +using Datadog.CustomActions.Interfaces; using Microsoft.Deployment.WindowsInstaller; -using System.IO; using System; -using Datadog.CustomActions.Interfaces; +using System.IO; namespace Datadog.CustomActions { @@ -14,8 +14,10 @@ private static ActionResult CleanupFiles(ISession session) var applicationDataLocation = session.Property("APPLICATIONDATADIRECTORY"); var toDelete = new[] { + // may contain python files created outside of install Path.Combine(projectLocation, "embedded2"), Path.Combine(projectLocation, "embedded3"), + // installation specific files Path.Combine(applicationDataLocation, "install_info"), Path.Combine(applicationDataLocation, "auth_token") }; diff --git a/tools/windows/DatadogAgentInstaller/CustomActions/InstallStateCustomActions.cs b/tools/windows/DatadogAgentInstaller/CustomActions/InstallStateCustomActions.cs index a450416dcd1a03..5a44d730f814e0 100644 --- a/tools/windows/DatadogAgentInstaller/CustomActions/InstallStateCustomActions.cs +++ b/tools/windows/DatadogAgentInstaller/CustomActions/InstallStateCustomActions.cs @@ -1,10 +1,12 @@ -using System; -using System.Security.Principal; using Datadog.CustomActions.Extensions; using Datadog.CustomActions.Interfaces; using Datadog.CustomActions.Native; using Microsoft.Deployment.WindowsInstaller; using Microsoft.Win32; +using System; +using System.Collections.Generic; +using System.IO; +using System.Security.Principal; using ServiceController = Datadog.CustomActions.Native.ServiceController; namespace Datadog.CustomActions @@ -20,6 +22,26 @@ public class InstallStateCustomActions private readonly INativeMethods _nativeMethods; + public static Dictionary PathsToRemoveOnUninstall() + { + var pathPropertyMap = new Dictionary(); + var paths = new List + { + "bin\\agent", + "embedded3", + // embedded2 only exists in Agent 6, so an error will be logged, but install will continue + "embedded2", + }; + for (var i = 0; i < paths.Count; i++) + { + // property names are a maximum of 72 characters (can't find a source for this, but can verify in Property table schema in Orca) + // WixRemoveFolderEx creates properties like PROJECTLOCATION_0 so mimic that here. + // include lowercase letters so the property isn't made public. + pathPropertyMap.Add($"dd_PROJECTLOCATION_{i}", paths[i]); + } + return pathPropertyMap; + } + public InstallStateCustomActions( ISession session, IRegistryServices registryServices, @@ -128,6 +150,7 @@ public ActionResult ReadInstallState() GetWindowsBuildVersion(); SetDDDriverRollback(); + SetRemoveFolderExProperties(); } catch (Exception e) { @@ -138,6 +161,51 @@ public ActionResult ReadInstallState() return ActionResult.Success; } + /// + /// Sets the properties used by the WiX util RemoveFolderEx to cleanup non-tracked paths. + /// + /// https://wixtoolset.org/docs/v3/xsd/util/removefolderex/ + /// + /// + /// RemoveFolderEx only takes a property as input, not IDs or paths, meaning we can't + /// pass something like $PROJECTLOCATION\bin\agent in WiX. Instead, we have to set + /// the properties in a custom action. + /// + /// The RemoveFolderEx elements in WiX are configured to only run at uninstall time, + /// so the properties values are only relevant then. However, the WixRemoveFolderEx + /// custom action will fail fast if any of the properties are empty. So we must always + /// provide a value to the properties to prevent an error in the log and to accomodate other + /// uses of RemoveFolderEx that run at other times (at time of writing there are none). + /// Though this error does not stop the installer, it will ignore it and continue. + /// + /// RemoveFolderEx handles rollback and will restore any file paths that it deletes. + /// + /// We specify specific subdirectories under PROJECTLOCATION instead of specifying PROJECTLOCATION + /// itself to reduce the impact when the Agent is erroneously installed to an existing directory. + /// + /// This action copies PROJECTLOCATION and thus changes to PROJECTLOCATION will not be reflected + /// in these properties. This should not be an issue since PROJECTLOCATION should not change during + /// uninstallation. + /// + private void SetRemoveFolderExProperties() + { + var installDir = _session["PROJECTLOCATION"]; + if (string.IsNullOrEmpty(installDir)) + { + if (!string.IsNullOrEmpty(_session["REMOVE"])) + { + _session.Log("PROJECTLOCATION is not set, cannot set RemoveFolderEx properties, some files may be left behind in the installation directory."); + } + return; + // We cannot throw an exception here because the installer will fail. This case can happen, for example, + // if the cleanup script deleted the registry keys before running the uninstaller. + } + foreach (var entry in PathsToRemoveOnUninstall()) + { + _session[entry.Key] = Path.Combine(installDir, entry.Value); + } + } + /// /// WiX doesn't support getting the real build number on Windows 10+ so we must fetch it ourselves /// diff --git a/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentCustomActions.cs b/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentCustomActions.cs index 51d51cca6f48a8..75bf5873b35c2d 100644 --- a/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentCustomActions.cs +++ b/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentCustomActions.cs @@ -97,6 +97,7 @@ public AgentCustomActions() // any command line values. // Prefer using our CA over RegistrySearch. // It is executed on the Welcome screen of the installer. + // Must run before CostInitialize and WixRemoveFoldersEx since it creates properties used by util:RemoveFolderEx When.After, new Step(RunAsAdmin.Id), // Creates properties used by both install+uninstall diff --git a/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentInstaller.cs b/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentInstaller.cs index 71a8014178759b..60fa20abb48e49 100644 --- a/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentInstaller.cs +++ b/tools/windows/DatadogAgentInstaller/WixSetup/Datadog Agent/AgentInstaller.cs @@ -360,16 +360,29 @@ private Dir CreateProgramFilesFolder() new DirFiles($@"{InstallerSource}\LICENSE"), new DirFiles($@"{InstallerSource}\*.json"), new DirFiles($@"{InstallerSource}\*.txt"), - new CompressedDir(this, "embedded3", $@"{InstallerSource}\embedded3"), - // Recursively delete/backup all files/folders in PROJECTLOCATION, they will be restored - // on rollback. By default WindowsInstller only removes the files it tracks, and embedded3 isn't tracked - new RemoveFolderEx { On = InstallEvent.uninstall, Property = "PROJECTLOCATION" } + new CompressedDir(this, "embedded3", $@"{InstallerSource}\embedded3") ); if (_agentPython.IncludePython2) { datadogAgentFolder.AddFile(new CompressedDir(this, "embedded2", $@"{InstallerSource}\embedded2")); } + // Recursively delete/backup all files/folders in these paths, they will be restored + // on rollback. By default WindowsInstller only removes the files it tracks, and these paths + // may contain untracked files. + // These properties are set in the ReadInstallState custom action. + // https://wixtoolset.org/docs/v3/xsd/util/removefolderex/ + foreach (var property in InstallStateCustomActions.PathsToRemoveOnUninstall().Keys) + { + datadogAgentFolder.Add( + new RemoveFolderEx + { + On = InstallEvent.uninstall, + Property = property + } + ); + } + return new Dir(new Id("DatadogAppRoot"), "%ProgramFiles%\\Datadog", datadogAgentFolder); } From 7cac8550a612f31d31b3124b27ff79fa3075aa53 Mon Sep 17 00:00:00 2001 From: Daniel Tafoya <63120739+daniel-taf@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:11:19 -0400 Subject: [PATCH 07/19] [PROCS-3946][PROCS-4193][PROCS-4192] Add local process collector for language detection (#27559) --- .../internal/process/process_collector.go | 201 +++++++++++++ .../internal/process/process_collector_nop.go | 18 ++ .../process/process_collector_test.go | 278 ++++++++++++++++++ .../internal/process/process_data.go | 55 ++++ .../collectors/internal/process/stub.go | 7 + comp/core/workloadmeta/def/types.go | 4 + .../workloadmeta/collector/process.go | 48 +-- .../workloadmeta/collector/process_test.go | 72 ++--- .../metadata/workloadmeta/extractor.go | 21 +- .../metadata/workloadmeta/extractor_test.go | 24 +- pkg/process/metadata/workloadmeta/grpc.go | 8 +- pkg/process/util/containers/containers.go | 38 +++ .../util/containers/mocks/containers.go | 27 +- ...guage-detection-core-edd5b24aa71e99ba.yaml | 12 + .../language-detection/etc/core_config.yaml | 9 + .../etc/core_config_no_check.yaml | 9 + .../etc/process_config_no_check.yaml | 7 + .../language_detection_test.go | 38 ++- .../tests/language-detection/node_test.go | 2 +- .../tests/language-detection/python_test.go | 33 ++- 20 files changed, 794 insertions(+), 117 deletions(-) create mode 100644 comp/core/workloadmeta/collectors/internal/process/process_collector.go create mode 100644 comp/core/workloadmeta/collectors/internal/process/process_collector_nop.go create mode 100644 comp/core/workloadmeta/collectors/internal/process/process_collector_test.go create mode 100644 comp/core/workloadmeta/collectors/internal/process/process_data.go create mode 100644 comp/core/workloadmeta/collectors/internal/process/stub.go create mode 100644 releasenotes/notes/process-language-detection-core-edd5b24aa71e99ba.yaml create mode 100644 test/new-e2e/tests/language-detection/etc/core_config.yaml create mode 100644 test/new-e2e/tests/language-detection/etc/core_config_no_check.yaml create mode 100644 test/new-e2e/tests/language-detection/etc/process_config_no_check.yaml diff --git a/comp/core/workloadmeta/collectors/internal/process/process_collector.go b/comp/core/workloadmeta/collectors/internal/process/process_collector.go new file mode 100644 index 00000000000000..47d9c5ede25426 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/process/process_collector.go @@ -0,0 +1,201 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +// Package process implements the local process collector for Workloadmeta. +package process + +import ( + "context" + "strconv" + "time" + + "github.com/benbjohnson/clock" + "go.uber.org/fx" + + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/config" + "github.com/DataDog/datadog-agent/pkg/errors" + processwlm "github.com/DataDog/datadog-agent/pkg/process/metadata/workloadmeta" + proccontainers "github.com/DataDog/datadog-agent/pkg/process/util/containers" + "github.com/DataDog/datadog-agent/pkg/status/health" + "github.com/DataDog/datadog-agent/pkg/util/flavor" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +const ( + collectorID = "local-process-collector" + componentName = "workloadmeta-process" + cacheValidityNoRT = 2 * time.Second +) + +type collector struct { + id string + store workloadmeta.Component + catalog workloadmeta.AgentType + + wlmExtractor *processwlm.WorkloadMetaExtractor + processDiffCh <-chan *processwlm.ProcessCacheDiff + + // only used when process checks are disabled + processData *Data + pidToCid map[int]string + collectionClock clock.Clock + containerProvider proccontainers.ContainerProvider +} + +// NewCollector returns a new local process collector provider and an error. +// Currently, this is only used on Linux when language detection and run in core agent are enabled. +func NewCollector() (workloadmeta.CollectorProvider, error) { + wlmExtractor := processwlm.GetSharedWorkloadMetaExtractor(config.SystemProbe) + processData := NewProcessData() + processData.Register(wlmExtractor) + + return workloadmeta.CollectorProvider{ + Collector: &collector{ + id: collectorID, + catalog: workloadmeta.NodeAgent, + wlmExtractor: wlmExtractor, + processDiffCh: wlmExtractor.ProcessCacheDiff(), + processData: processData, + pidToCid: make(map[int]string), + collectionClock: clock.New(), + }, + }, nil +} + +// GetFxOptions returns the FX framework options for the collector +func GetFxOptions() fx.Option { + return fx.Provide(NewCollector) +} + +func (c *collector) enabled() bool { + if flavor.GetFlavor() != flavor.DefaultAgent { + return false + } + + processChecksInCoreAgent := config.Datadog().GetBool("process_config.run_in_core_agent.enabled") + langDetectionEnabled := config.Datadog().GetBool("language_detection.enabled") + + return langDetectionEnabled && processChecksInCoreAgent +} + +func (c *collector) Start(ctx context.Context, store workloadmeta.Component) error { + if !c.enabled() { + return errors.NewDisabled(componentName, "language detection or core agent process collection is disabled") + } + + c.store = store + + // If process collection is disabled, the collector will gather the basic process and container data + // necessary for language detection. + if !config.Datadog().GetBool("process_config.process_collection.enabled") { + collectionTicker := c.collectionClock.Ticker(10 * time.Second) + if c.containerProvider == nil { + c.containerProvider = proccontainers.GetSharedContainerProvider(store) + } + go c.collect(ctx, c.containerProvider, collectionTicker) + } + + go c.stream(ctx) + + return nil +} + +func (c *collector) collect(ctx context.Context, containerProvider proccontainers.ContainerProvider, collectionTicker *clock.Ticker) { + ctx, cancel := context.WithCancel(ctx) + defer collectionTicker.Stop() + defer cancel() + + for { + select { + case <-collectionTicker.C: + // This ensures all processes are mapped correctly to a container and not just the principal process + c.pidToCid = containerProvider.GetPidToCid(cacheValidityNoRT) + c.wlmExtractor.SetLastPidToCid(c.pidToCid) + err := c.processData.Fetch() + if err != nil { + log.Error("Error fetching process data:", err) + } + case <-ctx.Done(): + log.Infof("The %s collector has stopped", collectorID) + return + } + } +} + +func (c *collector) stream(ctx context.Context) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + health := health.RegisterLiveness(componentName) + for { + select { + case <-health.C: + + case diff := <-c.processDiffCh: + log.Debugf("Received process diff with %d creations and %d deletions", len(diff.Creation), len(diff.Deletion)) + events := transform(diff) + c.store.Notify(events) + + case <-ctx.Done(): + err := health.Deregister() + if err != nil { + log.Warnf("error de-registering health check: %s", err) + } + return + } + } +} + +func (c *collector) Pull(_ context.Context) error { + return nil +} + +func (c *collector) GetID() string { + return c.id +} + +func (c *collector) GetTargetCatalog() workloadmeta.AgentType { + return c.catalog +} + +// transform converts a ProcessCacheDiff into a list of CollectorEvents. +// The type of event is based on whether a process was created or deleted since the last diff. +func transform(diff *processwlm.ProcessCacheDiff) []workloadmeta.CollectorEvent { + events := make([]workloadmeta.CollectorEvent, 0, len(diff.Creation)+len(diff.Deletion)) + + for _, creation := range diff.Creation { + events = append(events, workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeSet, + Entity: &workloadmeta.Process{ + EntityID: workloadmeta.EntityID{ + Kind: workloadmeta.KindProcess, + ID: strconv.Itoa(int(creation.Pid)), + }, + ContainerID: creation.ContainerId, + NsPid: creation.NsPid, + CreationTime: time.UnixMilli(creation.CreationTime), + Language: creation.Language, + }, + Source: workloadmeta.SourceLocalProcessCollector, + }) + } + + for _, deletion := range diff.Deletion { + events = append(events, workloadmeta.CollectorEvent{ + Type: workloadmeta.EventTypeUnset, + Entity: &workloadmeta.Process{ + EntityID: workloadmeta.EntityID{ + Kind: workloadmeta.KindProcess, + ID: strconv.Itoa(int(deletion.Pid)), + }, + }, + Source: workloadmeta.SourceLocalProcessCollector, + }) + } + + return events +} diff --git a/comp/core/workloadmeta/collectors/internal/process/process_collector_nop.go b/comp/core/workloadmeta/collectors/internal/process/process_collector_nop.go new file mode 100644 index 00000000000000..feb73553314ecc --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/process/process_collector_nop.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build !linux + +// Package process implements the local process collector for Workloadmeta. +package process + +import ( + "go.uber.org/fx" +) + +// GetFxOptions returns the FX framework options for the collector +func GetFxOptions() fx.Option { + return nil +} diff --git a/comp/core/workloadmeta/collectors/internal/process/process_collector_test.go b/comp/core/workloadmeta/collectors/internal/process/process_collector_test.go new file mode 100644 index 00000000000000..8ef9940a66811d --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/process/process_collector_test.go @@ -0,0 +1,278 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux && test + +// Package process implements the local process collector for +// Workloadmeta. +package process + +import ( + "context" + "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "go.uber.org/fx" + + "github.com/DataDog/datadog-agent/comp/core" + "github.com/DataDog/datadog-agent/comp/core/config" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + workloadmetafxmock "github.com/DataDog/datadog-agent/comp/core/workloadmeta/fx-mock" + workloadmetamock "github.com/DataDog/datadog-agent/comp/core/workloadmeta/mock" + "github.com/DataDog/datadog-agent/pkg/languagedetection/languagemodels" + processwlm "github.com/DataDog/datadog-agent/pkg/process/metadata/workloadmeta" + "github.com/DataDog/datadog-agent/pkg/process/procutil" + "github.com/DataDog/datadog-agent/pkg/process/procutil/mocks" + proccontainers "github.com/DataDog/datadog-agent/pkg/process/util/containers/mocks" + "github.com/DataDog/datadog-agent/pkg/util/flavor" + "github.com/DataDog/datadog-agent/pkg/util/fxutil" +) + +// NewProcessDataWithMockProbe returns a new ProcessData with a mock probe +func NewProcessDataWithMockProbe(t *testing.T) (*Data, *mocks.Probe) { + probe := mocks.NewProbe(t) + return &Data{ + probe: probe, + }, probe +} + +type collectorTest struct { + collector *collector + probe *mocks.Probe + mockClock *clock.Mock + mockStore workloadmetamock.Mock +} + +func setUpCollectorTest(t *testing.T, configOverrides map[string]interface{}) collectorTest { + mockStore := fxutil.Test[workloadmetamock.Mock](t, fx.Options( + core.MockBundle(), + fx.Replace(config.MockParams{Overrides: configOverrides}), + fx.Supply(workloadmeta.Params{ + AgentType: workloadmeta.NodeAgent, + }), + workloadmetafxmock.MockModule(), + )) + + wlmExtractor := processwlm.NewWorkloadMetaExtractor(mockStore.GetConfig()) + mockProcessData, probe := NewProcessDataWithMockProbe(t) + mockProcessData.Register(wlmExtractor) + mockClock := clock.NewMock() + processDiffCh := wlmExtractor.ProcessCacheDiff() + processCollector := &collector{ + id: collectorID, + store: mockStore, + catalog: workloadmeta.NodeAgent, + processDiffCh: processDiffCh, + processData: mockProcessData, + pidToCid: make(map[int]string), + wlmExtractor: wlmExtractor, + collectionClock: mockClock, + } + + return collectorTest{processCollector, probe, mockClock, mockStore} +} + +func TestProcessCollector(t *testing.T) { + originalFlavor := flavor.GetFlavor() + defer flavor.SetFlavor(originalFlavor) + flavor.SetFlavor(flavor.DefaultAgent) + + configOverrides := map[string]interface{}{ + "language_detection.enabled": true, + "process_config.process_collection.enabled": true, + "process_config.run_in_core_agent.enabled": true, + } + + c := setUpCollectorTest(t, configOverrides) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + processDiffCh := make(chan *processwlm.ProcessCacheDiff) + c.collector.processDiffCh = processDiffCh + + err := c.collector.Start(ctx, c.mockStore) + require.NoError(t, err) + + creationTime := time.Now().Unix() + processDiffCh <- &processwlm.ProcessCacheDiff{ + Creation: []*processwlm.ProcessEntity{ + { + Pid: 1, + ContainerId: "cid", + NsPid: 1, + CreationTime: creationTime, + Language: &languagemodels.Language{Name: languagemodels.Java}, + }, + }, + } + + expectedProc1 := &workloadmeta.Process{ + EntityID: workloadmeta.EntityID{ + ID: "1", + Kind: workloadmeta.KindProcess, + }, + NsPid: 1, + ContainerID: "cid", + CreationTime: time.UnixMilli(creationTime), + Language: &languagemodels.Language{Name: languagemodels.Java}, + } + + assert.EventuallyWithT(t, func(cT *assert.CollectT) { + proc, err := c.mockStore.GetProcess(1) + assert.NoError(cT, err) + assert.Equal(cT, expectedProc1, proc) + }, time.Second, time.Millisecond*100) + + processDiffCh <- &processwlm.ProcessCacheDiff{ + Creation: []*processwlm.ProcessEntity{ + { + Pid: 2, + ContainerId: "cid", + NsPid: 2, + CreationTime: creationTime, + Language: &languagemodels.Language{Name: languagemodels.Python}, + }, + }, + Deletion: []*processwlm.ProcessEntity{ + { + Pid: 1, + ContainerId: "cid", + NsPid: 1, + CreationTime: creationTime, + Language: &languagemodels.Language{Name: languagemodels.Java}, + }, + }, + } + + expectedProc2 := &workloadmeta.Process{ + EntityID: workloadmeta.EntityID{ + ID: "2", + Kind: workloadmeta.KindProcess, + }, + NsPid: 2, + ContainerID: "cid", + CreationTime: time.UnixMilli(creationTime), + Language: &languagemodels.Language{Name: languagemodels.Python}, + } + + assert.EventuallyWithT(t, func(cT *assert.CollectT) { + proc, err := c.mockStore.GetProcess(2) + assert.NoError(cT, err) + assert.Equal(cT, expectedProc2, proc) + + _, err = c.mockStore.GetProcess(1) + assert.Error(cT, err) + }, time.Second, time.Millisecond*100) +} + +func TestProcessCollectorStart(t *testing.T) { + tests := []struct { + name string + agentFlavor string + langDetectionEnabled bool + runInCoreAgent bool + expectedEnabled bool + }{ + { + name: "core agent + all configs enabled", + agentFlavor: flavor.DefaultAgent, + langDetectionEnabled: true, + runInCoreAgent: true, + expectedEnabled: true, + }, + { + name: "core agent + all configs disabled", + agentFlavor: flavor.DefaultAgent, + langDetectionEnabled: false, + runInCoreAgent: false, + expectedEnabled: false, + }, + { + name: "process agent + all configs enabled", + agentFlavor: flavor.ProcessAgent, + langDetectionEnabled: true, + runInCoreAgent: true, + expectedEnabled: false, + }, + { + name: "process agent + all configs disabled", + agentFlavor: flavor.ProcessAgent, + langDetectionEnabled: false, + runInCoreAgent: false, + expectedEnabled: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + originalFlavor := flavor.GetFlavor() + defer flavor.SetFlavor(originalFlavor) + flavor.SetFlavor(test.agentFlavor) + + configOverrides := map[string]interface{}{ + "language_detection.enabled": test.langDetectionEnabled, + "process_config.run_in_core_agent.enabled": test.runInCoreAgent, + } + + c := setUpCollectorTest(t, configOverrides) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + err := c.collector.Start(ctx, c.mockStore) + + enabled := err == nil + assert.Equal(t, test.expectedEnabled, enabled) + }) + } +} + +func TestProcessCollectorWithoutProcessCheck(t *testing.T) { + originalFlavor := flavor.GetFlavor() + defer flavor.SetFlavor(originalFlavor) + flavor.SetFlavor(flavor.DefaultAgent) + + configOverrides := map[string]interface{}{ + "language_detection.enabled": true, + "process_config.process_collection.enabled": false, + "process_config.run_in_core_agent.enabled": true, + } + + c := setUpCollectorTest(t, configOverrides) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + mockCtrl := gomock.NewController(t) + mockProvider := proccontainers.NewMockContainerProvider(mockCtrl) + c.collector.containerProvider = mockProvider + + err := c.collector.Start(ctx, c.mockStore) + require.NoError(t, err) + + c.probe.On("ProcessesByPID", mock.Anything, mock.Anything).Return(map[int32]*procutil.Process{ + 1: { + Pid: 1, + Cmdline: []string{"proc", "-h", "-v"}, + Stats: &procutil.Stats{CreateTime: 1}, + }, + }, nil).Times(1) + + // Testing container id enrichment + expectedCid := "container1" + mockProvider.EXPECT().GetPidToCid(2 * time.Second).Return(map[int]string{1: expectedCid}).MinTimes(1) + + c.mockClock.Add(10 * time.Second) + + assert.EventuallyWithT(t, func(cT *assert.CollectT) { + proc, err := c.mockStore.GetProcess(1) + assert.NoError(cT, err) + assert.NotNil(cT, proc) + assert.Equal(cT, expectedCid, proc.ContainerID) + }, 1*time.Second, time.Millisecond*100) +} diff --git a/comp/core/workloadmeta/collectors/internal/process/process_data.go b/comp/core/workloadmeta/collectors/internal/process/process_data.go new file mode 100644 index 00000000000000..cb240bd68d8954 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/process/process_data.go @@ -0,0 +1,55 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +// Package process implements the local process collector for Workloadmeta. +package process + +import ( + "time" + + "github.com/DataDog/datadog-agent/pkg/process/metadata" + "github.com/DataDog/datadog-agent/pkg/process/procutil" +) + +// Data collects a basic state of process data such as cmdline args. +// This is currently used for metadata extraction from processes. This is a starting point for providing +// process data across all checks as part of the migration to components. +type Data struct { + probe procutil.Probe + extractors []metadata.Extractor +} + +// NewProcessData returns a new ProcessData from the given config +func NewProcessData() *Data { + return &Data{ + probe: procutil.NewProcessProbe(), + } +} + +// Fetch retrieves process data from the system and notifies registered extractors +func (p *Data) Fetch() error { + procs, err := p.probe.ProcessesByPID(time.Now(), false) + + if err != nil { + return err + } + + notifyExtractors(procs, p.extractors) + + return nil +} + +// Register adds an Extractor which will be notified for metadata extraction +func (p *Data) Register(e metadata.Extractor) { + p.extractors = append(p.extractors, e) +} + +func notifyExtractors(procs map[int32]*procutil.Process, extractors []metadata.Extractor) { + for _, extractor := range extractors { + extractor.Extract(procs) + } +} diff --git a/comp/core/workloadmeta/collectors/internal/process/stub.go b/comp/core/workloadmeta/collectors/internal/process/stub.go new file mode 100644 index 00000000000000..c2616a29bfd0f8 --- /dev/null +++ b/comp/core/workloadmeta/collectors/internal/process/stub.go @@ -0,0 +1,7 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package process implements the local process collector for Workloadmeta. +package process diff --git a/comp/core/workloadmeta/def/types.go b/comp/core/workloadmeta/def/types.go index 874265c78c07d2..cad44b46ffb286 100644 --- a/comp/core/workloadmeta/def/types.go +++ b/comp/core/workloadmeta/def/types.go @@ -88,6 +88,10 @@ const ( // SourceHost represents entities detected by the host such as host tags. SourceHost Source = "host" + + // SourceLocalProcessCollector reprents processes entities detected + // by the LocalProcessCollector. + SourceLocalProcessCollector Source = "local_process_collector" ) // ContainerRuntime is the container runtime used by a container. diff --git a/pkg/process/metadata/workloadmeta/collector/process.go b/pkg/process/metadata/workloadmeta/collector/process.go index 2d3839121f04c9..f961af0f7d2cc4 100644 --- a/pkg/process/metadata/workloadmeta/collector/process.go +++ b/pkg/process/metadata/workloadmeta/collector/process.go @@ -8,6 +8,7 @@ package collector import ( "context" + "time" "github.com/benbjohnson/clock" @@ -15,10 +16,14 @@ import ( "github.com/DataDog/datadog-agent/pkg/config" "github.com/DataDog/datadog-agent/pkg/process/checks" workloadmetaExtractor "github.com/DataDog/datadog-agent/pkg/process/metadata/workloadmeta" + proccontainers "github.com/DataDog/datadog-agent/pkg/process/util/containers" "github.com/DataDog/datadog-agent/pkg/util/log" ) -const collectorId = "local-process" +const ( + collectorId = "local-process" + cacheValidityNoRT = 2 * time.Second +) // NewProcessCollector creates a new process collector. func NewProcessCollector(coreConfig, sysProbeConfig config.Reader) *Collector { @@ -49,7 +54,8 @@ type Collector struct { pidToCid map[int]string - collectionClock clock.Clock + collectionClock clock.Clock + containerProvider proccontainers.ContainerProvider } // Start will start the collector @@ -63,30 +69,27 @@ func (c *Collector) Start(ctx context.Context, store workloadmeta.Component) err c.ddConfig.GetDuration("workloadmeta.local_process_collector.collection_interval"), ) - filter := workloadmeta.NewFilterBuilder().AddKind(workloadmeta.KindContainer).Build() - containerEvt := store.Subscribe(collectorId, workloadmeta.NormalPriority, filter) + if c.containerProvider == nil { + c.containerProvider = proccontainers.GetSharedContainerProvider(store) + } - go c.run(ctx, store, containerEvt, collectionTicker) + go c.run(ctx, c.containerProvider, collectionTicker) return nil } -func (c *Collector) run(ctx context.Context, store workloadmeta.Component, containerEvt chan workloadmeta.EventBundle, collectionTicker *clock.Ticker) { +func (c *Collector) run(ctx context.Context, containerProvider proccontainers.ContainerProvider, collectionTicker *clock.Ticker) { defer c.grpcServer.Stop() - defer store.Unsubscribe(containerEvt) defer collectionTicker.Stop() log.Info("Starting local process collection server") for { select { - case evt, ok := <-containerEvt: - if !ok { - log.Infof("The %s collector has stopped, workloadmeta channel is closed", collectorId) - return - } - c.handleContainerEvent(evt) case <-collectionTicker.C: + // This ensures all processes are mapped correctly to a container and not just the principal process + c.pidToCid = containerProvider.GetPidToCid(cacheValidityNoRT) + c.wlmExtractor.SetLastPidToCid(c.pidToCid) err := c.processData.Fetch() if err != nil { log.Error("Error fetching process data:", err) @@ -98,25 +101,6 @@ func (c *Collector) run(ctx context.Context, store workloadmeta.Component, conta } } -func (c *Collector) handleContainerEvent(evt workloadmeta.EventBundle) { - defer evt.Acknowledge() - - for _, evt := range evt.Events { - ent := evt.Entity.(*workloadmeta.Container) - switch evt.Type { - case workloadmeta.EventTypeSet: - // Should be safe, even on windows because PID 0 is the idle process and therefore must always belong to the host - if ent.PID != 0 { - c.pidToCid[ent.PID] = ent.ID - } - case workloadmeta.EventTypeUnset: - delete(c.pidToCid, ent.PID) - } - } - - c.wlmExtractor.SetLastPidToCid(c.pidToCid) -} - // Enabled checks to see if we should enable the local process collector. // Since it's job is to collect processes when the process check is disabled, we only enable it when `process_config.process_collection.enabled` == false // Additionally, if the remote process collector is not enabled in the core agent, there is no reason to collect processes. Therefore, we check `language_detection.enabled` diff --git a/pkg/process/metadata/workloadmeta/collector/process_test.go b/pkg/process/metadata/workloadmeta/collector/process_test.go index 85f2d068120cc2..5670fe0e229453 100644 --- a/pkg/process/metadata/workloadmeta/collector/process_test.go +++ b/pkg/process/metadata/workloadmeta/collector/process_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/benbjohnson/clock" + "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -29,7 +30,9 @@ import ( workloadmetaExtractor "github.com/DataDog/datadog-agent/pkg/process/metadata/workloadmeta" "github.com/DataDog/datadog-agent/pkg/process/procutil" "github.com/DataDog/datadog-agent/pkg/process/procutil/mocks" + proccontainersmock "github.com/DataDog/datadog-agent/pkg/process/util/containers/mocks" pbgo "github.com/DataDog/datadog-agent/pkg/proto/pbgo/process" + "github.com/DataDog/datadog-agent/pkg/trace/testutil" "github.com/DataDog/datadog-agent/pkg/util/flavor" "github.com/DataDog/datadog-agent/pkg/util/fxutil" @@ -38,11 +41,12 @@ import ( const testCid = "containersAreAwesome" type collectorTest struct { - probe *mocks.Probe - clock *clock.Mock - collector *Collector - store workloadmetamock.Mock - stream pbgo.ProcessEntityStream_StreamEntitiesClient + probe *mocks.Probe + clock *clock.Mock + collector *Collector + store workloadmetamock.Mock + stream pbgo.ProcessEntityStream_StreamEntitiesClient + mockProvider *proccontainersmock.MockContainerProvider } func acquireStream(t *testing.T, port int) pbgo.ProcessEntityStream_StreamEntitiesClient { @@ -99,24 +103,29 @@ func setUpCollectorTest(t *testing.T) *collectorTest { mockClock := clock.NewMock() + mockCtrl := gomock.NewController(t) + mockProvider := proccontainersmock.NewMockContainerProvider(mockCtrl) + c := &Collector{ - ddConfig: store.GetConfig(), - processData: mockProcessData, - wlmExtractor: wlmExtractor, - grpcServer: grpcServer, - pidToCid: make(map[int]string), - collectionClock: mockClock, + ddConfig: store.GetConfig(), + processData: mockProcessData, + wlmExtractor: wlmExtractor, + grpcServer: grpcServer, + pidToCid: make(map[int]string), + collectionClock: mockClock, + containerProvider: mockProvider, } ctx, cancel := context.WithCancel(context.Background()) require.NoError(t, c.Start(ctx, store)) t.Cleanup(cancel) return &collectorTest{ - collector: c, - probe: probe, - clock: mockClock, - store: store, - stream: acquireStream(t, port), + collector: c, + probe: probe, + clock: mockClock, + store: store, + stream: acquireStream(t, port), + mockProvider: mockProvider, } } @@ -130,15 +139,6 @@ func (c *collectorTest) setupProcs() { }, nil).Maybe() } -func (c *collectorTest) waitForContainerUpdate(t *testing.T, cont *workloadmeta.Container) { - t.Helper() - - c.store.Set(cont) - require.EventuallyWithT(t, func(_ *assert.CollectT) { - assert.Contains(t, c.collector.pidToCid, cont.PID) - }, 15*time.Second, 1*time.Second) -} - // Tick sets up the collector to collect processes by advancing the clock func (c *collectorTest) tick() { c.clock.Add(c.store.GetConfig().GetDuration("workloadmeta.local_process_collector.collection_interval")) @@ -153,6 +153,9 @@ func TestProcessCollector(t *testing.T) { require.NoError(t, err) fmt.Printf("1: %v\n", resp.String()) + // Testing container id enrichment + c.mockProvider.EXPECT().GetPidToCid(2 * time.Second).Return(map[int]string{1: testCid}).MinTimes(1) + c.tick() resp, err = c.stream.Recv() assert.NoError(t, err) @@ -162,25 +165,6 @@ func TestProcessCollector(t *testing.T) { evt := resp.SetEvents[0] assert.EqualValues(t, 1, evt.Pid) assert.EqualValues(t, 1, evt.CreationTime) - - // Now test that this process updates with container id when the store is changed - c.waitForContainerUpdate(t, &workloadmeta.Container{ - EntityID: workloadmeta.EntityID{ - Kind: workloadmeta.KindContainer, - ID: testCid, - }, - PID: 1, - }) - - c.tick() - resp, err = c.stream.Recv() - assert.NoError(t, err) - fmt.Printf("3: %v\n", resp.String()) - - require.Len(t, resp.SetEvents, 1) - evt = resp.SetEvents[0] - assert.EqualValues(t, 1, evt.Pid) - assert.EqualValues(t, 1, evt.CreationTime) assert.Equal(t, testCid, evt.ContainerID) } diff --git a/pkg/process/metadata/workloadmeta/extractor.go b/pkg/process/metadata/workloadmeta/extractor.go index 1f3fb4287fad8c..429c092bd56424 100644 --- a/pkg/process/metadata/workloadmeta/extractor.go +++ b/pkg/process/metadata/workloadmeta/extractor.go @@ -22,6 +22,11 @@ import ( const subsystem = "WorkloadMetaExtractor" +var ( + initWorkloadMetaExtractor sync.Once + sharedWorkloadMetaExtractor *WorkloadMetaExtractor +) + // ProcessEntity represents a process exposed by the WorkloadMeta extractor type ProcessEntity struct { //nolint:revive // TODO(PROC) Fix revive linter @@ -53,8 +58,8 @@ type WorkloadMetaExtractor struct { // Extract call from the WorkloadMetaExtractor cache type ProcessCacheDiff struct { cacheVersion int32 - creation []*ProcessEntity - deletion []*ProcessEntity + Creation []*ProcessEntity + Deletion []*ProcessEntity } var ( @@ -66,6 +71,14 @@ var ( subsystem, "diffs_dropped", "The number of diffs dropped due to channel contention") ) +// GetSharedWorkloadMetaExtractor returns a shared WorkloadMetaExtractor +func GetSharedWorkloadMetaExtractor(sysprobeConfig config.Reader) *WorkloadMetaExtractor { + initWorkloadMetaExtractor.Do(func() { + sharedWorkloadMetaExtractor = NewWorkloadMetaExtractor(sysprobeConfig) + }) + return sharedWorkloadMetaExtractor +} + // NewWorkloadMetaExtractor constructs the WorkloadMetaExtractor. func NewWorkloadMetaExtractor(sysprobeConfig config.Reader) *WorkloadMetaExtractor { log.Info("Instantiating a new WorkloadMetaExtractor") @@ -158,8 +171,8 @@ func (w *WorkloadMetaExtractor) Extract(procs map[int32]*procutil.Process) { diff := &ProcessCacheDiff{ cacheVersion: w.cacheVersion, - creation: newEntities, - deletion: deadProcs, + Creation: newEntities, + Deletion: deadProcs, } // Do not block on write to prevent Extract caller from hanging e.g. process check diff --git a/pkg/process/metadata/workloadmeta/extractor_test.go b/pkg/process/metadata/workloadmeta/extractor_test.go index 17a85f78c1f48b..40be53e304af3a 100644 --- a/pkg/process/metadata/workloadmeta/extractor_test.go +++ b/pkg/process/metadata/workloadmeta/extractor_test.go @@ -107,8 +107,8 @@ func TestExtractor(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Python}, ContainerId: ctrId1, }, - }, diff.creation) - assert.ElementsMatch(t, []*ProcessEntity{}, diff.deletion) + }, diff.Creation) + assert.ElementsMatch(t, []*ProcessEntity{}, diff.Deletion) // Assert that if no process is created or terminated, the cache is not updated nor a diff generated extractor.Extract(map[int32]*procutil.Process{ @@ -156,7 +156,7 @@ func TestExtractor(t *testing.T) { diff = <-extractor.ProcessCacheDiff() assert.Equal(t, int32(2), diff.cacheVersion) - assert.ElementsMatch(t, []*ProcessEntity{}, diff.creation) + assert.ElementsMatch(t, []*ProcessEntity{}, diff.Creation) assert.ElementsMatch(t, []*ProcessEntity{ { Pid: Pid1, @@ -165,7 +165,7 @@ func TestExtractor(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Java}, ContainerId: ctrId1, }, - }, diff.deletion) + }, diff.Deletion) // Process creation generates a cache update and diff event extractor.Extract(map[int32]*procutil.Process{ @@ -202,8 +202,8 @@ func TestExtractor(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Unknown}, ContainerId: ctrId1, }, - }, diff.creation) - assert.ElementsMatch(t, []*ProcessEntity{}, diff.deletion) + }, diff.Creation) + assert.ElementsMatch(t, []*ProcessEntity{}, diff.Deletion) // Process creation and deletion generate a cache update and diff event extractor.Extract(map[int32]*procutil.Process{ @@ -240,7 +240,7 @@ func TestExtractor(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Python}, ContainerId: ctrId2, }, - }, diff.creation) + }, diff.Creation) assert.ElementsMatch(t, []*ProcessEntity{ { Pid: Pid2, @@ -249,7 +249,7 @@ func TestExtractor(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Python}, ContainerId: ctrId1, }, - }, diff.deletion) + }, diff.Deletion) } func BenchmarkHashProcess(b *testing.B) { @@ -286,7 +286,7 @@ func TestLateContainerId(t *testing.T) { }) assert.EqualValues(t, &ProcessCacheDiff{ cacheVersion: 1, - creation: []*ProcessEntity{ + Creation: []*ProcessEntity{ { Pid: proc1.Pid, ContainerId: "", @@ -295,7 +295,7 @@ func TestLateContainerId(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Java}, }, }, - deletion: []*ProcessEntity{}, + Deletion: []*ProcessEntity{}, }, <-extractor.ProcessCacheDiff()) var ( @@ -311,7 +311,7 @@ func TestLateContainerId(t *testing.T) { }) assert.EqualValues(t, &ProcessCacheDiff{ cacheVersion: 2, - creation: []*ProcessEntity{ + Creation: []*ProcessEntity{ { Pid: proc1.Pid, ContainerId: ctrId1, @@ -320,6 +320,6 @@ func TestLateContainerId(t *testing.T) { Language: &languagemodels.Language{Name: languagemodels.Java}, }, }, - deletion: []*ProcessEntity{}, + Deletion: []*ProcessEntity{}, }, <-extractor.ProcessCacheDiff()) } diff --git a/pkg/process/metadata/workloadmeta/grpc.go b/pkg/process/metadata/workloadmeta/grpc.go index cf779922173e5e..28592954005d66 100644 --- a/pkg/process/metadata/workloadmeta/grpc.go +++ b/pkg/process/metadata/workloadmeta/grpc.go @@ -71,13 +71,13 @@ func NewGRPCServer(config config.Reader, extractor *WorkloadMetaExtractor) *GRPC } func (l *GRPCServer) consumeProcessDiff(diff *ProcessCacheDiff) ([]*pbgo.ProcessEventSet, []*pbgo.ProcessEventUnset) { - setEvents := make([]*pbgo.ProcessEventSet, len(diff.creation)) - for i, proc := range diff.creation { + setEvents := make([]*pbgo.ProcessEventSet, len(diff.Creation)) + for i, proc := range diff.Creation { setEvents[i] = processEntityToEventSet(proc) } - unsetEvents := make([]*pbgo.ProcessEventUnset, len(diff.deletion)) - for i, proc := range diff.deletion { + unsetEvents := make([]*pbgo.ProcessEventUnset, len(diff.Deletion)) + for i, proc := range diff.Deletion { unsetEvents[i] = &pbgo.ProcessEventUnset{Pid: proc.Pid} } diff --git a/pkg/process/util/containers/containers.go b/pkg/process/util/containers/containers.go index 955266d922e051..596d8fa9a1ab43 100644 --- a/pkg/process/util/containers/containers.go +++ b/pkg/process/util/containers/containers.go @@ -59,6 +59,7 @@ var ( // ContainerProvider defines the interface for a container metrics provider type ContainerProvider interface { GetContainers(cacheValidity time.Duration, previousContainers map[string]*ContainerRateMetrics) ([]*model.Container, map[string]*ContainerRateMetrics, map[int]string, error) + GetPidToCid(cacheValidity time.Duration) map[int]string } // GetSharedContainerProvider returns a shared ContainerProvider @@ -188,6 +189,43 @@ func (p *containerProvider) GetContainers(cacheValidity time.Duration, previousC return processContainers, rateStats, pidToCid, nil } +// GetPidToCid returns containers found on the machine +func (p *containerProvider) GetPidToCid(cacheValidity time.Duration) map[int]string { + containersMetadata := p.metadataStore.ListContainersWithFilter(workloadmeta.GetRunningContainers) + pidToCid := make(map[int]string) + for _, container := range containersMetadata { + var annotations map[string]string + if pod, err := p.metadataStore.GetKubernetesPodForContainer(container.ID); err == nil { + annotations = pod.Annotations + } + + if p.filter != nil && p.filter.IsExcluded(annotations, container.Name, container.Image.Name, container.Labels[kubernetes.CriContainerNamespaceLabel]) { + continue + } + + collector := p.metricsProvider.GetCollector(provider.NewRuntimeMetadata( + string(container.Runtime), + string(container.RuntimeFlavor), + )) + if collector == nil { + log.Infof("No metrics collector available for runtime: %s, skipping container: %s", container.Runtime, container.ID) + continue + } + + // Building PID to CID mapping for NPM and Language Detection + pids, err := collector.GetPIDs(container.Namespace, container.ID, cacheValidity) + if err == nil && pids != nil { + for _, pid := range pids { + pidToCid[pid] = container.ID + } + } else { + log.Debugf("PIDs for: %+v not available, err: %v", container, err) + } + } + + return pidToCid +} + func computeContainerStats(container *workloadmeta.Container, inStats *metrics.ContainerStats, previousStats, outPreviousStats *ContainerRateMetrics, outStats *model.Container) { if inStats == nil { return diff --git a/pkg/process/util/containers/mocks/containers.go b/pkg/process/util/containers/mocks/containers.go index 7166ffcfb284b2..fb97f1cec66499 100644 --- a/pkg/process/util/containers/mocks/containers.go +++ b/pkg/process/util/containers/mocks/containers.go @@ -1,7 +1,8 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: github.com/DataDog/datadog-agent/pkg/process/util/containers (interfaces: ContainerProvider) +// Source: pkg/process/util/containers/containers.go -package mocks +// Package mock_containers is a generated GoMock package. +package mock_containers import ( reflect "reflect" @@ -36,9 +37,9 @@ func (m *MockContainerProvider) EXPECT() *MockContainerProviderMockRecorder { } // GetContainers mocks base method. -func (m *MockContainerProvider) GetContainers(arg0 time.Duration, arg1 map[string]*containers.ContainerRateMetrics) ([]*process.Container, map[string]*containers.ContainerRateMetrics, map[int]string, error) { +func (m *MockContainerProvider) GetContainers(cacheValidity time.Duration, previousContainers map[string]*containers.ContainerRateMetrics) ([]*process.Container, map[string]*containers.ContainerRateMetrics, map[int]string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetContainers", arg0, arg1) + ret := m.ctrl.Call(m, "GetContainers", cacheValidity, previousContainers) ret0, _ := ret[0].([]*process.Container) ret1, _ := ret[1].(map[string]*containers.ContainerRateMetrics) ret2, _ := ret[2].(map[int]string) @@ -47,7 +48,21 @@ func (m *MockContainerProvider) GetContainers(arg0 time.Duration, arg1 map[strin } // GetContainers indicates an expected call of GetContainers. -func (mr *MockContainerProviderMockRecorder) GetContainers(arg0, arg1 interface{}) *gomock.Call { +func (mr *MockContainerProviderMockRecorder) GetContainers(cacheValidity, previousContainers interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetContainers", reflect.TypeOf((*MockContainerProvider)(nil).GetContainers), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetContainers", reflect.TypeOf((*MockContainerProvider)(nil).GetContainers), cacheValidity, previousContainers) +} + +// GetPidToCid mocks base method. +func (m *MockContainerProvider) GetPidToCid(cacheValidity time.Duration) map[int]string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPidToCid", cacheValidity) + ret0, _ := ret[0].(map[int]string) + return ret0 +} + +// GetPidToCid indicates an expected call of GetPidToCid. +func (mr *MockContainerProviderMockRecorder) GetPidToCid(cacheValidity interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPidToCid", reflect.TypeOf((*MockContainerProvider)(nil).GetPidToCid), cacheValidity) } diff --git a/releasenotes/notes/process-language-detection-core-edd5b24aa71e99ba.yaml b/releasenotes/notes/process-language-detection-core-edd5b24aa71e99ba.yaml new file mode 100644 index 00000000000000..8b323098c4f8cd --- /dev/null +++ b/releasenotes/notes/process-language-detection-core-edd5b24aa71e99ba.yaml @@ -0,0 +1,12 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Language detection can run on the core Agent without needing a gRPC server. + diff --git a/test/new-e2e/tests/language-detection/etc/core_config.yaml b/test/new-e2e/tests/language-detection/etc/core_config.yaml new file mode 100644 index 00000000000000..5275bcbb0be733 --- /dev/null +++ b/test/new-e2e/tests/language-detection/etc/core_config.yaml @@ -0,0 +1,9 @@ +process_config: + process_collection: + enabled: true + run_in_core_agent: + enabled: true + intervals: + process: 1 +language_detection: + enabled: true diff --git a/test/new-e2e/tests/language-detection/etc/core_config_no_check.yaml b/test/new-e2e/tests/language-detection/etc/core_config_no_check.yaml new file mode 100644 index 00000000000000..6cd034176e3846 --- /dev/null +++ b/test/new-e2e/tests/language-detection/etc/core_config_no_check.yaml @@ -0,0 +1,9 @@ +process_config: + process_collection: + enabled: false + run_in_core_agent: + enabled: true + intervals: + process: 1 +language_detection: + enabled: true diff --git a/test/new-e2e/tests/language-detection/etc/process_config_no_check.yaml b/test/new-e2e/tests/language-detection/etc/process_config_no_check.yaml new file mode 100644 index 00000000000000..46a648d122c89c --- /dev/null +++ b/test/new-e2e/tests/language-detection/etc/process_config_no_check.yaml @@ -0,0 +1,7 @@ +process_config: + process_collection: + enabled: false + intervals: + process: 1 +language_detection: + enabled: true diff --git a/test/new-e2e/tests/language-detection/language_detection_test.go b/test/new-e2e/tests/language-detection/language_detection_test.go index e07986607b533c..29ce07db52c7af 100644 --- a/test/new-e2e/tests/language-detection/language_detection_test.go +++ b/test/new-e2e/tests/language-detection/language_detection_test.go @@ -14,17 +14,26 @@ import ( "testing" "time" - "github.com/DataDog/test-infra-definitions/components/datadog/agentparams" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments" awshost "github.com/DataDog/datadog-agent/test/new-e2e/pkg/environments/aws/host" + "github.com/DataDog/test-infra-definitions/components/datadog/agentparams" ) //go:embed etc/process_config.yaml -var configStr string +var processConfigStr string + +//go:embed etc/core_config.yaml +var coreConfigStr string + +//go:embed etc/process_config_no_check.yaml +var processConfigNoCheckStr string + +//go:embed etc/core_config_no_check.yaml +var coreConfigNoCheckStr string type languageDetectionSuite struct { e2e.BaseSuite[environments.Host] @@ -32,7 +41,7 @@ type languageDetectionSuite struct { func TestLanguageDetectionSuite(t *testing.T) { agentParams := []func(*agentparams.Params) error{ - agentparams.WithAgentConfig(configStr), + agentparams.WithAgentConfig(processConfigStr), } options := []e2e.SuiteOption{ @@ -42,14 +51,20 @@ func TestLanguageDetectionSuite(t *testing.T) { e2e.Run(t, &languageDetectionSuite{}, options...) } -func (s *languageDetectionSuite) checkDetectedLanguage(command string, language string) { +func (s *languageDetectionSuite) SetupSuite() { + s.BaseSuite.SetupSuite() + + s.installPython() +} + +func (s *languageDetectionSuite) checkDetectedLanguage(command string, language string, source string) { var pid string require.Eventually(s.T(), func() bool { pid = s.getPidForCommand(command) return len(pid) > 0 }, - 10*time.Second, 10*time.Millisecond, + 60*time.Second, 100*time.Millisecond, fmt.Sprintf("pid not found for command %s", command), ) @@ -57,10 +72,10 @@ func (s *languageDetectionSuite) checkDetectedLanguage(command string, language var err error assert.Eventually(s.T(), func() bool { - actualLanguage, err = s.getLanguageForPid(pid) + actualLanguage, err = s.getLanguageForPid(pid, source) return err == nil && actualLanguage == language }, - 10*time.Second, 100*time.Millisecond, + 60*time.Second, 100*time.Millisecond, fmt.Sprintf("language match not found, pid = %s, expected = %s, actual = %s, err = %v", pid, language, actualLanguage, err), ) @@ -79,17 +94,20 @@ func (s *languageDetectionSuite) getPidForCommand(command string) string { return pids[0] } -func (s *languageDetectionSuite) getLanguageForPid(pid string) (string, error) { +func (s *languageDetectionSuite) getLanguageForPid(pid string, source string) (string, error) { wl := s.Env().RemoteHost.MustExecute("sudo /opt/datadog-agent/bin/agent/agent workload-list") if len(strings.TrimSpace(wl)) == 0 { return "", errors.New("agent workload-list was empty") } scanner := bufio.NewScanner(strings.NewReader(wl)) - pidLine := fmt.Sprintf("PID: %s", pid) + headerLine := fmt.Sprintf("=== Entity process sources(merged):[%s] id: %s ===", source, pid) + for scanner.Scan() { line := scanner.Text() - if line == pidLine { + if line == headerLine { + scanner.Scan() // entity line + scanner.Scan() // pid scanner.Scan() // nspid scanner.Scan() // container id scanner.Scan() // creation time diff --git a/test/new-e2e/tests/language-detection/node_test.go b/test/new-e2e/tests/language-detection/node_test.go index 7c8b82a8bf50ad..23b7b5dc1727d1 100644 --- a/test/new-e2e/tests/language-detection/node_test.go +++ b/test/new-e2e/tests/language-detection/node_test.go @@ -41,5 +41,5 @@ func (s *languageDetectionSuite) TestNodeDetection() { s.Env().RemoteHost.MustExecute(fmt.Sprintf(`echo "%s" > prog.js`, nodeProg)) s.Env().RemoteHost.MustExecute("nohup node prog.js >myscript.log 2>&1 prog.py") s.Env().RemoteHost.MustExecute("nohup python3 prog.py >myscript.log 2>&1 Date: Tue, 6 Aug 2024 11:02:35 +0200 Subject: [PATCH 08/19] Add `gocheckcompilerdirectives` linter and fix warnings (#28184) --- .golangci.yml | 25 ++++++++++--------- cmd/system-probe/modules/traceroute_linux.go | 2 -- .../modules/traceroute_windows.go | 2 -- .../npcollectorimpl/npcollector_test.go | 2 +- comp/snmptraps/packet/test_helpers.go | 2 +- pkg/networkpath/traceroute/tcp/utils_test.go | 2 +- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index d23f863c2aa7b5..b74881792e806b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -63,18 +63,19 @@ issues: linters: disable-all: true enable: - - unconvert # Remove unnecessary type conversions - - unused # Checks Go code for unused constants, variables, functions and types - - ineffassign # Detects when assignments to existing variables are not used - - misspell # Finds commonly misspelled English words in comments - - gofmt # Gofmt checks whether code was gofmt-ed - - revive # Revive is a replacement for golint, a coding style checker - - errcheck # errcheck is a program for checking for unchecked errors in go programs. - - staticcheck # staticcheck is a go vet on steroids, applying a ton of static analysis checks - - govet # Vet examines Go source code and reports suspicious constructs, such as Printf calls whose arguments do not align with the format string - - depguard # Depguard is useful for preventing specific packages from being used - - bodyclose # checks whether HTTP response body is closed successfully - - gosimple # Linter for Go source code that specializes in simplifying code. + - unconvert # Remove unnecessary type conversions + - unused # Checks Go code for unused constants, variables, functions and types + - ineffassign # Detects when assignments to existing variables are not used + - misspell # Finds commonly misspelled English words in comments + - gofmt # Gofmt checks whether code was gofmt-ed + - revive # Revive is a replacement for golint, a coding style checker + - errcheck # errcheck is a program for checking for unchecked errors in go programs. + - staticcheck # staticcheck is a go vet on steroids, applying a ton of static analysis checks + - govet # Vet examines Go source code and reports suspicious constructs, such as Printf calls whose arguments do not align with the format string + - depguard # Depguard is useful for preventing specific packages from being used + - bodyclose # checks whether HTTP response body is closed successfully + - gosimple # Linter for Go source code that specializes in simplifying code. + - gocheckcompilerdirectives # Checks Go compiler directives syntax linters-settings: depguard: diff --git a/cmd/system-probe/modules/traceroute_linux.go b/cmd/system-probe/modules/traceroute_linux.go index f7bec225a6b9d0..57d3b3e9b85ef8 100644 --- a/cmd/system-probe/modules/traceroute_linux.go +++ b/cmd/system-probe/modules/traceroute_linux.go @@ -3,8 +3,6 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -// go:build linux - package modules import ( diff --git a/cmd/system-probe/modules/traceroute_windows.go b/cmd/system-probe/modules/traceroute_windows.go index 32a93f37c473dc..fe3e449abcd93a 100644 --- a/cmd/system-probe/modules/traceroute_windows.go +++ b/cmd/system-probe/modules/traceroute_windows.go @@ -3,8 +3,6 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -// go:build windows - package modules import ( diff --git a/comp/networkpath/npcollector/npcollectorimpl/npcollector_test.go b/comp/networkpath/npcollector/npcollectorimpl/npcollector_test.go index 2d4d6efebe48ee..60b6001544ad3f 100644 --- a/comp/networkpath/npcollector/npcollectorimpl/npcollector_test.go +++ b/comp/networkpath/npcollector/npcollectorimpl/npcollector_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2024-present Datadog, Inc. -// go:build test +//go:build test package npcollectorimpl diff --git a/comp/snmptraps/packet/test_helpers.go b/comp/snmptraps/packet/test_helpers.go index 0194680e99c378..22e856ee24f94c 100644 --- a/comp/snmptraps/packet/test_helpers.go +++ b/comp/snmptraps/packet/test_helpers.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2020-present Datadog, Inc. -// go:build !serverless && test +//go:build !serverless && test package packet diff --git a/pkg/networkpath/traceroute/tcp/utils_test.go b/pkg/networkpath/traceroute/tcp/utils_test.go index 15f50d0684c7e4..10c1f278b984a4 100644 --- a/pkg/networkpath/traceroute/tcp/utils_test.go +++ b/pkg/networkpath/traceroute/tcp/utils_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -// go:build test +//go:build test package tcp From ea3a88c6bb778391f2ebd05115844181e88cdc80 Mon Sep 17 00:00:00 2001 From: Pierre Gimalac Date: Tue, 6 Aug 2024 11:02:40 +0200 Subject: [PATCH 09/19] Do not print execution time in headless mode in Go linter task (#28203) --- tasks/linter.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tasks/linter.py b/tasks/linter.py index 1fcd7c6b88cce1..920d173b964397 100644 --- a/tasks/linter.py +++ b/tasks/linter.py @@ -27,11 +27,7 @@ from tasks.libs.common.color import Color, color_message from tasks.libs.common.constants import DEFAULT_BRANCH, GITHUB_REPO_NAME from tasks.libs.common.git import get_staged_files -from tasks.libs.common.utils import ( - gitlab_section, - is_pr_context, - running_in_ci, -) +from tasks.libs.common.utils import gitlab_section, is_pr_context, running_in_ci from tasks.libs.types.copyright import CopyrightLinter, LintFailure from tasks.modules import GoModule from tasks.test_core import ModuleLintResult, process_input_args, process_module_results, test_core @@ -190,10 +186,11 @@ def go( include_sds=include_sds, ) - with gitlab_section('Linter execution time'): - print(color_message('Execution time summary:', 'bold')) - for e in execution_times: - print(f'- {e.name}: {e.duration:.1f}s') + if not headless_mode: + with gitlab_section('Linter execution time'): + print(color_message('Execution time summary:', 'bold')) + for e in execution_times: + print(f'- {e.name}: {e.duration:.1f}s') with gitlab_section('Linter failures'): success = process_module_results(flavor=flavor, module_results=lint_results) From 689033db3d83da273b123e8fa2ac225a363344d6 Mon Sep 17 00:00:00 2001 From: AliDatadog <125997632+AliDatadog@users.noreply.github.com> Date: Tue, 6 Aug 2024 11:02:46 +0200 Subject: [PATCH 10/19] Remove deprecated `CI_ENABLE_CONTAINER_IMAGE_BUILDS` (#28052) --- .gitlab/container_build/docker_linux.yml | 2 -- .gitlab/container_build/fakeintake.yml | 1 - 2 files changed, 3 deletions(-) diff --git a/.gitlab/container_build/docker_linux.yml b/.gitlab/container_build/docker_linux.yml index 80949ca6669767..4383141d7f3a0c 100644 --- a/.gitlab/container_build/docker_linux.yml +++ b/.gitlab/container_build/docker_linux.yml @@ -1,8 +1,6 @@ --- .docker_build_job_definition: stage: container_build - variables: - CI_ENABLE_CONTAINER_IMAGE_BUILDS: "true" script: - aws s3 sync --only-show-errors $S3_ARTIFACTS_URI $BUILD_CONTEXT - TAG_SUFFIX=${TAG_SUFFIX:-} diff --git a/.gitlab/container_build/fakeintake.yml b/.gitlab/container_build/fakeintake.yml index 2d88bc7776ca5d..b5a725f23dba93 100644 --- a/.gitlab/container_build/fakeintake.yml +++ b/.gitlab/container_build/fakeintake.yml @@ -9,7 +9,6 @@ docker_build_fakeintake: image: 486234852809.dkr.ecr.us-east-1.amazonaws.com/docker:20.10-py3 tags: ["arch:amd64"] variables: - CI_ENABLE_CONTAINER_IMAGE_BUILDS: "true" TARGET: 486234852809.dkr.ecr.us-east-1.amazonaws.com/ci/datadog-agent/fakeintake:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA} DOCKERFILE: test/fakeintake/Dockerfile PLATFORMS: linux/amd64,linux/arm64 From 0e9a424f5081fd99b91f9844bfc9569fa8dd5e3d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 09:32:54 +0000 Subject: [PATCH 11/19] Bump golang.org/x/sync from 0.7.0 to 0.8.0 (#28225) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- LICENSE-3rdparty.csv | 6 +++--- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 7796f6eb67d57c..2d6aeb22b04ee1 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -2646,9 +2646,9 @@ core,golang.org/x/oauth2/google/internal/stsexchange,BSD-3-Clause,Copyright (c) core,golang.org/x/oauth2/internal,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/oauth2/jws,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/oauth2/jwt,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved -core,golang.org/x/sync/errgroup,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved -core,golang.org/x/sync/semaphore,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved -core,golang.org/x/sync/singleflight,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved +core,golang.org/x/sync/errgroup,BSD-3-Clause,Copyright 2009 The Go Authors +core,golang.org/x/sync/semaphore,BSD-3-Clause,Copyright 2009 The Go Authors +core,golang.org/x/sync/singleflight,BSD-3-Clause,Copyright 2009 The Go Authors core,golang.org/x/sys/cpu,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/sys/execabs,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/sys/plan9,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved diff --git a/go.mod b/go.mod index 9e683f0274034a..1ba73e6b6f8c1e 100644 --- a/go.mod +++ b/go.mod @@ -300,7 +300,7 @@ require ( golang.org/x/arch v0.8.0 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 golang.org/x/net v0.27.0 - golang.org/x/sync v0.7.0 + golang.org/x/sync v0.8.0 golang.org/x/sys v0.22.0 golang.org/x/text v0.16.0 golang.org/x/time v0.5.0 diff --git a/go.sum b/go.sum index ba9dbbd6fa29d9..c3d799e70c576d 100644 --- a/go.sum +++ b/go.sum @@ -3093,8 +3093,8 @@ golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= From 3a1b3a40351871c18163e510e9e53cec654807e8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 09:32:58 +0000 Subject: [PATCH 12/19] Bump golang.org/x/arch from 0.8.0 to 0.9.0 (#28222) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- LICENSE-3rdparty.csv | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 2d6aeb22b04ee1..05b65a163e51db 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -2569,8 +2569,8 @@ core,go4.org/intern,BSD-3-Clause,"Copyright (c) 2020, Brad Fitzpatrick" core,go4.org/mem,Apache-2.0,Copyright 2020 The Go4 AUTHORS core,go4.org/netipx,BSD-3-Clause,Alex Willmer | Copyright (c) 2020 The Inet.af AUTHORS. All rights reserved | Matt Layher | Tailscale Inc. | Tobias Klauser core,go4.org/unsafe/assume-no-moving-gc,BSD-3-Clause,"Copyright (c) 2020, Brad Fitzpatrick" -core,golang.org/x/arch/arm64/arm64asm,BSD-3-Clause,Copyright (c) 2015 The Go Authors. All rights reserved -core,golang.org/x/arch/x86/x86asm,BSD-3-Clause,Copyright (c) 2015 The Go Authors. All rights reserved +core,golang.org/x/arch/arm64/arm64asm,BSD-3-Clause,Copyright 2015 The Go Authors +core,golang.org/x/arch/x86/x86asm,BSD-3-Clause,Copyright 2015 The Go Authors core,golang.org/x/crypto/argon2,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/crypto/bcrypt,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/crypto/blake2b,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved diff --git a/go.mod b/go.mod index 1ba73e6b6f8c1e..ba730f4570988b 100644 --- a/go.mod +++ b/go.mod @@ -297,7 +297,7 @@ require ( go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 go4.org/netipx v0.0.0-20220812043211-3cc044ffd68d - golang.org/x/arch v0.8.0 + golang.org/x/arch v0.9.0 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 golang.org/x/net v0.27.0 golang.org/x/sync v0.8.0 diff --git a/go.sum b/go.sum index c3d799e70c576d..af1cbcfdb7184d 100644 --- a/go.sum +++ b/go.sum @@ -2843,8 +2843,8 @@ go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2/go.mod h1: go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4= go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E= golang.org/x/arch v0.0.0-20190927153633-4e8777c89be4/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4= -golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= -golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/arch v0.9.0 h1:ub9TgUInamJ8mrZIGlBG6/4TqWeMszd4N8lNorbrr6k= +golang.org/x/arch v0.9.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= From 87684191e73aaf5d0dcce3ceb48be3c717ba94c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 09:40:38 +0000 Subject: [PATCH 13/19] Bump golang.org/x/mod from 0.19.0 to 0.20.0 (#28226) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ba730f4570988b..ecd6de2e8323ba 100644 --- a/go.mod +++ b/go.mod @@ -565,7 +565,7 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 go.opentelemetry.io/proto/otlp v1.2.0 // indirect golang.org/x/crypto v0.25.0 // indirect - golang.org/x/mod v0.19.0 + golang.org/x/mod v0.20.0 golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/term v0.22.0 // indirect gonum.org/v1/gonum v0.15.0 // indirect diff --git a/go.sum b/go.sum index af1cbcfdb7184d..f5eaf08a021611 100644 --- a/go.sum +++ b/go.sum @@ -2944,8 +2944,8 @@ golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= -golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= From 0501086088f6615845ad1d052729cc6d27a23e56 Mon Sep 17 00:00:00 2001 From: Pierre Gimalac Date: Tue, 6 Aug 2024 12:05:30 +0200 Subject: [PATCH 14/19] Add a config to enable happy eyeball in http transport (#28200) --- pkg/config/setup/config.go | 1 + pkg/util/http/transport.go | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index 18854c93e9e894..c0c55f04eefaa6 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -967,6 +967,7 @@ func agent(config pkgconfigmodel.Setup) { config.BindEnvAndSetDefault("skip_ssl_validation", false) config.BindEnvAndSetDefault("sslkeylogfile", "") config.BindEnv("tls_handshake_timeout") + config.BindEnv("http_dial_fallback_delay") config.BindEnvAndSetDefault("hostname", "") config.BindEnvAndSetDefault("hostname_file", "") config.BindEnvAndSetDefault("tags", []string{}) diff --git a/pkg/util/http/transport.go b/pkg/util/http/transport.go index 62d1641e582415..94cfa4f4ce6b6d 100644 --- a/pkg/util/http/transport.go +++ b/pkg/util/http/transport.go @@ -97,14 +97,21 @@ func CreateHTTPTransport(cfg pkgconfigmodel.Reader) *http.Transport { tlsHandshakeTimeout = 10 * time.Second } + // Control whether to disable RFC 6555 Fast Fallback ("Happy Eyeballs") + // By default this is disabled (set to a negative value). + // It can be set to 0 to use the default value, or an explicit duration. + fallbackDelay := -1 * time.Nanosecond + if cfg.IsSet("http_dial_fallback_delay") { + fallbackDelay = cfg.GetDuration("http_dial_fallback_delay") + } + transport := &http.Transport{ TLSClientConfig: tlsConfig, DialContext: (&net.Dialer{ Timeout: 30 * time.Second, // Enables TCP keepalives to detect broken connections - KeepAlive: 30 * time.Second, - // Disable RFC 6555 Fast Fallback ("Happy Eyeballs") - FallbackDelay: -1 * time.Nanosecond, + KeepAlive: 30 * time.Second, + FallbackDelay: fallbackDelay, }).DialContext, MaxIdleConns: 100, MaxIdleConnsPerHost: 5, From 9182e4a15a3592965999d9149b96b9976d70d057 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:10:35 +0000 Subject: [PATCH 15/19] Bump golang.org/x/time from 0.5.0 to 0.6.0 (#28224) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- LICENSE-3rdparty.csv | 2 +- go.mod | 2 +- go.sum | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 05b65a163e51db..3dc634834779f6 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -2691,7 +2691,7 @@ core,golang.org/x/text/transform,BSD-3-Clause,Copyright (c) 2009 The Go Authors. core,golang.org/x/text/unicode/bidi,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/text/unicode/norm,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved core,golang.org/x/text/width,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved -core,golang.org/x/time/rate,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved +core,golang.org/x/time/rate,BSD-3-Clause,Copyright 2009 The Go Authors core,golang/go,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved. core,gomodules.xyz/jsonpatch/v2,Apache-2.0,Copyright (c) 2015 The Authors core,gonum.org/v1/gonum/blas,BSD-3-Clause,Alexander Egurnov | Andrei Blinnikov | Andrew Brampton | Bailey Lissington | Bill Gray | Bill Noon | Brendan Tracey | Brent Pedersen | Bulat Khasanov | Chad Kunde | Chan Kwan Yin | Chih-Wei Chang | Chong-Yeol Nah | Chris Tessum | Christophe Meessen | Christopher Waldon | Clayton Northey | Copyright ©2013 The Gonum Authors. All rights reserved | Dan Kortschak | Dan Lorenc | Daniel Fireman | Dario Heinisch | David Kleiven | David Samborski | Davor Kapsa | DeepMind Technologies | Delaney Gillilan | Dezmond Goff | Dong-hee Na | Dustin Spicuzza | Egon Elbre | Ekaterina Efimova | Eng Zer Jun | Ethan Burns | Ethan Reesor | Evert Lammerts | Evgeny Savinov | Fabian Wickborn | Facundo Gaich | Fazlul Shahriar | Francesc Campoy | Google Inc | Gustaf Johansson | Hossein Zolfi | Iakov Davydov | Igor Mikushkin | Iskander Sharipov | Jalem Raj Rohit | James Bell | James Bowman | James Holmes <32bitkid@gmail.com> | Janne Snabb | Jeremy Atkinson | Jes Cok | Jinesi Yelizati | Jon Richards | Jonas Kahler | Jonas Schulze | Jonathan Bluett-Duncan | Jonathan J Lawlor | Jonathan Reiter | Jonathan Schroeder | Joost van Amersfoort | Jordan Stoker | Joseph Watson | Josh Wilson | Julien Roland | Kai Trukenmüller | Kent English | Kevin C. Zimmerman | Kirill Motkov | Konstantin Shaposhnikov | Leonid Kneller | Lyron Winderbaum | Marco Leogrande | Mark Canning | Mark Skilbeck | Martin Diz | Matthew Connelly | Matthieu Di Mercurio | Max Halford | Maxim Sergeev | Microsoft Corporation | MinJae Kwon | Nathan Edwards | Nick Potts | Nils Wogatzky | Olivier Wulveryck | Or Rikon | Patricio Whittingslow | Patrick DeVivo | Pontus Melke | Renee French | Rishi Desai | Robin Eklind | Roger Welin | Roman Werpachowski | Rondall Jones | Sam Zaydel | Samuel Kelemen | Saran Ahluwalia | Scott Holden | Scott Kiesel | Sebastien Binet | Shawn Smith | Sintela Ltd | Spencer Lyon | Steve McCoy | Taesu Pyo | Takeshi Yoneda | Tamir Hyman | The University of Adelaide | The University of Minnesota | The University of Washington | Thomas Berg | Tobin Harding | Valentin Deleplace | Vincent Thiery | Vladimír Chalupecký | Will Tekulve | Yasuhiro Matsumoto | Yevgeniy Vahlis | Yucheng Zhu | Yunomi | Zoe Juozapaitis | antichris | source{d} diff --git a/go.mod b/go.mod index ecd6de2e8323ba..ef78c1b1f72e89 100644 --- a/go.mod +++ b/go.mod @@ -303,7 +303,7 @@ require ( golang.org/x/sync v0.8.0 golang.org/x/sys v0.22.0 golang.org/x/text v0.16.0 - golang.org/x/time v0.5.0 + golang.org/x/time v0.6.0 golang.org/x/tools v0.23.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 google.golang.org/genproto v0.0.0-20240617180043-68d350f18fd4 // indirect diff --git a/go.sum b/go.sum index f5eaf08a021611..73709c86c07812 100644 --- a/go.sum +++ b/go.sum @@ -3269,8 +3269,9 @@ golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxb golang.org/x/time v0.0.0-20220922220347-f3bd1da661af/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= +golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= From a1d33e28b85162a526669d536c385b2d53d36272 Mon Sep 17 00:00:00 2001 From: Nicolas Schweitzer Date: Tue, 6 Aug 2024 12:31:31 +0200 Subject: [PATCH 16/19] fix(build-rc): Use correct semantic version ordering (#28202) --- tasks/libs/releasing/version.py | 17 ++++++++++++---- tasks/unit_tests/version_tests.py | 34 ++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/tasks/libs/releasing/version.py b/tasks/libs/releasing/version.py index 5337ddb1fa5167..3ae5e8bf28bc62 100644 --- a/tasks/libs/releasing/version.py +++ b/tasks/libs/releasing/version.py @@ -421,10 +421,19 @@ def get_matching_pattern(ctx, major_version, release=False): """ We need to used specific patterns (official release tags) for nightly builds as they are used to install agent versions. """ + from functools import cmp_to_key + + import semver + pattern = rf"{major_version}\.*" if release or os.getenv("BUCKET_BRANCH") in ALLOWED_REPO_NIGHTLY_BRANCHES: - pattern = ctx.run( - rf"git tag --list --merged {get_current_branch(ctx)} | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$' | sort -rV | head -1", - hide=True, - ).stdout.strip() + tags = ( + ctx.run( + rf"git tag --list --merged {get_current_branch(ctx)} | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$'", + hide=True, + ) + .stdout.strip() + .split("\n") + ) + pattern = max(tags, key=cmp_to_key(semver.compare)) return pattern diff --git a/tasks/unit_tests/version_tests.py b/tasks/unit_tests/version_tests.py index c09bb5c5e29544..ac0072e2a9f626 100644 --- a/tasks/unit_tests/version_tests.py +++ b/tasks/unit_tests/version_tests.py @@ -5,7 +5,7 @@ from invoke import MockContext, Result -from tasks.libs.releasing.version import query_version +from tasks.libs.releasing.version import get_matching_pattern, query_version from tasks.libs.types.version import Version @@ -233,7 +233,7 @@ def test_on_nightly_bucket(self): c = MockContext( run={ "git rev-parse --abbrev-ref HEAD": Result("main"), - rf"git tag --list --merged main | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$' | sort -rV | head -1": Result( + rf"git tag --list --merged main | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$'": Result( "7.55.0-devel" ), 'git describe --tags --candidates=50 --match "7.55.0-devel" --abbrev=7': Result( @@ -252,7 +252,7 @@ def test_on_release(self): c = MockContext( run={ "git rev-parse --abbrev-ref HEAD": Result("7.55.x"), - rf"git tag --list --merged 7.55.x | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$' | sort -rV | head -1": Result( + rf"git tag --list --merged 7.55.x | grep -E '^{major_version}\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$'": Result( "7.55.0-devel" ), 'git describe --tags --candidates=50 --match "7.55.0-devel" --abbrev=7': Result( @@ -265,3 +265,31 @@ def test_on_release(self): self.assertEqual(p, "devel") self.assertEqual(c, 543) self.assertEqual(g, "315e3a2") + + +class TestGetMatchingPattern(unittest.TestCase): + def test_on_patch_release(self): + c = MockContext( + run={ + "git rev-parse --abbrev-ref HEAD": Result("7.55.x"), + r"git tag --list --merged 7.55.x | grep -E '^7\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$'": Result( + '7.15.0-devel\n7.15.0-rc.2\n7.15.0-rc.4\n7.15.0-rc.5\n7.15.0-rc.6\n7.15.0-rc.7\n7.15.0-rc.8\n7.15.0-rc.9\n7.16.0\n7.16.0-rc.1\n7.16.0-rc.2\n7.16.0-rc.3\n7.16.0-rc.4\n7.16.0-rc.5\n7.16.0-rc.6\n7.16.0-rc.7\n7.16.0-rc.8\n7.16.0-rc.9\n7.17.0-devel\n7.17.0-rc.1\n7.17.0-rc.2\n7.17.0-rc.3\n7.17.0-rc.4\n7.18.0-devel\n7.18.0-rc.1\n7.18.0-rc.2\n7.18.0-rc.3\n7.18.0-rc.4\n7.18.0-rc.5\n7.18.0-rc.6\n7.19.0-devel\n7.19.0-rc.1\n7.19.0-rc.2\n7.19.0-rc.3\n7.19.0-rc.4\n7.19.0-rc.5\n7.20.0-devel\n7.20.0-rc.1\n7.20.0-rc.2\n7.20.0-rc.3\n7.20.0-rc.4\n7.20.0-rc.5\n7.20.0-rc.6\n7.20.0-rc.7\n7.21.0-devel\n7.21.0-rc.1\n7.21.0-rc.2\n7.21.0-rc.3\n7.22.0-devel\n7.22.0-rc.1\n7.22.0-rc.2\n7.22.0-rc.3\n7.22.0-rc.4\n7.22.0-rc.5\n7.22.0-rc.6\n7.23.0-devel\n7.23.0-rc.1\n7.23.0-rc.2\n7.23.0-rc.3\n7.24.0-devel\n7.24.0-rc.1\n7.24.0-rc.2\n7.24.0-rc.3\n7.24.0-rc.4\n7.24.0-rc.5\n7.25.0-devel\n7.25.0-rc.1\n7.25.0-rc.2\n7.25.0-rc.3\n7.25.0-rc.4\n7.25.0-rc.5\n7.25.0-rc.6\n7.26.0-devel\n7.26.0-rc.1\n7.26.0-rc.2\n7.26.0-rc.3\n7.27.0-devel\n7.27.0-rc.1\n7.27.0-rc.2\n7.27.0-rc.3\n7.27.0-rc.4\n7.27.0-rc.5\n7.27.0-rc.6\n7.28.0-devel\n7.28.0-rc.1\n7.28.0-rc.2\n7.28.0-rc.3\n7.29.0-devel\n7.29.0-rc.1\n7.29.0-rc.2\n7.29.0-rc.3\n7.29.0-rc.4\n7.29.0-rc.5\n7.29.0-rc.6\n7.30.0-devel\n7.30.0-rc.1\n7.30.0-rc.2\n7.30.0-rc.3\n7.30.0-rc.4\n7.30.0-rc.5\n7.30.0-rc.6\n7.30.0-rc.7\n7.31.0-devel\n7.31.0-rc.1\n7.31.0-rc.2\n7.31.0-rc.3\n7.31.0-rc.4\n7.31.0-rc.5\n7.31.0-rc.6\n7.31.0-rc.7\n7.31.0-rc.8\n7.32.0-devel\n7.32.0-rc.1\n7.32.0-rc.2\n7.32.0-rc.3\n7.32.0-rc.4\n7.32.0-rc.5\n7.32.0-rc.6\n7.33.0-devel\n7.33.0-rc.1\n7.33.0-rc.2\n7.33.0-rc.3\n7.33.0-rc.4\n7.33.0-rc.4-dbm-beta-0.1\n7.34.0-devel\n7.34.0-rc.1\n7.34.0-rc.2\n7.34.0-rc.3\n7.34.0-rc.4\n7.35.0-devel\n7.35.0-rc.1\n7.35.0-rc.2\n7.35.0-rc.3\n7.35.0-rc.4\n7.36.0-devel\n7.36.0-rc.1\n7.36.0-rc.2\n7.36.0-rc.3\n7.36.0-rc.4\n7.37.0-devel\n7.37.0-rc.1\n7.37.0-rc.2\n7.37.0-rc.3\n7.38.0-devel\n7.38.0-rc.1\n7.38.0-rc.2\n7.38.0-rc.3\n7.39.0-devel\n7.39.0-rc.1\n7.39.0-rc.2\n7.39.0-rc.3\n7.40.0-devel\n7.40.0-rc.1\n7.40.0-rc.2\n7.41.0-devel\n7.41.0-rc.1\n7.41.0-rc.2\n7.41.0-rc.3\n7.42.0-devel\n7.42.0-rc.1\n7.42.0-rc.2\n7.42.0-rc.3\n7.43.0-devel\n7.43.0-rc.1\n7.43.0-rc.2\n7.43.0-rc.3\n7.44.0-devel\n7.44.0-rc.1\n7.44.0-rc.2\n7.44.0-rc.3\n7.44.0-rc.4\n7.45.0-devel\n7.45.0-rc.1\n7.45.0-rc.2\n7.45.0-rc.3\n7.46.0-devel\n7.46.0-rc.1\n7.46.0-rc.2\n7.47.0-devel\n7.47.0-rc.1\n7.47.0-rc.2\n7.47.0-rc.3\n7.48.0-devel\n7.48.0-rc.0\n7.48.0-rc.1\n7.48.0-rc.2\n7.49.0-devel\n7.49.0-rc.1\n7.49.0-rc.2\n7.50.0-devel\n7.50.0-rc.1\n7.50.0-rc.2\n7.50.0-rc.3\n7.50.0-rc.4\n7.51.0-devel\n7.51.0-rc.1\n7.51.0-rc.2\n7.52.0-devel\n7.52.0-rc.1\n7.52.0-rc.2\n7.52.0-rc.3\n7.53.0-devel\n7.53.0-rc.1\n7.53.0-rc.2\n7.54.0-devel\n7.54.0-rc.1\n7.54.0-rc.2\n7.55.0\n7.55.0-devel\n7.55.0-rc.1\n7.55.0-rc.10\n7.55.0-rc.11\n7.55.0-rc.2\n7.55.0-rc.3\n7.55.0-rc.4\n7.55.0-rc.5\n7.55.0-rc.6\n7.55.0-rc.7\n7.55.0-rc.8\n7.55.0-rc.9' + ), + } + ) + self.assertEqual(get_matching_pattern(c, major_version="7", release=True), "7.55.0") + + def test_on_release(self): + c = MockContext( + run={ + "git rev-parse --abbrev-ref HEAD": Result("7.55.x"), + r"git tag --list --merged 7.55.x | grep -E '^7\.[0-9]+\.[0-9]+(-rc.*|-devel.*)?$'": Result( + '7.15.0-devel\n7.15.0-rc.2\n7.15.0-rc.4\n7.15.0-rc.5\n7.15.0-rc.6\n7.15.0-rc.7\n7.15.0-rc.8\n7.15.0-rc.9\n7.16.0\n7.16.0-rc.1\n7.16.0-rc.2\n7.16.0-rc.3\n7.16.0-rc.4\n7.16.0-rc.5\n7.16.0-rc.6\n7.16.0-rc.7\n7.16.0-rc.8\n7.16.0-rc.9\n7.17.0-devel\n7.17.0-rc.1\n7.17.0-rc.2\n7.17.0-rc.3\n7.17.0-rc.4\n7.18.0-devel\n7.18.0-rc.1\n7.18.0-rc.2\n7.18.0-rc.3\n7.18.0-rc.4\n7.18.0-rc.5\n7.18.0-rc.6\n7.19.0-devel\n7.19.0-rc.1\n7.19.0-rc.2\n7.19.0-rc.3\n7.19.0-rc.4\n7.19.0-rc.5\n7.20.0-devel\n7.20.0-rc.1\n7.20.0-rc.2\n7.20.0-rc.3\n7.20.0-rc.4\n7.20.0-rc.5\n7.20.0-rc.6\n7.20.0-rc.7\n7.21.0-devel\n7.21.0-rc.1\n7.21.0-rc.2\n7.21.0-rc.3\n7.22.0-devel\n7.22.0-rc.1\n7.22.0-rc.2\n7.22.0-rc.3\n7.22.0-rc.4\n7.22.0-rc.5\n7.22.0-rc.6\n7.23.0-devel\n7.23.0-rc.1\n7.23.0-rc.2\n7.23.0-rc.3\n7.24.0-devel\n7.24.0-rc.1\n7.24.0-rc.2\n7.24.0-rc.3\n7.24.0-rc.4\n7.24.0-rc.5\n7.25.0-devel\n7.25.0-rc.1\n7.25.0-rc.2\n7.25.0-rc.3\n7.25.0-rc.4\n7.25.0-rc.5\n7.25.0-rc.6\n7.26.0-devel\n7.26.0-rc.1\n7.26.0-rc.2\n7.26.0-rc.3\n7.27.0-devel\n7.27.0-rc.1\n7.27.0-rc.2\n7.27.0-rc.3\n7.27.0-rc.4\n7.27.0-rc.5\n7.27.0-rc.6\n7.28.0-devel\n7.28.0-rc.1\n7.28.0-rc.2\n7.28.0-rc.3\n7.29.0-devel\n7.29.0-rc.1\n7.29.0-rc.2\n7.29.0-rc.3\n7.29.0-rc.4\n7.29.0-rc.5\n7.29.0-rc.6\n7.30.0-devel\n7.30.0-rc.1\n7.30.0-rc.2\n7.30.0-rc.3\n7.30.0-rc.4\n7.30.0-rc.5\n7.30.0-rc.6\n7.30.0-rc.7\n7.31.0-devel\n7.31.0-rc.1\n7.31.0-rc.2\n7.31.0-rc.3\n7.31.0-rc.4\n7.31.0-rc.5\n7.31.0-rc.6\n7.31.0-rc.7\n7.31.0-rc.8\n7.32.0-devel\n7.32.0-rc.1\n7.32.0-rc.2\n7.32.0-rc.3\n7.32.0-rc.4\n7.32.0-rc.5\n7.32.0-rc.6\n7.33.0-devel\n7.33.0-rc.1\n7.33.0-rc.2\n7.33.0-rc.3\n7.33.0-rc.4\n7.33.0-rc.4-dbm-beta-0.1\n7.34.0-devel\n7.34.0-rc.1\n7.34.0-rc.2\n7.34.0-rc.3\n7.34.0-rc.4\n7.35.0-devel\n7.35.0-rc.1\n7.35.0-rc.2\n7.35.0-rc.3\n7.35.0-rc.4\n7.36.0-devel\n7.36.0-rc.1\n7.36.0-rc.2\n7.36.0-rc.3\n7.36.0-rc.4\n7.37.0-devel\n7.37.0-rc.1\n7.37.0-rc.2\n7.37.0-rc.3\n7.38.0-devel\n7.38.0-rc.1\n7.38.0-rc.2\n7.38.0-rc.3\n7.39.0-devel\n7.39.0-rc.1\n7.39.0-rc.2\n7.39.0-rc.3\n7.40.0-devel\n7.40.0-rc.1\n7.40.0-rc.2\n7.41.0-devel\n7.41.0-rc.1\n7.41.0-rc.2\n7.41.0-rc.3\n7.42.0-devel\n7.42.0-rc.1\n7.42.0-rc.2\n7.42.0-rc.3\n7.43.0-devel\n7.43.0-rc.1\n7.43.0-rc.2\n7.43.0-rc.3\n7.44.0-devel\n7.44.0-rc.1\n7.44.0-rc.2\n7.44.0-rc.3\n7.44.0-rc.4\n7.45.0-devel\n7.45.0-rc.1\n7.45.0-rc.2\n7.45.0-rc.3\n7.46.0-devel\n7.46.0-rc.1\n7.46.0-rc.2\n7.47.0-devel\n7.47.0-rc.1\n7.47.0-rc.2\n7.47.0-rc.3\n7.48.0-devel\n7.48.0-rc.0\n7.48.0-rc.1\n7.48.0-rc.2\n7.49.0-devel\n7.49.0-rc.1\n7.49.0-rc.2\n7.50.0-devel\n7.50.0-rc.1\n7.50.0-rc.2\n7.50.0-rc.3\n7.50.0-rc.4\n7.51.0-devel\n7.51.0-rc.1\n7.51.0-rc.2\n7.52.0-devel\n7.52.0-rc.1\n7.52.0-rc.2\n7.52.0-rc.3\n7.53.0-devel\n7.53.0-rc.1\n7.53.0-rc.2\n7.54.0-devel\n7.54.0-rc.1\n7.54.0-rc.2\n7.55.0-devel\n7.55.0-rc.1\n7.55.0-rc.10\n7.55.0-rc.11\n7.55.0-rc.2\n7.55.0-rc.3\n7.55.0-rc.4\n7.55.0-rc.5\n7.55.0-rc.6\n7.55.0-rc.7\n7.55.0-rc.8\n7.55.0-rc.9' + ), + } + ) + self.assertEqual(get_matching_pattern(c, major_version="7", release=True), "7.55.0-rc.11") + + def test_on_branch(self): + c = MockContext() + self.assertEqual(get_matching_pattern(c, major_version="42", release=False), r"42\.*") From 2ef32dc523b235903acc529e6a95a0122ed88aed Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 6 Aug 2024 13:22:25 +0200 Subject: [PATCH 17/19] discovery: Add system-probe endpoint for service list (#28051) --- .../corechecks/servicediscovery/errors.go | 2 +- .../servicediscovery/events_test.go | 4 +- .../corechecks/servicediscovery/impl_linux.go | 4 +- .../servicediscovery/model/model.go | 18 + .../servicediscovery/module/impl_linux.go | 280 ++++++++++++- .../module/impl_linux_test.go | 390 ++++++++++++++++++ .../module/testdata/docker-compose.yml | 18 + .../servicediscovery/service_detector.go | 39 +- .../servicediscovery/service_detector_test.go | 6 +- .../servicediscovery/servicediscovery.go | 2 +- .../corechecks/servicediscovery/telemetry.go | 3 +- tasks/system_probe.py | 1 + 12 files changed, 741 insertions(+), 26 deletions(-) create mode 100644 pkg/collector/corechecks/servicediscovery/model/model.go create mode 100644 pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go create mode 100644 pkg/collector/corechecks/servicediscovery/module/testdata/docker-compose.yml diff --git a/pkg/collector/corechecks/servicediscovery/errors.go b/pkg/collector/corechecks/servicediscovery/errors.go index 2611d6fbbeecdc..d08a7e20a6a2d0 100644 --- a/pkg/collector/corechecks/servicediscovery/errors.go +++ b/pkg/collector/corechecks/servicediscovery/errors.go @@ -20,7 +20,7 @@ const ( type errWithCode struct { err error code errCode - svc *serviceMetadata + svc *ServiceMetadata } func (e errWithCode) Error() string { diff --git a/pkg/collector/corechecks/servicediscovery/events_test.go b/pkg/collector/corechecks/servicediscovery/events_test.go index 2060d5eff2d15d..07d0a419a5820d 100644 --- a/pkg/collector/corechecks/servicediscovery/events_test.go +++ b/pkg/collector/corechecks/servicediscovery/events_test.go @@ -64,7 +64,7 @@ func Test_telemetrySender(t *testing.T) { }, Ports: nil, }, - meta: serviceMetadata{ + meta: ServiceMetadata{ Name: "test-service", Language: "jvm", Type: "web_service", @@ -163,7 +163,7 @@ func Test_telemetrySender_name_provided(t *testing.T) { }, Ports: nil, }, - meta: serviceMetadata{ + meta: ServiceMetadata{ Name: "test-service", Language: "jvm", Type: "web_service", diff --git a/pkg/collector/corechecks/servicediscovery/impl_linux.go b/pkg/collector/corechecks/servicediscovery/impl_linux.go index c80882c508c907..ae0e29b6b216d3 100644 --- a/pkg/collector/corechecks/servicediscovery/impl_linux.go +++ b/pkg/collector/corechecks/servicediscovery/impl_linux.go @@ -40,7 +40,7 @@ type linuxImpl struct { time timer bootTime uint64 - serviceDetector *serviceDetector + serviceDetector *ServiceDetector ignoreCfg map[string]bool ignoreProcs map[int]bool @@ -69,7 +69,7 @@ func newLinuxImpl(ignoreCfg map[string]bool) (osImpl, error) { bootTime: stat.BootTime, portPoller: poller, time: realTime{}, - serviceDetector: newServiceDetector(), + serviceDetector: NewServiceDetector(), ignoreCfg: ignoreCfg, ignoreProcs: make(map[int]bool), aliveServices: make(map[int]*serviceInfo), diff --git a/pkg/collector/corechecks/servicediscovery/model/model.go b/pkg/collector/corechecks/servicediscovery/model/model.go new file mode 100644 index 00000000000000..f0a2ba1a3b18df --- /dev/null +++ b/pkg/collector/corechecks/servicediscovery/model/model.go @@ -0,0 +1,18 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +// Package model contains types for service discovery. +package model + +// Service represents a listening process. +type Service struct { + PID int `json:"pid"` + Name string `json:"name"` +} + +// ServicesResponse is the response for the system-probe /discovery/services endpoint. +type ServicesResponse struct { + Services []Service `json:"services"` +} diff --git a/pkg/collector/corechecks/servicediscovery/module/impl_linux.go b/pkg/collector/corechecks/servicediscovery/module/impl_linux.go index e1c0d040c52e25..ccdac79c660a54 100644 --- a/pkg/collector/corechecks/servicediscovery/module/impl_linux.go +++ b/pkg/collector/corechecks/servicediscovery/module/impl_linux.go @@ -6,24 +6,53 @@ package module import ( + "fmt" "net/http" + "strconv" + "strings" + + "github.com/prometheus/procfs" + "github.com/shirou/gopsutil/v3/process" + + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/servicediscovery" "github.com/DataDog/datadog-agent/cmd/system-probe/api/module" sysconfigtypes "github.com/DataDog/datadog-agent/cmd/system-probe/config/types" + "github.com/DataDog/datadog-agent/cmd/system-probe/utils" "github.com/DataDog/datadog-agent/comp/core/telemetry" workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/servicediscovery/model" + "github.com/DataDog/datadog-agent/pkg/util/kernel" + "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/util/optional" ) +const ( + pathServices = "/services" +) + // Ensure discovery implements the module.Module interface. var _ module.Module = &discovery{} +// cacheData holds process data that should be cached between calls to the +// endpoint. +type cacheData struct { + serviceName string +} + // discovery is an implementation of the Module interface for the discovery module. -type discovery struct{} +type discovery struct { + // cache maps pids to data that should be cached between calls to the endpoint. + cache map[int32]cacheData + serviceDetector servicediscovery.ServiceDetector +} // NewDiscoveryModule creates a new discovery system probe module. func NewDiscoveryModule(*sysconfigtypes.Config, optional.Option[workloadmeta.Component], telemetry.Component) (module.Module, error) { - return &discovery{}, nil + return &discovery{ + cache: make(map[int32]cacheData), + serviceDetector: *servicediscovery.NewServiceDetector(), + }, nil } // GetStats returns the stats of the discovery module. @@ -34,15 +63,258 @@ func (s *discovery) GetStats() map[string]interface{} { // Register registers the discovery module with the provided HTTP mux. func (s *discovery) Register(httpMux *module.Router) error { httpMux.HandleFunc("/status", s.handleStatusEndpoint) + httpMux.HandleFunc(pathServices, s.handleServices) return nil } // Close cleans resources used by the discovery module. -// Currently, a no-op. -func (s *discovery) Close() {} +func (s *discovery) Close() { + clear(s.cache) +} // handleStatusEndpoint is the handler for the /status endpoint. // Reports the status of the discovery module. func (s *discovery) handleStatusEndpoint(w http.ResponseWriter, _ *http.Request) { _, _ = w.Write([]byte("Discovery Module is running")) } + +// handleServers is the handler for the /services endpoint. +// Returns the list of currently running services. +func (s *discovery) handleServices(w http.ResponseWriter, _ *http.Request) { + services, err := s.getServices() + if err != nil { + _ = log.Errorf("failed to handle /discovery%s: %v", pathServices, err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + resp := &model.ServicesResponse{ + Services: *services, + } + utils.WriteAsJSON(w, resp) +} + +// getSockets get a list of socket inode numbers opened by a process. Based on +// snapshotBoundSockets() in +// pkg/security/security_profile/activity_tree/process_node_snapshot.go. The +// socket inode information from /proc/../fd is needed to map the connection +// from the net/tcp (and similar) files to actual ports. +func getSockets(p *process.Process) ([]uint64, error) { + FDs, err := p.OpenFiles() + if err != nil { + return nil, err + } + + // sockets have the following pattern "socket:[inode]" + var sockets []uint64 + for _, fd := range FDs { + const prefix = "socket:[" + if strings.HasPrefix(fd.Path, prefix) { + inodeStr := strings.TrimPrefix(fd.Path[:len(fd.Path)-1], prefix) + sock, err := strconv.ParseUint(inodeStr, 10, 64) + if err != nil { + continue + } + sockets = append(sockets, sock) + } + } + + return sockets, nil +} + +// namespaceInfo stores information related to each network namespace. +type namespaceInfo struct { + // listeningSockets stores the socket inode numbers for sockets which are listening. + listeningSockets map[uint64]struct{} +} + +// Lifted from pkg/network/proc_net.go +const ( + tcpListen uint64 = 10 + + // tcpClose is also used to indicate a UDP connection where the other end hasn't been established + tcpClose uint64 = 7 + udpListen = tcpClose +) + +// addSockets adds only listening sockets to a map (set) to be used for later looksups. +func addSockets[P procfs.NetTCP | procfs.NetUDP](sockMap map[uint64]struct{}, sockets P, state uint64) { + for _, sock := range sockets { + if sock.St != state { + continue + } + sockMap[sock.Inode] = struct{}{} + } +} + +// getNsInfo gets the list of open ports with socket inodes for all supported +// protocols for the provided namespace. Based on snapshotBoundSockets() in +// pkg/security/security_profile/activity_tree/process_node_snapshot.go. +func getNsInfo(pid int) (*namespaceInfo, error) { + path := kernel.HostProc(fmt.Sprintf("%d", pid)) + proc, err := procfs.NewFS(path) + if err != nil { + log.Warnf("error while opening procfs (pid: %v): %s", pid, err) + return nil, err + } + + TCP, err := proc.NetTCP() + if err != nil { + log.Debugf("couldn't snapshot TCP sockets: %v", err) + } + UDP, err := proc.NetUDP() + if err != nil { + log.Debugf("couldn't snapshot UDP sockets: %v", err) + } + TCP6, err := proc.NetTCP6() + if err != nil { + log.Debugf("couldn't snapshot TCP6 sockets: %v", err) + } + UDP6, err := proc.NetUDP6() + if err != nil { + log.Debugf("couldn't snapshot UDP6 sockets: %v", err) + } + + listeningSockets := make(map[uint64]struct{}) + + addSockets(listeningSockets, TCP, tcpListen) + addSockets(listeningSockets, TCP6, tcpListen) + addSockets(listeningSockets, UDP, udpListen) + addSockets(listeningSockets, UDP6, udpListen) + + return &namespaceInfo{ + listeningSockets: listeningSockets, + }, nil +} + +// parsingContext holds temporary context not preserved between invocations of +// the endpoint. +type parsingContext struct { + procRoot string + netNsInfo map[uint32]*namespaceInfo +} + +// getServiceName gets the service name for a process using the servicedetector +// module. +func (s *discovery) getServiceName(proc *process.Process) (string, error) { + cmdline, err := proc.CmdlineSlice() + if err != nil { + return "", nil + } + + env, err := proc.Environ() + if err != nil { + return "", nil + } + + return s.serviceDetector.GetServiceName(cmdline, env), nil +} + +// getService gets information for a single service. +func (s *discovery) getService(context parsingContext, pid int32) *model.Service { + proc, err := process.NewProcess(pid) + if err != nil { + return nil + } + + sockets, err := getSockets(proc) + if err != nil { + return nil + } + if len(sockets) == 0 { + return nil + } + + ns, err := kernel.GetNetNsInoFromPid(context.procRoot, int(pid)) + if err != nil { + return nil + } + + // The socket and network address information are different for each + // network namespace. Since namespaces can be shared between multiple + // processes, we cache them to only parse them once per call to this + // function. + nsInfo, ok := context.netNsInfo[ns] + if !ok { + nsInfo, err = getNsInfo(int(pid)) + if err != nil { + return nil + } + + context.netNsInfo[ns] = nsInfo + } + + haveListeningSocket := false + for _, socket := range sockets { + if _, ok := nsInfo.listeningSockets[socket]; ok { + haveListeningSocket = true + break + } + } + + if !haveListeningSocket { + return nil + } + + var serviceName string + if cached, ok := s.cache[pid]; ok { + serviceName = cached.serviceName + } else { + serviceName, err = s.getServiceName(proc) + if err != nil { + return nil + } + + s.cache[pid] = cacheData{serviceName: serviceName} + } + + return &model.Service{ + PID: int(pid), + Name: serviceName, + } +} + +// cleanCache deletes dead PIDs from the cache. Note that this does not actually +// shrink the map but should free memory for the service name strings referenced +// from it. +func (s *discovery) cleanCache(alivePids map[int32]struct{}) { + for pid := range s.cache { + if _, alive := alivePids[pid]; alive { + continue + } + + delete(s.cache, pid) + } +} + +// getStatus returns the list of currently running services. +func (s *discovery) getServices() (*[]model.Service, error) { + procRoot := kernel.ProcFSRoot() + pids, err := process.Pids() + if err != nil { + return nil, err + } + + context := parsingContext{ + procRoot: procRoot, + netNsInfo: make(map[uint32]*namespaceInfo), + } + + var services []model.Service + alivePids := make(map[int32]struct{}, len(pids)) + + for _, pid := range pids { + alivePids[pid] = struct{}{} + + service := s.getService(context, pid) + if service == nil { + continue + } + + services = append(services, *service) + } + + s.cleanCache(alivePids) + + return &services, nil +} diff --git a/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go b/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go new file mode 100644 index 00000000000000..fc6e236195a2b2 --- /dev/null +++ b/pkg/collector/corechecks/servicediscovery/module/impl_linux_test.go @@ -0,0 +1,390 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +// This doesn't need BPF but it's built with this tag to only run with +// system-probe tests. +//go:build linux_bpf + +package module + +import ( + "context" + "encoding/json" + "fmt" + "net" + "net/http" + "os" + "os/exec" + "regexp" + "runtime" + "syscall" + "testing" + "time" + + "net/http/httptest" + + gorillamux "github.com/gorilla/mux" + "github.com/prometheus/procfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vishvananda/netns" + + "github.com/DataDog/datadog-agent/cmd/system-probe/api/module" + "github.com/DataDog/datadog-agent/cmd/system-probe/config" + "github.com/DataDog/datadog-agent/cmd/system-probe/config/types" + workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def" + "github.com/DataDog/datadog-agent/pkg/collector/corechecks/servicediscovery/model" + "github.com/DataDog/datadog-agent/pkg/network/protocols/http/testutil" + protocolUtils "github.com/DataDog/datadog-agent/pkg/network/protocols/testutil" + "github.com/DataDog/datadog-agent/pkg/util/optional" +) + +func setupDiscoveryModule(t *testing.T) string { + t.Helper() + + wmeta := optional.NewNoneOption[workloadmeta.Component]() + mux := gorillamux.NewRouter() + cfg := &types.Config{ + Enabled: true, + EnabledModules: map[types.ModuleName]struct{}{ + config.DiscoveryModule: {}, + }, + } + m := module.Factory{ + Name: config.DiscoveryModule, + ConfigNamespaces: []string{"discovery"}, + Fn: NewDiscoveryModule, + NeedsEBPF: func() bool { + return false + }, + } + err := module.Register(cfg, mux, []module.Factory{m}, wmeta, nil) + require.NoError(t, err) + + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + return srv.URL +} + +func getServices(t *testing.T, url string) []model.Service { + location := url + "/" + string(config.DiscoveryModule) + pathServices + req, err := http.NewRequest(http.MethodGet, location, nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + res := &model.ServicesResponse{} + err = json.NewDecoder(resp.Body).Decode(res) + require.NoError(t, err) + require.NotEmpty(t, res) + + return res.Services +} + +func getServicesMap(t *testing.T, url string) map[int]model.Service { + services := getServices(t, url) + servicesMap := make(map[int]model.Service) + for _, service := range services { + servicesMap[service.PID] = service + } + + return servicesMap +} + +func startTCPServer(t *testing.T, proto string) (*os.File, *net.TCPAddr) { + listener, err := net.Listen(proto, "") + require.NoError(t, err) + t.Cleanup(func() { _ = listener.Close() }) + tcpAddr := listener.Addr().(*net.TCPAddr) + + f, err := listener.(*net.TCPListener).File() + defer listener.Close() + require.NoError(t, err) + + return f, tcpAddr +} + +func startTCPClient(t *testing.T, proto string, server *net.TCPAddr) (*os.File, *net.TCPAddr) { + client, err := net.DialTCP(proto, nil, server) + require.NoError(t, err) + t.Cleanup(func() { _ = client.Close() }) + + f, err := client.File() + defer client.Close() + require.NoError(t, err) + + return f, client.LocalAddr().(*net.TCPAddr) +} + +func startUDPServer(t *testing.T, proto string) (*os.File, *net.UDPAddr) { + lnPacket, err := net.ListenPacket(proto, "") + require.NoError(t, err) + t.Cleanup(func() { _ = lnPacket.Close() }) + + f, err := lnPacket.(*net.UDPConn).File() + defer lnPacket.Close() + require.NoError(t, err) + + return f, lnPacket.LocalAddr().(*net.UDPAddr) +} + +func startUDPClient(t *testing.T, proto string, server *net.UDPAddr) (*os.File, *net.UDPAddr) { + udpClient, err := net.DialUDP(proto, nil, server) + require.NoError(t, err) + t.Cleanup(func() { _ = udpClient.Close() }) + + f, err := udpClient.File() + defer udpClient.Close() + require.NoError(t, err) + + return f, udpClient.LocalAddr().(*net.UDPAddr) +} + +func disableCloseOnExec(t *testing.T, f *os.File) { + _, _, syserr := syscall.Syscall(syscall.SYS_FCNTL, f.Fd(), syscall.F_SETFD, 0) + require.Equal(t, syscall.Errno(0x0), syserr) +} + +func startProcessWithFile(t *testing.T, f *os.File) *exec.Cmd { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(func() { cancel() }) + + // Disable close-on-exec so that the process gets it + t.Cleanup(func() { f.Close() }) + disableCloseOnExec(t, f) + + cmd := exec.CommandContext(ctx, "sleep", "1000") + err := cmd.Start() + require.NoError(t, err) + f.Close() + + return cmd +} + +// Check that we get (only) listening processes for all expected protocols. +func TestBasic(t *testing.T) { + url := setupDiscoveryModule(t) + + var expectedPIDs []int + var unexpectedPIDs []int + + var startTCP = func(proto string) { + f, server := startTCPServer(t, proto) + cmd := startProcessWithFile(t, f) + expectedPIDs = append(expectedPIDs, cmd.Process.Pid) + + f, _ = startTCPClient(t, proto, server) + cmd = startProcessWithFile(t, f) + unexpectedPIDs = append(unexpectedPIDs, cmd.Process.Pid) + } + + var startUDP = func(proto string) { + f, server := startUDPServer(t, proto) + cmd := startProcessWithFile(t, f) + expectedPIDs = append(expectedPIDs, cmd.Process.Pid) + + f, _ = startUDPClient(t, proto, server) + cmd = startProcessWithFile(t, f) + unexpectedPIDs = append(unexpectedPIDs, cmd.Process.Pid) + } + + startTCP("tcp4") + startTCP("tcp6") + startUDP("udp4") + startUDP("udp6") + + // Eventually to give the processes time to start + require.EventuallyWithT(t, func(collect *assert.CollectT) { + portMap := getServicesMap(t, url) + for _, pid := range expectedPIDs { + assert.Contains(collect, portMap, pid) + } + for _, pid := range unexpectedPIDs { + assert.NotContains(collect, portMap, pid) + } + }, 30*time.Second, 100*time.Millisecond) +} + +func TestServiceName(t *testing.T) { + url := setupDiscoveryModule(t) + + listener, err := net.Listen("tcp", "") + require.NoError(t, err) + f, err := listener.(*net.TCPListener).File() + listener.Close() + + // Disable close-on-exec so that the sleep gets it + require.NoError(t, err) + t.Cleanup(func() { f.Close() }) + disableCloseOnExec(t, f) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(func() { cancel() }) + + cmd := exec.CommandContext(ctx, "sleep", "1000") + cmd.Dir = "/tmp/" + cmd.Env = append(cmd.Env, "DD_SERVICE=foobar") + err = cmd.Start() + require.NoError(t, err) + f.Close() + + pid := cmd.Process.Pid + // Eventually to give the processes time to start + require.EventuallyWithT(t, func(collect *assert.CollectT) { + portMap := getServicesMap(t, url) + assert.Contains(collect, portMap, pid) + assert.Equal(t, "foobar", portMap[pid].Name) + }, 30*time.Second, 100*time.Millisecond) +} + +// Check that we can get listening processes in other namespaces. +func TestNamespaces(t *testing.T) { + url := setupDiscoveryModule(t) + + // Needed when changing namespaces + runtime.LockOSThread() + t.Cleanup(func() { runtime.UnlockOSThread() }) + + origNs, err := netns.Get() + require.NoError(t, err) + t.Cleanup(func() { netns.Set(origNs) }) + + ctx, cancel := context.WithTimeout(context.Background(), 100*time.Second) + t.Cleanup(func() { cancel() }) + + var pids []int + for i := 0; i < 3; i++ { + ns, err := netns.New() + require.NoError(t, err) + t.Cleanup(func() { ns.Close() }) + + listener, err := net.Listen("tcp", "") + require.NoError(t, err) + f, err := listener.(*net.TCPListener).File() + listener.Close() + + // Disable close-on-exec so that the sleep gets it + require.NoError(t, err) + t.Cleanup(func() { f.Close() }) + disableCloseOnExec(t, f) + + cmd := exec.CommandContext(ctx, "sleep", "1000") + err = cmd.Start() + require.NoError(t, err) + f.Close() + ns.Close() + + pids = append(pids, cmd.Process.Pid) + } + + netns.Set(origNs) + + // Eventually to give the processes time to start + require.EventuallyWithT(t, func(collect *assert.CollectT) { + portMap := getServicesMap(t, url) + for _, pid := range pids { + assert.Contains(collect, portMap, pid) + } + }, 30*time.Second, 100*time.Millisecond) +} + +// Check that we are able to find services inside Docker containers. +func TestDocker(t *testing.T) { + url := setupDiscoveryModule(t) + + dir, _ := testutil.CurDir() + err := protocolUtils.RunDockerServer(t, "foo-server", + dir+"/testdata/docker-compose.yml", []string{}, + regexp.MustCompile("Serving.*"), + protocolUtils.DefaultTimeout, 3) + require.NoError(t, err) + + proc, err := procfs.NewDefaultFS() + require.NoError(t, err) + processes, err := proc.AllProcs() + require.NoError(t, err) + pid1111 := 0 + require.EventuallyWithT(t, func(collect *assert.CollectT) { + for _, process := range processes { + comm, err := process.Comm() + if err != nil { + continue + } + if comm == "python-1111" { + pid1111 = process.PID + break + } + } + assert.NotZero(collect, pid1111) + }, time.Second*10, time.Millisecond*20) + + portMap := getServicesMap(t, url) + + require.Contains(t, portMap, pid1111) +} + +// Check that the cache is cleaned when procceses die. +func TestCache(t *testing.T) { + module, err := NewDiscoveryModule(nil, optional.NewNoneOption[workloadmeta.Component](), nil) + require.NoError(t, err) + discovery := module.(*discovery) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(func() { cancel() }) + + f, _ := startTCPServer(t, "tcp4") + defer f.Close() + + disableCloseOnExec(t, f) + + var serviceNames []string + var cmds []*exec.Cmd + + for i := 0; i < 10; i++ { + cmd := exec.CommandContext(ctx, "sleep", "100") + name := fmt.Sprintf("foo%d", i) + env := fmt.Sprintf("DD_SERVICE=%s", name) + cmd.Env = append(cmd.Env, env) + err = cmd.Start() + require.NoError(t, err) + + cmds = append(cmds, cmd) + serviceNames = append(serviceNames, name) + } + f.Close() + + require.EventuallyWithT(t, func(collect *assert.CollectT) { + _, err = discovery.getServices() + require.NoError(t, err) + + for _, cmd := range cmds { + pid := int32(cmd.Process.Pid) + assert.Contains(collect, discovery.cache, pid) + } + }, 10*time.Second, 100*time.Millisecond) + + for i, cmd := range cmds { + pid := int32(cmd.Process.Pid) + require.Contains(t, discovery.cache[pid].serviceName, serviceNames[i]) + } + + cancel() + for _, cmd := range cmds { + cmd.Wait() + } + + _, err = discovery.getServices() + require.NoError(t, err) + + for _, cmd := range cmds { + pid := cmd.Process.Pid + require.NotContains(t, discovery.cache, int32(pid)) + } + + discovery.Close() + require.Empty(t, discovery.cache) +} diff --git a/pkg/collector/corechecks/servicediscovery/module/testdata/docker-compose.yml b/pkg/collector/corechecks/servicediscovery/module/testdata/docker-compose.yml new file mode 100644 index 00000000000000..1ba97088df06a3 --- /dev/null +++ b/pkg/collector/corechecks/servicediscovery/module/testdata/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3' +name: port-test +services: + one: + # Just some image that has python. mysql chosen since it is already used + # from another test. + image: mysql:8.0.32 + # Symlink binary to allow easier identification of processes from test + command: > + sh -c "echo + && ln -s /usr/bin/python3 python-1111 + && ./python-1111 -m http.server 1234" + environment: + # The "Serving HTTP.. print does not show up otherwise" + - PYTHONUNBUFFERED=x + restart: always + ports: + - 1111:1234 diff --git a/pkg/collector/corechecks/servicediscovery/service_detector.go b/pkg/collector/corechecks/servicediscovery/service_detector.go index 4b7849859d22cb..1040c5dda03638 100644 --- a/pkg/collector/corechecks/servicediscovery/service_detector.go +++ b/pkg/collector/corechecks/servicediscovery/service_detector.go @@ -18,20 +18,23 @@ import ( agentzap "github.com/DataDog/datadog-agent/pkg/util/log/zap" ) -type serviceDetector struct { +// ServiceDetector defines the service detector to get metadata about services. +type ServiceDetector struct { logger *zap.Logger langFinder language.Finder } -func newServiceDetector() *serviceDetector { +// NewServiceDetector creates a new ServiceDetector object. +func NewServiceDetector() *ServiceDetector { logger := zap.New(agentzap.NewZapCore()) - return &serviceDetector{ + return &ServiceDetector{ logger: logger, langFinder: language.New(logger), } } -type serviceMetadata struct { +// ServiceMetadata stores metadata about a service. +type ServiceMetadata struct { Name string Language string Type string @@ -50,7 +53,24 @@ func fixAdditionalNames(additionalNames []string) []string { return out } -func (sd *serviceDetector) Detect(p processInfo) serviceMetadata { +func makeFinalName(meta usm.ServiceMetadata) string { + name := meta.Name + if len(meta.AdditionalNames) > 0 { + name = name + "-" + strings.Join(fixAdditionalNames(meta.AdditionalNames), "-") + } + + return name +} + +// GetServiceName gets the service name based on the command line arguments and +// the list of environment variables. +func (sd *ServiceDetector) GetServiceName(cmdline []string, env []string) string { + meta, _ := usm.ExtractServiceMetadata(sd.logger, cmdline, env) + return makeFinalName(meta) +} + +// Detect gets metadata for a service. +func (sd *ServiceDetector) Detect(p processInfo) ServiceMetadata { meta, _ := usm.ExtractServiceMetadata(sd.logger, p.CmdLine, p.Env) lang, _ := sd.langFinder.Detect(p.CmdLine, p.Env) svcType := servicetype.Detect(meta.Name, p.Ports) @@ -58,13 +78,8 @@ func (sd *serviceDetector) Detect(p processInfo) serviceMetadata { sd.logger.Debug("name info", zap.String("name", meta.Name), zap.Strings("additional names", meta.AdditionalNames)) - name := meta.Name - if len(meta.AdditionalNames) > 0 { - name = name + "-" + strings.Join(fixAdditionalNames(meta.AdditionalNames), "-") - } - - return serviceMetadata{ - Name: name, + return ServiceMetadata{ + Name: makeFinalName(meta), Language: string(lang), Type: string(svcType), APMInstrumentation: string(apmInstr), diff --git a/pkg/collector/corechecks/servicediscovery/service_detector_test.go b/pkg/collector/corechecks/servicediscovery/service_detector_test.go index 5abd079e57d6e4..326955a3cf07d8 100644 --- a/pkg/collector/corechecks/servicediscovery/service_detector_test.go +++ b/pkg/collector/corechecks/servicediscovery/service_detector_test.go @@ -12,7 +12,7 @@ import ( ) func Test_serviceDetector(t *testing.T) { - sd := newServiceDetector() + sd := NewServiceDetector() // no need to test many cases here, just ensuring the process data is properly passed down is enough. pInfo := processInfo{ @@ -24,7 +24,7 @@ func Test_serviceDetector(t *testing.T) { Ports: []int{5432}, } - want := serviceMetadata{ + want := ServiceMetadata{ Name: "my-service", Language: "python", Type: "db", @@ -42,7 +42,7 @@ func Test_serviceDetector(t *testing.T) { Stat: procStat{}, Ports: nil, } - wantEmpty := serviceMetadata{ + wantEmpty := ServiceMetadata{ Name: "", Language: "UNKNOWN", Type: "web_service", diff --git a/pkg/collector/corechecks/servicediscovery/servicediscovery.go b/pkg/collector/corechecks/servicediscovery/servicediscovery.go index 47c68851bd7bc5..10ac0b638926fd 100644 --- a/pkg/collector/corechecks/servicediscovery/servicediscovery.go +++ b/pkg/collector/corechecks/servicediscovery/servicediscovery.go @@ -36,7 +36,7 @@ const ( type serviceInfo struct { process processInfo - meta serviceMetadata + meta ServiceMetadata LastHeartbeat time.Time } diff --git a/pkg/collector/corechecks/servicediscovery/telemetry.go b/pkg/collector/corechecks/servicediscovery/telemetry.go index 4f38a5e0fc4341..05ab9d78923ab1 100644 --- a/pkg/collector/corechecks/servicediscovery/telemetry.go +++ b/pkg/collector/corechecks/servicediscovery/telemetry.go @@ -7,6 +7,7 @@ package servicediscovery import ( "errors" + "github.com/DataDog/datadog-agent/pkg/telemetry" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/prometheus/client_golang/prometheus" @@ -48,7 +49,7 @@ func telemetryFromError(err error) { var codeErr errWithCode if errors.As(err, &codeErr) { log.Debugf("sending telemetry for error: %v", err) - svc := serviceMetadata{} + svc := ServiceMetadata{} if codeErr.svc != nil { svc = *codeErr.svc } diff --git a/tasks/system_probe.py b/tasks/system_probe.py index d5c6e8d4edfd65..8d5a0cd028c05d 100644 --- a/tasks/system_probe.py +++ b/tasks/system_probe.py @@ -52,6 +52,7 @@ "./pkg/ebpf/...", "./pkg/network/...", "./pkg/collector/corechecks/ebpf/...", + "./pkg/collector/corechecks/servicediscovery/module/...", "./pkg/process/monitor/...", ] TEST_PACKAGES = " ".join(TEST_PACKAGES_LIST) From 7c7f1f5e2f6e7a479e46f4fb2d2cf4a953e225c2 Mon Sep 17 00:00:00 2001 From: Mackenzie <63265430+mackjmr@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:52:51 +0200 Subject: [PATCH 18/19] Update prometheus job name in converged agent (#28064) --- .../simple-dd/config-enhanced-result.yaml | 2 +- .../simple-dd/config-provided-result.yaml | 2 +- .../impl/testdata/simple-dd/config.yaml | 2 +- comp/otelcol/converter/README.md | 2 + comp/otelcol/converter/impl/converter_test.go | 5 +++ comp/otelcol/converter/impl/prometheus.go | 8 +++- .../processors/no-changes/config.yaml | 2 +- .../no-processor-partial/config-result.yaml | 2 +- .../no-processor-partial/config.yaml | 2 +- .../no-processors/config-result.yaml | 2 +- .../processors/no-processors/config.yaml | 2 +- .../other-processors/config-result.yaml | 2 +- .../processors/other-processors/config.yaml | 2 +- .../job-name-change/config-result.yaml | 40 +++++++++++++++++++ .../receivers/job-name-change/config.yaml | 40 +++++++++++++++++++ .../multi-dd-partial-prom/config-result.yaml | 4 +- .../multi-dd-partial-prom/config.yaml | 2 +- .../testdata/receivers/no-changes/config.yaml | 2 +- .../no-prom-multi-dd/config-result.yaml | 2 +- .../config-result.yaml | 2 +- .../no-prometheus-receiver/config-result.yaml | 2 +- .../no-receivers-defined/config-result.yaml | 2 +- 22 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 comp/otelcol/converter/impl/testdata/receivers/job-name-change/config-result.yaml create mode 100644 comp/otelcol/converter/impl/testdata/receivers/job-name-change/config.yaml diff --git a/comp/otelcol/collector/impl/testdata/simple-dd/config-enhanced-result.yaml b/comp/otelcol/collector/impl/testdata/simple-dd/config-enhanced-result.yaml index cf84558ebcf93e..ec57f953248db4 100644 --- a/comp/otelcol/collector/impl/testdata/simple-dd/config-enhanced-result.yaml +++ b/comp/otelcol/collector/impl/testdata/simple-dd/config-enhanced-result.yaml @@ -163,7 +163,7 @@ receivers: enable_http2: true follow_redirects: true honor_timestamps: true - job_name: otel-collector + job_name: datadog-agent metrics_path: /metrics scheme: http scrape_interval: 5s diff --git a/comp/otelcol/collector/impl/testdata/simple-dd/config-provided-result.yaml b/comp/otelcol/collector/impl/testdata/simple-dd/config-provided-result.yaml index c75d7a99aee5e1..7e10c57812684e 100644 --- a/comp/otelcol/collector/impl/testdata/simple-dd/config-provided-result.yaml +++ b/comp/otelcol/collector/impl/testdata/simple-dd/config-provided-result.yaml @@ -114,7 +114,7 @@ receivers: enable_http2: true follow_redirects: true honor_timestamps: true - job_name: otel-collector + job_name: datadog-agent metrics_path: /metrics scheme: http scrape_interval: 5s diff --git a/comp/otelcol/collector/impl/testdata/simple-dd/config.yaml b/comp/otelcol/collector/impl/testdata/simple-dd/config.yaml index 58edbce6d738bf..79cdf3933013d3 100644 --- a/comp/otelcol/collector/impl/testdata/simple-dd/config.yaml +++ b/comp/otelcol/collector/impl/testdata/simple-dd/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus: config: scrape_configs: - - job_name: 'otel-collector' + - job_name: 'datadog-agent' scrape_interval: 5s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/README.md b/comp/otelcol/converter/README.md index 5a9ae435f94752..8c33b64962ede0 100644 --- a/comp/otelcol/converter/README.md +++ b/comp/otelcol/converter/README.md @@ -22,6 +22,8 @@ The converter will check to see if a prometheus receiver is defined which points If it finds datadogexporters which are not defined in a pipeline with the prometheus receiver, it adds the prometheus config (name: `prometheus/dd-autoconfigured`), and then create it's own pipeline `metrics/dd-autoconfigured/
` which contains the prometheus receiver and the datadog exporter. +For any prometheus receiver collecting collector health metrics, and sending these to Datadog, it will update the job name to `datadog-agent`. This ensures the health metrics are tagged by `service:datadog-agent` and differentiable from collector health metrics. + ## Provided and enhanced config `GetProvidedConf` and `GetEnhancedConf` return the string representation of the user provided and autoconfigured conf respectively. Currently, these APIs have two limitations: diff --git a/comp/otelcol/converter/impl/converter_test.go b/comp/otelcol/converter/impl/converter_test.go index b1fff4119301d7..912186d7f76ee7 100644 --- a/comp/otelcol/converter/impl/converter_test.go +++ b/comp/otelcol/converter/impl/converter_test.go @@ -86,6 +86,11 @@ func TestConvert(t *testing.T) { provided: "processors/no-changes/config.yaml", expectedResult: "processors/no-changes/config.yaml", }, + { + name: "receivers/job-name-change", + provided: "receivers/job-name-change/config.yaml", + expectedResult: "receivers/job-name-change/config-result.yaml", + }, { name: "receivers/no-changes", provided: "receivers/no-changes/config.yaml", diff --git a/comp/otelcol/converter/impl/prometheus.go b/comp/otelcol/converter/impl/prometheus.go index 6cdac2ce5234b5..a26b0d7e5342a4 100644 --- a/comp/otelcol/converter/impl/prometheus.go +++ b/comp/otelcol/converter/impl/prometheus.go @@ -6,7 +6,9 @@ // Package converterimpl provides the implementation of the otel-agent converter. package converterimpl -import "go.opentelemetry.io/collector/confmap" +import ( + "go.opentelemetry.io/collector/confmap" +) var ( // prometheus @@ -16,7 +18,7 @@ var ( "config": map[string]any{ "scrape_configs": []any{ map[string]any{ - "job_name": "otelcol", + "job_name": "datadog-agent", "scrape_interval": "10s", "static_configs": []any{ map[string]any{ @@ -115,6 +117,7 @@ func addPrometheusReceiver(conf *confmap.Conf, comp component) { } if targetString == internalMetricsAddress { if ddExporter := receiverInPipelineWithDatadogExporter(conf, receiver); ddExporter != "" { + scrapeConfigMap["job_name"] = "datadog-agent" delete(datadogExportersMap, ddExporter) } } @@ -124,6 +127,7 @@ func addPrometheusReceiver(conf *confmap.Conf, comp component) { } } } + *conf = *confmap.NewFromStringMap(stringMapConf) if len(datadogExportersMap) == 0 { return diff --git a/comp/otelcol/converter/impl/testdata/processors/no-changes/config.yaml b/comp/otelcol/converter/impl/testdata/processors/no-changes/config.yaml index 6717d7caf9962e..5bbfe3beb7fca3 100644 --- a/comp/otelcol/converter/impl/testdata/processors/no-changes/config.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/no-changes/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config-result.yaml b/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config-result.yaml index b8ed270d1ef71f..bb6a071a5057e9 100644 --- a/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config.yaml b/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config.yaml index 074f1e4828db04..c101e0de68ab04 100644 --- a/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/no-processor-partial/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/no-processors/config-result.yaml b/comp/otelcol/converter/impl/testdata/processors/no-processors/config-result.yaml index c70eb22b14decf..456b7672058429 100644 --- a/comp/otelcol/converter/impl/testdata/processors/no-processors/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/no-processors/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/no-processors/config.yaml b/comp/otelcol/converter/impl/testdata/processors/no-processors/config.yaml index 0631605f775dcf..bc52407015b1c2 100644 --- a/comp/otelcol/converter/impl/testdata/processors/no-processors/config.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/no-processors/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/other-processors/config-result.yaml b/comp/otelcol/converter/impl/testdata/processors/other-processors/config-result.yaml index 9f371dd5b95d4b..ccf2c06ed1bc70 100644 --- a/comp/otelcol/converter/impl/testdata/processors/other-processors/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/other-processors/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/processors/other-processors/config.yaml b/comp/otelcol/converter/impl/testdata/processors/other-processors/config.yaml index a21afdf992e261..03e8e147b6aeff 100644 --- a/comp/otelcol/converter/impl/testdata/processors/other-processors/config.yaml +++ b/comp/otelcol/converter/impl/testdata/processors/other-processors/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config-result.yaml new file mode 100644 index 00000000000000..9b2beefe390201 --- /dev/null +++ b/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config-result.yaml @@ -0,0 +1,40 @@ +receivers: + otlp: + prometheus/user-defined: + config: + scrape_configs: + - job_name: 'datadog-agent' + scrape_interval: 10s + static_configs: + - targets: ['0.0.0.0:8888'] + +exporters: + datadog: + api: + key: 12345 + +extensions: + pprof/user-defined: + health_check/user-defined: + zpages/user-defined: + endpoint: "localhost:55679" + datadog/user-defined: + +processors: + infraattributes/user-defined: + +service: + extensions: [pprof/user-defined, zpages/user-defined, health_check/user-defined, datadog/user-defined] + pipelines: + traces: + receivers: [nop] + processors: [infraattributes/user-defined] + exporters: [datadog] + metrics: + receivers: [nop, prometheus/user-defined] + processors: [infraattributes/user-defined] + exporters: [datadog] + logs: + receivers: [nop] + processors: [infraattributes/user-defined] + exporters: [datadog] diff --git a/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config.yaml b/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config.yaml new file mode 100644 index 00000000000000..28666713c70b71 --- /dev/null +++ b/comp/otelcol/converter/impl/testdata/receivers/job-name-change/config.yaml @@ -0,0 +1,40 @@ +receivers: + otlp: + prometheus/user-defined: + config: + scrape_configs: + - job_name: 'otelcol' + scrape_interval: 10s + static_configs: + - targets: ['0.0.0.0:8888'] + +exporters: + datadog: + api: + key: 12345 + +extensions: + pprof/user-defined: + health_check/user-defined: + zpages/user-defined: + endpoint: "localhost:55679" + datadog/user-defined: + +processors: + infraattributes/user-defined: + +service: + extensions: [pprof/user-defined, zpages/user-defined, health_check/user-defined, datadog/user-defined] + pipelines: + traces: + receivers: [nop] + processors: [infraattributes/user-defined] + exporters: [datadog] + metrics: + receivers: [nop, prometheus/user-defined] + processors: [infraattributes/user-defined] + exporters: [datadog] + logs: + receivers: [nop] + processors: [infraattributes/user-defined] + exporters: [datadog] diff --git a/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config-result.yaml index a2f0ead4868a24..71fe27339ea3a4 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config-result.yaml @@ -3,14 +3,14 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['localhost:1234'] prometheus/dd-autoconfigured: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['localhost:1234'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config.yaml b/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config.yaml index bcc00bd9728533..e7ad53fde07c74 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/multi-dd-partial-prom/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['localhost:1234'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/no-changes/config.yaml b/comp/otelcol/converter/impl/testdata/receivers/no-changes/config.yaml index 28666713c70b71..9b2beefe390201 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/no-changes/config.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/no-changes/config.yaml @@ -3,7 +3,7 @@ receivers: prometheus/user-defined: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/no-prom-multi-dd/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/no-prom-multi-dd/config-result.yaml index 9878a50d8c0a34..d5f0dcf5b4906d 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/no-prom-multi-dd/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/no-prom-multi-dd/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/dd-autoconfigured: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/no-prom-not-default-addr/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/no-prom-not-default-addr/config-result.yaml index 88011eae86b97b..b5391b16abc70e 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/no-prom-not-default-addr/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/no-prom-not-default-addr/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/dd-autoconfigured: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['localhost:1234'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/no-prometheus-receiver/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/no-prometheus-receiver/config-result.yaml index 345f9b7fef564a..ff42c6b03ee362 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/no-prometheus-receiver/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/no-prometheus-receiver/config-result.yaml @@ -3,7 +3,7 @@ receivers: prometheus/dd-autoconfigured: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] diff --git a/comp/otelcol/converter/impl/testdata/receivers/no-receivers-defined/config-result.yaml b/comp/otelcol/converter/impl/testdata/receivers/no-receivers-defined/config-result.yaml index 3beaa148bb33e8..075a05ae3898c8 100644 --- a/comp/otelcol/converter/impl/testdata/receivers/no-receivers-defined/config-result.yaml +++ b/comp/otelcol/converter/impl/testdata/receivers/no-receivers-defined/config-result.yaml @@ -2,7 +2,7 @@ receivers: prometheus/dd-autoconfigured: config: scrape_configs: - - job_name: 'otelcol' + - job_name: 'datadog-agent' scrape_interval: 10s static_configs: - targets: ['0.0.0.0:8888'] From ff7f7c662063231a727f2d58a8119ffa2348eac5 Mon Sep 17 00:00:00 2001 From: Daniel Lavie Date: Tue, 6 Aug 2024 15:16:50 +0300 Subject: [PATCH 19/19] Deploy Image Goland Task (#28204) --- .run/Publish Docker.run.xml | 22 ++++++++++++++++++++++ .run/bash_runner.sh | 10 ++++++++++ .run/deploy_image.sh | 28 ++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 .run/Publish Docker.run.xml create mode 100644 .run/deploy_image.sh diff --git a/.run/Publish Docker.run.xml b/.run/Publish Docker.run.xml new file mode 100644 index 00000000000000..b87dbf2902b043 --- /dev/null +++ b/.run/Publish Docker.run.xml @@ -0,0 +1,22 @@ + + + + \ No newline at end of file diff --git a/.run/bash_runner.sh b/.run/bash_runner.sh index 39de3383cb0666..975755050923a9 100755 --- a/.run/bash_runner.sh +++ b/.run/bash_runner.sh @@ -15,6 +15,11 @@ if [[ -z $SCRIPT_TO_RUN ]]; then exit fi +if [ "$INCLUDE_AWS_ECR_LOGIN_PASSWORD" = true ]; then + # Get AWS ECR login password + AWS_ECR_LOGIN_PASSWORD=$(aws-vault exec sso-sandbox-account-admin -- aws ecr get-login-password --region us-east-1) +fi + if [[ -z $REMOTE_MACHINE_IP ]]; then echo "REMOTE_MACHINE_IP environment variable was not set, assuming local configuration" source "${SCRIPT_TO_RUN}" @@ -50,6 +55,11 @@ else # Finally create the environment variable to inject list in the format that works with sh `ssh` command env_variables_to_inject=$(echo "$env_vars" | grep -v -w "${remote_env_array_as_grep_patterns[@]}" | tr '\n' ' ') + + if [ "$INCLUDE_AWS_ECR_LOGIN_PASSWORD" = true ]; then + env_variables_to_inject+="AWS_ECR_LOGIN_PASSWORD=${AWS_ECR_LOGIN_PASSWORD} " + fi + # shellcheck disable=SC2002 cat "${SCRIPT_TO_RUN}" | ssh -tt "vagrant@$REMOTE_MACHINE_IP" \ "export $env_variables_to_inject;cd ${DD_AGENT_ROOT_DIR};bash --login" diff --git a/.run/deploy_image.sh b/.run/deploy_image.sh new file mode 100644 index 00000000000000..eedeb5c5e7df07 --- /dev/null +++ b/.run/deploy_image.sh @@ -0,0 +1,28 @@ +#!/usr/bin/bash -ex + +# This script logs into the Docker registry, builds a Docker image, and pushes it to Amazon ECR. + +# Ensure AWS_ECR_LOGIN_PASSWORD is set +if [[ -z $AWS_ECR_LOGIN_PASSWORD ]]; then + echo "AWS_ECR_LOGIN_PASSWORD environment variable is not set" + exit 1 +fi + +# Ensure IMAGE_NAME and IMAGE_VERSION are set +if [[ -z $IMAGE_NAME || -z $IMAGE_VERSION ]]; then + echo "IMAGE_NAME and IMAGE_VERSION environment variables must be set" + exit 1 +fi + +# Docker registry and image details +AWS_ACCOUNT_ID="601427279990" +REGION="us-east-1" +REPOSITORY_NAME="usm-agent" + +# Login to Amazon ECR +echo $AWS_ECR_LOGIN_PASSWORD | docker login --username AWS --password-stdin "$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com" + +# Build and push the Docker image +inv -e process-agent.build-dev-image \ + --image "$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$REPOSITORY_NAME/$IMAGE_NAME:$IMAGE_VERSION" \ + --base-image datadog/agent-dev:master-py3 --push