From 0f4104ed32a8fd32f3a3264be5a6d945fc4cb96f Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Thu, 18 Apr 2024 15:15:46 -0700 Subject: [PATCH 01/23] in buffer batching with perf buffers for NPM --- .github/CODEOWNERS | 1 + pkg/config/setup/system_probe.go | 2 + pkg/ebpf/manager.go | 12 +- pkg/ebpf/perf/event.go | 213 ++++++++++++++++ pkg/network/config/config.go | 8 + pkg/network/ebpf/c/tracer.c | 4 +- pkg/network/ebpf/c/tracer/events.h | 70 +++--- pkg/network/ebpf/c/tracer/maps.h | 2 +- pkg/network/event_common_linux.go | 105 +++++++- pkg/network/event_common_notlinux.go | 2 +- pkg/network/state_linux_test.go | 2 +- pkg/network/tracer/connection/ebpf_tracer.go | 231 +++++++++--------- .../tracer/connection/fentry/manager.go | 5 +- .../tracer/connection/fentry/probes.go | 15 +- .../tracer/connection/fentry/tracer.go | 101 ++++---- .../tracer/connection/kprobe/config.go | 65 ++++- .../tracer/connection/kprobe/manager.go | 68 +++--- .../tracer/connection/kprobe/tracer.go | 81 ++---- .../tracer/connection/kprobe/tracer_test.go | 28 +-- .../tracer/connection/perf_batching.go | 48 ++-- .../tracer/connection/perf_batching_test.go | 41 +++- .../tracer/connection/tcp_close_consumer.go | 121 +++------ .../connection/tcp_close_consumer_test.go | 7 +- .../tracer/connection/util/conn_tracer.go | 70 +----- pkg/util/encoding/binary.go | 43 ++++ pkg/util/slices/map.go | 16 ++ pkg/util/sync/pool.go | 16 ++ 27 files changed, 853 insertions(+), 524 deletions(-) create mode 100644 pkg/ebpf/perf/event.go create mode 100644 pkg/util/encoding/binary.go create mode 100644 pkg/util/slices/map.go diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8c82bee16308ec..a1ca0d547e9f9f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -458,6 +458,7 @@ /pkg/util/gpu/ @DataDog/container-platform /pkg/util/kernel/ @DataDog/ebpf-platform /pkg/util/safeelf/ @DataDog/ebpf-platform +/pkg/util/slices/ @DataDog/ebpf-platform /pkg/util/ktime @DataDog/agent-security /pkg/util/kubernetes/ @DataDog/container-integrations @DataDog/container-platform @DataDog/container-app /pkg/util/podman/ @DataDog/container-integrations diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index bdc97ee3d902ac..2551b80e090deb 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -197,6 +197,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnv(join(netNS, "max_failed_connections_buffered")) cfg.BindEnvAndSetDefault(join(spNS, "closed_connection_flush_threshold"), 0) cfg.BindEnvAndSetDefault(join(spNS, "closed_channel_size"), 500) + cfg.BindEnvAndSetDefault(join(netNS, "closed_buffer_wakeup_count"), 5) cfg.BindEnvAndSetDefault(join(spNS, "max_connection_state_buffered"), 75000) cfg.BindEnvAndSetDefault(join(spNS, "disable_dns_inspection"), false, "DD_DISABLE_DNS_INSPECTION") @@ -212,6 +213,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnvAndSetDefault(join(spNS, "enable_conntrack_all_namespaces"), true, "DD_SYSTEM_PROBE_ENABLE_CONNTRACK_ALL_NAMESPACES") cfg.BindEnvAndSetDefault(join(netNS, "enable_protocol_classification"), true, "DD_ENABLE_PROTOCOL_CLASSIFICATION") cfg.BindEnvAndSetDefault(join(netNS, "enable_ringbuffers"), true, "DD_SYSTEM_PROBE_NETWORK_ENABLE_RINGBUFFERS") + cfg.BindEnvAndSetDefault(join(netNS, "enable_kernel_batching"), false, "DD_SYSTEM_PROBE_NETWORK_ENABLE_KERNEL_BATCHING") cfg.BindEnvAndSetDefault(join(netNS, "enable_tcp_failed_connections"), true, "DD_SYSTEM_PROBE_NETWORK_ENABLE_FAILED_CONNS") cfg.BindEnvAndSetDefault(join(netNS, "ignore_conntrack_init_failure"), false, "DD_SYSTEM_PROBE_NETWORK_IGNORE_CONNTRACK_INIT_FAILURE") cfg.BindEnvAndSetDefault(join(netNS, "conntrack_init_timeout"), 10*time.Second) diff --git a/pkg/ebpf/manager.go b/pkg/ebpf/manager.go index 06e790609a9ecf..2351e4bbee6092 100644 --- a/pkg/ebpf/manager.go +++ b/pkg/ebpf/manager.go @@ -74,11 +74,13 @@ type Modifier interface { // InitWithOptions is a wrapper around ebpf-manager.Manager.InitWithOptions func (m *Manager) InitWithOptions(bytecode io.ReaderAt, opts *manager.Options) error { - // we must load the ELF file before initialization, - // to build the collection specs, because some modifiers - // inspect these to make changes to the eBPF resources. - if err := m.LoadELF(bytecode); err != nil { - return fmt.Errorf("failed to load elf from reader: %w", err) + if bytecode != nil { + // we must load the ELF file before initialization, + // to build the collection specs, because some modifiers + // inspect these to make changes to the eBPF resources. + if err := m.LoadELF(bytecode); err != nil { + return fmt.Errorf("failed to load elf from reader: %w", err) + } } for _, mod := range m.EnabledModifiers { diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go new file mode 100644 index 00000000000000..d338c08d9b6941 --- /dev/null +++ b/pkg/ebpf/perf/event.go @@ -0,0 +1,213 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +//go:build linux_bpf + +// Package perf implements types related to eBPF and the perf subsystem, like perf buffers and ring buffers. +package perf + +import ( + "errors" + "fmt" + "slices" + + manager "github.com/DataDog/ebpf-manager" + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/features" + "github.com/cilium/ebpf/perf" + "github.com/cilium/ebpf/ringbuf" + + ebpfTelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" + ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" +) + +var perfPool = ddsync.NewDefaultTypedPool[perf.Record]() +var ringbufPool = ddsync.NewDefaultTypedPool[ringbuf.Record]() + +// Flushable is an interface for objects that support flushing +type Flushable interface { + Flush() +} + +// EventHandler abstracts consuming data from a perf buffer or ring buffer (depending on availability and options). +// It handles upgrading maps from a ring buffer is desired, and unmarshalling into the desired data type. +type EventHandler struct { + f Flushable + opts EventHandlerOptions +} + +// EventHandlerOptions are the options controlling the EventHandler. +// MapName and Handler are required options. +type EventHandlerOptions struct { + MapName string + Handler func([]byte) + + TelemetryEnabled bool + UseRingBuffer bool + UpgradePerfBuffer bool + + PerfOptions PerfBufferOptions + RingBufOptions RingBufferOptions +} + +// PerfBufferOptions are options specifically for perf buffers +// +//nolint:revive +type PerfBufferOptions struct { + BufferSize int + + // Watermark - The reader will start processing samples once their sizes in the perf ring buffer + // exceed this value. Must be smaller than PerfRingBufferSize. Defaults to the manager value if not set. + Watermark int + + // The number of events required in any per CPU buffer before + // Read will process data. This is mutually exclusive with Watermark. + // The default is zero, which means Watermark will take precedence. + WakeupEvents int +} + +// RingBufferOptions are options specifically for ring buffers +type RingBufferOptions struct { + BufferSize int +} + +// NewEventHandler creates an event handler with the provided options +func NewEventHandler(opts EventHandlerOptions) (*EventHandler, error) { + if opts.MapName == "" { + return nil, errors.New("invalid options: MapName is required") + } + if opts.Handler == nil { + return nil, errors.New("invalid options: Handler is required") + } + e := &EventHandler{ + opts: opts, + } + return e, nil +} + +// Init must be called after ebpf-manager.Manager.LoadELF but before ebpf-manager.Manager.Init/InitWithOptions() +func (e *EventHandler) Init(mgr *manager.Manager, mgrOpts *manager.Options) error { + ms, _, _ := mgr.GetMapSpec(e.opts.MapName) + if ms == nil { + return fmt.Errorf("unable to find map spec %q", e.opts.MapName) + } + + if e.opts.UseRingBuffer && features.HaveMapType(ebpf.RingBuf) == nil { + if e.opts.UpgradePerfBuffer { + if ms.Type != ebpf.PerfEventArray { + return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) + } + UpgradePerfBuffer(mgr, mgrOpts, e.opts.MapName) + } else if ms.Type != ebpf.RingBuf { + return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.opts.MapName, ms.Type.String()) + } + + if ms.MaxEntries != uint32(e.opts.RingBufOptions.BufferSize) { + ResizeRingBuffer(mgrOpts, e.opts.MapName, e.opts.RingBufOptions.BufferSize) + } + e.initRingBuffer(mgr) + return nil + } + + if ms.Type != ebpf.PerfEventArray { + return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) + } + e.initPerfBuffer(mgr) + return nil +} + +// MapType returns the ebpf.MapType of the underlying events map +// This is only valid after calling Init. +func (e *EventHandler) MapType() ebpf.MapType { + switch e.f.(type) { + case *manager.PerfMap: + return ebpf.PerfEventArray + case *manager.RingBuffer: + return ebpf.RingBuf + default: + return ebpf.UnspecifiedMap + } +} + +// Flush flushes the pending data from the underlying perfbuf/ringbuf +func (e *EventHandler) Flush() { + e.f.Flush() +} + +// ResizeRingBuffer resizes the ring buffer by creating/updating a map spec editor +func ResizeRingBuffer(mgrOpts *manager.Options, mapName string, bufferSize int) { + if mgrOpts.MapSpecEditors == nil { + mgrOpts.MapSpecEditors = make(map[string]manager.MapSpecEditor) + } + specEditor := mgrOpts.MapSpecEditors[mapName] + specEditor.MaxEntries = uint32(bufferSize) + specEditor.EditorFlag |= manager.EditMaxEntries + mgrOpts.MapSpecEditors[mapName] = specEditor +} + +func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { + mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { + return perfMap.Name == e.opts.MapName + }) + pm := &manager.PerfMap{ + Map: manager.Map{Name: e.opts.MapName}, + PerfMapOptions: manager.PerfMapOptions{ + PerfRingBufferSize: e.opts.PerfOptions.BufferSize, + Watermark: e.opts.PerfOptions.Watermark, + WakeupEvents: e.opts.PerfOptions.WakeupEvents, + RecordHandler: e.perfRecordHandler, + LostHandler: nil, // TODO do we need support for Lost? + RecordGetter: perfPool.Get, + TelemetryEnabled: e.opts.TelemetryEnabled, + }, + } + mgr.PerfMaps = append(mgr.PerfMaps, pm) + ebpfTelemetry.ReportPerfMapTelemetry(pm) + e.f = pm +} + +func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap, _ *manager.Manager) { + defer perfPool.Put(record) + e.opts.Handler(record.RawSample) +} + +func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { + mgr.RingBuffers = slices.DeleteFunc(mgr.RingBuffers, func(ringBuf *manager.RingBuffer) bool { + return ringBuf.Name == e.opts.MapName + }) + rb := &manager.RingBuffer{ + Map: manager.Map{Name: e.opts.MapName}, + RingBufferOptions: manager.RingBufferOptions{ + RecordHandler: e.ringRecordHandler, + RecordGetter: ringbufPool.Get, + TelemetryEnabled: e.opts.TelemetryEnabled, + }, + } + mgr.RingBuffers = append(mgr.RingBuffers, rb) + ebpfTelemetry.ReportRingBufferTelemetry(rb) + e.f = rb +} + +func (e *EventHandler) ringRecordHandler(record *ringbuf.Record, _ *manager.RingBuffer, _ *manager.Manager) { + defer ringbufPool.Put(record) + e.opts.Handler(record.RawSample) +} + +// UpgradePerfBuffer upgrades a perf buffer to a ring buffer by creating a map spec editor +func UpgradePerfBuffer(mgr *manager.Manager, mgrOpts *manager.Options, mapName string) { + if mgrOpts.MapSpecEditors == nil { + mgrOpts.MapSpecEditors = make(map[string]manager.MapSpecEditor) + } + specEditor := mgrOpts.MapSpecEditors[mapName] + specEditor.Type = ebpf.RingBuf + specEditor.KeySize = 0 + specEditor.ValueSize = 0 + specEditor.EditorFlag |= manager.EditType | manager.EditKeyValue + mgrOpts.MapSpecEditors[mapName] = specEditor + + mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { + return perfMap.Name == mapName + }) +} diff --git a/pkg/network/config/config.go b/pkg/network/config/config.go index 47f8c85cda215b..4f340ac1c2f8d5 100644 --- a/pkg/network/config/config.go +++ b/pkg/network/config/config.go @@ -215,6 +215,9 @@ type Config struct { // ClosedChannelSize specifies the size for closed channel for the tracer ClosedChannelSize int + // ClosedBufferWakeupCount specifies the number of events that will buffer in a perf buffer before userspace is woken up. + ClosedBufferWakeupCount int + // ExcludedSourceConnections is a map of source connections to blacklist ExcludedSourceConnections map[string][]string @@ -285,6 +288,9 @@ type Config struct { // EnableUSMEventStream enables USM to use the event stream instead // of netlink for receiving process events. EnableUSMEventStream bool + + // KernelBatchingEnabled enables the use of custom batching for eBPF perf events with perf buffers + KernelBatchingEnabled bool } // New creates a config for the network tracer @@ -317,6 +323,7 @@ func New() *Config { MaxFailedConnectionsBuffered: uint32(cfg.GetInt64(sysconfig.FullKeyPath(netNS, "max_failed_connections_buffered"))), ClosedConnectionFlushThreshold: cfg.GetInt(sysconfig.FullKeyPath(spNS, "closed_connection_flush_threshold")), ClosedChannelSize: cfg.GetInt(sysconfig.FullKeyPath(spNS, "closed_channel_size")), + ClosedBufferWakeupCount: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_buffer_wakeup_count")), MaxConnectionsStateBuffered: cfg.GetInt(sysconfig.FullKeyPath(spNS, "max_connection_state_buffered")), ClientStateExpiry: 2 * time.Minute, @@ -331,6 +338,7 @@ func New() *Config { ProtocolClassificationEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_protocol_classification")), NPMRingbuffersEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_ringbuffers")), + KernelBatchingEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_kernel_batching")), EnableHTTPMonitoring: cfg.GetBool(sysconfig.FullKeyPath(smNS, "enable_http_monitoring")), EnableHTTP2Monitoring: cfg.GetBool(sysconfig.FullKeyPath(smNS, "enable_http2_monitoring")), diff --git a/pkg/network/ebpf/c/tracer.c b/pkg/network/ebpf/c/tracer.c index 8de25a2a37e6b3..a5d84f0021cc68 100644 --- a/pkg/network/ebpf/c/tracer.c +++ b/pkg/network/ebpf/c/tracer.c @@ -303,7 +303,9 @@ int BPF_BYPASSABLE_KRETPROBE(kretprobe__tcp_close_clean_protocols) { bpf_map_delete_elem(&tcp_close_args, &pid_tgid); } - bpf_tail_call_compat(ctx, &tcp_close_progs, 0); + if (is_batching_enabled()) { + bpf_tail_call_compat(ctx, &tcp_close_progs, 0); + } return 0; } diff --git a/pkg/network/ebpf/c/tracer/events.h b/pkg/network/ebpf/c/tracer/events.h index 1de94d755b1f3c..84a120ea6e1ff4 100644 --- a/pkg/network/ebpf/c/tracer/events.h +++ b/pkg/network/ebpf/c/tracer/events.h @@ -44,6 +44,12 @@ __maybe_unused static __always_inline void submit_closed_conn_event(void *ctx, i } } +static __always_inline bool is_batching_enabled() { + __u64 batching_enabled = 0; + LOAD_CONSTANT("batching_enabled", batching_enabled); + return batching_enabled != 0; +} + static __always_inline int cleanup_conn(void *ctx, conn_tuple_t *tup, struct sock *sk) { u32 cpu = bpf_get_smp_processor_id(); // Will hold the full connection data to send through the perf or ring buffer @@ -94,32 +100,34 @@ static __always_inline int cleanup_conn(void *ctx, conn_tuple_t *tup, struct soc // if we added another field conn.conn_stats.duration = bpf_ktime_get_ns() - conn.conn_stats.duration; - // Batch TCP closed connections before generating a perf event - batch_t *batch_ptr = bpf_map_lookup_elem(&conn_close_batch, &cpu); - if (batch_ptr == NULL) { - return -1; - } + if (is_batching_enabled()) { + // Batch TCP closed connections before generating a perf event + batch_t *batch_ptr = bpf_map_lookup_elem(&conn_close_batch, &cpu); + if (batch_ptr == NULL) { + return -1; + } - // TODO: Can we turn this into a macro based on TCP_CLOSED_BATCH_SIZE? - switch (batch_ptr->len) { - case 0: - batch_ptr->c0 = conn; - batch_ptr->len++; - return 0; - case 1: - batch_ptr->c1 = conn; - batch_ptr->len++; - return 0; - case 2: - batch_ptr->c2 = conn; - batch_ptr->len++; - return 0; - case 3: - batch_ptr->c3 = conn; - batch_ptr->len++; - // In this case the batch is ready to be flushed, which we defer to kretprobe/tcp_close - // in order to cope with the eBPF stack limitation of 512 bytes. - return 0; + // TODO: Can we turn this into a macro based on TCP_CLOSED_BATCH_SIZE? + switch (batch_ptr->len) { + case 0: + batch_ptr->c0 = conn; + batch_ptr->len++; + return 0; + case 1: + batch_ptr->c1 = conn; + batch_ptr->len++; + return 0; + case 2: + batch_ptr->c2 = conn; + batch_ptr->len++; + return 0; + case 3: + batch_ptr->c3 = conn; + batch_ptr->len++; + // In this case the batch is ready to be flushed, which we defer to kretprobe/tcp_close + // in order to cope with the eBPF stack limitation of 512 bytes. + return 0; + } } // If we hit this section it means we had one or more interleaved tcp_close calls. @@ -127,11 +135,13 @@ static __always_inline int cleanup_conn(void *ctx, conn_tuple_t *tup, struct soc // frequent of a case to cause performance issues and avoid cases where // we drop whole connections, which impacts things USM connection matching. submit_closed_conn_event(ctx, cpu, &conn, sizeof(conn_t)); - if (is_tcp) { - increment_telemetry_count(unbatched_tcp_close); - } - if (is_udp) { - increment_telemetry_count(unbatched_udp_close); + if (is_batching_enabled()) { + if (is_tcp) { + increment_telemetry_count(unbatched_tcp_close); + } + if (is_udp) { + increment_telemetry_count(unbatched_udp_close); + } } return 0; } diff --git a/pkg/network/ebpf/c/tracer/maps.h b/pkg/network/ebpf/c/tracer/maps.h index e6123782f8ea56..c97b643d561bb6 100644 --- a/pkg/network/ebpf/c/tracer/maps.h +++ b/pkg/network/ebpf/c/tracer/maps.h @@ -36,7 +36,7 @@ BPF_PERF_EVENT_ARRAY_MAP(conn_close_event, __u32) * or BPF_MAP_TYPE_PERCPU_ARRAY, but they are not available in * some of the Kernels we support (4.4 ~ 4.6) */ -BPF_HASH_MAP(conn_close_batch, __u32, batch_t, 1024) +BPF_HASH_MAP(conn_close_batch, __u32, batch_t, 1) /* * Map to hold struct sock parameter for tcp_sendmsg calls diff --git a/pkg/network/event_common_linux.go b/pkg/network/event_common_linux.go index 99058b017903a6..e70118200b1b03 100644 --- a/pkg/network/event_common_linux.go +++ b/pkg/network/event_common_linux.go @@ -3,10 +3,20 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build linux +//go:build linux_bpf package network +import ( + "fmt" + "math" + "time" + "unsafe" + + netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" + "github.com/DataDog/datadog-agent/pkg/network/protocols" +) + // Sub returns s-other. // // This implementation is different from the implementation on @@ -50,3 +60,96 @@ func (s StatCounters) Sub(other StatCounters) (sc StatCounters, underflow bool) return sc, false } + +// UnmarshalBinary converts a raw byte slice to a ConnectionStats object +func (c *ConnectionStats) UnmarshalBinary(data []byte) error { + if len(data) < netebpf.SizeofConn { + return fmt.Errorf("'Conn' binary data too small, received %d but expected %d bytes", len(data), netebpf.SizeofConn) + } + + ct := (*netebpf.Conn)(unsafe.Pointer(&data[0])) + c.FromConn(ct) + return nil +} + +// FromConn populates relevant fields on ConnectionStats from the connection data +func (c *ConnectionStats) FromConn(ct *netebpf.Conn) { + c.FromTupleAndStats(&ct.Tup, &ct.Conn_stats) + c.FromTCPStats(&ct.Tcp_stats) +} + +// FromTupleAndStats populates relevant fields on ConnectionStats from the arguments +func (c *ConnectionStats) FromTupleAndStats(t *netebpf.ConnTuple, s *netebpf.ConnStats) { + *c = ConnectionStats{ConnectionTuple: ConnectionTuple{ + Pid: t.Pid, + NetNS: t.Netns, + Source: t.SourceAddress(), + Dest: t.DestAddress(), + SPort: t.Sport, + DPort: t.Dport, + }, + Monotonic: StatCounters{ + SentBytes: s.Sent_bytes, + RecvBytes: s.Recv_bytes, + SentPackets: uint64(s.Sent_packets), + RecvPackets: uint64(s.Recv_packets), + }, + LastUpdateEpoch: s.Timestamp, + IsAssured: s.IsAssured(), + Cookie: StatCookie(s.Cookie), + } + + if s.Duration <= uint64(math.MaxInt64) { + c.Duration = time.Duration(s.Duration) * time.Nanosecond + } + + c.ProtocolStack = protocols.Stack{ + API: protocols.API(s.Protocol_stack.Api), + Application: protocols.Application(s.Protocol_stack.Application), + Encryption: protocols.Encryption(s.Protocol_stack.Encryption), + } + + if t.Type() == netebpf.TCP { + c.Type = TCP + } else { + c.Type = UDP + } + + switch t.Family() { + case netebpf.IPv4: + c.Family = AFINET + case netebpf.IPv6: + c.Family = AFINET6 + } + + c.SPortIsEphemeral = IsPortInEphemeralRange(c.Family, c.Type, t.Sport) + + switch s.ConnectionDirection() { + case netebpf.Incoming: + c.Direction = INCOMING + case netebpf.Outgoing: + c.Direction = OUTGOING + default: + c.Direction = OUTGOING + } +} + +// FromTCPStats populates relevant fields on ConnectionStats from the arguments +func (c *ConnectionStats) FromTCPStats(tcpStats *netebpf.TCPStats) { + if c.Type != TCP { + return + } + + if tcpStats != nil { + c.Monotonic.Retransmits = tcpStats.Retransmits + c.Monotonic.TCPEstablished = tcpStats.State_transitions >> netebpf.Established & 1 + c.Monotonic.TCPClosed = tcpStats.State_transitions >> netebpf.Close & 1 + c.RTT = tcpStats.Rtt + c.RTTVar = tcpStats.Rtt_var + if tcpStats.Failure_reason > 0 { + c.TCPFailures = map[uint16]uint32{ + tcpStats.Failure_reason: 1, + } + } + } +} diff --git a/pkg/network/event_common_notlinux.go b/pkg/network/event_common_notlinux.go index 938acfb127f7a3..b366328c545d4c 100644 --- a/pkg/network/event_common_notlinux.go +++ b/pkg/network/event_common_notlinux.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build !linux +//go:build !linux_bpf package network diff --git a/pkg/network/state_linux_test.go b/pkg/network/state_linux_test.go index 2a1f3a9b4df801..a7ec0737efd14d 100644 --- a/pkg/network/state_linux_test.go +++ b/pkg/network/state_linux_test.go @@ -3,7 +3,7 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build linux +//go:build linux_bpf package network diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index d181471578d216..f30e6b87bdb9e6 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -25,23 +25,23 @@ import ( telemetryComponent "github.com/DataDog/datadog-agent/comp/core/telemetry" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/maps" + "github.com/DataDog/datadog-agent/pkg/ebpf/perf" ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" "github.com/DataDog/datadog-agent/pkg/network" "github.com/DataDog/datadog-agent/pkg/network/config" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" - "github.com/DataDog/datadog-agent/pkg/network/protocols" "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/fentry" "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/kprobe" "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/util" "github.com/DataDog/datadog-agent/pkg/telemetry" + "github.com/DataDog/datadog-agent/pkg/util/encoding" "github.com/DataDog/datadog-agent/pkg/util/log" + ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" ) const ( - defaultClosedChannelSize = 500 - defaultFailedChannelSize = 500 - connTracerModuleName = "network_tracer__ebpf" + connTracerModuleName = "network_tracer__ebpf" ) var tcpOngoingConnectMapTTL = 30 * time.Minute.Nanoseconds() @@ -158,8 +158,6 @@ type ebpfTracer struct { ebpfTracerType TracerType - exitTelemetry chan struct{} - ch *cookieHasher } @@ -204,24 +202,36 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace manager.ConstantEditor{Name: "ephemeral_range_begin", Value: uint64(begin)}, manager.ConstantEditor{Name: "ephemeral_range_end", Value: uint64(end)}) - closedChannelSize := defaultClosedChannelSize - if config.ClosedChannelSize > 0 { - closedChannelSize = config.ClosedChannelSize + connPool := ddsync.NewDefaultTypedPool[network.ConnectionStats]() + var extractor *batchExtractor + + util.AddBoolConst(&mgrOptions, "batching_enabled", config.KernelBatchingEnabled) + if config.KernelBatchingEnabled { + numCPUs, err := ebpf.PossibleCPU() + if err != nil { + return nil, fmt.Errorf("could not determine number of CPUs: %w", err) + } + extractor = newBatchExtractor(numCPUs) + mgrOptions.MapSpecEditors[probes.ConnCloseBatchMap] = manager.MapSpecEditor{ + MaxEntries: uint32(numCPUs), + EditorFlag: manager.EditMaxEntries, + } } - var connCloseEventHandler ddebpf.EventHandler - var failedConnsHandler ddebpf.EventHandler - if config.RingBufferSupportedNPM() { - connCloseEventHandler = ddebpf.NewRingBufferHandler(closedChannelSize) - failedConnsHandler = ddebpf.NewRingBufferHandler(defaultFailedChannelSize) - } else { - connCloseEventHandler = ddebpf.NewPerfHandler(closedChannelSize) - failedConnsHandler = ddebpf.NewPerfHandler(defaultFailedChannelSize) + + tr := &ebpfTracer{ + removeTuple: &netebpf.ConnTuple{}, + ch: newCookieHasher(), + } + + connCloseEventHandler, err := initClosedConnEventHandler(config, tr.closedPerfCallback, connPool, extractor) + if err != nil { + return nil, err } var m *manager.Manager - var tracerType TracerType = TracerTypeFentry //nolint:revive // TODO + var tracerType = TracerTypeFentry var closeTracerFn func() - m, closeTracerFn, err := fentry.LoadTracer(config, mgrOptions, connCloseEventHandler) + m, closeTracerFn, err = fentry.LoadTracer(config, mgrOptions, connCloseEventHandler) if err != nil && !errors.Is(err, fentry.ErrorNotSupported) { // failed to load fentry tracer return nil, err @@ -231,7 +241,7 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace // load the kprobe tracer log.Info("loading kprobe-based tracer") var kprobeTracerType kprobe.TracerType - m, closeTracerFn, kprobeTracerType, err = kprobe.LoadTracer(config, mgrOptions, connCloseEventHandler, failedConnsHandler) + m, closeTracerFn, kprobeTracerType, err = kprobe.LoadTracer(config, mgrOptions, connCloseEventHandler) if err != nil { return nil, err } @@ -240,17 +250,14 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace m.DumpHandler = dumpMapsHandler ddebpf.AddNameMappings(m, "npm_tracer") - numCPUs, err := ebpf.PossibleCPU() - if err != nil { - return nil, fmt.Errorf("could not determine number of CPUs: %w", err) - } - extractor := newBatchExtractor(numCPUs) - batchMgr, err := newConnBatchManager(m, extractor) - if err != nil { - return nil, fmt.Errorf("could not create connection batch manager: %w", err) + var flusher perf.Flushable = connCloseEventHandler + if config.KernelBatchingEnabled { + flusher, err = newConnBatchManager(m, extractor, connPool, tr.closedPerfCallback) + if err != nil { + return nil, err + } } - - closeConsumer := newTCPCloseConsumer(connCloseEventHandler, batchMgr) + tr.closeConsumer = newTCPCloseConsumer(flusher, connPool) // Failed connections are not supported on prebuilt if tracerType == TracerTypeKProbePrebuilt { @@ -260,16 +267,10 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace config.TCPFailedConnectionsEnabled = false } - tr := &ebpfTracer{ - m: m, - config: config, - closeConsumer: closeConsumer, - removeTuple: &netebpf.ConnTuple{}, - closeTracer: closeTracerFn, - ebpfTracerType: tracerType, - exitTelemetry: make(chan struct{}), - ch: newCookieHasher(), - } + tr.m = m + tr.config = config + tr.closeTracer = closeTracerFn + tr.ebpfTracerType = tracerType tr.setupMapCleaner(m) @@ -293,6 +294,64 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace return tr, nil } +func initClosedConnEventHandler(config *config.Config, closedCallback func(*network.ConnectionStats), pool ddsync.Pool[network.ConnectionStats], extractor *batchExtractor) (*perf.EventHandler, error) { + connHasher := newCookieHasher() + singleConnHandler := encoding.BinaryUnmarshalCallback(pool.Get, func(b *network.ConnectionStats, err error) { + if err != nil { + if b != nil { + pool.Put(b) + } + log.Debug(err.Error()) + return + } + if b != nil { + connHasher.Hash(b) + } + closedCallback(b) + }) + + eopts := perf.EventHandlerOptions{ + MapName: probes.ConnCloseEventMap, + TelemetryEnabled: config.InternalTelemetryEnabled, + UseRingBuffer: config.RingBufferSupportedNPM(), + UpgradePerfBuffer: true, + PerfOptions: perf.PerfBufferOptions{ + BufferSize: util.ComputeDefaultClosedConnPerfBufferSize(), + }, + RingBufOptions: perf.RingBufferOptions{ + BufferSize: util.ComputeDefaultClosedConnRingBufferSize(), + }, + } + if config.KernelBatchingEnabled { + eopts.PerfOptions.Watermark = 1 + eopts.Handler = func(buf []byte) { + l := len(buf) + switch { + case l >= netebpf.SizeofBatch: + b := netebpf.ToBatch(buf) + for rc := extractor.NextConnection(b); rc != nil; rc = extractor.NextConnection(b) { + c := pool.Get() + c.FromConn(rc) + connHasher.Hash(c) + + closedCallback(c) + } + case l >= netebpf.SizeofConn: + singleConnHandler(buf) + case l == 0: + singleConnHandler(nil) + default: + log.Debugf("unexpected %q binary data of size %d bytes", probes.ConnCloseEventMap, l) + } + } + } else { + eopts.PerfOptions.WakeupEvents = config.ClosedBufferWakeupCount + eopts.Handler = singleConnHandler + } + + return perf.NewEventHandler(eopts) +} + func boolConst(name string, value bool) manager.ConstantEditor { c := manager.ConstantEditor{ Name: name, @@ -305,6 +364,10 @@ func boolConst(name string, value bool) manager.ConstantEditor { return c } +func (t *ebpfTracer) closedPerfCallback(c *network.ConnectionStats) { + t.closeConsumer.Callback(c) +} + func (t *ebpfTracer) Start(callback func(*network.ConnectionStats)) (err error) { defer func() { if err != nil { @@ -317,11 +380,13 @@ func (t *ebpfTracer) Start(callback func(*network.ConnectionStats)) (err error) return fmt.Errorf("error initializing port binding maps: %s", err) } + t.closeConsumer.Start(callback) + if err := t.m.Start(); err != nil { + t.closeConsumer.Stop() return fmt.Errorf("could not start ebpf manager: %s", err) } - t.closeConsumer.Start(callback) return nil } @@ -344,7 +409,6 @@ func (t *ebpfTracer) FlushPending() { func (t *ebpfTracer) Stop() { t.stopOnce.Do(func() { - close(t.exitTelemetry) ddebpf.RemoveNameMappings(t.m) ebpftelemetry.UnregisterTelemetry(t.m) _ = t.m.Stop(manager.CleanAll) @@ -389,7 +453,8 @@ func (t *ebpfTracer) GetConnections(buffer *network.ConnectionBuffer, filter fun continue } - populateConnStats(conn, key, stats, t.ch) + conn.FromTupleAndStats(key, stats) + t.ch.Hash(conn) connsByTuple[*key] = stats.Cookie isTCP := conn.Type == network.TCP @@ -413,7 +478,7 @@ func (t *ebpfTracer) GetConnections(buffer *network.ConnectionBuffer, filter fun } if t.getTCPStats(tcp, key) { - updateTCPStats(conn, tcp) + conn.FromTCPStats(tcp) } if retrans, ok := t.getTCPRetransmits(key, seen); ok && conn.Type == network.TCP { conn.Monotonic.Retransmits = retrans @@ -717,81 +782,3 @@ func (t *ebpfTracer) setupMapCleaner(m *manager.Manager) { t.ongoingConnectCleaner = tcpOngoingConnectPidCleaner } - -func populateConnStats(stats *network.ConnectionStats, t *netebpf.ConnTuple, s *netebpf.ConnStats, ch *cookieHasher) { - *stats = network.ConnectionStats{ConnectionTuple: network.ConnectionTuple{ - Pid: t.Pid, - NetNS: t.Netns, - Source: t.SourceAddress(), - Dest: t.DestAddress(), - SPort: t.Sport, - DPort: t.Dport, - }, - Monotonic: network.StatCounters{ - SentBytes: s.Sent_bytes, - RecvBytes: s.Recv_bytes, - SentPackets: uint64(s.Sent_packets), - RecvPackets: uint64(s.Recv_packets), - }, - LastUpdateEpoch: s.Timestamp, - IsAssured: s.IsAssured(), - Cookie: network.StatCookie(s.Cookie), - } - - if s.Duration <= uint64(math.MaxInt64) { - stats.Duration = time.Duration(s.Duration) * time.Nanosecond - } - - stats.ProtocolStack = protocols.Stack{ - API: protocols.API(s.Protocol_stack.Api), - Application: protocols.Application(s.Protocol_stack.Application), - Encryption: protocols.Encryption(s.Protocol_stack.Encryption), - } - - if t.Type() == netebpf.TCP { - stats.Type = network.TCP - } else { - stats.Type = network.UDP - } - - switch t.Family() { - case netebpf.IPv4: - stats.Family = network.AFINET - case netebpf.IPv6: - stats.Family = network.AFINET6 - } - - stats.SPortIsEphemeral = network.IsPortInEphemeralRange(stats.Family, stats.Type, t.Sport) - - switch s.ConnectionDirection() { - case netebpf.Incoming: - stats.Direction = network.INCOMING - case netebpf.Outgoing: - stats.Direction = network.OUTGOING - default: - stats.Direction = network.OUTGOING - } - - if ch != nil { - ch.Hash(stats) - } -} - -func updateTCPStats(conn *network.ConnectionStats, tcpStats *netebpf.TCPStats) { - if conn.Type != network.TCP { - return - } - - if tcpStats != nil { - conn.Monotonic.Retransmits = tcpStats.Retransmits - conn.Monotonic.TCPEstablished = tcpStats.State_transitions >> netebpf.Established & 1 - conn.Monotonic.TCPClosed = tcpStats.State_transitions >> netebpf.Close & 1 - conn.RTT = tcpStats.Rtt - conn.RTTVar = tcpStats.Rtt_var - if tcpStats.Failure_reason > 0 { - conn.TCPFailures = map[uint16]uint32{ - tcpStats.Failure_reason: 1, - } - } - } -} diff --git a/pkg/network/tracer/connection/fentry/manager.go b/pkg/network/tracer/connection/fentry/manager.go index b41820b7ffedf3..deddd6708f5fd7 100644 --- a/pkg/network/tracer/connection/fentry/manager.go +++ b/pkg/network/tracer/connection/fentry/manager.go @@ -11,12 +11,10 @@ import ( manager "github.com/DataDog/ebpf-manager" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" - "github.com/DataDog/datadog-agent/pkg/network/config" "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" - "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/util" ) -func initManager(mgr *ddebpf.Manager, connCloseEventHandler ddebpf.EventHandler, cfg *config.Config) { +func initManager(mgr *ddebpf.Manager) { mgr.Maps = []*manager.Map{ {Name: probes.ConnMap}, {Name: probes.TCPStatsMap}, @@ -29,7 +27,6 @@ func initManager(mgr *ddebpf.Manager, connCloseEventHandler ddebpf.EventHandler, {Name: "pending_bind"}, {Name: probes.TelemetryMap}, } - util.SetupClosedConnHandler(connCloseEventHandler, mgr, cfg) for funcName := range programs { p := &manager.Probe{ ProbeIdentificationPair: manager.ProbeIdentificationPair{ diff --git a/pkg/network/tracer/connection/fentry/probes.go b/pkg/network/tracer/connection/fentry/probes.go index 8d8ce7ae73df3d..cc407dbef8b6a3 100644 --- a/pkg/network/tracer/connection/fentry/probes.go +++ b/pkg/network/tracer/connection/fentry/probes.go @@ -138,7 +138,6 @@ func enabledPrograms(c *config.Config) (map[string]struct{}, error) { enableProgram(enabled, tcpSendPageReturn) enableProgram(enabled, selectVersionBasedProbe(kv, tcpRecvMsgReturn, tcpRecvMsgPre5190Return, kv5190)) enableProgram(enabled, tcpClose) - enableProgram(enabled, tcpCloseReturn) enableProgram(enabled, tcpConnect) enableProgram(enabled, tcpFinishConnect) enableProgram(enabled, inetCskAcceptReturn) @@ -153,30 +152,40 @@ func enabledPrograms(c *config.Config) (map[string]struct{}, error) { // if err == nil && len(missing) == 0 { // enableProgram(enabled, sockFDLookupRet) // } + + if c.KernelBatchingEnabled { + enableProgram(enabled, tcpCloseReturn) + } } if c.CollectUDPv4Conns { enableProgram(enabled, udpSendPageReturn) enableProgram(enabled, udpDestroySock) - enableProgram(enabled, udpDestroySockReturn) enableProgram(enabled, inetBind) enableProgram(enabled, inetBindRet) enableProgram(enabled, udpRecvMsg) enableProgram(enabled, selectVersionBasedProbe(kv, udpRecvMsgReturn, udpRecvMsgPre5190Return, kv5190)) enableProgram(enabled, udpSendMsgReturn) enableProgram(enabled, udpSendSkb) + + if c.KernelBatchingEnabled { + enableProgram(enabled, udpDestroySockReturn) + } } if c.CollectUDPv6Conns { enableProgram(enabled, udpSendPageReturn) enableProgram(enabled, udpv6DestroySock) - enableProgram(enabled, udpv6DestroySockReturn) enableProgram(enabled, inet6Bind) enableProgram(enabled, inet6BindRet) enableProgram(enabled, udpv6RecvMsg) enableProgram(enabled, selectVersionBasedProbe(kv, udpv6RecvMsgReturn, udpv6RecvMsgPre5190Return, kv5190)) enableProgram(enabled, udpv6SendMsgReturn) enableProgram(enabled, udpv6SendSkb) + + if c.KernelBatchingEnabled { + enableProgram(enabled, udpv6DestroySockReturn) + } } if c.CollectUDPv4Conns || c.CollectUDPv6Conns { diff --git a/pkg/network/tracer/connection/fentry/tracer.go b/pkg/network/tracer/connection/fentry/tracer.go index 0124ca3436d6c7..cc4315e5ce7662 100644 --- a/pkg/network/tracer/connection/fentry/tracer.go +++ b/pkg/network/tracer/connection/fentry/tracer.go @@ -14,9 +14,11 @@ import ( "syscall" manager "github.com/DataDog/ebpf-manager" + "github.com/cilium/ebpf" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" + "github.com/DataDog/datadog-agent/pkg/ebpf/perf" ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" "github.com/DataDog/datadog-agent/pkg/network/config" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" @@ -29,7 +31,7 @@ const probeUID = "net" var ErrorNotSupported = errors.New("fentry tracer is only supported on Fargate") //nolint:revive // TODO // LoadTracer loads a new tracer -func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler) (*manager.Manager, func(), error) { +func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { if !fargate.IsFargateInstance() { return nil, nil, ErrorNotSupported } @@ -39,61 +41,62 @@ func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHa o.RLimit = mgrOpts.RLimit o.MapSpecEditors = mgrOpts.MapSpecEditors o.ConstantEditors = mgrOpts.ConstantEditors + return initFentryTracer(ar, o, config, m, connCloseEventHandler) + }) - // Use the config to determine what kernel probes should be enabled - enabledProbes, err := enabledPrograms(config) - if err != nil { - return fmt.Errorf("invalid probe configuration: %v", err) - } - - initManager(m, connCloseEventHandler, config) - - file, err := os.Stat("/proc/self/ns/pid") + if err != nil { + return nil, nil, err + } - if err != nil { - return fmt.Errorf("could not load sysprobe pid: %w", err) - } + return m.Manager, nil, nil +} - device := file.Sys().(*syscall.Stat_t).Dev - inode := file.Sys().(*syscall.Stat_t).Ino - ringbufferEnabled := config.RingBufferSupportedNPM() - - o.ConstantEditors = append(o.ConstantEditors, manager.ConstantEditor{ - Name: "systemprobe_device", - Value: device, - }) - o.ConstantEditors = append(o.ConstantEditors, manager.ConstantEditor{ - Name: "systemprobe_ino", - Value: inode, - }) - util.AddBoolConst(&o, "ringbuffers_enabled", ringbufferEnabled) - if ringbufferEnabled { - util.EnableRingbuffersViaMapEditor(&mgrOpts) - } +// Use a function so someone doesn't accidentally use mgrOpts from the outer scope in LoadTracer +func initFentryTracer(ar bytecode.AssetReader, o manager.Options, config *config.Config, m *ddebpf.Manager, connCloseEventHandler *perf.EventHandler) error { + // Use the config to determine what kernel probes should be enabled + enabledProbes, err := enabledPrograms(config) + if err != nil { + return fmt.Errorf("invalid probe configuration: %v", err) + } - // exclude all non-enabled probes to ensure we don't run into problems with unsupported probe types - for _, p := range m.Probes { - if _, enabled := enabledProbes[p.EBPFFuncName]; !enabled { - o.ExcludedFunctions = append(o.ExcludedFunctions, p.EBPFFuncName) - } - } - for funcName := range enabledProbes { - o.ActivatedProbes = append( - o.ActivatedProbes, - &manager.ProbeSelector{ - ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: funcName, - UID: probeUID, - }, - }) - } + initManager(m) - return m.InitWithOptions(ar, &o) + file, err := os.Stat("/proc/self/ns/pid") + if err != nil { + return fmt.Errorf("could not load sysprobe pid: %w", err) + } + pidStat := file.Sys().(*syscall.Stat_t) + o.ConstantEditors = append(o.ConstantEditors, manager.ConstantEditor{ + Name: "systemprobe_device", + Value: pidStat.Dev, + }, manager.ConstantEditor{ + Name: "systemprobe_ino", + Value: pidStat.Ino, }) - if err != nil { - return nil, nil, err + // exclude all non-enabled probes to ensure we don't run into problems with unsupported probe types + for _, p := range m.Probes { + if _, enabled := enabledProbes[p.EBPFFuncName]; !enabled { + o.ExcludedFunctions = append(o.ExcludedFunctions, p.EBPFFuncName) + } + } + for funcName := range enabledProbes { + o.ActivatedProbes = append( + o.ActivatedProbes, + &manager.ProbeSelector{ + ProbeIdentificationPair: manager.ProbeIdentificationPair{ + EBPFFuncName: funcName, + UID: probeUID, + }, + }) } - return m.Manager, nil, nil + if err := m.LoadELF(ar); err != nil { + return fmt.Errorf("failed to load ELF with ebpf manager: %w", err) + } + if err := connCloseEventHandler.Init(m.Manager, &o); err != nil { + return fmt.Errorf("error initializing closed connections event handler: %w", err) + } + util.AddBoolConst(&o, "ringbuffers_enabled", connCloseEventHandler.MapType() == ebpf.RingBuf) + return m.InitWithOptions(nil, &o) } diff --git a/pkg/network/tracer/connection/kprobe/config.go b/pkg/network/tracer/connection/kprobe/config.go index 880a2f0a5e8388..d927cb2b9b7aa8 100644 --- a/pkg/network/tracer/connection/kprobe/config.go +++ b/pkg/network/tracer/connection/kprobe/config.go @@ -8,10 +8,14 @@ package kprobe import ( + "errors" "fmt" + manager "github.com/DataDog/ebpf-manager" + "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/network/config" + netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" "github.com/DataDog/datadog-agent/pkg/util/kernel" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -76,10 +80,15 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes enableProbe(enabled, probes.TCPReadSock) enableProbe(enabled, probes.TCPReadSockReturn) enableProbe(enabled, probes.TCPClose) - enableProbe(enabled, probes.TCPCloseFlushReturn) + if c.KernelBatchingEnabled { + enableProbe(enabled, probes.TCPCloseFlushReturn) + } + enableProbe(enabled, probes.TCPConnect) enableProbe(enabled, probes.TCPDone) - enableProbe(enabled, probes.TCPDoneFlushReturn) + if c.KernelBatchingEnabled { + enableProbe(enabled, probes.TCPDoneFlushReturn) + } enableProbe(enabled, probes.TCPFinishConnect) enableProbe(enabled, probes.InetCskAcceptReturn) enableProbe(enabled, probes.InetCskListenStop) @@ -93,7 +102,9 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes if c.CollectUDPv4Conns { enableProbe(enabled, probes.UDPDestroySock) - enableProbe(enabled, probes.UDPDestroySockReturn) + if c.KernelBatchingEnabled { + enableProbe(enabled, probes.UDPDestroySockReturn) + } enableProbe(enabled, selectVersionBasedProbe(runtimeTracer, kv, probes.IPMakeSkb, probes.IPMakeSkbPre4180, kv4180)) enableProbe(enabled, probes.IPMakeSkbReturn) enableProbe(enabled, probes.InetBind) @@ -116,11 +127,13 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes if c.CollectUDPv6Conns { enableProbe(enabled, probes.UDPv6DestroySock) - enableProbe(enabled, probes.UDPv6DestroySockReturn) + if c.KernelBatchingEnabled { + enableProbe(enabled, probes.UDPv6DestroySockReturn) + } if kv >= kv5180 || runtimeTracer { // prebuilt shouldn't arrive here with 5.18+ and UDPv6 enabled if !coreTracer && !runtimeTracer { - return nil, fmt.Errorf("UDPv6 does not function on prebuilt tracer with kernel versions 5.18+") + return nil, errors.New("UDPv6 does not function on prebuilt tracer with kernel versions 5.18+") } enableProbe(enabled, probes.IP6MakeSkb) } else if kv >= kv470 { @@ -156,6 +169,46 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes return enabled, nil } +func protocolClassificationTailCalls(cfg *config.Config) []manager.TailCallRoute { + tcs := []manager.TailCallRoute{ + { + ProgArrayName: probes.ClassificationProgsMap, + Key: netebpf.ClassificationQueues, + ProbeIdentificationPair: manager.ProbeIdentificationPair{ + EBPFFuncName: probes.ProtocolClassifierQueuesSocketFilter, + UID: probeUID, + }, + }, + { + ProgArrayName: probes.ClassificationProgsMap, + Key: netebpf.ClassificationDBs, + ProbeIdentificationPair: manager.ProbeIdentificationPair{ + EBPFFuncName: probes.ProtocolClassifierDBsSocketFilter, + UID: probeUID, + }, + }, + { + ProgArrayName: probes.ClassificationProgsMap, + Key: netebpf.ClassificationGRPC, + ProbeIdentificationPair: manager.ProbeIdentificationPair{ + EBPFFuncName: probes.ProtocolClassifierGRPCSocketFilter, + UID: probeUID, + }, + }, + } + if cfg.KernelBatchingEnabled { + tcs = append(tcs, manager.TailCallRoute{ + ProgArrayName: probes.TCPCloseProgsMap, + Key: 0, + ProbeIdentificationPair: manager.ProbeIdentificationPair{ + EBPFFuncName: probes.TCPCloseFlushReturn, + UID: probeUID, + }, + }) + } + return tcs +} + func enableAdvancedUDP(enabled map[probes.ProbeFuncName]struct{}) error { missing, err := ebpf.VerifyKernelFuncs("skb_consume_udp", "__skb_free_datagram_locked", "skb_free_datagram_locked") if err != nil { @@ -169,7 +222,7 @@ func enableAdvancedUDP(enabled map[probes.ProbeFuncName]struct{}) error { } else if _, miss := missing["skb_free_datagram_locked"]; !miss { enableProbe(enabled, probes.SKBFreeDatagramLocked) } else { - return fmt.Errorf("missing desired UDP receive kernel functions") + return errors.New("missing desired UDP receive kernel functions") } return nil } diff --git a/pkg/network/tracer/connection/kprobe/manager.go b/pkg/network/tracer/connection/kprobe/manager.go index fb2ee4b7bd0656..63dde86a0e1073 100644 --- a/pkg/network/tracer/connection/kprobe/manager.go +++ b/pkg/network/tracer/connection/kprobe/manager.go @@ -11,9 +11,8 @@ import ( manager "github.com/DataDog/ebpf-manager" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" - "github.com/DataDog/datadog-agent/pkg/network/config" "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" - "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/util" + "github.com/DataDog/datadog-agent/pkg/util/slices" ) var mainProbes = []probes.ProbeFuncName{ @@ -32,9 +31,7 @@ var mainProbes = []probes.ProbeFuncName{ probes.TCPReadSockReturn, probes.TCPClose, probes.TCPDone, - probes.TCPDoneFlushReturn, probes.TCPCloseCleanProtocolsReturn, - probes.TCPCloseFlushReturn, probes.TCPConnect, probes.TCPFinishConnect, probes.IPMakeSkb, @@ -50,9 +47,7 @@ var mainProbes = []probes.ProbeFuncName{ probes.InetCskAcceptReturn, probes.InetCskListenStop, probes.UDPDestroySock, - probes.UDPDestroySockReturn, probes.UDPv6DestroySock, - probes.UDPv6DestroySockReturn, probes.InetBind, probes.Inet6Bind, probes.InetBindRet, @@ -61,7 +56,14 @@ var mainProbes = []probes.ProbeFuncName{ probes.UDPSendPageReturn, } -func initManager(mgr *ddebpf.Manager, connCloseEventHandler ddebpf.EventHandler, runtimeTracer bool, cfg *config.Config) error { +var batchProbes = []probes.ProbeFuncName{ + probes.TCPDoneFlushReturn, + probes.TCPCloseFlushReturn, + probes.UDPDestroySockReturn, + probes.UDPv6DestroySockReturn, +} + +func initManager(mgr *ddebpf.Manager, runtimeTracer bool) error { mgr.Maps = []*manager.Map{ {Name: probes.ConnMap}, {Name: probes.TCPStatsMap}, @@ -82,45 +84,45 @@ func initManager(mgr *ddebpf.Manager, connCloseEventHandler ddebpf.EventHandler, {Name: probes.ClassificationProgsMap}, {Name: probes.TCPCloseProgsMap}, } - util.SetupClosedConnHandler(connCloseEventHandler, mgr, cfg) - for _, funcName := range mainProbes { - p := &manager.Probe{ + var funcNameToProbe = func(funcName probes.ProbeFuncName) *manager.Probe { + return &manager.Probe{ ProbeIdentificationPair: manager.ProbeIdentificationPair{ EBPFFuncName: funcName, UID: probeUID, }, } - mgr.Probes = append(mgr.Probes, p) } - mgr.Probes = append(mgr.Probes, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.SKBFreeDatagramLocked, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UnderscoredSKBFreeDatagramLocked, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.SKBConsumeUDP, UID: probeUID}}, - ) + mgr.Probes = append(mgr.Probes, slices.Map(mainProbes, funcNameToProbe)...) + mgr.Probes = append(mgr.Probes, slices.Map(batchProbes, funcNameToProbe)...) + mgr.Probes = append(mgr.Probes, slices.Map([]probes.ProbeFuncName{ + probes.SKBFreeDatagramLocked, + probes.UnderscoredSKBFreeDatagramLocked, + probes.SKBConsumeUDP, + }, funcNameToProbe)...) if !runtimeTracer { // the runtime compiled tracer has no need for separate probes targeting specific kernel versions, since it can // do that with #ifdefs inline. Thus, the following probes should only be declared as existing in the prebuilt // tracer. - mgr.Probes = append(mgr.Probes, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.TCPRetransmitPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.IPMakeSkbPre4180, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.IP6MakeSkbPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.IP6MakeSkbPre5180, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPRecvMsgPre5190, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPv6RecvMsgPre5190, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPRecvMsgPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPv6RecvMsgPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPRecvMsgPre410, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPv6RecvMsgPre410, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPRecvMsgReturnPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.UDPv6RecvMsgReturnPre470, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.TCPSendMsgPre410, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.TCPRecvMsgPre410, UID: probeUID}}, - &manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: probes.TCPRecvMsgPre5190, UID: probeUID}}, - ) + mgr.Probes = append(mgr.Probes, slices.Map([]probes.ProbeFuncName{ + probes.TCPRetransmitPre470, + probes.IPMakeSkbPre4180, + probes.IP6MakeSkbPre470, + probes.IP6MakeSkbPre5180, + probes.UDPRecvMsgPre5190, + probes.UDPv6RecvMsgPre5190, + probes.UDPRecvMsgPre470, + probes.UDPv6RecvMsgPre470, + probes.UDPRecvMsgPre410, + probes.UDPv6RecvMsgPre410, + probes.UDPRecvMsgReturnPre470, + probes.UDPv6RecvMsgReturnPre470, + probes.TCPSendMsgPre410, + probes.TCPRecvMsgPre410, + probes.TCPRecvMsgPre5190, + }, funcNameToProbe)...) } return nil diff --git a/pkg/network/tracer/connection/kprobe/tracer.go b/pkg/network/tracer/connection/kprobe/tracer.go index 634642154e8818..cfbc2be96baf2e 100644 --- a/pkg/network/tracer/connection/kprobe/tracer.go +++ b/pkg/network/tracer/connection/kprobe/tracer.go @@ -13,9 +13,12 @@ import ( manager "github.com/DataDog/ebpf-manager" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cilium/ebpf/features" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" + "github.com/DataDog/datadog-agent/pkg/ebpf/perf" "github.com/DataDog/datadog-agent/pkg/ebpf/prebuilt" ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" "github.com/DataDog/datadog-agent/pkg/network/config" @@ -44,41 +47,6 @@ var ( // - 2492d3b867043f6880708d095a7a5d65debcfc32 classificationMinimumKernel = kernel.VersionCode(4, 11, 0) - protocolClassificationTailCalls = []manager.TailCallRoute{ - { - ProgArrayName: probes.ClassificationProgsMap, - Key: netebpf.ClassificationQueues, - ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: probes.ProtocolClassifierQueuesSocketFilter, - UID: probeUID, - }, - }, - { - ProgArrayName: probes.ClassificationProgsMap, - Key: netebpf.ClassificationDBs, - ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: probes.ProtocolClassifierDBsSocketFilter, - UID: probeUID, - }, - }, - { - ProgArrayName: probes.ClassificationProgsMap, - Key: netebpf.ClassificationGRPC, - ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: probes.ProtocolClassifierGRPCSocketFilter, - UID: probeUID, - }, - }, - { - ProgArrayName: probes.TCPCloseProgsMap, - Key: 0, - ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: probes.TCPCloseFlushReturn, - UID: probeUID, - }, - }, - } - // these primarily exist for mocking out in tests coreTracerLoader = loadCORETracer rcTracerLoader = loadRuntimeCompiledTracer @@ -109,7 +77,7 @@ func ClassificationSupported(config *config.Config) bool { } // LoadTracer loads the co-re/prebuilt/runtime compiled network tracer, depending on config -func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler, failedConnsHandler ddebpf.EventHandler) (*manager.Manager, func(), TracerType, error) { //nolint:revive // TODO +func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), TracerType, error) { kprobeAttachMethod := manager.AttachKprobeWithPerfEventOpen if cfg.AttachKprobesWithKprobeEventsABI { kprobeAttachMethod = manager.AttachKprobeWithKprobeEvents @@ -174,18 +142,11 @@ func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandl return m, closeFn, TracerTypePrebuilt, err } -func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer bool, config *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler) (*manager.Manager, func(), error) { +func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer bool, config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}) - if err := initManager(m, connCloseEventHandler, runtimeTracer, config); err != nil { + if err := initManager(m, runtimeTracer); err != nil { return nil, nil, fmt.Errorf("could not initialize manager: %w", err) } - switch connCloseEventHandler.(type) { - case *ddebpf.RingBufferHandler: - util.EnableRingbuffersViaMapEditor(&mgrOpts) - util.AddBoolConst(&mgrOpts, "ringbuffers_enabled", true) - } - - var undefinedProbes []manager.ProbeIdentificationPair var closeProtocolClassifierSocketFilterFn func() classificationSupported := ClassificationSupported(config) @@ -193,8 +154,9 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo var tailCallsIdentifiersSet map[manager.ProbeIdentificationPair]struct{} if classificationSupported { - tailCallsIdentifiersSet = make(map[manager.ProbeIdentificationPair]struct{}, len(protocolClassificationTailCalls)) - for _, tailCall := range protocolClassificationTailCalls { + pcTailCalls := protocolClassificationTailCalls(config) + tailCallsIdentifiersSet = make(map[manager.ProbeIdentificationPair]struct{}, len(pcTailCalls)) + for _, tailCall := range pcTailCalls { tailCallsIdentifiersSet[tailCall.ProbeIdentificationPair] = struct{}{} } socketFilterProbe, _ := m.GetProbe(manager.ProbeIdentificationPair{ @@ -202,7 +164,7 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo UID: probeUID, }) if socketFilterProbe == nil { - return nil, nil, fmt.Errorf("error retrieving protocol classifier socket filter") + return nil, nil, errors.New("error retrieving protocol classifier socket filter") } var err error @@ -211,9 +173,7 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo return nil, nil, fmt.Errorf("error enabling protocol classifier: %w", err) } - //nolint:ineffassign,staticcheck // TODO(NET) Fix ineffassign linter // TODO(NET) Fix staticcheck linter - undefinedProbes = append(undefinedProbes, protocolClassificationTailCalls[0].ProbeIdentificationPair) - mgrOpts.TailCallRouter = append(mgrOpts.TailCallRouter, protocolClassificationTailCalls...) + mgrOpts.TailCallRouter = append(mgrOpts.TailCallRouter, pcTailCalls...) } else { // Kernels < 4.7.0 do not know about the per-cpu array map used // in classification, preventing the program to load even though @@ -263,14 +223,25 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo }) } - if err := m.InitWithOptions(buf, &mgrOpts); err != nil { + if err := m.LoadELF(buf); err != nil { + return nil, nil, fmt.Errorf("failed to load ELF with ebpf manager: %w", err) + } + if err := connCloseEventHandler.Init(m.Manager, &mgrOpts); err != nil { + return nil, nil, fmt.Errorf("error initializing closed connections event handler: %w", err) + } + usingRingBuffers := connCloseEventHandler.MapType() == ebpf.RingBuf + util.AddBoolConst(&mgrOpts, "ringbuffers_enabled", usingRingBuffers) + if features.HaveMapType(ebpf.RingBuf) != nil { + m.EnabledModifiers = append(m.EnabledModifiers, ddebpf.NewHelperCallRemover(asm.FnRingbufOutput)) + } + if err := m.InitWithOptions(nil, &mgrOpts); err != nil { return nil, nil, fmt.Errorf("failed to init ebpf manager: %w", err) } return m.Manager, closeProtocolClassifierSocketFilterFn, nil } -func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler) (*manager.Manager, func(), error) { +func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { var m *manager.Manager var closeFn func() var err error @@ -288,7 +259,7 @@ func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEve return m, closeFn, err } -func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler) (*manager.Manager, func(), error) { +func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { buf, err := getRuntimeCompiledTracer(config) if err != nil { return nil, nil, err @@ -298,7 +269,7 @@ func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, c return tracerLoaderFromAsset(buf, true, false, config, mgrOpts, connCloseEventHandler) } -func loadPrebuiltTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler ddebpf.EventHandler) (*manager.Manager, func(), error) { +func loadPrebuiltTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { buf, err := netebpf.ReadBPFModule(config.BPFDir, config.BPFDebug) if err != nil { return nil, nil, fmt.Errorf("could not read bpf module: %w", err) diff --git a/pkg/network/tracer/connection/kprobe/tracer_test.go b/pkg/network/tracer/connection/kprobe/tracer_test.go index 4c4a155ce12b95..5727c50bd51693 100644 --- a/pkg/network/tracer/connection/kprobe/tracer_test.go +++ b/pkg/network/tracer/connection/kprobe/tracer_test.go @@ -15,8 +15,8 @@ import ( manager "github.com/DataDog/ebpf-manager" - ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" + "github.com/DataDog/datadog-agent/pkg/ebpf/perf" "github.com/DataDog/datadog-agent/pkg/network/config" "github.com/DataDog/datadog-agent/pkg/util/kernel" ) @@ -169,14 +169,14 @@ func testTracerFallbackCOREAndRCErr(t *testing.T) { runFallbackTests(t, "CORE and RC error", true, true, tests) } -func loaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ ddebpf.EventHandler) (*manager.Manager, func(), error) { - return func(_ *config.Config, _ manager.Options, _ ddebpf.EventHandler) (*manager.Manager, func(), error) { +func loaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { + return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { return nil, closeFn, err } } -func prebuiltLoaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ ddebpf.EventHandler) (*manager.Manager, func(), error) { - return func(_ *config.Config, _ manager.Options, _ ddebpf.EventHandler) (*manager.Manager, func(), error) { +func prebuiltLoaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { + return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { return nil, closeFn, err } } @@ -216,7 +216,7 @@ func runFallbackTests(t *testing.T, desc string, coreErr, rcErr bool, tests []st cfg.AllowPrebuiltFallback = te.allowPrebuiltFallback prevOffsetGuessingRun := offsetGuessingRun - _, closeFn, tracerType, err := LoadTracer(cfg, manager.Options{}, nil, nil) + _, closeFn, tracerType, err := LoadTracer(cfg, manager.Options{}, nil) if te.err == nil { assert.NoError(t, err, "%+v", te) } else { @@ -251,12 +251,12 @@ func TestCORETracerSupported(t *testing.T) { }) coreCalled := false - coreTracerLoader = func(*config.Config, manager.Options, ddebpf.EventHandler) (*manager.Manager, func(), error) { + coreTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*manager.Manager, func(), error) { coreCalled = true return nil, nil, nil } prebuiltCalled := false - prebuiltTracerLoader = func(*config.Config, manager.Options, ddebpf.EventHandler) (*manager.Manager, func(), error) { + prebuiltTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*manager.Manager, func(), error) { prebuiltCalled = true return nil, nil, nil } @@ -270,7 +270,7 @@ func TestCORETracerSupported(t *testing.T) { cfg := config.New() cfg.EnableCORE = true cfg.AllowRuntimeCompiledFallback = false - _, _, _, err = LoadTracer(cfg, manager.Options{}, nil, nil) + _, _, _, err = LoadTracer(cfg, manager.Options{}, nil) assert.False(t, prebuiltCalled) if kv < kernel.VersionCode(4, 4, 128) && platform != "centos" && platform != "redhat" { assert.False(t, coreCalled) @@ -283,7 +283,7 @@ func TestCORETracerSupported(t *testing.T) { coreCalled = false prebuiltCalled = false cfg.AllowRuntimeCompiledFallback = true - _, _, _, err = LoadTracer(cfg, manager.Options{}, nil, nil) + _, _, _, err = LoadTracer(cfg, manager.Options{}, nil) assert.NoError(t, err) if kv < kernel.VersionCode(4, 4, 128) && platform != "centos" && platform != "redhat" { assert.False(t, coreCalled) @@ -296,7 +296,7 @@ func TestCORETracerSupported(t *testing.T) { func TestDefaultKprobeMaxActiveSet(t *testing.T) { prevLoader := tracerLoaderFromAsset - tracerLoaderFromAsset = func(_ bytecode.AssetReader, _, _ bool, _ *config.Config, mgrOpts manager.Options, _ ddebpf.EventHandler) (*manager.Manager, func(), error) { + tracerLoaderFromAsset = func(_ bytecode.AssetReader, _, _ bool, _ *config.Config, mgrOpts manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { assert.Equal(t, mgrOpts.DefaultKProbeMaxActive, 128) return nil, nil, nil } @@ -306,7 +306,7 @@ func TestDefaultKprobeMaxActiveSet(t *testing.T) { cfg := config.New() cfg.EnableCORE = true cfg.AllowRuntimeCompiledFallback = false - _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil, nil) + _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil) require.NoError(t, err) }) @@ -314,7 +314,7 @@ func TestDefaultKprobeMaxActiveSet(t *testing.T) { cfg := config.New() cfg.EnableCORE = false cfg.AllowRuntimeCompiledFallback = false - _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil, nil) + _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil) require.NoError(t, err) }) @@ -322,7 +322,7 @@ func TestDefaultKprobeMaxActiveSet(t *testing.T) { cfg := config.New() cfg.EnableCORE = false cfg.AllowRuntimeCompiledFallback = true - _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil, nil) + _, _, _, err := LoadTracer(cfg, manager.Options{DefaultKProbeMaxActive: 128}, nil) require.NoError(t, err) }) } diff --git a/pkg/network/tracer/connection/perf_batching.go b/pkg/network/tracer/connection/perf_batching.go index 9c20044cde3daf..93754574a10f2c 100644 --- a/pkg/network/tracer/connection/perf_batching.go +++ b/pkg/network/tracer/connection/perf_batching.go @@ -8,6 +8,7 @@ package connection import ( + "errors" "fmt" "time" @@ -17,6 +18,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/network" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" + ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" ) // perfBatchManager is responsible for two things: @@ -27,16 +29,18 @@ import ( // The motivation is to impose an upper limit on how long a TCP close connection // event remains stored in the eBPF map before being processed by the NetworkAgent. type perfBatchManager struct { - batchMap *maps.GenericMap[uint32, netebpf.Batch] - extractor *batchExtractor - ch *cookieHasher + batchMap *maps.GenericMap[uint32, netebpf.Batch] + extractor *batchExtractor + ch *cookieHasher + connGetter ddsync.PoolGetter[network.ConnectionStats] + callback func(stats *network.ConnectionStats) } // newPerfBatchManager returns a new `PerfBatchManager` and initializes the // eBPF map that holds the tcp_close batch objects. -func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], extractor *batchExtractor) (*perfBatchManager, error) { +func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], extractor *batchExtractor, getter ddsync.PoolGetter[network.ConnectionStats], callback func(stats *network.ConnectionStats)) (*perfBatchManager, error) { if batchMap == nil { - return nil, fmt.Errorf("batchMap is nil") + return nil, errors.New("batchMap is nil") } for cpu := uint32(0); cpu < uint32(extractor.NumCPUs()); cpu++ { @@ -51,25 +55,18 @@ func newPerfBatchManager(batchMap *maps.GenericMap[uint32, netebpf.Batch], extra } return &perfBatchManager{ - batchMap: batchMap, - extractor: extractor, - ch: newCookieHasher(), + batchMap: batchMap, + extractor: extractor, + ch: newCookieHasher(), + connGetter: getter, + callback: callback, }, nil } -// ExtractBatchInto extracts from the given batch all connections that haven't been processed yet. -func (p *perfBatchManager) ExtractBatchInto(buffer *network.ConnectionBuffer, b *netebpf.Batch) { - for rc := p.extractor.NextConnection(b); rc != nil; rc = p.extractor.NextConnection(b) { - conn := buffer.Next() - populateConnStats(conn, &rc.Tup, &rc.Conn_stats, p.ch) - updateTCPStats(conn, &rc.Tcp_stats) - } -} - -// GetPendingConns return all connections that are in batches that are not yet full. +// Flush return all connections that are in batches that are not yet full. // It tracks which connections have been processed by this call, by batch id. // This prevents double-processing of connections between GetPendingConns and Extract. -func (p *perfBatchManager) GetPendingConns(buffer *network.ConnectionBuffer) { +func (p *perfBatchManager) Flush() { b := new(netebpf.Batch) for cpu := uint32(0); cpu < uint32(p.extractor.NumCPUs()); cpu++ { err := p.batchMap.Lookup(&cpu, b) @@ -78,20 +75,23 @@ func (p *perfBatchManager) GetPendingConns(buffer *network.ConnectionBuffer) { } for rc := p.extractor.NextConnection(b); rc != nil; rc = p.extractor.NextConnection(b) { - c := buffer.Next() - populateConnStats(c, &rc.Tup, &rc.Conn_stats, p.ch) - updateTCPStats(c, &rc.Tcp_stats) + c := p.connGetter.Get() + c.FromConn(rc) + p.ch.Hash(c) + p.callback(c) } } + // indicate we are done with all pending connection + p.callback(nil) p.extractor.CleanupExpiredState(time.Now()) } -func newConnBatchManager(mgr *manager.Manager, extractor *batchExtractor) (*perfBatchManager, error) { +func newConnBatchManager(mgr *manager.Manager, extractor *batchExtractor, connGetter ddsync.PoolGetter[network.ConnectionStats], closedCallback func(stats *network.ConnectionStats)) (*perfBatchManager, error) { connCloseMap, err := maps.GetMap[uint32, netebpf.Batch](mgr, probes.ConnCloseBatchMap) if err != nil { return nil, fmt.Errorf("unable to get map %s: %s", probes.ConnCloseBatchMap, err) } - batchMgr, err := newPerfBatchManager(connCloseMap, extractor) + batchMgr, err := newPerfBatchManager(connCloseMap, extractor, connGetter, closedCallback) if err != nil { return nil, err } diff --git a/pkg/network/tracer/connection/perf_batching_test.go b/pkg/network/tracer/connection/perf_batching_test.go index c7d22aaff83aa8..82095728412c4f 100644 --- a/pkg/network/tracer/connection/perf_batching_test.go +++ b/pkg/network/tracer/connection/perf_batching_test.go @@ -19,6 +19,7 @@ import ( ebpfmaps "github.com/DataDog/datadog-agent/pkg/ebpf/maps" "github.com/DataDog/datadog-agent/pkg/network" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" + ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" ) const ( @@ -26,7 +27,15 @@ const ( ) func TestGetPendingConns(t *testing.T) { - manager := newTestBatchManager(t) + var pendingConns []*network.ConnectionStats + flushDone := make(chan struct{}) + manager := newTestBatchManager(t, func(conn *network.ConnectionStats) { + if conn == nil { + flushDone <- struct{}{} + return + } + pendingConns = append(pendingConns, conn) + }) batch := new(netebpf.Batch) batch.Id = 0 @@ -41,9 +50,8 @@ func TestGetPendingConns(t *testing.T) { } updateBatch() - buffer := network.NewConnectionBuffer(256, 256) - manager.GetPendingConns(buffer) - pendingConns := buffer.Connections() + go manager.Flush() + <-flushDone assert.GreaterOrEqual(t, len(pendingConns), 2) for _, pid := range []uint32{pidMax + 1, pidMax + 2} { found := false @@ -64,9 +72,9 @@ func TestGetPendingConns(t *testing.T) { updateBatch() // We should now get only the connection that hasn't been processed before - buffer.Reset() - manager.GetPendingConns(buffer) - pendingConns = buffer.Connections() + go manager.Flush() + pendingConns = pendingConns[:0] + <-flushDone assert.GreaterOrEqual(t, len(pendingConns), 1) var found bool for _, p := range pendingConns { @@ -80,7 +88,12 @@ func TestGetPendingConns(t *testing.T) { } func TestPerfBatchStateCleanup(t *testing.T) { - manager := newTestBatchManager(t) + flushDone := make(chan struct{}) + manager := newTestBatchManager(t, func(stats *network.ConnectionStats) { + if stats == nil { + flushDone <- struct{}{} + } + }) manager.extractor.expiredStateInterval = 100 * time.Millisecond batch := new(netebpf.Batch) @@ -93,14 +106,15 @@ func TestPerfBatchStateCleanup(t *testing.T) { err := manager.batchMap.Put(&cpu, batch) require.NoError(t, err) - buffer := network.NewConnectionBuffer(256, 256) - manager.GetPendingConns(buffer) + go manager.Flush() + <-flushDone _, ok := manager.extractor.stateByCPU[cpu].processed[batch.Id] require.True(t, ok) assert.Equal(t, uint16(2), manager.extractor.stateByCPU[cpu].processed[batch.Id].offset) manager.extractor.CleanupExpiredState(time.Now().Add(manager.extractor.expiredStateInterval)) - manager.GetPendingConns(buffer) + go manager.Flush() + <-flushDone // state should not have been cleaned up, since no more connections have happened _, ok = manager.extractor.stateByCPU[cpu].processed[batch.Id] @@ -108,7 +122,7 @@ func TestPerfBatchStateCleanup(t *testing.T) { assert.Equal(t, uint16(2), manager.extractor.stateByCPU[cpu].processed[batch.Id].offset) } -func newTestBatchManager(t *testing.T) *perfBatchManager { +func newTestBatchManager(t *testing.T, callback func(*network.ConnectionStats)) *perfBatchManager { require.NoError(t, rlimit.RemoveMemlock()) m, err := ebpf.NewMap(&ebpf.MapSpec{ Type: ebpf.Hash, @@ -122,7 +136,8 @@ func newTestBatchManager(t *testing.T) *perfBatchManager { gm, err := ebpfmaps.Map[uint32, netebpf.Batch](m) require.NoError(t, err) extractor := newBatchExtractor(numTestCPUs) - mgr, err := newPerfBatchManager(gm, extractor) + connPool := ddsync.NewDefaultTypedPool[network.ConnectionStats]() + mgr, err := newPerfBatchManager(gm, extractor, connPool, callback) require.NoError(t, err) return mgr } diff --git a/pkg/network/tracer/connection/tcp_close_consumer.go b/pkg/network/tracer/connection/tcp_close_consumer.go index 3c9ff45dba9f0e..e35117316d4565 100644 --- a/pkg/network/tracer/connection/tcp_close_consumer.go +++ b/pkg/network/tracer/connection/tcp_close_consumer.go @@ -9,15 +9,13 @@ package connection import ( "sync" - "time" - "unsafe" - ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/ebpf/perf" "github.com/DataDog/datadog-agent/pkg/network" - netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" "github.com/DataDog/datadog-agent/pkg/status/health" "github.com/DataDog/datadog-agent/pkg/telemetry" "github.com/DataDog/datadog-agent/pkg/util/log" + ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" ) const closeConsumerModuleName = "network_tracer__ebpf" @@ -25,30 +23,29 @@ const closeConsumerModuleName = "network_tracer__ebpf" // Telemetry var closeConsumerTelemetry = struct { perfReceived telemetry.Counter - perfLost telemetry.Counter }{ telemetry.NewCounter(closeConsumerModuleName, "closed_conn_polling_received", []string{}, "Counter measuring the number of closed connections received"), - telemetry.NewCounter(closeConsumerModuleName, "closed_conn_polling_lost", []string{}, "Counter measuring the number of closed connection batches lost (were transmitted from ebpf but never received)"), } type tcpCloseConsumer struct { - eventHandler ddebpf.EventHandler - batchManager *perfBatchManager - requests chan chan struct{} - buffer *network.ConnectionBuffer - once sync.Once - closed chan struct{} - ch *cookieHasher + requests chan chan struct{} + once sync.Once + closed chan struct{} + + flusher perf.Flushable + callback func(*network.ConnectionStats) + releaser ddsync.PoolReleaser[network.ConnectionStats] + flushChannel chan chan struct{} } -func newTCPCloseConsumer(eventHandler ddebpf.EventHandler, batchManager *perfBatchManager) *tcpCloseConsumer { +func newTCPCloseConsumer(flusher perf.Flushable, releaser ddsync.PoolReleaser[network.ConnectionStats]) *tcpCloseConsumer { return &tcpCloseConsumer{ - eventHandler: eventHandler, - batchManager: batchManager, requests: make(chan chan struct{}), - buffer: network.NewConnectionBuffer(netebpf.BatchSize, netebpf.BatchSize), closed: make(chan struct{}), - ch: newCookieHasher(), + flusher: flusher, + releaser: releaser, + callback: func(*network.ConnectionStats) {}, + flushChannel: make(chan chan struct{}, 1), } } @@ -75,101 +72,47 @@ func (c *tcpCloseConsumer) Stop() { if c == nil { return } - c.eventHandler.Stop() c.once.Do(func() { close(c.closed) }) } -func (c *tcpCloseConsumer) extractConn(data []byte) { - ct := (*netebpf.Conn)(unsafe.Pointer(&data[0])) - conn := c.buffer.Next() - populateConnStats(conn, &ct.Tup, &ct.Conn_stats, c.ch) - updateTCPStats(conn, &ct.Tcp_stats) +func (c *tcpCloseConsumer) Callback(conn *network.ConnectionStats) { + // sentinel record post-flush + if conn == nil { + request := <-c.flushChannel + close(request) + return + } + + closeConsumerTelemetry.perfReceived.Inc() + c.callback(conn) + c.releaser.Put(conn) } func (c *tcpCloseConsumer) Start(callback func(*network.ConnectionStats)) { if c == nil { return } - health := health.RegisterLiveness("network-tracer") - - var ( - then = time.Now() - closedCount uint64 - lostSamplesCount uint64 - ) + c.callback = callback + liveHealth := health.RegisterLiveness("network-tracer") go func() { defer func() { - err := health.Deregister() + err := liveHealth.Deregister() if err != nil { log.Warnf("error de-registering health check: %s", err) } }() - dataChannel := c.eventHandler.DataChannel() - lostChannel := c.eventHandler.LostChannel() for { select { - case <-c.closed: return - case <-health.C: - case batchData, ok := <-dataChannel: - if !ok { - return - } - - l := len(batchData.Data) - switch { - case l >= netebpf.SizeofBatch: - batch := netebpf.ToBatch(batchData.Data) - c.batchManager.ExtractBatchInto(c.buffer, batch) - case l >= netebpf.SizeofConn: - c.extractConn(batchData.Data) - default: - log.Errorf("unknown type received from perf buffer, skipping. data size=%d, expecting %d or %d", len(batchData.Data), netebpf.SizeofConn, netebpf.SizeofBatch) - continue - } - - closeConsumerTelemetry.perfReceived.Add(float64(c.buffer.Len())) - closedCount += uint64(c.buffer.Len()) - conns := c.buffer.Connections() - for i := range conns { - callback(&conns[i]) - } - c.buffer.Reset() - batchData.Done() - // lost events only occur when using perf buffers - case lc, ok := <-lostChannel: - if !ok { - return - } - closeConsumerTelemetry.perfLost.Add(float64(lc)) - lostSamplesCount += lc + case <-liveHealth.C: case request := <-c.requests: - oneTimeBuffer := network.NewConnectionBuffer(32, 32) - c.batchManager.GetPendingConns(oneTimeBuffer) - conns := oneTimeBuffer.Connections() - for i := range conns { - callback(&conns[i]) - } - close(request) - - closedCount += uint64(oneTimeBuffer.Len()) - now := time.Now() - elapsed := now.Sub(then) - then = now - log.Debugf( - "tcp close summary: closed_count=%d elapsed=%s closed_rate=%.2f/s lost_samples_count=%d", - closedCount, - elapsed, - float64(closedCount)/elapsed.Seconds(), - lostSamplesCount, - ) - closedCount = 0 - lostSamplesCount = 0 + c.flushChannel <- request + c.flusher.Flush() } } }() diff --git a/pkg/network/tracer/connection/tcp_close_consumer_test.go b/pkg/network/tracer/connection/tcp_close_consumer_test.go index 96db4d9bfc04cc..6bdbf4024962eb 100644 --- a/pkg/network/tracer/connection/tcp_close_consumer_test.go +++ b/pkg/network/tracer/connection/tcp_close_consumer_test.go @@ -11,15 +11,10 @@ import ( "testing" "github.com/stretchr/testify/require" - - "github.com/DataDog/datadog-agent/pkg/ebpf" ) func TestTcpCloseConsumerStopRace(t *testing.T) { - pf := ebpf.NewPerfHandler(10) - require.NotNil(t, pf) - - c := newTCPCloseConsumer(pf, nil) + c := newTCPCloseConsumer(nil, nil) require.NotNil(t, c) c.Stop() diff --git a/pkg/network/tracer/connection/util/conn_tracer.go b/pkg/network/tracer/connection/util/conn_tracer.go index 1fc7e129340582..811c656e7a43ad 100644 --- a/pkg/network/tracer/connection/util/conn_tracer.go +++ b/pkg/network/tracer/connection/util/conn_tracer.go @@ -14,16 +14,10 @@ import ( manager "github.com/DataDog/ebpf-manager" cebpf "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" - "github.com/DataDog/datadog-agent/pkg/ebpf" - ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" "github.com/DataDog/datadog-agent/pkg/network" - "github.com/DataDog/datadog-agent/pkg/network/config" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" - "github.com/DataDog/datadog-agent/pkg/network/ebpf/probes" "github.com/DataDog/datadog-agent/pkg/process/util" - "github.com/DataDog/datadog-agent/pkg/util/log" ) // toPowerOf2 converts a number to its nearest power of 2 @@ -32,9 +26,9 @@ func toPowerOf2(x int) int { return int(math.Pow(2, math.Round(log2))) } -// computeDefaultClosedConnRingBufferSize is the default buffer size of the ring buffer for closed connection events. +// ComputeDefaultClosedConnRingBufferSize is the default buffer size of the ring buffer for closed connection events. // Must be a power of 2 and a multiple of the page size -func computeDefaultClosedConnRingBufferSize() int { +func ComputeDefaultClosedConnRingBufferSize() int { numCPUs, err := cebpf.PossibleCPU() if err != nil { numCPUs = 1 @@ -42,68 +36,12 @@ func computeDefaultClosedConnRingBufferSize() int { return 8 * toPowerOf2(numCPUs) * os.Getpagesize() } -// computeDefaultClosedConnPerfBufferSize is the default buffer size of the perf buffer for closed connection events. +// ComputeDefaultClosedConnPerfBufferSize is the default buffer size of the perf buffer for closed connection events. // Must be a multiple of the page size -func computeDefaultClosedConnPerfBufferSize() int { +func ComputeDefaultClosedConnPerfBufferSize() int { return 8 * os.Getpagesize() } -// EnableRingbuffersViaMapEditor sets up the ring buffer for closed connection events via a map editor -func EnableRingbuffersViaMapEditor(mgrOpts *manager.Options) { - mgrOpts.MapSpecEditors[probes.ConnCloseEventMap] = manager.MapSpecEditor{ - Type: cebpf.RingBuf, - MaxEntries: uint32(computeDefaultClosedConnRingBufferSize()), - KeySize: 0, - ValueSize: 0, - EditorFlag: manager.EditType | manager.EditMaxEntries | manager.EditKeyValue, - } -} - -// SetupHandler sets up the closed connection event handler -func SetupHandler(eventHandler ebpf.EventHandler, mgr *ebpf.Manager, cfg *config.Config, perfSize int, mapName probes.BPFMapName) { - switch handler := eventHandler.(type) { - case *ebpf.RingBufferHandler: - log.Infof("Setting up connection handler for map %v with ring buffer", mapName) - rb := &manager.RingBuffer{ - Map: manager.Map{Name: mapName}, - RingBufferOptions: manager.RingBufferOptions{ - RecordGetter: handler.RecordGetter, - RecordHandler: handler.RecordHandler, - TelemetryEnabled: cfg.InternalTelemetryEnabled, - }, - } - mgr.RingBuffers = append(mgr.RingBuffers, rb) - ebpftelemetry.ReportRingBufferTelemetry(rb) - case *ebpf.PerfHandler: - log.Infof("Setting up connection handler for map %v with perf buffer", mapName) - pm := &manager.PerfMap{ - Map: manager.Map{Name: mapName}, - PerfMapOptions: manager.PerfMapOptions{ - PerfRingBufferSize: perfSize, - Watermark: 1, - RecordHandler: handler.RecordHandler, - LostHandler: handler.LostHandler, - RecordGetter: handler.RecordGetter, - TelemetryEnabled: cfg.InternalTelemetryEnabled, - }, - } - mgr.PerfMaps = append(mgr.PerfMaps, pm) - ebpftelemetry.ReportPerfMapTelemetry(pm) - helperCallRemover := ebpf.NewHelperCallRemover(asm.FnRingbufOutput) - err := helperCallRemover.BeforeInit(mgr.Manager, mgr.Name, nil) - if err != nil { - log.Error("Failed to remove helper calls from eBPF programs: ", err) - } - default: - log.Errorf("Failed to set up connection handler for map %v: unknown event handler type", mapName) - } -} - -// SetupClosedConnHandler sets up the closed connection event handler -func SetupClosedConnHandler(connCloseEventHandler ebpf.EventHandler, mgr *ebpf.Manager, cfg *config.Config) { - SetupHandler(connCloseEventHandler, mgr, cfg, computeDefaultClosedConnPerfBufferSize(), probes.ConnCloseEventMap) -} - // AddBoolConst modifies the options to include a constant editor for a boolean value func AddBoolConst(options *manager.Options, name string, flag bool) { val := uint64(1) diff --git a/pkg/util/encoding/binary.go b/pkg/util/encoding/binary.go new file mode 100644 index 00000000000000..88dc5b4b864b67 --- /dev/null +++ b/pkg/util/encoding/binary.go @@ -0,0 +1,43 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +// Package encoding is for utilities relating to the encoding package from the stdlib +package encoding + +import ( + "encoding" +) + +// BinaryUnmarshalCallback returns a function that will decode the argument byte slice into *T +// using `newFn` to create an instance of *T and the encoding.BinaryUnmarshaler interface to do the actual conversion. +// `callback` will be called with the resulting *T. +// If the argument byte slice is empty, callback will be called with `nil`. +// Unmarshalling errors will be provided to the callback as the second argument. The data argument to the callback +// may still be non-nil even if there was an error. This allows the callback to handle the allocated object, even +// in the face of errors. +// This function panics if `*T` does not implement encoding.BinaryUnmarshaler. +func BinaryUnmarshalCallback[T any](newFn func() *T, callback func(*T, error)) func(buf []byte) { + // we use `any` as the type constraint rather than encoding.BinaryUnmarshaler because we are not allowed to + // callback with `nil` in the latter case. There is a workaround, but it requires specifying two type constraints. + // For sake of cleanliness, we resort to a runtime check here. + if _, ok := any(new(T)).(encoding.BinaryUnmarshaler); !ok { + panic("pointer type *T must implement encoding.BinaryUnmarshaler") + } + + return func(buf []byte) { + if len(buf) == 0 { + callback(nil, nil) + return + } + + d := newFn() + if err := any(d).(encoding.BinaryUnmarshaler).UnmarshalBinary(buf); err != nil { + // pass d here so callback can choose how to deal with the data + callback(d, err) + return + } + callback(d, nil) + } +} diff --git a/pkg/util/slices/map.go b/pkg/util/slices/map.go new file mode 100644 index 00000000000000..049d4f4a25e50f --- /dev/null +++ b/pkg/util/slices/map.go @@ -0,0 +1,16 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +// Package slices are utilities to deal with slices +package slices + +// Map returns a new slice with the result of applying fn to each element. +func Map[S ~[]E, E any, RE any](s S, fn func(E) RE) []RE { + x := make([]RE, 0, len(s)) + for _, v := range s { + x = append(x, fn(v)) + } + return x +} diff --git a/pkg/util/sync/pool.go b/pkg/util/sync/pool.go index fdf1872c666ad5..231a6a53b9efce 100644 --- a/pkg/util/sync/pool.go +++ b/pkg/util/sync/pool.go @@ -8,6 +8,22 @@ package sync import "sync" +// PoolReleaser is interface that wraps a sync.Pool Put function +type PoolReleaser[K any] interface { + Put(*K) +} + +// PoolGetter is interface that wraps a sync.Pool Get function +type PoolGetter[K any] interface { + Get() *K +} + +// Pool is a combination interface of PoolGetter and PoolReleaser +type Pool[K any] interface { + PoolGetter[K] + PoolReleaser[K] +} + // TypedPool is a type-safe version of sync.Pool type TypedPool[K any] struct { p sync.Pool From 79a6bcc956e58ec7f2a53c6909b8ec325de87bd1 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 25 Nov 2024 11:39:28 -0800 Subject: [PATCH 02/23] add tests --- .github/CODEOWNERS | 1 + pkg/util/encoding/binary_test.go | 69 ++++++++++++++++++++++++++++++++ pkg/util/slices/map_test.go | 19 +++++++++ 3 files changed, 89 insertions(+) create mode 100644 pkg/util/encoding/binary_test.go create mode 100644 pkg/util/slices/map_test.go diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a1ca0d547e9f9f..ad03ff756ddfca 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -454,6 +454,7 @@ /pkg/util/crio/ @DataDog/container-integrations /pkg/util/docker/ @DataDog/container-integrations /pkg/util/ecs/ @DataDog/container-integrations +/pkg/util/encoding/ @DataDog/ebpf-platform /pkg/util/funcs/ @DataDog/ebpf-platform /pkg/util/gpu/ @DataDog/container-platform /pkg/util/kernel/ @DataDog/ebpf-platform diff --git a/pkg/util/encoding/binary_test.go b/pkg/util/encoding/binary_test.go new file mode 100644 index 00000000000000..58a4b345968376 --- /dev/null +++ b/pkg/util/encoding/binary_test.go @@ -0,0 +1,69 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +package encoding + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +type emptyTestType struct { +} + +func (tt *emptyTestType) UnmarshalBinary(_ []byte) error { + return nil +} + +type errorTestType struct{} + +func (tt *errorTestType) UnmarshalBinary(_ []byte) error { + return errors.New("error") +} + +type dataTestType struct { + buf []byte +} + +func (tt *dataTestType) UnmarshalBinary(data []byte) error { + tt.buf = data + return nil +} + +func TestBinaryUnmarshalCallback(t *testing.T) { + assert.Panics(t, func() { + type x struct{} + BinaryUnmarshalCallback(func() *x { + return new(x) + }, func(x *x, err error) {}) + }) + + cb := BinaryUnmarshalCallback(func() *emptyTestType { + return new(emptyTestType) + }, func(x *emptyTestType, err error) { + assert.Nil(t, x) + assert.NoError(t, err) + }) + cb(nil) + cb([]byte{}) + + cb = BinaryUnmarshalCallback(func() *errorTestType { + return new(errorTestType) + }, func(x *errorTestType, err error) { + assert.NotNil(t, x) + assert.Error(t, err) + }) + cb([]byte{1, 2}) + + cb = BinaryUnmarshalCallback(func() *dataTestType { + return new(dataTestType) + }, func(x *dataTestType, err error) { + assert.Equal(t, []byte{1, 2}, x.buf) + assert.NoError(t, err) + }) + cb([]byte{1, 2}) +} diff --git a/pkg/util/slices/map_test.go b/pkg/util/slices/map_test.go new file mode 100644 index 00000000000000..0bf48b54a5a21b --- /dev/null +++ b/pkg/util/slices/map_test.go @@ -0,0 +1,19 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +package slices + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestMap(t *testing.T) { + x := Map([]int{1, 2, 4, 8}, func(v int) int { + return v * v + }) + assert.Equal(t, []int{1, 4, 16, 64}, x) +} From 0b2e89bfbc68e78b17efb0aa85ba44dd491074e5 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 25 Nov 2024 12:08:11 -0800 Subject: [PATCH 03/23] fix lint --- pkg/util/encoding/binary_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/util/encoding/binary_test.go b/pkg/util/encoding/binary_test.go index 58a4b345968376..05450f10fe4069 100644 --- a/pkg/util/encoding/binary_test.go +++ b/pkg/util/encoding/binary_test.go @@ -39,7 +39,7 @@ func TestBinaryUnmarshalCallback(t *testing.T) { type x struct{} BinaryUnmarshalCallback(func() *x { return new(x) - }, func(x *x, err error) {}) + }, func(_ *x, _ error) {}) }) cb := BinaryUnmarshalCallback(func() *emptyTestType { From f1cac6a15c342eb883fd15a76036392493d25b63 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 26 Nov 2024 10:53:49 -0800 Subject: [PATCH 04/23] address review comments --- pkg/ebpf/perf/event.go | 24 ++++++++++++++++++------ pkg/network/event_common_linux.go | 20 +++++++++----------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index d338c08d9b6941..0b1cd1c159a9f7 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -41,11 +41,17 @@ type EventHandler struct { // EventHandlerOptions are the options controlling the EventHandler. // MapName and Handler are required options. type EventHandlerOptions struct { + // MapName specifies the name of the map. This field is required. MapName string + // Handler is the callback for data received from the perf/ring buffer. This field is required. Handler func([]byte) - TelemetryEnabled bool - UseRingBuffer bool + // TelemetryEnabled specifies whether to collect usage telemetry from the perf/ring buffer. + TelemetryEnabled bool + // UseRingBuffer specifies whether to use a ring buffer + UseRingBuffer bool + // UpgradePerfBuffer specifies if you wish to upgrade a perf buffer to a ring buffer. + // This only takes effect if UseRingBuffer is true. UpgradePerfBuffer bool PerfOptions PerfBufferOptions @@ -58,7 +64,7 @@ type EventHandlerOptions struct { type PerfBufferOptions struct { BufferSize int - // Watermark - The reader will start processing samples once their sizes in the perf ring buffer + // Watermark - The reader will start processing samples once their sizes in the perf buffer // exceed this value. Must be smaller than PerfRingBufferSize. Defaults to the manager value if not set. Watermark int @@ -94,14 +100,17 @@ func (e *EventHandler) Init(mgr *manager.Manager, mgrOpts *manager.Options) erro return fmt.Errorf("unable to find map spec %q", e.opts.MapName) } - if e.opts.UseRingBuffer && features.HaveMapType(ebpf.RingBuf) == nil { + ringBuffersAvailable := features.HaveMapType(ebpf.RingBuf) == nil + if e.opts.UseRingBuffer && ringBuffersAvailable { if e.opts.UpgradePerfBuffer { if ms.Type != ebpf.PerfEventArray { return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) } UpgradePerfBuffer(mgr, mgrOpts, e.opts.MapName) - } else if ms.Type != ebpf.RingBuf { - return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.opts.MapName, ms.Type.String()) + } else { + if ms.Type != ebpf.RingBuf { + return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.opts.MapName, ms.Type.String()) + } } if ms.MaxEntries != uint32(e.opts.RingBufOptions.BufferSize) { @@ -148,6 +157,7 @@ func ResizeRingBuffer(mgrOpts *manager.Options, mapName string, bufferSize int) } func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { + // remove any existing perf buffers from manager mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { return perfMap.Name == e.opts.MapName }) @@ -174,6 +184,7 @@ func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap } func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { + // remove any existing matching ring buffers from manager mgr.RingBuffers = slices.DeleteFunc(mgr.RingBuffers, func(ringBuf *manager.RingBuffer) bool { return ringBuf.Name == e.opts.MapName }) @@ -207,6 +218,7 @@ func UpgradePerfBuffer(mgr *manager.Manager, mgrOpts *manager.Options, mapName s specEditor.EditorFlag |= manager.EditType | manager.EditKeyValue mgrOpts.MapSpecEditors[mapName] = specEditor + // remove map from perf maps because it has been upgraded mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { return perfMap.Name == mapName }) diff --git a/pkg/network/event_common_linux.go b/pkg/network/event_common_linux.go index e70118200b1b03..b0644f44b16111 100644 --- a/pkg/network/event_common_linux.go +++ b/pkg/network/event_common_linux.go @@ -136,20 +136,18 @@ func (c *ConnectionStats) FromTupleAndStats(t *netebpf.ConnTuple, s *netebpf.Con // FromTCPStats populates relevant fields on ConnectionStats from the arguments func (c *ConnectionStats) FromTCPStats(tcpStats *netebpf.TCPStats) { - if c.Type != TCP { + if c.Type != TCP || tcpStats == nil { return } - if tcpStats != nil { - c.Monotonic.Retransmits = tcpStats.Retransmits - c.Monotonic.TCPEstablished = tcpStats.State_transitions >> netebpf.Established & 1 - c.Monotonic.TCPClosed = tcpStats.State_transitions >> netebpf.Close & 1 - c.RTT = tcpStats.Rtt - c.RTTVar = tcpStats.Rtt_var - if tcpStats.Failure_reason > 0 { - c.TCPFailures = map[uint16]uint32{ - tcpStats.Failure_reason: 1, - } + c.Monotonic.Retransmits = tcpStats.Retransmits + c.Monotonic.TCPEstablished = tcpStats.State_transitions >> netebpf.Established & 1 + c.Monotonic.TCPClosed = tcpStats.State_transitions >> netebpf.Close & 1 + c.RTT = tcpStats.Rtt + c.RTTVar = tcpStats.Rtt_var + if tcpStats.Failure_reason > 0 { + c.TCPFailures = map[uint16]uint32{ + tcpStats.Failure_reason: 1, } } } From 5bfb4d2ff2767b6e5bbb263d2678220b209ceb01 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 26 Nov 2024 10:55:46 -0800 Subject: [PATCH 05/23] add minor comments --- pkg/ebpf/perf/event.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 0b1cd1c159a9f7..a7022c707c7f83 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -103,16 +103,19 @@ func (e *EventHandler) Init(mgr *manager.Manager, mgrOpts *manager.Options) erro ringBuffersAvailable := features.HaveMapType(ebpf.RingBuf) == nil if e.opts.UseRingBuffer && ringBuffersAvailable { if e.opts.UpgradePerfBuffer { + // using ring buffers and upgrading from perf buffer if ms.Type != ebpf.PerfEventArray { return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) } UpgradePerfBuffer(mgr, mgrOpts, e.opts.MapName) } else { + // using ring buffers, but not upgrading from a perf buffer if ms.Type != ebpf.RingBuf { return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.opts.MapName, ms.Type.String()) } } + // resize if necessary if ms.MaxEntries != uint32(e.opts.RingBufOptions.BufferSize) { ResizeRingBuffer(mgrOpts, e.opts.MapName, e.opts.RingBufOptions.BufferSize) } From 5bc0068e2d5336f1edc0f45842565918fc85f1d8 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 08:31:12 -0800 Subject: [PATCH 06/23] fix typo --- pkg/ebpf/perf/event.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index a7022c707c7f83..0fe26e444eefdd 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -32,7 +32,7 @@ type Flushable interface { } // EventHandler abstracts consuming data from a perf buffer or ring buffer (depending on availability and options). -// It handles upgrading maps from a ring buffer is desired, and unmarshalling into the desired data type. +// It handles upgrading maps from a ring buffer if desired, and unmarshalling into the desired data type. type EventHandler struct { f Flushable opts EventHandlerOptions From 4e882c51feec0554dfb900a9a23e30924681cf15 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 08:39:11 -0800 Subject: [PATCH 07/23] rename to Flusher --- pkg/ebpf/perf/event.go | 6 +++--- pkg/network/tracer/connection/ebpf_tracer.go | 2 +- pkg/network/tracer/connection/tcp_close_consumer.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 0fe26e444eefdd..f30a7903ecff25 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -26,15 +26,15 @@ import ( var perfPool = ddsync.NewDefaultTypedPool[perf.Record]() var ringbufPool = ddsync.NewDefaultTypedPool[ringbuf.Record]() -// Flushable is an interface for objects that support flushing -type Flushable interface { +// Flusher is an interface for objects that support flushing +type Flusher interface { Flush() } // EventHandler abstracts consuming data from a perf buffer or ring buffer (depending on availability and options). // It handles upgrading maps from a ring buffer if desired, and unmarshalling into the desired data type. type EventHandler struct { - f Flushable + f Flusher opts EventHandlerOptions } diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index f30e6b87bdb9e6..0ff5629d1f52a8 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -250,7 +250,7 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace m.DumpHandler = dumpMapsHandler ddebpf.AddNameMappings(m, "npm_tracer") - var flusher perf.Flushable = connCloseEventHandler + var flusher perf.Flusher = connCloseEventHandler if config.KernelBatchingEnabled { flusher, err = newConnBatchManager(m, extractor, connPool, tr.closedPerfCallback) if err != nil { diff --git a/pkg/network/tracer/connection/tcp_close_consumer.go b/pkg/network/tracer/connection/tcp_close_consumer.go index e35117316d4565..3dbd33dd55ede1 100644 --- a/pkg/network/tracer/connection/tcp_close_consumer.go +++ b/pkg/network/tracer/connection/tcp_close_consumer.go @@ -32,13 +32,13 @@ type tcpCloseConsumer struct { once sync.Once closed chan struct{} - flusher perf.Flushable + flusher perf.Flusher callback func(*network.ConnectionStats) releaser ddsync.PoolReleaser[network.ConnectionStats] flushChannel chan chan struct{} } -func newTCPCloseConsumer(flusher perf.Flushable, releaser ddsync.PoolReleaser[network.ConnectionStats]) *tcpCloseConsumer { +func newTCPCloseConsumer(flusher perf.Flusher, releaser ddsync.PoolReleaser[network.ConnectionStats]) *tcpCloseConsumer { return &tcpCloseConsumer{ requests: make(chan chan struct{}), closed: make(chan struct{}), From d01218c6650640ef31a02043e2801aa99d9e55b9 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 08:41:12 -0800 Subject: [PATCH 08/23] rename to custom batching --- pkg/config/setup/system_probe.go | 2 +- pkg/network/config/config.go | 6 +++--- pkg/network/tracer/connection/ebpf_tracer.go | 8 ++++---- pkg/network/tracer/connection/fentry/probes.go | 6 +++--- pkg/network/tracer/connection/kprobe/config.go | 10 +++++----- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index 2551b80e090deb..5dce44fb3ef64b 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -213,7 +213,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnvAndSetDefault(join(spNS, "enable_conntrack_all_namespaces"), true, "DD_SYSTEM_PROBE_ENABLE_CONNTRACK_ALL_NAMESPACES") cfg.BindEnvAndSetDefault(join(netNS, "enable_protocol_classification"), true, "DD_ENABLE_PROTOCOL_CLASSIFICATION") cfg.BindEnvAndSetDefault(join(netNS, "enable_ringbuffers"), true, "DD_SYSTEM_PROBE_NETWORK_ENABLE_RINGBUFFERS") - cfg.BindEnvAndSetDefault(join(netNS, "enable_kernel_batching"), false, "DD_SYSTEM_PROBE_NETWORK_ENABLE_KERNEL_BATCHING") + cfg.BindEnvAndSetDefault(join(netNS, "enable_custom_batching"), false, "DD_SYSTEM_PROBE_NETWORK_ENABLE_CUSTOM_BATCHING") cfg.BindEnvAndSetDefault(join(netNS, "enable_tcp_failed_connections"), true, "DD_SYSTEM_PROBE_NETWORK_ENABLE_FAILED_CONNS") cfg.BindEnvAndSetDefault(join(netNS, "ignore_conntrack_init_failure"), false, "DD_SYSTEM_PROBE_NETWORK_IGNORE_CONNTRACK_INIT_FAILURE") cfg.BindEnvAndSetDefault(join(netNS, "conntrack_init_timeout"), 10*time.Second) diff --git a/pkg/network/config/config.go b/pkg/network/config/config.go index 4f340ac1c2f8d5..c7fa6f4b3f95a6 100644 --- a/pkg/network/config/config.go +++ b/pkg/network/config/config.go @@ -289,8 +289,8 @@ type Config struct { // of netlink for receiving process events. EnableUSMEventStream bool - // KernelBatchingEnabled enables the use of custom batching for eBPF perf events with perf buffers - KernelBatchingEnabled bool + // CustomBatchingEnabled enables the use of custom batching for eBPF perf events with perf buffers + CustomBatchingEnabled bool } // New creates a config for the network tracer @@ -338,7 +338,7 @@ func New() *Config { ProtocolClassificationEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_protocol_classification")), NPMRingbuffersEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_ringbuffers")), - KernelBatchingEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_kernel_batching")), + CustomBatchingEnabled: cfg.GetBool(sysconfig.FullKeyPath(netNS, "enable_custom_batching")), EnableHTTPMonitoring: cfg.GetBool(sysconfig.FullKeyPath(smNS, "enable_http_monitoring")), EnableHTTP2Monitoring: cfg.GetBool(sysconfig.FullKeyPath(smNS, "enable_http2_monitoring")), diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index 0ff5629d1f52a8..48046da4fc17f7 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -205,8 +205,8 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace connPool := ddsync.NewDefaultTypedPool[network.ConnectionStats]() var extractor *batchExtractor - util.AddBoolConst(&mgrOptions, "batching_enabled", config.KernelBatchingEnabled) - if config.KernelBatchingEnabled { + util.AddBoolConst(&mgrOptions, "batching_enabled", config.CustomBatchingEnabled) + if config.CustomBatchingEnabled { numCPUs, err := ebpf.PossibleCPU() if err != nil { return nil, fmt.Errorf("could not determine number of CPUs: %w", err) @@ -251,7 +251,7 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace ddebpf.AddNameMappings(m, "npm_tracer") var flusher perf.Flusher = connCloseEventHandler - if config.KernelBatchingEnabled { + if config.CustomBatchingEnabled { flusher, err = newConnBatchManager(m, extractor, connPool, tr.closedPerfCallback) if err != nil { return nil, err @@ -322,7 +322,7 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw BufferSize: util.ComputeDefaultClosedConnRingBufferSize(), }, } - if config.KernelBatchingEnabled { + if config.CustomBatchingEnabled { eopts.PerfOptions.Watermark = 1 eopts.Handler = func(buf []byte) { l := len(buf) diff --git a/pkg/network/tracer/connection/fentry/probes.go b/pkg/network/tracer/connection/fentry/probes.go index cc407dbef8b6a3..02ad4d6937c7cf 100644 --- a/pkg/network/tracer/connection/fentry/probes.go +++ b/pkg/network/tracer/connection/fentry/probes.go @@ -153,7 +153,7 @@ func enabledPrograms(c *config.Config) (map[string]struct{}, error) { // enableProgram(enabled, sockFDLookupRet) // } - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProgram(enabled, tcpCloseReturn) } } @@ -168,7 +168,7 @@ func enabledPrograms(c *config.Config) (map[string]struct{}, error) { enableProgram(enabled, udpSendMsgReturn) enableProgram(enabled, udpSendSkb) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProgram(enabled, udpDestroySockReturn) } } @@ -183,7 +183,7 @@ func enabledPrograms(c *config.Config) (map[string]struct{}, error) { enableProgram(enabled, udpv6SendMsgReturn) enableProgram(enabled, udpv6SendSkb) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProgram(enabled, udpv6DestroySockReturn) } } diff --git a/pkg/network/tracer/connection/kprobe/config.go b/pkg/network/tracer/connection/kprobe/config.go index d927cb2b9b7aa8..7567b28e5c4d44 100644 --- a/pkg/network/tracer/connection/kprobe/config.go +++ b/pkg/network/tracer/connection/kprobe/config.go @@ -80,13 +80,13 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes enableProbe(enabled, probes.TCPReadSock) enableProbe(enabled, probes.TCPReadSockReturn) enableProbe(enabled, probes.TCPClose) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProbe(enabled, probes.TCPCloseFlushReturn) } enableProbe(enabled, probes.TCPConnect) enableProbe(enabled, probes.TCPDone) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProbe(enabled, probes.TCPDoneFlushReturn) } enableProbe(enabled, probes.TCPFinishConnect) @@ -102,7 +102,7 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes if c.CollectUDPv4Conns { enableProbe(enabled, probes.UDPDestroySock) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProbe(enabled, probes.UDPDestroySockReturn) } enableProbe(enabled, selectVersionBasedProbe(runtimeTracer, kv, probes.IPMakeSkb, probes.IPMakeSkbPre4180, kv4180)) @@ -127,7 +127,7 @@ func enabledProbes(c *config.Config, runtimeTracer, coreTracer bool) (map[probes if c.CollectUDPv6Conns { enableProbe(enabled, probes.UDPv6DestroySock) - if c.KernelBatchingEnabled { + if c.CustomBatchingEnabled { enableProbe(enabled, probes.UDPv6DestroySockReturn) } if kv >= kv5180 || runtimeTracer { @@ -196,7 +196,7 @@ func protocolClassificationTailCalls(cfg *config.Config) []manager.TailCallRoute }, }, } - if cfg.KernelBatchingEnabled { + if cfg.CustomBatchingEnabled { tcs = append(tcs, manager.TailCallRoute{ ProgArrayName: probes.TCPCloseProgsMap, Key: 0, From 644d3fc82ae08caa8f34f81b153c4d5039077573 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 08:49:15 -0800 Subject: [PATCH 09/23] deprecate configs in sp namespace --- cmd/system-probe/config/adjust_npm.go | 2 ++ pkg/config/setup/system_probe.go | 3 +-- pkg/network/config/config.go | 6 +----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cmd/system-probe/config/adjust_npm.go b/cmd/system-probe/config/adjust_npm.go index e1be10ae08d795..add4668cf738dd 100644 --- a/cmd/system-probe/config/adjust_npm.go +++ b/cmd/system-probe/config/adjust_npm.go @@ -26,6 +26,8 @@ const ( func adjustNetwork(cfg model.Config) { ebpflessEnabled := cfg.GetBool(netNS("enable_ebpfless")) + deprecateInt(cfg, spNS("closed_connection_flush_threshold"), netNS("closed_connection_flush_threshold")) + limitMaxInt(cfg, spNS("max_conns_per_message"), maxConnsMessageBatchSize) if cfg.GetBool(spNS("disable_tcp")) { diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index 5dce44fb3ef64b..c4f5af5e63e48b 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -195,8 +195,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnvAndSetDefault(join(spNS, "max_tracked_connections"), 65536) cfg.BindEnv(join(spNS, "max_closed_connections_buffered")) cfg.BindEnv(join(netNS, "max_failed_connections_buffered")) - cfg.BindEnvAndSetDefault(join(spNS, "closed_connection_flush_threshold"), 0) - cfg.BindEnvAndSetDefault(join(spNS, "closed_channel_size"), 500) + cfg.BindEnv(join(netNS, "closed_connection_flush_threshold")) cfg.BindEnvAndSetDefault(join(netNS, "closed_buffer_wakeup_count"), 5) cfg.BindEnvAndSetDefault(join(spNS, "max_connection_state_buffered"), 75000) diff --git a/pkg/network/config/config.go b/pkg/network/config/config.go index c7fa6f4b3f95a6..e14e4a139ad311 100644 --- a/pkg/network/config/config.go +++ b/pkg/network/config/config.go @@ -212,9 +212,6 @@ type Config struct { // EnableEbpfConntracker enables the ebpf based network conntracker. Used only for testing at the moment EnableEbpfConntracker bool - // ClosedChannelSize specifies the size for closed channel for the tracer - ClosedChannelSize int - // ClosedBufferWakeupCount specifies the number of events that will buffer in a perf buffer before userspace is woken up. ClosedBufferWakeupCount int @@ -321,8 +318,7 @@ func New() *Config { MaxTrackedConnections: uint32(cfg.GetInt64(sysconfig.FullKeyPath(spNS, "max_tracked_connections"))), MaxClosedConnectionsBuffered: uint32(cfg.GetInt64(sysconfig.FullKeyPath(spNS, "max_closed_connections_buffered"))), MaxFailedConnectionsBuffered: uint32(cfg.GetInt64(sysconfig.FullKeyPath(netNS, "max_failed_connections_buffered"))), - ClosedConnectionFlushThreshold: cfg.GetInt(sysconfig.FullKeyPath(spNS, "closed_connection_flush_threshold")), - ClosedChannelSize: cfg.GetInt(sysconfig.FullKeyPath(spNS, "closed_channel_size")), + ClosedConnectionFlushThreshold: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_connection_flush_threshold")), ClosedBufferWakeupCount: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_buffer_wakeup_count")), MaxConnectionsStateBuffered: cfg.GetInt(sysconfig.FullKeyPath(spNS, "max_connection_state_buffered")), ClientStateExpiry: 2 * time.Minute, From 3d14fdcdfe705534fab0aa14539031bf4543d919 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 10:49:02 -0800 Subject: [PATCH 10/23] use Modifier interface for event handler --- pkg/ebpf/manager.go | 12 +++++------- pkg/ebpf/perf/event.go | 18 ++++++++++++++++-- pkg/network/tracer/connection/fentry/tracer.go | 10 ++-------- pkg/network/tracer/connection/kprobe/tracer.go | 10 ++-------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pkg/ebpf/manager.go b/pkg/ebpf/manager.go index 2351e4bbee6092..06e790609a9ecf 100644 --- a/pkg/ebpf/manager.go +++ b/pkg/ebpf/manager.go @@ -74,13 +74,11 @@ type Modifier interface { // InitWithOptions is a wrapper around ebpf-manager.Manager.InitWithOptions func (m *Manager) InitWithOptions(bytecode io.ReaderAt, opts *manager.Options) error { - if bytecode != nil { - // we must load the ELF file before initialization, - // to build the collection specs, because some modifiers - // inspect these to make changes to the eBPF resources. - if err := m.LoadELF(bytecode); err != nil { - return fmt.Errorf("failed to load elf from reader: %w", err) - } + // we must load the ELF file before initialization, + // to build the collection specs, because some modifiers + // inspect these to make changes to the eBPF resources. + if err := m.LoadELF(bytecode); err != nil { + return fmt.Errorf("failed to load elf from reader: %w", err) } for _, mod := range m.EnabledModifiers { diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index f30a7903ecff25..021fed798369bd 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -19,6 +19,8 @@ import ( "github.com/cilium/ebpf/perf" "github.com/cilium/ebpf/ringbuf" + ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" + "github.com/DataDog/datadog-agent/pkg/ebpf/names" ebpfTelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" ddsync "github.com/DataDog/datadog-agent/pkg/util/sync" ) @@ -31,6 +33,9 @@ type Flusher interface { Flush() } +// compile time check to ensure this satisfies the Modifier interface +var _ ddebpf.Modifier = (*EventHandler)(nil) + // EventHandler abstracts consuming data from a perf buffer or ring buffer (depending on availability and options). // It handles upgrading maps from a ring buffer if desired, and unmarshalling into the desired data type. type EventHandler struct { @@ -93,8 +98,8 @@ func NewEventHandler(opts EventHandlerOptions) (*EventHandler, error) { return e, nil } -// Init must be called after ebpf-manager.Manager.LoadELF but before ebpf-manager.Manager.Init/InitWithOptions() -func (e *EventHandler) Init(mgr *manager.Manager, mgrOpts *manager.Options) error { +// BeforeInit implements the Modifier interface +func (e *EventHandler) BeforeInit(mgr *manager.Manager, _ names.ModuleName, mgrOpts *manager.Options) error { ms, _, _ := mgr.GetMapSpec(e.opts.MapName) if ms == nil { return fmt.Errorf("unable to find map spec %q", e.opts.MapName) @@ -130,6 +135,15 @@ func (e *EventHandler) Init(mgr *manager.Manager, mgrOpts *manager.Options) erro return nil } +// AfterInit implements the Modifier interface +func (e *EventHandler) AfterInit(_ *manager.Manager, _ names.ModuleName, _ *manager.Options) error { + return nil +} + +func (e *EventHandler) String() string { + return "EventHandler" +} + // MapType returns the ebpf.MapType of the underlying events map // This is only valid after calling Init. func (e *EventHandler) MapType() ebpf.MapType { diff --git a/pkg/network/tracer/connection/fentry/tracer.go b/pkg/network/tracer/connection/fentry/tracer.go index cc4315e5ce7662..e029ed215f2a13 100644 --- a/pkg/network/tracer/connection/fentry/tracer.go +++ b/pkg/network/tracer/connection/fentry/tracer.go @@ -36,7 +36,7 @@ func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHa return nil, nil, ErrorNotSupported } - m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}) + m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}, connCloseEventHandler) err := ddebpf.LoadCOREAsset(netebpf.ModuleFileName("tracer-fentry", config.BPFDebug), func(ar bytecode.AssetReader, o manager.Options) error { o.RLimit = mgrOpts.RLimit o.MapSpecEditors = mgrOpts.MapSpecEditors @@ -91,12 +91,6 @@ func initFentryTracer(ar bytecode.AssetReader, o manager.Options, config *config }) } - if err := m.LoadELF(ar); err != nil { - return fmt.Errorf("failed to load ELF with ebpf manager: %w", err) - } - if err := connCloseEventHandler.Init(m.Manager, &o); err != nil { - return fmt.Errorf("error initializing closed connections event handler: %w", err) - } util.AddBoolConst(&o, "ringbuffers_enabled", connCloseEventHandler.MapType() == ebpf.RingBuf) - return m.InitWithOptions(nil, &o) + return m.InitWithOptions(ar, &o) } diff --git a/pkg/network/tracer/connection/kprobe/tracer.go b/pkg/network/tracer/connection/kprobe/tracer.go index cfbc2be96baf2e..299c816688479b 100644 --- a/pkg/network/tracer/connection/kprobe/tracer.go +++ b/pkg/network/tracer/connection/kprobe/tracer.go @@ -143,7 +143,7 @@ func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandl } func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer bool, config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { - m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}) + m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}, connCloseEventHandler) if err := initManager(m, runtimeTracer); err != nil { return nil, nil, fmt.Errorf("could not initialize manager: %w", err) } @@ -223,18 +223,12 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo }) } - if err := m.LoadELF(buf); err != nil { - return nil, nil, fmt.Errorf("failed to load ELF with ebpf manager: %w", err) - } - if err := connCloseEventHandler.Init(m.Manager, &mgrOpts); err != nil { - return nil, nil, fmt.Errorf("error initializing closed connections event handler: %w", err) - } usingRingBuffers := connCloseEventHandler.MapType() == ebpf.RingBuf util.AddBoolConst(&mgrOpts, "ringbuffers_enabled", usingRingBuffers) if features.HaveMapType(ebpf.RingBuf) != nil { m.EnabledModifiers = append(m.EnabledModifiers, ddebpf.NewHelperCallRemover(asm.FnRingbufOutput)) } - if err := m.InitWithOptions(nil, &mgrOpts); err != nil { + if err := m.InitWithOptions(buf, &mgrOpts); err != nil { return nil, nil, fmt.Errorf("failed to init ebpf manager: %w", err) } From 8693f02f9c5c7ae477250a06c04d3b53419d2697 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 10:51:52 -0800 Subject: [PATCH 11/23] add comments about handlers --- pkg/ebpf/perf/event.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 021fed798369bd..4fb9cb3ea8acb5 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -196,6 +196,7 @@ func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { } func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap, _ *manager.Manager) { + // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer perfPool.Put(record) e.opts.Handler(record.RawSample) } @@ -219,6 +220,7 @@ func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { } func (e *EventHandler) ringRecordHandler(record *ringbuf.Record, _ *manager.RingBuffer, _ *manager.Manager) { + // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer ringbufPool.Put(record) e.opts.Handler(record.RawSample) } From 41908f9eb8fc5438bc4816c39ee1a48cdff219ef Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 11:52:45 -0800 Subject: [PATCH 12/23] refactor API to be more understandable --- pkg/ebpf/perf/event.go | 209 +++++++++++++------ pkg/network/tracer/connection/ebpf_tracer.go | 29 +-- 2 files changed, 152 insertions(+), 86 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 4fb9cb3ea8acb5..5552302f54af2d 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -40,99 +40,172 @@ var _ ddebpf.Modifier = (*EventHandler)(nil) // It handles upgrading maps from a ring buffer if desired, and unmarshalling into the desired data type. type EventHandler struct { f Flusher - opts EventHandlerOptions + opts eventHandlerOptions + // mapName specifies the name of the map + mapName string + // handler is the callback for data received from the perf/ring buffer + handler func([]byte) } -// EventHandlerOptions are the options controlling the EventHandler. -// MapName and Handler are required options. -type EventHandlerOptions struct { - // MapName specifies the name of the map. This field is required. - MapName string - // Handler is the callback for data received from the perf/ring buffer. This field is required. - Handler func([]byte) +type mapMode uint8 - // TelemetryEnabled specifies whether to collect usage telemetry from the perf/ring buffer. - TelemetryEnabled bool - // UseRingBuffer specifies whether to use a ring buffer - UseRingBuffer bool - // UpgradePerfBuffer specifies if you wish to upgrade a perf buffer to a ring buffer. - // This only takes effect if UseRingBuffer is true. - UpgradePerfBuffer bool +const ( + perfBufferOnly mapMode = iota + upgradePerfBuffer + ringBufferOnly +) + +// EventHandlerMode controls the mode in which the event handler operates +type EventHandlerMode func(*EventHandler) + +// UsePerfBuffers will only use perf buffers and will not attempt any upgrades to ring buffers. +func UsePerfBuffers(bufferSize int, perfMode PerfBufferMode) EventHandlerMode { + return func(e *EventHandler) { + e.opts.mode = perfBufferOnly + e.opts.perfBufferSize = bufferSize + perfMode(&e.opts.perfOptions) + } +} + +// UpgradePerfBuffers will upgrade to ring buffers if available, but will fall back to perf buffers if not. +func UpgradePerfBuffers(perfBufferSize int, perfMode PerfBufferMode, ringBufferSize int) EventHandlerMode { + return func(e *EventHandler) { + e.opts.mode = upgradePerfBuffer + e.opts.perfBufferSize = perfBufferSize + e.opts.ringBufferSize = ringBufferSize + perfMode(&e.opts.perfOptions) + } +} + +// UseRingBuffers will only use ring buffers. +func UseRingBuffers(bufferSize int) EventHandlerMode { + return func(e *EventHandler) { + e.opts.mode = ringBufferOnly + e.opts.ringBufferSize = bufferSize + } +} + +// EventHandlerOption is an option that applies to the event handler +type EventHandlerOption func(*EventHandler) + +// SendTelemetry specifies whether to collect usage telemetry from the perf/ring buffer +func SendTelemetry(enabled bool) EventHandlerOption { + return func(e *EventHandler) { + e.opts.telemetryEnabled = enabled + } +} - PerfOptions PerfBufferOptions - RingBufOptions RingBufferOptions +// eventHandlerOptions are the options controlling the EventHandler. +type eventHandlerOptions struct { + // telemetryEnabled specifies whether to collect usage telemetry from the perf/ring buffer. + telemetryEnabled bool + + mode mapMode + + perfBufferSize int + ringBufferSize int + + perfOptions perfBufferOptions } -// PerfBufferOptions are options specifically for perf buffers +// PerfBufferMode is a mode for the perf buffer // //nolint:revive -type PerfBufferOptions struct { - BufferSize int - - // Watermark - The reader will start processing samples once their sizes in the perf buffer - // exceed this value. Must be smaller than PerfRingBufferSize. Defaults to the manager value if not set. - Watermark int +type PerfBufferMode func(*perfBufferOptions) + +// Watermark - The reader will start processing samples once their sizes in the perf buffer +// exceed this value. Must be smaller than the perf buffer size. +func Watermark(byteCount int) PerfBufferMode { + return func(opts *perfBufferOptions) { + opts.watermark = byteCount + opts.wakeupEvents = 0 + } +} - // The number of events required in any per CPU buffer before - // Read will process data. This is mutually exclusive with Watermark. - // The default is zero, which means Watermark will take precedence. - WakeupEvents int +// WakeupEvents - The number of events required in any per CPU buffer before Read will process data. +func WakeupEvents(count int) PerfBufferMode { + return func(opts *perfBufferOptions) { + opts.wakeupEvents = count + opts.watermark = 0 + } } -// RingBufferOptions are options specifically for ring buffers -type RingBufferOptions struct { - BufferSize int +// perfBufferOptions are options specifically for perf buffers +// +//nolint:revive +type perfBufferOptions struct { + watermark int + wakeupEvents int } // NewEventHandler creates an event handler with the provided options -func NewEventHandler(opts EventHandlerOptions) (*EventHandler, error) { - if opts.MapName == "" { +func NewEventHandler(mapName string, handler func([]byte), mode EventHandlerMode, opts ...EventHandlerOption) (*EventHandler, error) { + if mapName == "" { return nil, errors.New("invalid options: MapName is required") } - if opts.Handler == nil { + if handler == nil { return nil, errors.New("invalid options: Handler is required") } e := &EventHandler{ - opts: opts, + mapName: mapName, + handler: handler, + } + mode(e) + for _, opt := range opts { + opt(e) } return e, nil } // BeforeInit implements the Modifier interface func (e *EventHandler) BeforeInit(mgr *manager.Manager, _ names.ModuleName, mgrOpts *manager.Options) error { - ms, _, _ := mgr.GetMapSpec(e.opts.MapName) + ms, _, _ := mgr.GetMapSpec(e.mapName) if ms == nil { - return fmt.Errorf("unable to find map spec %q", e.opts.MapName) + return fmt.Errorf("unable to find map spec %q", e.mapName) } - ringBuffersAvailable := features.HaveMapType(ebpf.RingBuf) == nil - if e.opts.UseRingBuffer && ringBuffersAvailable { - if e.opts.UpgradePerfBuffer { - // using ring buffers and upgrading from perf buffer - if ms.Type != ebpf.PerfEventArray { - return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) - } - UpgradePerfBuffer(mgr, mgrOpts, e.opts.MapName) - } else { - // using ring buffers, but not upgrading from a perf buffer - if ms.Type != ebpf.RingBuf { - return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.opts.MapName, ms.Type.String()) - } + ringBufErr := features.HaveMapType(ebpf.RingBuf) + if e.opts.mode == ringBufferOnly { + if ringBufErr != nil { + return ringBufErr + } + if ms.Type != ebpf.RingBuf { + return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.mapName, ms.Type.String()) } - // resize if necessary - if ms.MaxEntries != uint32(e.opts.RingBufOptions.BufferSize) { - ResizeRingBuffer(mgrOpts, e.opts.MapName, e.opts.RingBufOptions.BufferSize) + if ms.MaxEntries != uint32(e.opts.ringBufferSize) { + ResizeRingBuffer(mgrOpts, e.mapName, e.opts.ringBufferSize) } e.initRingBuffer(mgr) return nil } - if ms.Type != ebpf.PerfEventArray { - return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.opts.MapName, ms.Type.String()) + if e.opts.mode == perfBufferOnly { + if ms.Type != ebpf.PerfEventArray { + return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.mapName, ms.Type.String()) + } + e.initPerfBuffer(mgr) + return nil } - e.initPerfBuffer(mgr) - return nil + + if e.opts.mode == upgradePerfBuffer { + if ms.Type != ebpf.PerfEventArray { + return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.mapName, ms.Type.String()) + } + if ringBufErr == nil { + UpgradePerfBuffer(mgr, mgrOpts, e.mapName) + if ms.MaxEntries != uint32(e.opts.ringBufferSize) { + ResizeRingBuffer(mgrOpts, e.mapName, e.opts.ringBufferSize) + } + e.initRingBuffer(mgr) + return nil + } + + e.initPerfBuffer(mgr) + return nil + } + + return fmt.Errorf("unsupported EventHandlerMode %d", e.opts.mode) } // AfterInit implements the Modifier interface @@ -176,18 +249,18 @@ func ResizeRingBuffer(mgrOpts *manager.Options, mapName string, bufferSize int) func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { // remove any existing perf buffers from manager mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { - return perfMap.Name == e.opts.MapName + return perfMap.Name == e.mapName }) pm := &manager.PerfMap{ - Map: manager.Map{Name: e.opts.MapName}, + Map: manager.Map{Name: e.mapName}, PerfMapOptions: manager.PerfMapOptions{ - PerfRingBufferSize: e.opts.PerfOptions.BufferSize, - Watermark: e.opts.PerfOptions.Watermark, - WakeupEvents: e.opts.PerfOptions.WakeupEvents, + PerfRingBufferSize: e.opts.perfBufferSize, + Watermark: e.opts.perfOptions.watermark, + WakeupEvents: e.opts.perfOptions.wakeupEvents, RecordHandler: e.perfRecordHandler, LostHandler: nil, // TODO do we need support for Lost? RecordGetter: perfPool.Get, - TelemetryEnabled: e.opts.TelemetryEnabled, + TelemetryEnabled: e.opts.telemetryEnabled, }, } mgr.PerfMaps = append(mgr.PerfMaps, pm) @@ -198,20 +271,20 @@ func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap, _ *manager.Manager) { // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer perfPool.Put(record) - e.opts.Handler(record.RawSample) + e.handler(record.RawSample) } func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { // remove any existing matching ring buffers from manager mgr.RingBuffers = slices.DeleteFunc(mgr.RingBuffers, func(ringBuf *manager.RingBuffer) bool { - return ringBuf.Name == e.opts.MapName + return ringBuf.Name == e.mapName }) rb := &manager.RingBuffer{ - Map: manager.Map{Name: e.opts.MapName}, + Map: manager.Map{Name: e.mapName}, RingBufferOptions: manager.RingBufferOptions{ RecordHandler: e.ringRecordHandler, RecordGetter: ringbufPool.Get, - TelemetryEnabled: e.opts.TelemetryEnabled, + TelemetryEnabled: e.opts.telemetryEnabled, }, } mgr.RingBuffers = append(mgr.RingBuffers, rb) @@ -222,7 +295,7 @@ func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { func (e *EventHandler) ringRecordHandler(record *ringbuf.Record, _ *manager.RingBuffer, _ *manager.Manager) { // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer ringbufPool.Put(record) - e.opts.Handler(record.RawSample) + e.handler(record.RawSample) } // UpgradePerfBuffer upgrades a perf buffer to a ring buffer by creating a map spec editor diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index 48046da4fc17f7..d6ea3bdbf6aee1 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -310,21 +310,11 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw closedCallback(b) }) - eopts := perf.EventHandlerOptions{ - MapName: probes.ConnCloseEventMap, - TelemetryEnabled: config.InternalTelemetryEnabled, - UseRingBuffer: config.RingBufferSupportedNPM(), - UpgradePerfBuffer: true, - PerfOptions: perf.PerfBufferOptions{ - BufferSize: util.ComputeDefaultClosedConnPerfBufferSize(), - }, - RingBufOptions: perf.RingBufferOptions{ - BufferSize: util.ComputeDefaultClosedConnRingBufferSize(), - }, - } + handler := singleConnHandler + perfMode := perf.WakeupEvents(config.ClosedBufferWakeupCount) if config.CustomBatchingEnabled { - eopts.PerfOptions.Watermark = 1 - eopts.Handler = func(buf []byte) { + perfMode = perf.Watermark(1) + handler = func(buf []byte) { l := len(buf) switch { case l >= netebpf.SizeofBatch: @@ -344,12 +334,15 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw log.Debugf("unexpected %q binary data of size %d bytes", probes.ConnCloseEventMap, l) } } - } else { - eopts.PerfOptions.WakeupEvents = config.ClosedBufferWakeupCount - eopts.Handler = singleConnHandler } - return perf.NewEventHandler(eopts) + perfBufferSize := util.ComputeDefaultClosedConnPerfBufferSize() + mode := perf.UsePerfBuffers(perfBufferSize, perfMode) + if config.RingBufferSupportedNPM() { + mode = perf.UpgradePerfBuffers(perfBufferSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) + } + + return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, perf.SendTelemetry(config.InternalTelemetryEnabled)) } func boolConst(name string, value bool) manager.ConstantEditor { From 278c30ce92efc4a4370f5f9700cc95dda0c13411 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 13:16:41 -0800 Subject: [PATCH 13/23] fix constant value --- pkg/ebpf/perf/event.go | 50 ++++++++++++------- pkg/network/tracer/connection/ebpf_tracer.go | 4 +- .../tracer/connection/fentry/tracer.go | 7 +-- .../tracer/connection/kprobe/tracer.go | 7 --- 4 files changed, 38 insertions(+), 30 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 5552302f54af2d..043bb3af8c252c 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -15,6 +15,7 @@ import ( manager "github.com/DataDog/ebpf-manager" "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/perf" "github.com/cilium/ebpf/ringbuf" @@ -95,6 +96,13 @@ func SendTelemetry(enabled bool) EventHandlerOption { } } +// RingBufferConstantName provides a constant name that will be set whether ring buffers are in use +func RingBufferConstantName(name string) EventHandlerOption { + return func(e *EventHandler) { + e.opts.ringBufferConstantName = name + } +} + // eventHandlerOptions are the options controlling the EventHandler. type eventHandlerOptions struct { // telemetryEnabled specifies whether to collect usage telemetry from the perf/ring buffer. @@ -103,9 +111,10 @@ type eventHandlerOptions struct { mode mapMode perfBufferSize int - ringBufferSize int + perfOptions perfBufferOptions - perfOptions perfBufferOptions + ringBufferSize int + ringBufferConstantName string } // PerfBufferMode is a mode for the perf buffer @@ -158,11 +167,12 @@ func NewEventHandler(mapName string, handler func([]byte), mode EventHandlerMode } // BeforeInit implements the Modifier interface -func (e *EventHandler) BeforeInit(mgr *manager.Manager, _ names.ModuleName, mgrOpts *manager.Options) error { +func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleName, mgrOpts *manager.Options) (err error) { ms, _, _ := mgr.GetMapSpec(e.mapName) if ms == nil { return fmt.Errorf("unable to find map spec %q", e.mapName) } + defer e.setupConstant(mgrOpts) ringBufErr := features.HaveMapType(ebpf.RingBuf) if e.opts.mode == ringBufferOnly { @@ -202,12 +212,31 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, _ names.ModuleName, mgrO } e.initPerfBuffer(mgr) - return nil + // add helper call remover because ring buffers are not available + return ddebpf.NewHelperCallRemover(asm.FnRingbufOutput).BeforeInit(mgr, moduleName, mgrOpts) } return fmt.Errorf("unsupported EventHandlerMode %d", e.opts.mode) } +func (e *EventHandler) setupConstant(mgrOpts *manager.Options) { + if e.opts.ringBufferConstantName == "" || e.f == nil { + return + } + + var val uint64 + switch e.f.(type) { + case *manager.RingBuffer: + val = uint64(1) + default: + val = uint64(0) + } + mgrOpts.ConstantEditors = append(mgrOpts.ConstantEditors, manager.ConstantEditor{ + Name: e.opts.ringBufferConstantName, + Value: val, + }) +} + // AfterInit implements the Modifier interface func (e *EventHandler) AfterInit(_ *manager.Manager, _ names.ModuleName, _ *manager.Options) error { return nil @@ -217,19 +246,6 @@ func (e *EventHandler) String() string { return "EventHandler" } -// MapType returns the ebpf.MapType of the underlying events map -// This is only valid after calling Init. -func (e *EventHandler) MapType() ebpf.MapType { - switch e.f.(type) { - case *manager.PerfMap: - return ebpf.PerfEventArray - case *manager.RingBuffer: - return ebpf.RingBuf - default: - return ebpf.UnspecifiedMap - } -} - // Flush flushes the pending data from the underlying perfbuf/ringbuf func (e *EventHandler) Flush() { e.f.Flush() diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index d6ea3bdbf6aee1..45af2b1483cb84 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -342,7 +342,9 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw mode = perf.UpgradePerfBuffers(perfBufferSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) } - return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, perf.SendTelemetry(config.InternalTelemetryEnabled)) + return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, + perf.SendTelemetry(config.InternalTelemetryEnabled), + perf.RingBufferConstantName("ringbuffers_enabled")) } func boolConst(name string, value bool) manager.ConstantEditor { diff --git a/pkg/network/tracer/connection/fentry/tracer.go b/pkg/network/tracer/connection/fentry/tracer.go index e029ed215f2a13..4a7d6121b5ade6 100644 --- a/pkg/network/tracer/connection/fentry/tracer.go +++ b/pkg/network/tracer/connection/fentry/tracer.go @@ -14,7 +14,6 @@ import ( "syscall" manager "github.com/DataDog/ebpf-manager" - "github.com/cilium/ebpf" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" @@ -22,7 +21,6 @@ import ( ebpftelemetry "github.com/DataDog/datadog-agent/pkg/ebpf/telemetry" "github.com/DataDog/datadog-agent/pkg/network/config" netebpf "github.com/DataDog/datadog-agent/pkg/network/ebpf" - "github.com/DataDog/datadog-agent/pkg/network/tracer/connection/util" "github.com/DataDog/datadog-agent/pkg/util/fargate" ) @@ -41,7 +39,7 @@ func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHa o.RLimit = mgrOpts.RLimit o.MapSpecEditors = mgrOpts.MapSpecEditors o.ConstantEditors = mgrOpts.ConstantEditors - return initFentryTracer(ar, o, config, m, connCloseEventHandler) + return initFentryTracer(ar, o, config, m) }) if err != nil { @@ -52,7 +50,7 @@ func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHa } // Use a function so someone doesn't accidentally use mgrOpts from the outer scope in LoadTracer -func initFentryTracer(ar bytecode.AssetReader, o manager.Options, config *config.Config, m *ddebpf.Manager, connCloseEventHandler *perf.EventHandler) error { +func initFentryTracer(ar bytecode.AssetReader, o manager.Options, config *config.Config, m *ddebpf.Manager) error { // Use the config to determine what kernel probes should be enabled enabledProbes, err := enabledPrograms(config) if err != nil { @@ -91,6 +89,5 @@ func initFentryTracer(ar bytecode.AssetReader, o manager.Options, config *config }) } - util.AddBoolConst(&o, "ringbuffers_enabled", connCloseEventHandler.MapType() == ebpf.RingBuf) return m.InitWithOptions(ar, &o) } diff --git a/pkg/network/tracer/connection/kprobe/tracer.go b/pkg/network/tracer/connection/kprobe/tracer.go index 299c816688479b..40a0a8f49eae01 100644 --- a/pkg/network/tracer/connection/kprobe/tracer.go +++ b/pkg/network/tracer/connection/kprobe/tracer.go @@ -13,8 +13,6 @@ import ( manager "github.com/DataDog/ebpf-manager" "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/features" ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" @@ -223,11 +221,6 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo }) } - usingRingBuffers := connCloseEventHandler.MapType() == ebpf.RingBuf - util.AddBoolConst(&mgrOpts, "ringbuffers_enabled", usingRingBuffers) - if features.HaveMapType(ebpf.RingBuf) != nil { - m.EnabledModifiers = append(m.EnabledModifiers, ddebpf.NewHelperCallRemover(asm.FnRingbufOutput)) - } if err := m.InitWithOptions(buf, &mgrOpts); err != nil { return nil, nil, fmt.Errorf("failed to init ebpf manager: %w", err) } From 757fb638d5ff498de2da6cf6720a170fd4e4aeb9 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 2 Dec 2024 14:16:01 -0800 Subject: [PATCH 14/23] fix helper call removal --- pkg/ebpf/perf/event.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 043bb3af8c252c..c0832a5735bfcb 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -189,6 +189,7 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleN e.initRingBuffer(mgr) return nil } + defer e.removeRingBufferHelperCalls(mgr, moduleName, mgrOpts) if e.opts.mode == perfBufferOnly { if ms.Type != ebpf.PerfEventArray { @@ -212,13 +213,20 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleN } e.initPerfBuffer(mgr) - // add helper call remover because ring buffers are not available - return ddebpf.NewHelperCallRemover(asm.FnRingbufOutput).BeforeInit(mgr, moduleName, mgrOpts) + return nil } return fmt.Errorf("unsupported EventHandlerMode %d", e.opts.mode) } +func (e *EventHandler) removeRingBufferHelperCalls(mgr *manager.Manager, moduleName names.ModuleName, mgrOpts *manager.Options) { + if features.HaveMapType(ebpf.RingBuf) == nil { + return + } + // add helper call remover because ring buffers are not available + _ = ddebpf.NewHelperCallRemover(asm.FnRingbufOutput).BeforeInit(mgr, moduleName, mgrOpts) +} + func (e *EventHandler) setupConstant(mgrOpts *manager.Options) { if e.opts.ringBufferConstantName == "" || e.f == nil { return From 965035ecc17f4931dff612b6804d4fcfba70d565 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Wed, 4 Dec 2024 14:36:05 -0800 Subject: [PATCH 15/23] add metric for conns received during flush --- pkg/network/tracer/connection/tcp_close_consumer.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/network/tracer/connection/tcp_close_consumer.go b/pkg/network/tracer/connection/tcp_close_consumer.go index 3dbd33dd55ede1..5e1d45fae5afa6 100644 --- a/pkg/network/tracer/connection/tcp_close_consumer.go +++ b/pkg/network/tracer/connection/tcp_close_consumer.go @@ -9,6 +9,7 @@ package connection import ( "sync" + "sync/atomic" "github.com/DataDog/datadog-agent/pkg/ebpf/perf" "github.com/DataDog/datadog-agent/pkg/network" @@ -22,9 +23,11 @@ const closeConsumerModuleName = "network_tracer__ebpf" // Telemetry var closeConsumerTelemetry = struct { - perfReceived telemetry.Counter + perfReceived telemetry.Counter + flushReceived telemetry.Counter }{ telemetry.NewCounter(closeConsumerModuleName, "closed_conn_polling_received", []string{}, "Counter measuring the number of closed connections received"), + telemetry.NewCounter(closeConsumerModuleName, "closed_conn_flush_received", []string{}, "Counter measuring the number of closed connections received during flush"), } type tcpCloseConsumer struct { @@ -36,6 +39,7 @@ type tcpCloseConsumer struct { callback func(*network.ConnectionStats) releaser ddsync.PoolReleaser[network.ConnectionStats] flushChannel chan chan struct{} + flushing *atomic.Bool } func newTCPCloseConsumer(flusher perf.Flusher, releaser ddsync.PoolReleaser[network.ConnectionStats]) *tcpCloseConsumer { @@ -46,6 +50,7 @@ func newTCPCloseConsumer(flusher perf.Flusher, releaser ddsync.PoolReleaser[netw releaser: releaser, callback: func(*network.ConnectionStats) {}, flushChannel: make(chan chan struct{}, 1), + flushing: &atomic.Bool{}, } } @@ -82,10 +87,14 @@ func (c *tcpCloseConsumer) Callback(conn *network.ConnectionStats) { if conn == nil { request := <-c.flushChannel close(request) + c.flushing.Store(false) return } closeConsumerTelemetry.perfReceived.Inc() + if c.flushing.Load() { + closeConsumerTelemetry.flushReceived.Inc() + } c.callback(conn) c.releaser.Put(conn) } @@ -111,6 +120,7 @@ func (c *tcpCloseConsumer) Start(callback func(*network.ConnectionStats)) { return case <-liveHealth.C: case request := <-c.requests: + c.flushing.Store(true) c.flushChannel <- request c.flusher.Flush() } From 2bebabf3f54d9b0abb560738355886cc11aa5504 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Fri, 6 Dec 2024 12:23:28 -0800 Subject: [PATCH 16/23] remove panic from BinaryUnmarshalCallback --- pkg/util/encoding/binary.go | 21 +++++++-------------- pkg/util/encoding/binary_test.go | 7 ------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/pkg/util/encoding/binary.go b/pkg/util/encoding/binary.go index 88dc5b4b864b67..1ad9d2a220788c 100644 --- a/pkg/util/encoding/binary.go +++ b/pkg/util/encoding/binary.go @@ -10,30 +10,23 @@ import ( "encoding" ) -// BinaryUnmarshalCallback returns a function that will decode the argument byte slice into *T -// using `newFn` to create an instance of *T and the encoding.BinaryUnmarshaler interface to do the actual conversion. -// `callback` will be called with the resulting *T. +// BinaryUnmarshalCallback returns a function that will decode the argument byte slice into T +// using `newFn` to create an instance of T and the encoding.BinaryUnmarshaler interface to do the actual conversion. +// `callback` will be called with the resulting T. // If the argument byte slice is empty, callback will be called with `nil`. // Unmarshalling errors will be provided to the callback as the second argument. The data argument to the callback // may still be non-nil even if there was an error. This allows the callback to handle the allocated object, even // in the face of errors. -// This function panics if `*T` does not implement encoding.BinaryUnmarshaler. -func BinaryUnmarshalCallback[T any](newFn func() *T, callback func(*T, error)) func(buf []byte) { - // we use `any` as the type constraint rather than encoding.BinaryUnmarshaler because we are not allowed to - // callback with `nil` in the latter case. There is a workaround, but it requires specifying two type constraints. - // For sake of cleanliness, we resort to a runtime check here. - if _, ok := any(new(T)).(encoding.BinaryUnmarshaler); !ok { - panic("pointer type *T must implement encoding.BinaryUnmarshaler") - } - +func BinaryUnmarshalCallback[T encoding.BinaryUnmarshaler](newFn func() T, callback func(T, error)) func(buf []byte) { return func(buf []byte) { if len(buf) == 0 { - callback(nil, nil) + var nilvalue T + callback(nilvalue, nil) return } d := newFn() - if err := any(d).(encoding.BinaryUnmarshaler).UnmarshalBinary(buf); err != nil { + if err := d.UnmarshalBinary(buf); err != nil { // pass d here so callback can choose how to deal with the data callback(d, err) return diff --git a/pkg/util/encoding/binary_test.go b/pkg/util/encoding/binary_test.go index 05450f10fe4069..4412d5c3176510 100644 --- a/pkg/util/encoding/binary_test.go +++ b/pkg/util/encoding/binary_test.go @@ -35,13 +35,6 @@ func (tt *dataTestType) UnmarshalBinary(data []byte) error { } func TestBinaryUnmarshalCallback(t *testing.T) { - assert.Panics(t, func() { - type x struct{} - BinaryUnmarshalCallback(func() *x { - return new(x) - }, func(_ *x, _ error) {}) - }) - cb := BinaryUnmarshalCallback(func() *emptyTestType { return new(emptyTestType) }, func(x *emptyTestType, err error) { From da8ebd60fb511d0a2af48225a3e944c30481ffa1 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 9 Dec 2024 12:35:50 -0800 Subject: [PATCH 17/23] change default wakeup_events to match batch size --- pkg/config/setup/system_probe.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index c4f5af5e63e48b..400d0fe4219552 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -196,7 +196,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnv(join(spNS, "max_closed_connections_buffered")) cfg.BindEnv(join(netNS, "max_failed_connections_buffered")) cfg.BindEnv(join(netNS, "closed_connection_flush_threshold")) - cfg.BindEnvAndSetDefault(join(netNS, "closed_buffer_wakeup_count"), 5) + cfg.BindEnvAndSetDefault(join(netNS, "closed_buffer_wakeup_count"), 4) cfg.BindEnvAndSetDefault(join(spNS, "max_connection_state_buffered"), 75000) cfg.BindEnvAndSetDefault(join(spNS, "disable_dns_inspection"), false, "DD_DISABLE_DNS_INSPECTION") From dbb127286d441bd2df0a3b8d346d48734ce15c78 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 9 Dec 2024 16:52:42 -0800 Subject: [PATCH 18/23] extend in-buffer batching to ringbufs --- pkg/ebpf/c/bpf_helpers_custom.h | 13 +++++ pkg/ebpf/perf/event.go | 48 +++++++++++++++---- pkg/network/ebpf/c/tracer/events.h | 29 ++++++++--- pkg/network/protocols/events/configuration.go | 2 +- pkg/network/tracer/connection/ebpf_tracer.go | 3 +- 5 files changed, 76 insertions(+), 19 deletions(-) diff --git a/pkg/ebpf/c/bpf_helpers_custom.h b/pkg/ebpf/c/bpf_helpers_custom.h index 42c83c272ed111..d2b997032293cd 100644 --- a/pkg/ebpf/c/bpf_helpers_custom.h +++ b/pkg/ebpf/c/bpf_helpers_custom.h @@ -39,4 +39,17 @@ unsigned long long load_half(void *skb, unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word"); +// declare our own versions of these enums, because they don't exist on <5.8 +enum { + DD_BPF_RB_NO_WAKEUP = 1, + DD_BPF_RB_FORCE_WAKEUP = 2, +}; + +enum { + DD_BPF_RB_AVAIL_DATA = 0, + DD_BPF_RB_RING_SIZE = 1, + DD_BPF_RB_CONS_POS = 2, + DD_BPF_RB_PROD_POS = 3, +}; + #endif diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index c0832a5735bfcb..7e500c4f1efa96 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -96,10 +96,18 @@ func SendTelemetry(enabled bool) EventHandlerOption { } } -// RingBufferConstantName provides a constant name that will be set whether ring buffers are in use -func RingBufferConstantName(name string) EventHandlerOption { +// RingBufferEnabledConstantName provides a constant name that will be set whether ring buffers are in use +func RingBufferEnabledConstantName(name string) EventHandlerOption { return func(e *EventHandler) { - e.opts.ringBufferConstantName = name + e.opts.ringBufferEnabledConstantName = name + } +} + +// RingBufferWakeupSize sets a constant for eBPF to use, that determines when to wakeup userspace +func RingBufferWakeupSize(name string, size uint64) EventHandlerOption { + return func(e *EventHandler) { + e.opts.ringBufferWakeupConstantName = name + e.opts.ringBufferWakeupSize = size } } @@ -113,8 +121,11 @@ type eventHandlerOptions struct { perfBufferSize int perfOptions perfBufferOptions - ringBufferSize int - ringBufferConstantName string + ringBufferSize int + ringBufferEnabledConstantName string + + ringBufferWakeupConstantName string + ringBufferWakeupSize uint64 } // PerfBufferMode is a mode for the perf buffer @@ -172,7 +183,8 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleN if ms == nil { return fmt.Errorf("unable to find map spec %q", e.mapName) } - defer e.setupConstant(mgrOpts) + defer e.setupEnabledConstant(mgrOpts) + defer e.setupRingbufferWakeupConstant(mgrOpts) ringBufErr := features.HaveMapType(ebpf.RingBuf) if e.opts.mode == ringBufferOnly { @@ -224,11 +236,11 @@ func (e *EventHandler) removeRingBufferHelperCalls(mgr *manager.Manager, moduleN return } // add helper call remover because ring buffers are not available - _ = ddebpf.NewHelperCallRemover(asm.FnRingbufOutput).BeforeInit(mgr, moduleName, mgrOpts) + _ = ddebpf.NewHelperCallRemover(asm.FnRingbufOutput, asm.FnRingbufQuery, asm.FnRingbufReserve, asm.FnRingbufSubmit, asm.FnRingbufDiscard).BeforeInit(mgr, moduleName, mgrOpts) } -func (e *EventHandler) setupConstant(mgrOpts *manager.Options) { - if e.opts.ringBufferConstantName == "" || e.f == nil { +func (e *EventHandler) setupEnabledConstant(mgrOpts *manager.Options) { + if e.opts.ringBufferEnabledConstantName == "" || e.f == nil { return } @@ -240,11 +252,27 @@ func (e *EventHandler) setupConstant(mgrOpts *manager.Options) { val = uint64(0) } mgrOpts.ConstantEditors = append(mgrOpts.ConstantEditors, manager.ConstantEditor{ - Name: e.opts.ringBufferConstantName, + Name: e.opts.ringBufferEnabledConstantName, Value: val, }) } +func (e *EventHandler) setupRingbufferWakeupConstant(mgrOpts *manager.Options) { + if e.opts.ringBufferWakeupSize == 0 || e.opts.ringBufferWakeupConstantName == "" || e.f == nil { + return + } + + switch e.f.(type) { + case *manager.RingBuffer: + mgrOpts.ConstantEditors = append(mgrOpts.ConstantEditors, manager.ConstantEditor{ + Name: e.opts.ringBufferWakeupConstantName, + Value: e.opts.ringBufferWakeupSize, + }) + default: + // do nothing + } +} + // AfterInit implements the Modifier interface func (e *EventHandler) AfterInit(_ *manager.Manager, _ names.ModuleName, _ *manager.Options) error { return nil diff --git a/pkg/network/ebpf/c/tracer/events.h b/pkg/network/ebpf/c/tracer/events.h index 84a120ea6e1ff4..4fa6b66936fda5 100644 --- a/pkg/network/ebpf/c/tracer/events.h +++ b/pkg/network/ebpf/c/tracer/events.h @@ -34,22 +34,37 @@ static __always_inline void clean_protocol_classification(conn_tuple_t *tup) { bpf_map_delete_elem(&conn_tuple_to_socket_skb_conn_tuple, &conn_tuple); } +static __always_inline bool is_batching_enabled() { + __u64 batching_enabled = 0; + LOAD_CONSTANT("batching_enabled", batching_enabled); + return batching_enabled != 0; +} + +__maybe_unused static __always_inline __u64 get_ringbuf_flags(size_t data_size) { + if (is_batching_enabled()) { + return 0; + } + + __u64 ringbuffer_wakeup_size = 0; + LOAD_CONSTANT("ringbuffer_wakeup_size", ringbuffer_wakeup_size); + if (ringbuffer_wakeup_size == 0) { + return 0; + } + + __u64 sz = bpf_ringbuf_query(&conn_close_event, DD_BPF_RB_AVAIL_DATA); + return (sz + data_size) >= ringbuffer_wakeup_size ? DD_BPF_RB_FORCE_WAKEUP : DD_BPF_RB_NO_WAKEUP; +} + __maybe_unused static __always_inline void submit_closed_conn_event(void *ctx, int cpu, void *event_data, size_t data_size) { __u64 ringbuffers_enabled = 0; LOAD_CONSTANT("ringbuffers_enabled", ringbuffers_enabled); if (ringbuffers_enabled > 0) { - bpf_ringbuf_output(&conn_close_event, event_data, data_size, 0); + bpf_ringbuf_output(&conn_close_event, event_data, data_size, get_ringbuf_flags(data_size)); } else { bpf_perf_event_output(ctx, &conn_close_event, cpu, event_data, data_size); } } -static __always_inline bool is_batching_enabled() { - __u64 batching_enabled = 0; - LOAD_CONSTANT("batching_enabled", batching_enabled); - return batching_enabled != 0; -} - static __always_inline int cleanup_conn(void *ctx, conn_tuple_t *tup, struct sock *sk) { u32 cpu = bpf_get_smp_processor_id(); // Will hold the full connection data to send through the perf or ring buffer diff --git a/pkg/network/protocols/events/configuration.go b/pkg/network/protocols/events/configuration.go index 0888c8d2fde7d6..2d85791bf820b0 100644 --- a/pkg/network/protocols/events/configuration.go +++ b/pkg/network/protocols/events/configuration.go @@ -159,7 +159,7 @@ func removeRingBufferHelperCalls(m *manager.Manager) { // TODO: this is not the intended API usage of a `ebpf.Modifier`. // Once we have access to the `ddebpf.Manager`, add this modifier to its list of // `EnabledModifiers` and let it control the execution of the callbacks - patcher := ddebpf.NewHelperCallRemover(asm.FnRingbufOutput) + patcher := ddebpf.NewHelperCallRemover(asm.FnRingbufOutput, asm.FnRingbufQuery, asm.FnRingbufReserve, asm.FnRingbufSubmit, asm.FnRingbufDiscard) err := patcher.BeforeInit(m, names.NewModuleName("usm"), nil) if err != nil { diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index 45af2b1483cb84..eb7fc83e7a67c6 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -344,7 +344,8 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, perf.SendTelemetry(config.InternalTelemetryEnabled), - perf.RingBufferConstantName("ringbuffers_enabled")) + perf.RingBufferEnabledConstantName("ringbuffers_enabled"), + perf.RingBufferWakeupSize("ringbuffer_wakeup_size", uint64(config.ClosedBufferWakeupCount*(netebpf.SizeofConn+unix.BPF_RINGBUF_HDR_SZ)))) } func boolConst(name string, value bool) manager.ConstantEditor { From ad60da3c5823d7030af51c562917d35b6f286ad0 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 10 Dec 2024 14:19:52 -0800 Subject: [PATCH 19/23] add channel to read from perf/ringbuf faster --- cmd/system-probe/config/adjust_npm.go | 2 + pkg/config/setup/system_probe.go | 3 + pkg/ebpf/manager.go | 37 +++++++++++++ pkg/ebpf/perf/event.go | 55 +++++++++++++++++-- pkg/network/config/config.go | 4 ++ pkg/network/tracer/connection/ebpf_tracer.go | 30 +++++----- .../tracer/connection/fentry/tracer.go | 4 +- .../tracer/connection/kprobe/tracer.go | 16 +++--- .../tracer/connection/kprobe/tracer_test.go | 15 ++--- 9 files changed, 130 insertions(+), 36 deletions(-) diff --git a/cmd/system-probe/config/adjust_npm.go b/cmd/system-probe/config/adjust_npm.go index add4668cf738dd..98a57348273573 100644 --- a/cmd/system-probe/config/adjust_npm.go +++ b/cmd/system-probe/config/adjust_npm.go @@ -27,6 +27,8 @@ func adjustNetwork(cfg model.Config) { ebpflessEnabled := cfg.GetBool(netNS("enable_ebpfless")) deprecateInt(cfg, spNS("closed_connection_flush_threshold"), netNS("closed_connection_flush_threshold")) + deprecateInt(cfg, spNS("closed_channel_size"), netNS("closed_channel_size")) + applyDefault(cfg, netNS("closed_channel_size"), 500) limitMaxInt(cfg, spNS("max_conns_per_message"), maxConnsMessageBatchSize) diff --git a/pkg/config/setup/system_probe.go b/pkg/config/setup/system_probe.go index 400d0fe4219552..610e07187e2595 100644 --- a/pkg/config/setup/system_probe.go +++ b/pkg/config/setup/system_probe.go @@ -195,7 +195,10 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Config) { cfg.BindEnvAndSetDefault(join(spNS, "max_tracked_connections"), 65536) cfg.BindEnv(join(spNS, "max_closed_connections_buffered")) cfg.BindEnv(join(netNS, "max_failed_connections_buffered")) + cfg.BindEnv(join(spNS, "closed_connection_flush_threshold")) cfg.BindEnv(join(netNS, "closed_connection_flush_threshold")) + cfg.BindEnv(join(spNS, "closed_channel_size")) + cfg.BindEnv(join(netNS, "closed_channel_size")) cfg.BindEnvAndSetDefault(join(netNS, "closed_buffer_wakeup_count"), 4) cfg.BindEnvAndSetDefault(join(spNS, "max_connection_state_buffered"), 75000) diff --git a/pkg/ebpf/manager.go b/pkg/ebpf/manager.go index 06e790609a9ecf..3c09088fa959db 100644 --- a/pkg/ebpf/manager.go +++ b/pkg/ebpf/manager.go @@ -100,3 +100,40 @@ func (m *Manager) InitWithOptions(bytecode io.ReaderAt, opts *manager.Options) e } return nil } + +type modifierPreStart interface { + PreStart() error +} + +// Start is a wrapper around ebpf-manager.Manager.Start +func (m *Manager) Start() error { + for _, mod := range m.EnabledModifiers { + if ps, ok := mod.(modifierPreStart); ok { + if err := ps.PreStart(); err != nil { + return fmt.Errorf("prestart %s manager modifier: %w", mod, err) + } + } + } + return m.Manager.Start() +} + +type modifierAfterStop interface { + AfterStop(manager.MapCleanupType) error +} + +// Stop is a wrapper around ebpf-manager.Manager.Stop +func (m *Manager) Stop(ct manager.MapCleanupType) error { + if err := m.Manager.Stop(ct); err != nil { + return err + } + + for _, mod := range m.EnabledModifiers { + if as, ok := mod.(modifierAfterStop); ok { + if err := as.AfterStop(ct); err != nil { + return fmt.Errorf("afterstop %s manager modifier: %w", mod, err) + } + } + } + + return nil +} diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 7e500c4f1efa96..387abb73f57410 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -46,6 +46,10 @@ type EventHandler struct { mapName string // handler is the callback for data received from the perf/ring buffer handler func([]byte) + + readLoop func() + perfChan chan *perf.Record + ringChan chan *ringbuf.Record } type mapMode uint8 @@ -60,18 +64,20 @@ const ( type EventHandlerMode func(*EventHandler) // UsePerfBuffers will only use perf buffers and will not attempt any upgrades to ring buffers. -func UsePerfBuffers(bufferSize int, perfMode PerfBufferMode) EventHandlerMode { +func UsePerfBuffers(bufferSize int, channelSize int, perfMode PerfBufferMode) EventHandlerMode { return func(e *EventHandler) { e.opts.mode = perfBufferOnly + e.opts.channelSize = channelSize e.opts.perfBufferSize = bufferSize perfMode(&e.opts.perfOptions) } } // UpgradePerfBuffers will upgrade to ring buffers if available, but will fall back to perf buffers if not. -func UpgradePerfBuffers(perfBufferSize int, perfMode PerfBufferMode, ringBufferSize int) EventHandlerMode { +func UpgradePerfBuffers(perfBufferSize int, channelSize int, perfMode PerfBufferMode, ringBufferSize int) EventHandlerMode { return func(e *EventHandler) { e.opts.mode = upgradePerfBuffer + e.opts.channelSize = channelSize e.opts.perfBufferSize = perfBufferSize e.opts.ringBufferSize = ringBufferSize perfMode(&e.opts.perfOptions) @@ -79,9 +85,10 @@ func UpgradePerfBuffers(perfBufferSize int, perfMode PerfBufferMode, ringBufferS } // UseRingBuffers will only use ring buffers. -func UseRingBuffers(bufferSize int) EventHandlerMode { +func UseRingBuffers(bufferSize int, channelSize int) EventHandlerMode { return func(e *EventHandler) { e.opts.mode = ringBufferOnly + e.opts.channelSize = channelSize e.opts.ringBufferSize = bufferSize } } @@ -116,7 +123,8 @@ type eventHandlerOptions struct { // telemetryEnabled specifies whether to collect usage telemetry from the perf/ring buffer. telemetryEnabled bool - mode mapMode + mode mapMode + channelSize int perfBufferSize int perfOptions perfBufferOptions @@ -278,6 +286,23 @@ func (e *EventHandler) AfterInit(_ *manager.Manager, _ names.ModuleName, _ *mana return nil } +// PreStart implements the Modifier interface +func (e *EventHandler) PreStart() error { + go e.readLoop() + return nil +} + +// AfterStop implements the Modifier interface +func (e *EventHandler) AfterStop(_ manager.MapCleanupType) error { + if e.perfChan != nil { + close(e.perfChan) + } + if e.ringChan != nil { + close(e.ringChan) + } + return nil +} + func (e *EventHandler) String() string { return "EventHandler" } @@ -299,6 +324,13 @@ func ResizeRingBuffer(mgrOpts *manager.Options, mapName string, bufferSize int) } func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { + e.perfChan = make(chan *perf.Record, e.opts.channelSize) + e.readLoop = func() { + for record := range e.perfChan { + e.perfLoopHandler(record) + } + } + // remove any existing perf buffers from manager mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { return perfMap.Name == e.mapName @@ -321,12 +353,23 @@ func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { } func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap, _ *manager.Manager) { + e.perfChan <- record +} + +func (e *EventHandler) perfLoopHandler(record *perf.Record) { // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer perfPool.Put(record) e.handler(record.RawSample) } func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { + e.ringChan = make(chan *ringbuf.Record, e.opts.channelSize) + e.readLoop = func() { + for record := range e.ringChan { + e.ringLoopHandler(record) + } + } + // remove any existing matching ring buffers from manager mgr.RingBuffers = slices.DeleteFunc(mgr.RingBuffers, func(ringBuf *manager.RingBuffer) bool { return ringBuf.Name == e.mapName @@ -345,6 +388,10 @@ func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { } func (e *EventHandler) ringRecordHandler(record *ringbuf.Record, _ *manager.RingBuffer, _ *manager.Manager) { + e.ringChan <- record +} + +func (e *EventHandler) ringLoopHandler(record *ringbuf.Record) { // record is only allowed to live for the duration of the callback. Put it back into the sync.Pool once done. defer ringbufPool.Put(record) e.handler(record.RawSample) diff --git a/pkg/network/config/config.go b/pkg/network/config/config.go index e14e4a139ad311..ac12375f8f351a 100644 --- a/pkg/network/config/config.go +++ b/pkg/network/config/config.go @@ -212,6 +212,9 @@ type Config struct { // EnableEbpfConntracker enables the ebpf based network conntracker. Used only for testing at the moment EnableEbpfConntracker bool + // ClosedChannelSize specifies the size for closed channel for the tracer + ClosedChannelSize int + // ClosedBufferWakeupCount specifies the number of events that will buffer in a perf buffer before userspace is woken up. ClosedBufferWakeupCount int @@ -319,6 +322,7 @@ func New() *Config { MaxClosedConnectionsBuffered: uint32(cfg.GetInt64(sysconfig.FullKeyPath(spNS, "max_closed_connections_buffered"))), MaxFailedConnectionsBuffered: uint32(cfg.GetInt64(sysconfig.FullKeyPath(netNS, "max_failed_connections_buffered"))), ClosedConnectionFlushThreshold: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_connection_flush_threshold")), + ClosedChannelSize: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_channel_size")), ClosedBufferWakeupCount: cfg.GetInt(sysconfig.FullKeyPath(netNS, "closed_buffer_wakeup_count")), MaxConnectionsStateBuffered: cfg.GetInt(sysconfig.FullKeyPath(spNS, "max_connection_state_buffered")), ClientStateExpiry: 2 * time.Minute, diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index eb7fc83e7a67c6..de8d7e7fb8a596 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -138,7 +138,7 @@ var EbpfTracerTelemetry = struct { //nolint:revive // TODO } type ebpfTracer struct { - m *manager.Manager + m *ddebpf.Manager conns *maps.GenericMap[netebpf.ConnTuple, netebpf.ConnStats] tcpStats *maps.GenericMap[netebpf.ConnTuple, netebpf.TCPStats] @@ -228,7 +228,7 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace return nil, err } - var m *manager.Manager + var m *ddebpf.Manager var tracerType = TracerTypeFentry var closeTracerFn func() m, closeTracerFn, err = fentry.LoadTracer(config, mgrOptions, connCloseEventHandler) @@ -248,11 +248,11 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace tracerType = TracerType(kprobeTracerType) } m.DumpHandler = dumpMapsHandler - ddebpf.AddNameMappings(m, "npm_tracer") + ddebpf.AddNameMappings(m.Manager, "npm_tracer") var flusher perf.Flusher = connCloseEventHandler if config.CustomBatchingEnabled { - flusher, err = newConnBatchManager(m, extractor, connPool, tr.closedPerfCallback) + flusher, err = newConnBatchManager(m.Manager, extractor, connPool, tr.closedPerfCallback) if err != nil { return nil, err } @@ -272,21 +272,21 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace tr.closeTracer = closeTracerFn tr.ebpfTracerType = tracerType - tr.setupMapCleaner(m) + tr.setupMapCleaner(m.Manager) - tr.conns, err = maps.GetMap[netebpf.ConnTuple, netebpf.ConnStats](m, probes.ConnMap) + tr.conns, err = maps.GetMap[netebpf.ConnTuple, netebpf.ConnStats](m.Manager, probes.ConnMap) if err != nil { tr.Stop() return nil, fmt.Errorf("error retrieving the bpf %s map: %s", probes.ConnMap, err) } - tr.tcpStats, err = maps.GetMap[netebpf.ConnTuple, netebpf.TCPStats](m, probes.TCPStatsMap) + tr.tcpStats, err = maps.GetMap[netebpf.ConnTuple, netebpf.TCPStats](m.Manager, probes.TCPStatsMap) if err != nil { tr.Stop() return nil, fmt.Errorf("error retrieving the bpf %s map: %s", probes.TCPStatsMap, err) } - if tr.tcpRetransmits, err = maps.GetMap[netebpf.ConnTuple, uint32](m, probes.TCPRetransmitsMap); err != nil { + if tr.tcpRetransmits, err = maps.GetMap[netebpf.ConnTuple, uint32](m.Manager, probes.TCPRetransmitsMap); err != nil { tr.Stop() return nil, fmt.Errorf("error retrieving the bpf %s map: %s", probes.TCPRetransmitsMap, err) } @@ -337,9 +337,9 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw } perfBufferSize := util.ComputeDefaultClosedConnPerfBufferSize() - mode := perf.UsePerfBuffers(perfBufferSize, perfMode) + mode := perf.UsePerfBuffers(perfBufferSize, config.ClosedChannelSize, perfMode) if config.RingBufferSupportedNPM() { - mode = perf.UpgradePerfBuffers(perfBufferSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) + mode = perf.UpgradePerfBuffers(perfBufferSize, config.ClosedChannelSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) } return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, @@ -405,8 +405,8 @@ func (t *ebpfTracer) FlushPending() { func (t *ebpfTracer) Stop() { t.stopOnce.Do(func() { - ddebpf.RemoveNameMappings(t.m) - ebpftelemetry.UnregisterTelemetry(t.m) + ddebpf.RemoveNameMappings(t.m.Manager) + ebpftelemetry.UnregisterTelemetry(t.m.Manager) _ = t.m.Stop(manager.CleanAll) t.closeConsumer.Stop() t.ongoingConnectCleaner.Stop() @@ -551,7 +551,7 @@ func (t *ebpfTracer) Remove(conn *network.ConnectionStats) error { func (t *ebpfTracer) getEBPFTelemetry() *netebpf.Telemetry { var zero uint32 - mp, err := maps.GetMap[uint32, netebpf.Telemetry](t.m, probes.TelemetryMap) + mp, err := maps.GetMap[uint32, netebpf.Telemetry](t.m.Manager, probes.TelemetryMap) if err != nil { log.Warnf("error retrieving telemetry map: %s", err) return nil @@ -681,7 +681,7 @@ func (t *ebpfTracer) initializePortBindingMaps() error { return fmt.Errorf("failed to read initial TCP pid->port mapping: %s", err) } - tcpPortMap, err := maps.GetMap[netebpf.PortBinding, uint32](t.m, probes.PortBindingsMap) + tcpPortMap, err := maps.GetMap[netebpf.PortBinding, uint32](t.m.Manager, probes.PortBindingsMap) if err != nil { return fmt.Errorf("failed to get TCP port binding map: %w", err) } @@ -699,7 +699,7 @@ func (t *ebpfTracer) initializePortBindingMaps() error { return fmt.Errorf("failed to read initial UDP pid->port mapping: %s", err) } - udpPortMap, err := maps.GetMap[netebpf.PortBinding, uint32](t.m, probes.UDPPortBindingsMap) + udpPortMap, err := maps.GetMap[netebpf.PortBinding, uint32](t.m.Manager, probes.UDPPortBindingsMap) if err != nil { return fmt.Errorf("failed to get UDP port binding map: %w", err) } diff --git a/pkg/network/tracer/connection/fentry/tracer.go b/pkg/network/tracer/connection/fentry/tracer.go index 4a7d6121b5ade6..071d9432cf7b4d 100644 --- a/pkg/network/tracer/connection/fentry/tracer.go +++ b/pkg/network/tracer/connection/fentry/tracer.go @@ -29,7 +29,7 @@ const probeUID = "net" var ErrorNotSupported = errors.New("fentry tracer is only supported on Fargate") //nolint:revive // TODO // LoadTracer loads a new tracer -func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { +func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), error) { if !fargate.IsFargateInstance() { return nil, nil, ErrorNotSupported } @@ -46,7 +46,7 @@ func LoadTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHa return nil, nil, err } - return m.Manager, nil, nil + return m, nil, nil } // Use a function so someone doesn't accidentally use mgrOpts from the outer scope in LoadTracer diff --git a/pkg/network/tracer/connection/kprobe/tracer.go b/pkg/network/tracer/connection/kprobe/tracer.go index 40a0a8f49eae01..954d2159a3de51 100644 --- a/pkg/network/tracer/connection/kprobe/tracer.go +++ b/pkg/network/tracer/connection/kprobe/tracer.go @@ -75,7 +75,7 @@ func ClassificationSupported(config *config.Config) bool { } // LoadTracer loads the co-re/prebuilt/runtime compiled network tracer, depending on config -func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), TracerType, error) { +func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), TracerType, error) { kprobeAttachMethod := manager.AttachKprobeWithPerfEventOpen if cfg.AttachKprobesWithKprobeEventsABI { kprobeAttachMethod = manager.AttachKprobeWithKprobeEvents @@ -89,7 +89,7 @@ func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandl return nil, nil, TracerTypeCORE, fmt.Errorf("error determining if CO-RE tracer is supported: %w", err) } - var m *manager.Manager + var m *ddebpf.Manager var closeFn func() if err == nil { m, closeFn, err = coreTracerLoader(cfg, mgrOpts, connCloseEventHandler) @@ -140,7 +140,7 @@ func LoadTracer(cfg *config.Config, mgrOpts manager.Options, connCloseEventHandl return m, closeFn, TracerTypePrebuilt, err } -func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer bool, config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { +func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer bool, config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), error) { m := ddebpf.NewManagerWithDefault(&manager.Manager{}, "network", &ebpftelemetry.ErrorsTelemetryModifier{}, connCloseEventHandler) if err := initManager(m, runtimeTracer); err != nil { return nil, nil, fmt.Errorf("could not initialize manager: %w", err) @@ -225,11 +225,11 @@ func loadTracerFromAsset(buf bytecode.AssetReader, runtimeTracer, coreTracer boo return nil, nil, fmt.Errorf("failed to init ebpf manager: %w", err) } - return m.Manager, closeProtocolClassifierSocketFilterFn, nil + return m, closeProtocolClassifierSocketFilterFn, nil } -func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { - var m *manager.Manager +func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), error) { + var m *ddebpf.Manager var closeFn func() var err error err = ddebpf.LoadCOREAsset(netebpf.ModuleFileName("tracer", config.BPFDebug), func(ar bytecode.AssetReader, o manager.Options) error { @@ -246,7 +246,7 @@ func loadCORETracer(config *config.Config, mgrOpts manager.Options, connCloseEve return m, closeFn, err } -func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { +func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), error) { buf, err := getRuntimeCompiledTracer(config) if err != nil { return nil, nil, err @@ -256,7 +256,7 @@ func loadRuntimeCompiledTracer(config *config.Config, mgrOpts manager.Options, c return tracerLoaderFromAsset(buf, true, false, config, mgrOpts, connCloseEventHandler) } -func loadPrebuiltTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*manager.Manager, func(), error) { +func loadPrebuiltTracer(config *config.Config, mgrOpts manager.Options, connCloseEventHandler *perf.EventHandler) (*ddebpf.Manager, func(), error) { buf, err := netebpf.ReadBPFModule(config.BPFDir, config.BPFDebug) if err != nil { return nil, nil, fmt.Errorf("could not read bpf module: %w", err) diff --git a/pkg/network/tracer/connection/kprobe/tracer_test.go b/pkg/network/tracer/connection/kprobe/tracer_test.go index 5727c50bd51693..6b731dc95ccc88 100644 --- a/pkg/network/tracer/connection/kprobe/tracer_test.go +++ b/pkg/network/tracer/connection/kprobe/tracer_test.go @@ -15,6 +15,7 @@ import ( manager "github.com/DataDog/ebpf-manager" + ddebpf "github.com/DataDog/datadog-agent/pkg/ebpf" "github.com/DataDog/datadog-agent/pkg/ebpf/bytecode" "github.com/DataDog/datadog-agent/pkg/ebpf/perf" "github.com/DataDog/datadog-agent/pkg/network/config" @@ -169,14 +170,14 @@ func testTracerFallbackCOREAndRCErr(t *testing.T) { runFallbackTests(t, "CORE and RC error", true, true, tests) } -func loaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { - return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { +func loaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*ddebpf.Manager, func(), error) { + return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*ddebpf.Manager, func(), error) { return nil, closeFn, err } } -func prebuiltLoaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { - return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { +func prebuiltLoaderFunc(closeFn func(), err error) func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*ddebpf.Manager, func(), error) { + return func(_ *config.Config, _ manager.Options, _ *perf.EventHandler) (*ddebpf.Manager, func(), error) { return nil, closeFn, err } } @@ -251,12 +252,12 @@ func TestCORETracerSupported(t *testing.T) { }) coreCalled := false - coreTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*manager.Manager, func(), error) { + coreTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*ddebpf.Manager, func(), error) { coreCalled = true return nil, nil, nil } prebuiltCalled := false - prebuiltTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*manager.Manager, func(), error) { + prebuiltTracerLoader = func(*config.Config, manager.Options, *perf.EventHandler) (*ddebpf.Manager, func(), error) { prebuiltCalled = true return nil, nil, nil } @@ -296,7 +297,7 @@ func TestCORETracerSupported(t *testing.T) { func TestDefaultKprobeMaxActiveSet(t *testing.T) { prevLoader := tracerLoaderFromAsset - tracerLoaderFromAsset = func(_ bytecode.AssetReader, _, _ bool, _ *config.Config, mgrOpts manager.Options, _ *perf.EventHandler) (*manager.Manager, func(), error) { + tracerLoaderFromAsset = func(_ bytecode.AssetReader, _, _ bool, _ *config.Config, mgrOpts manager.Options, _ *perf.EventHandler) (*ddebpf.Manager, func(), error) { assert.Equal(t, mgrOpts.DefaultKProbeMaxActive, 128) return nil, nil, nil } From e6eb153c749085c3f47e3b1c59474427384749b7 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Fri, 13 Dec 2024 12:58:19 -0800 Subject: [PATCH 20/23] add telemetry for perf channel len --- pkg/ebpf/perf/event.go | 54 ++++++++++++++++---- pkg/ebpf/telemetry/perf_metrics.go | 81 +++++++++++++++++++++++++++--- 2 files changed, 117 insertions(+), 18 deletions(-) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 387abb73f57410..ad0a0318e2f25e 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -12,6 +12,7 @@ import ( "errors" "fmt" "slices" + "sync/atomic" manager "github.com/DataDog/ebpf-manager" "github.com/cilium/ebpf" @@ -50,6 +51,8 @@ type EventHandler struct { readLoop func() perfChan chan *perf.Record ringChan chan *ringbuf.Record + + chLenTelemetry *atomic.Uint64 } type mapMode uint8 @@ -182,6 +185,9 @@ func NewEventHandler(mapName string, handler func([]byte), mode EventHandlerMode for _, opt := range opts { opt(e) } + if e.opts.telemetryEnabled { + e.chLenTelemetry = &atomic.Uint64{} + } return e, nil } @@ -323,13 +329,15 @@ func ResizeRingBuffer(mgrOpts *manager.Options, mapName string, bufferSize int) mgrOpts.MapSpecEditors[mapName] = specEditor } +func (e *EventHandler) perfLoop() { + for record := range e.perfChan { + e.perfLoopHandler(record) + } +} + func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { e.perfChan = make(chan *perf.Record, e.opts.channelSize) - e.readLoop = func() { - for record := range e.perfChan { - e.perfLoopHandler(record) - } - } + e.readLoop = e.perfLoop // remove any existing perf buffers from manager mgr.PerfMaps = slices.DeleteFunc(mgr.PerfMaps, func(perfMap *manager.PerfMap) bool { @@ -349,11 +357,17 @@ func (e *EventHandler) initPerfBuffer(mgr *manager.Manager) { } mgr.PerfMaps = append(mgr.PerfMaps, pm) ebpfTelemetry.ReportPerfMapTelemetry(pm) + ebpfTelemetry.ReportPerfMapChannelLenTelemetry(pm, func() int { + return int(e.chLenTelemetry.Swap(0)) + }) e.f = pm } func (e *EventHandler) perfRecordHandler(record *perf.Record, _ *manager.PerfMap, _ *manager.Manager) { e.perfChan <- record + if e.opts.telemetryEnabled { + updateMaxTelemetry(e.chLenTelemetry, uint64(len(e.perfChan))) + } } func (e *EventHandler) perfLoopHandler(record *perf.Record) { @@ -364,11 +378,7 @@ func (e *EventHandler) perfLoopHandler(record *perf.Record) { func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { e.ringChan = make(chan *ringbuf.Record, e.opts.channelSize) - e.readLoop = func() { - for record := range e.ringChan { - e.ringLoopHandler(record) - } - } + e.readLoop = e.ringLoop // remove any existing matching ring buffers from manager mgr.RingBuffers = slices.DeleteFunc(mgr.RingBuffers, func(ringBuf *manager.RingBuffer) bool { @@ -384,11 +394,23 @@ func (e *EventHandler) initRingBuffer(mgr *manager.Manager) { } mgr.RingBuffers = append(mgr.RingBuffers, rb) ebpfTelemetry.ReportRingBufferTelemetry(rb) + ebpfTelemetry.ReportRingBufferChannelLenTelemetry(rb, func() int { + return int(e.chLenTelemetry.Swap(0)) + }) e.f = rb } +func (e *EventHandler) ringLoop() { + for record := range e.ringChan { + e.ringLoopHandler(record) + } +} + func (e *EventHandler) ringRecordHandler(record *ringbuf.Record, _ *manager.RingBuffer, _ *manager.Manager) { e.ringChan <- record + if e.opts.telemetryEnabled { + updateMaxTelemetry(e.chLenTelemetry, uint64(len(e.ringChan))) + } } func (e *EventHandler) ringLoopHandler(record *ringbuf.Record) { @@ -414,3 +436,15 @@ func UpgradePerfBuffer(mgr *manager.Manager, mgrOpts *manager.Options, mapName s return perfMap.Name == mapName }) } + +func updateMaxTelemetry(a *atomic.Uint64, val uint64) { + for { + oldVal := a.Load() + if val <= oldVal { + return + } + if a.CompareAndSwap(oldVal, val) { + return + } + } +} diff --git a/pkg/ebpf/telemetry/perf_metrics.go b/pkg/ebpf/telemetry/perf_metrics.go index 84324790e1d28f..a8f6ed2e6602ee 100644 --- a/pkg/ebpf/telemetry/perf_metrics.go +++ b/pkg/ebpf/telemetry/perf_metrics.go @@ -22,19 +22,25 @@ var ( ) type perfUsageCollector struct { - mtx sync.Mutex - usage *prometheus.GaugeVec - usagePct *prometheus.GaugeVec - size *prometheus.GaugeVec - lost *prometheus.CounterVec - - perfMaps []*manager.PerfMap - ringBuffers []*manager.RingBuffer + mtx sync.Mutex + usage *prometheus.GaugeVec + usagePct *prometheus.GaugeVec + size *prometheus.GaugeVec + lost *prometheus.CounterVec + channelLen *prometheus.GaugeVec + + perfMaps []*manager.PerfMap + perfChannelLenFuncs map[*manager.PerfMap]func() int + + ringBuffers []*manager.RingBuffer + ringChannelLenFuncs map[*manager.RingBuffer]func() int } // NewPerfUsageCollector creates a prometheus.Collector for perf buffer and ring buffer metrics func NewPerfUsageCollector() prometheus.Collector { perfCollector = &perfUsageCollector{ + perfChannelLenFuncs: make(map[*manager.PerfMap]func() int), + ringChannelLenFuncs: make(map[*manager.RingBuffer]func() int), usage: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Subsystem: "ebpf__perf", @@ -67,6 +73,14 @@ func NewPerfUsageCollector() prometheus.Collector { }, []string{"map_name", "map_type", "cpu_num"}, ), + channelLen: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: "ebpf__perf", + Name: "_channel_len", + Help: "gauge tracking number of elements in buffer channel", + }, + []string{"map_name", "map_type"}, + ), } return perfCollector } @@ -103,6 +117,11 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { } } + for pm, chFunc := range p.perfChannelLenFuncs { + mapName, mapType := pm.Name, ebpf.PerfEventArray.String() + p.channelLen.WithLabelValues(mapName, mapType).Set(float64(chFunc())) + } + for _, rb := range p.ringBuffers { mapName, mapType := rb.Name, ebpf.RingBuf.String() size := float64(rb.BufferSize()) @@ -118,10 +137,16 @@ func (p *perfUsageCollector) Collect(metrics chan<- prometheus.Metric) { p.size.WithLabelValues(mapName, mapType, cpuString).Set(size) } + for rb, chFunc := range p.ringChannelLenFuncs { + mapName, mapType := rb.Name, ebpf.RingBuf.String() + p.channelLen.WithLabelValues(mapName, mapType).Set(float64(chFunc())) + } + p.usage.Collect(metrics) p.usagePct.Collect(metrics) p.size.Collect(metrics) p.lost.Collect(metrics) + p.channelLen.Collect(metrics) } // ReportPerfMapTelemetry starts reporting the telemetry for the provided PerfMap @@ -132,6 +157,14 @@ func ReportPerfMapTelemetry(pm *manager.PerfMap) { perfCollector.registerPerfMap(pm) } +// ReportPerfMapChannelLenTelemetry starts reporting the telemetry for the provided PerfMap's buffer channel +func ReportPerfMapChannelLenTelemetry(pm *manager.PerfMap, channelLenFunc func() int) { + if perfCollector == nil { + return + } + perfCollector.registerPerfMapChannel(pm, channelLenFunc) +} + // ReportRingBufferTelemetry starts reporting the telemetry for the provided RingBuffer func ReportRingBufferTelemetry(rb *manager.RingBuffer) { if perfCollector == nil { @@ -140,6 +173,14 @@ func ReportRingBufferTelemetry(rb *manager.RingBuffer) { perfCollector.registerRingBuffer(rb) } +// ReportRingBufferChannelLenTelemetry starts reporting the telemetry for the provided RingBuffer's buffer channel +func ReportRingBufferChannelLenTelemetry(rb *manager.RingBuffer, channelLenFunc func() int) { + if perfCollector == nil { + return + } + perfCollector.registerRingBufferChannel(rb, channelLenFunc) +} + func (p *perfUsageCollector) registerPerfMap(pm *manager.PerfMap) { if !pm.TelemetryEnabled { return @@ -149,6 +190,15 @@ func (p *perfUsageCollector) registerPerfMap(pm *manager.PerfMap) { p.perfMaps = append(p.perfMaps, pm) } +func (p *perfUsageCollector) registerPerfMapChannel(pm *manager.PerfMap, channelLenFunc func() int) { + if !pm.TelemetryEnabled { + return + } + p.mtx.Lock() + defer p.mtx.Unlock() + p.perfChannelLenFuncs[pm] = channelLenFunc +} + func (p *perfUsageCollector) registerRingBuffer(rb *manager.RingBuffer) { if !rb.TelemetryEnabled { return @@ -158,6 +208,15 @@ func (p *perfUsageCollector) registerRingBuffer(rb *manager.RingBuffer) { p.ringBuffers = append(p.ringBuffers, rb) } +func (p *perfUsageCollector) registerRingBufferChannel(rb *manager.RingBuffer, channelLenFunc func() int) { + if !rb.TelemetryEnabled { + return + } + p.mtx.Lock() + defer p.mtx.Unlock() + p.ringChannelLenFuncs[rb] = channelLenFunc +} + // UnregisterTelemetry unregisters the PerfMap and RingBuffers from telemetry func UnregisterTelemetry(m *manager.Manager) { if perfCollector == nil { @@ -172,7 +231,13 @@ func (p *perfUsageCollector) unregisterTelemetry(m *manager.Manager) { p.perfMaps = slices.DeleteFunc(p.perfMaps, func(perfMap *manager.PerfMap) bool { return slices.Contains(m.PerfMaps, perfMap) }) + for _, pm := range m.PerfMaps { + delete(p.perfChannelLenFuncs, pm) + } p.ringBuffers = slices.DeleteFunc(p.ringBuffers, func(ringBuf *manager.RingBuffer) bool { return slices.Contains(m.RingBuffers, ringBuf) }) + for _, rb := range m.RingBuffers { + delete(p.ringChannelLenFuncs, rb) + } } From 0f1ed30f67112d546129f6794f8de7a1a3c89017 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 16 Dec 2024 13:54:41 -0800 Subject: [PATCH 21/23] calculate channel size based on batching used --- pkg/network/tracer/connection/ebpf_tracer.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index de8d7e7fb8a596..be2524d88e7b4f 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -312,8 +312,11 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw handler := singleConnHandler perfMode := perf.WakeupEvents(config.ClosedBufferWakeupCount) + // multiply by number of connections with in-buffer batching to have same effective size as with custom batching + chanSize := config.ClosedChannelSize * config.ClosedBufferWakeupCount if config.CustomBatchingEnabled { perfMode = perf.Watermark(1) + chanSize = config.ClosedChannelSize handler = func(buf []byte) { l := len(buf) switch { @@ -337,9 +340,9 @@ func initClosedConnEventHandler(config *config.Config, closedCallback func(*netw } perfBufferSize := util.ComputeDefaultClosedConnPerfBufferSize() - mode := perf.UsePerfBuffers(perfBufferSize, config.ClosedChannelSize, perfMode) + mode := perf.UsePerfBuffers(perfBufferSize, chanSize, perfMode) if config.RingBufferSupportedNPM() { - mode = perf.UpgradePerfBuffers(perfBufferSize, config.ClosedChannelSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) + mode = perf.UpgradePerfBuffers(perfBufferSize, chanSize, perfMode, util.ComputeDefaultClosedConnRingBufferSize()) } return perf.NewEventHandler(probes.ConnCloseEventMap, handler, mode, From fed0a5700b9dc270ee173ea09b217217c9bbdade Mon Sep 17 00:00:00 2001 From: usamasaqib Date: Mon, 23 Dec 2024 12:36:59 +0100 Subject: [PATCH 22/23] add comments explaining the `updateMaxTelemetry` function --- pkg/ebpf/perf/event.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index ad0a0318e2f25e..6ca3591c9dc1c3 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -437,12 +437,18 @@ func UpgradePerfBuffer(mgr *manager.Manager, mgrOpts *manager.Options, mapName s }) } +// implement the CAS algorithm to atomically update a max value func updateMaxTelemetry(a *atomic.Uint64, val uint64) { for { oldVal := a.Load() if val <= oldVal { return } + // if the value at a is not `oldVal`, then `CompareAndSwap` returns + // false indicating that the value of the atomic has changed between + // the above check and this invocation. + // In this case we retry the above test, to see if the value still needs + // to be updated. if a.CompareAndSwap(oldVal, val) { return } From 496324c05d0dddb8776755a7998b7cfe41a440c3 Mon Sep 17 00:00:00 2001 From: usamasaqib Date: Mon, 23 Dec 2024 12:59:54 +0100 Subject: [PATCH 23/23] add explanatory comments about the BeforeInit implementation of EventHandler --- pkg/ebpf/perf/event.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/ebpf/perf/event.go b/pkg/ebpf/perf/event.go index 6ca3591c9dc1c3..ee1c287be698e3 100644 --- a/pkg/ebpf/perf/event.go +++ b/pkg/ebpf/perf/event.go @@ -192,6 +192,7 @@ func NewEventHandler(mapName string, handler func([]byte), mode EventHandlerMode } // BeforeInit implements the Modifier interface +// This function will modify the shared buffers according to the user provided mode func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleName, mgrOpts *manager.Options) (err error) { ms, _, _ := mgr.GetMapSpec(e.mapName) if ms == nil { @@ -209,6 +210,8 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleN return fmt.Errorf("map %q is not a ring buffer, got %q instead", e.mapName, ms.Type.String()) } + // the size of the ring buffer is communicated to the kernel via the max entries field + // of the bpf map if ms.MaxEntries != uint32(e.opts.ringBufferSize) { ResizeRingBuffer(mgrOpts, e.mapName, e.opts.ringBufferSize) } @@ -229,6 +232,11 @@ func (e *EventHandler) BeforeInit(mgr *manager.Manager, moduleName names.ModuleN if ms.Type != ebpf.PerfEventArray { return fmt.Errorf("map %q is not a perf buffer, got %q instead", e.mapName, ms.Type.String()) } + + // the layout of the bpf map for perf buffers does not match that of ring buffers. + // When upgrading perf buffers to ring buffers, we must account for these differences. + // - Ring buffers do not use key/value sizes + // - Ring buffers specify their size via max entries if ringBufErr == nil { UpgradePerfBuffer(mgr, mgrOpts, e.mapName) if ms.MaxEntries != uint32(e.opts.ringBufferSize) {