diff --git a/pkg/network/ebpf/c/tracer.c b/pkg/network/ebpf/c/tracer.c index 7555758f40524..909243dba174e 100644 --- a/pkg/network/ebpf/c/tracer.c +++ b/pkg/network/ebpf/c/tracer.c @@ -238,6 +238,7 @@ int BPF_BYPASSABLE_KPROBE(kprobe__tcp_done, struct sock *sk) { __u64 timestamp = bpf_ktime_get_ns(); if (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST, -EEXIST) == 0) { cleanup_conn(ctx, &t, sk); + increment_telemetry_count(tcp_done_connection_flush); flush_tcp_failure(ctx, &t, err); } else { bpf_map_delete_elem(&conn_close_flushed, &t); @@ -287,6 +288,7 @@ int BPF_BYPASSABLE_KPROBE(kprobe__tcp_close, struct sock *sk) { __u64 timestamp = bpf_ktime_get_ns(); if (bpf_map_update_with_telemetry(conn_close_flushed, &t, ×tamp, BPF_NOEXIST, -EEXIST) == 0) { cleanup_conn(ctx, &t, sk); + increment_telemetry_count(tcp_close_connection_flush); int err = 0; bpf_probe_read_kernel_with_telemetry(&err, sizeof(err), (&sk->sk_err)); if (err == TCP_CONN_FAILED_RESET || err == TCP_CONN_FAILED_TIMEOUT || err == TCP_CONN_FAILED_REFUSED) { diff --git a/pkg/network/ebpf/c/tracer/maps.h b/pkg/network/ebpf/c/tracer/maps.h index f02066fe4b4dc..6ac62657e90f9 100644 --- a/pkg/network/ebpf/c/tracer/maps.h +++ b/pkg/network/ebpf/c/tracer/maps.h @@ -27,7 +27,7 @@ BPF_HASH_MAP(tcp_retransmits, conn_tuple_t, __u32, 0) BPF_HASH_MAP(tcp_ongoing_connect_pid, skp_conn_tuple_t, pid_ts_t, 0) /* Will hold a flag to indicate that closed connections have already been flushed */ -BPF_HASH_MAP(conn_close_flushed, conn_tuple_t, __u64, 8192) +BPF_HASH_MAP(conn_close_flushed, conn_tuple_t, __u64, 16384) /* Will hold the tcp/udp close events * The keys are the cpu number and the values a perf file descriptor for a perf event diff --git a/pkg/network/ebpf/c/tracer/telemetry.h b/pkg/network/ebpf/c/tracer/telemetry.h index a6067f1fae501..b723f1c4b8b71 100644 --- a/pkg/network/ebpf/c/tracer/telemetry.h +++ b/pkg/network/ebpf/c/tracer/telemetry.h @@ -29,6 +29,8 @@ enum telemetry_counter { tcp_done_failed_tuple, tcp_finish_connect_failed_tuple, tcp_close_target_failures, + tcp_done_connection_flush, + tcp_close_connection_flush }; static __always_inline void increment_telemetry_count(enum telemetry_counter counter_name) { @@ -82,6 +84,12 @@ static __always_inline void increment_telemetry_count(enum telemetry_counter cou case tcp_close_target_failures: __sync_fetch_and_add(&val->tcp_close_target_failures, 1); break; + case tcp_done_connection_flush: + __sync_fetch_and_add(&val->tcp_done_connection_flush, 1); + break; + case tcp_close_connection_flush: + __sync_fetch_and_add(&val->tcp_close_connection_flush, 1); + break; } } diff --git a/pkg/network/ebpf/c/tracer/tracer.h b/pkg/network/ebpf/c/tracer/tracer.h index cb2a96ff7a66c..f99301993b539 100644 --- a/pkg/network/ebpf/c/tracer/tracer.h +++ b/pkg/network/ebpf/c/tracer/tracer.h @@ -119,6 +119,8 @@ typedef struct { __u64 tcp_done_failed_tuple; __u64 tcp_finish_connect_failed_tuple; __u64 tcp_close_target_failures; + __u64 tcp_done_connection_flush; + __u64 tcp_close_connection_flush; } telemetry_t; typedef struct { diff --git a/pkg/network/ebpf/kprobe_types_linux.go b/pkg/network/ebpf/kprobe_types_linux.go index 58cee5d1115aa..13d63751f2adc 100644 --- a/pkg/network/ebpf/kprobe_types_linux.go +++ b/pkg/network/ebpf/kprobe_types_linux.go @@ -78,6 +78,8 @@ type Telemetry struct { Tcp_done_failed_tuple uint64 Tcp_finish_connect_failed_tuple uint64 Tcp_close_target_failures uint64 + Tcp_done_connection_flush uint64 + Tcp_close_connection_flush uint64 } type PortBinding struct { Netns uint32 diff --git a/pkg/network/tracer/connection/ebpf_tracer.go b/pkg/network/tracer/connection/ebpf_tracer.go index 4792e6f2aca5a..7fc8559b477ad 100644 --- a/pkg/network/tracer/connection/ebpf_tracer.go +++ b/pkg/network/tracer/connection/ebpf_tracer.go @@ -69,6 +69,8 @@ var EbpfTracerTelemetry = struct { tcpDoneFailedTuple *prometheus.Desc tcpFinishConnectFailedTuple *prometheus.Desc tcpCloseTargetFailures *prometheus.Desc + tcpDoneConnectionFlush *prometheus.Desc + tcpCloseConnectionFlush *prometheus.Desc ongoingConnectPidCleaned telemetry.Counter PidCollisions *telemetry.StatCounterWrapper iterationDups telemetry.Counter @@ -93,6 +95,8 @@ var EbpfTracerTelemetry = struct { lastTcpDoneFailedTuple *atomic.Int64 lastTcpFinishConnectFailedTuple *atomic.Int64 lastTcpCloseTargetFailures *atomic.Int64 + lastTcpDoneConnectionFlush *atomic.Int64 + lastTcpCloseConnectionFlush *atomic.Int64 }{ telemetry.NewGauge(connTracerModuleName, "connections", []string{"ip_proto", "family"}, "Gauge measuring the number of active connections in the EBPF map"), prometheus.NewDesc(connTracerModuleName+"__tcp_failed_connects", "Counter measuring the number of failed TCP connections in the EBPF map", nil, nil), @@ -110,6 +114,8 @@ var EbpfTracerTelemetry = struct { prometheus.NewDesc(connTracerModuleName+"__tcp_done_failed_tuple", "Counter measuring the number of failed TCP connections due to tuple collisions", nil, nil), prometheus.NewDesc(connTracerModuleName+"__tcp_finish_connect_failed_tuple", "Counter measuring the number of failed TCP connections due to tuple collisions", nil, nil), prometheus.NewDesc(connTracerModuleName+"__tcp_close_target_failures", "Counter measuring the number of failed TCP connections in tcp_close", nil, nil), + prometheus.NewDesc(connTracerModuleName+"__tcp_done_connection_flush", "Counter measuring the number of connection flushes performed in tcp_done", nil, nil), + prometheus.NewDesc(connTracerModuleName+"__tcp_close_connection_flush", "Counter measuring the number of connection flushes performed in tcp_close", nil, nil), telemetry.NewCounter(connTracerModuleName, "ongoing_connect_pid_cleaned", []string{}, "Counter measuring the number of tcp_ongoing_connect_pid entries cleaned in userspace"), telemetry.NewStatCounterWrapper(connTracerModuleName, "pid_collisions", []string{}, "Counter measuring number of process collisions"), telemetry.NewCounter(connTracerModuleName, "iteration_dups", []string{}, "Counter measuring the number of connections iterated more than once"), @@ -129,6 +135,8 @@ var EbpfTracerTelemetry = struct { atomic.NewInt64(0), atomic.NewInt64(0), atomic.NewInt64(0), + atomic.NewInt64(0), + atomic.NewInt64(0), } type ebpfTracer struct { @@ -533,6 +541,8 @@ func (t *ebpfTracer) Describe(ch chan<- *prometheus.Desc) { ch <- EbpfTracerTelemetry.tcpDoneFailedTuple ch <- EbpfTracerTelemetry.tcpFinishConnectFailedTuple ch <- EbpfTracerTelemetry.tcpCloseTargetFailures + ch <- EbpfTracerTelemetry.tcpDoneConnectionFlush + ch <- EbpfTracerTelemetry.tcpCloseConnectionFlush } // Collect returns the current state of all metrics of the collector @@ -600,6 +610,14 @@ func (t *ebpfTracer) Collect(ch chan<- prometheus.Metric) { delta = int64(ebpfTelemetry.Tcp_close_target_failures) - EbpfTracerTelemetry.lastTcpCloseTargetFailures.Load() EbpfTracerTelemetry.lastTcpCloseTargetFailures.Store(int64(ebpfTelemetry.Tcp_close_target_failures)) ch <- prometheus.MustNewConstMetric(EbpfTracerTelemetry.tcpCloseTargetFailures, prometheus.CounterValue, float64(delta)) + + delta = int64(ebpfTelemetry.Tcp_done_connection_flush) - EbpfTracerTelemetry.lastTcpDoneConnectionFlush.Load() + EbpfTracerTelemetry.lastTcpDoneConnectionFlush.Store(int64(ebpfTelemetry.Tcp_done_connection_flush)) + ch <- prometheus.MustNewConstMetric(EbpfTracerTelemetry.tcpDoneConnectionFlush, prometheus.CounterValue, float64(delta)) + + delta = int64(ebpfTelemetry.Tcp_close_connection_flush) - EbpfTracerTelemetry.lastTcpCloseConnectionFlush.Load() + EbpfTracerTelemetry.lastTcpCloseConnectionFlush.Store(int64(ebpfTelemetry.Tcp_close_connection_flush)) + ch <- prometheus.MustNewConstMetric(EbpfTracerTelemetry.tcpCloseConnectionFlush, prometheus.CounterValue, float64(delta)) } // DumpMaps (for debugging purpose) returns all maps content by default or selected maps from maps parameter.