diff --git a/bpf/.clang-format b/bpf/.clang-format index 63e1c63a1..a7342cf28 100644 --- a/bpf/.clang-format +++ b/bpf/.clang-format @@ -1,5 +1,6 @@ { BasedOnStyle: LLVM, + BreakStringLiterals: true, AllowShortFunctionsOnASingleLine: InlineOnly, ColumnLimit: 100, IndentWidth: 4, diff --git a/bpf/bpf_dbg.h b/bpf/bpf_dbg.h index af57a1a94..99dd60604 100644 --- a/bpf/bpf_dbg.h +++ b/bpf/bpf_dbg.h @@ -23,33 +23,34 @@ typedef struct log_info { struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 1 << 12); - __uint(pinning, LIBBPF_PIN_BY_NAME); + __uint(pinning, LIBBPF_PIN_BY_NAME); } debug_events SEC(".maps"); enum bpf_func_id___x { BPF_FUNC_snprintf___x = 42 /* avoid zero */ }; -#define bpf_dbg_helper(fmt, args...) { \ - log_info_t *__trace__ = bpf_ringbuf_reserve(&debug_events, sizeof(log_info_t), 0); \ - if (__trace__) { \ - if(bpf_core_enum_value_exists(enum bpf_func_id___x, BPF_FUNC_snprintf___x)) { \ - BPF_SNPRINTF(__trace__->log, sizeof(__trace__->log), fmt, ##args); \ - } else { \ - __builtin_memcpy(__trace__->log, fmt, sizeof(__trace__->log)); \ - } \ - u64 id = bpf_get_current_pid_tgid(); \ - bpf_get_current_comm(&__trace__->comm, sizeof(__trace__->comm)); \ - __trace__->pid = id >> 32; \ - bpf_ringbuf_submit(__trace__, 0); \ - } \ -} - -#define bpf_dbg_printk(fmt, args...) { \ - bpf_printk(fmt, ##args); \ - bpf_dbg_helper(fmt, ##args); \ -} +#define bpf_dbg_helper(fmt, args...) \ + { \ + log_info_t *__trace__ = bpf_ringbuf_reserve(&debug_events, sizeof(log_info_t), 0); \ + if (__trace__) { \ + if (bpf_core_enum_value_exists(enum bpf_func_id___x, BPF_FUNC_snprintf___x)) { \ + BPF_SNPRINTF(__trace__->log, sizeof(__trace__->log), fmt, ##args); \ + } else { \ + __builtin_memcpy(__trace__->log, fmt, sizeof(__trace__->log)); \ + } \ + u64 id = bpf_get_current_pid_tgid(); \ + bpf_get_current_comm(&__trace__->comm, sizeof(__trace__->comm)); \ + __trace__->pid = id >> 32; \ + bpf_ringbuf_submit(__trace__, 0); \ + } \ + } + +#define bpf_dbg_printk(fmt, args...) \ + { \ + bpf_printk(fmt, ##args); \ + bpf_dbg_helper(fmt, ##args); \ + } #else #define bpf_dbg_printk(fmt, args...) #endif #endif - diff --git a/bpf/flow.h b/bpf/flow.h index 29954cae2..63dcaa1a0 100644 --- a/bpf/flow.h +++ b/bpf/flow.h @@ -23,12 +23,12 @@ #define TC_ACT_SHOT 2 #define IP_MAX_LEN 16 -#define ETH_ALEN 6 /* Octets in one ethernet addr */ +#define ETH_ALEN 6 /* Octets in one ethernet addr */ -#define s6_addr in6_u.u6_addr8 -#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define s6_addr in6_u.u6_addr8 +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ // ETH_P_IPV6 value as defined in IEEE 802: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml -#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ +#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ typedef __u8 u8; typedef __u16 u16; typedef __u32 u32; diff --git a/bpf/flows.c b/bpf/flows.c index 8f5a71271..273e1d3f0 100644 --- a/bpf/flows.c +++ b/bpf/flows.c @@ -26,14 +26,14 @@ // sets the TCP header flags for connection information static inline void set_flags(struct tcphdr *th, u16 *flags) { - //If both ACK and SYN are set, then it is server -> client communication during 3-way handshake. + //If both ACK and SYN are set, then it is server -> client communication during 3-way handshake. if (th->ack && th->syn) { *flags |= SYN_ACK_FLAG; - } else if (th->ack && th->fin ) { + } else if (th->ack && th->fin) { // If both ACK and FIN are set, then it is graceful termination from server. *flags |= FIN_ACK_FLAG; - } else if (th->ack && th->rst ) { - // If both ACK and RST are set, then it is abrupt connection termination. + } else if (th->ack && th->rst) { + // If both ACK and RST are set, then it is abrupt connection termination. *flags |= RST_ACK_FLAG; } else if (th->fin) { *flags |= FIN_FLAG; @@ -199,18 +199,18 @@ static inline int flow_monitor(struct __sk_buff *skb) { }; u8 *direction = (u8 *)bpf_map_lookup_elem(&flow_directions, &id); - if(direction == NULL) { + if (direction == NULL) { // Calculate direction based on first flag received // SYN and ACK mean someone else initiated the connection and this is the INGRESS direction - if((flags & SYN_ACK_FLAG) == SYN_ACK_FLAG) { + if ((flags & SYN_ACK_FLAG) == SYN_ACK_FLAG) { new_flow.iface_direction = INGRESS; } // SYN only means we initiated the connection and this is the EGRESS direction - else if((flags & SYN_FLAG) == SYN_FLAG) { + else if ((flags & SYN_FLAG) == SYN_FLAG) { new_flow.iface_direction = EGRESS; } // save, when direction was calculated based on TCP flag - if(new_flow.iface_direction != UNKNOWN) { + if (new_flow.iface_direction != UNKNOWN) { // errors are intentionally omitted bpf_map_update_elem(&flow_directions, &id, &new_flow.iface_direction, BPF_NOEXIST); } @@ -242,7 +242,8 @@ static inline int flow_monitor(struct __sk_buff *skb) { } new_flow.errno = -ret; - flow_record *record = (flow_record *)bpf_ringbuf_reserve(&direct_flows, sizeof(flow_record), 0); + flow_record *record = + (flow_record *)bpf_ringbuf_reserve(&direct_flows, sizeof(flow_record), 0); if (!record) { if (trace_messages) { bpf_dbg_printk("couldn't reserve space in the ringbuf. Dropping flow"); @@ -257,7 +258,7 @@ static inline int flow_monitor(struct __sk_buff *skb) { cleanup: // finally, when flow receives FIN or RST, clean flow_directions - if(flags & FIN_FLAG || flags & RST_FLAG || flags & FIN_ACK_FLAG || flags & RST_ACK_FLAG) { + if (flags & FIN_FLAG || flags & RST_FLAG || flags & FIN_ACK_FLAG || flags & RST_ACK_FLAG) { bpf_map_delete_elem(&flow_directions, &id); } return TC_ACT_OK; diff --git a/bpf/flows_sock.c b/bpf/flows_sock.c index 79fbb3182..d7b6bd242 100644 --- a/bpf/flows_sock.c +++ b/bpf/flows_sock.c @@ -30,17 +30,18 @@ struct __tcphdr { __be16 dest; __be32 seq; __be32 ack_seq; - __u16 res1 : 4, doff : 4, fin : 1, syn : 1, rst : 1, psh : 1, ack : 1, urg : 1, ece : 1, cwr : 1; + __u16 res1 : 4, doff : 4, fin : 1, syn : 1, rst : 1, psh : 1, ack : 1, urg : 1, ece : 1, + cwr : 1; __be16 window; __sum16 check; __be16 urg_ptr; }; struct __udphdr { - __be16 source; - __be16 dest; - __be16 len; - __sum16 check; + __be16 source; + __be16 dest; + __be16 len; + __sum16 check; }; static __always_inline bool read_sk_buff(struct __sk_buff *skb, flow_id *id, u16 *custom_flags) { @@ -85,10 +86,17 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, flow_id *id, u16 break; } case ETH_P_IPV6: - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, nexthdr), &proto, sizeof(proto)); - - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), &id->src_ip.s6_addr, sizeof(id->src_ip.s6_addr)); - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), &id->dst_ip.s6_addr, sizeof(id->dst_ip.s6_addr)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct ipv6hdr, nexthdr), &proto, sizeof(proto)); + + bpf_skb_load_bytes(skb, + ETH_HLEN + offsetof(struct ipv6hdr, saddr), + &id->src_ip.s6_addr, + sizeof(id->src_ip.s6_addr)); + bpf_skb_load_bytes(skb, + ETH_HLEN + offsetof(struct ipv6hdr, daddr), + &id->dst_ip.s6_addr, + sizeof(id->dst_ip.s6_addr)); hdr_len = ETH_HLEN + sizeof(struct ipv6hdr); break; @@ -100,40 +108,49 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, flow_id *id, u16 id->dst_port = 0; id->transport_protocol = proto; - switch(proto) { - case IPPROTO_TCP: { - u16 port; - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, source), &port, sizeof(port)); - id->src_port = __bpf_htons(port); - - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, dest), &port, sizeof(port)); - id->dst_port = __bpf_htons(port); - - u8 doff; - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, ack_seq) + 4, &doff, sizeof(doff)); // read the first byte past __tcphdr->ack_seq, we can't do offsetof bit fields - doff &= 0xf0; // clean-up res1 - doff >>= 4; // move the upper 4 bits to low - doff *= 4; // convert to bytes length - - u8 flags; - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, ack_seq) + 4 + 1, &flags, sizeof(flags)); // read the second byte past __tcphdr->doff, again bit fields offsets - *custom_flags = ((u16)flags & 0x00ff); - - hdr_len += doff; - - if ((skb->len - hdr_len) < 0) { // less than 0 is a packet we can't parse - return false; - } - - break; - } - case IPPROTO_UDP: { - u16 port; - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __udphdr, source), &port, sizeof(port)); - id->src_port = __bpf_htons(port); - bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __udphdr, dest), &port, sizeof(port)); - id->dst_port = __bpf_htons(port); + switch (proto) { + case IPPROTO_TCP: { + u16 port; + bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, source), &port, sizeof(port)); + id->src_port = __bpf_htons(port); + + bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __tcphdr, dest), &port, sizeof(port)); + id->dst_port = __bpf_htons(port); + + u8 doff; + bpf_skb_load_bytes( + skb, + hdr_len + offsetof(struct __tcphdr, ack_seq) + 4, + &doff, + sizeof( + doff)); // read the first byte past __tcphdr->ack_seq, we can't do offsetof bit fields + doff &= 0xf0; // clean-up res1 + doff >>= 4; // move the upper 4 bits to low + doff *= 4; // convert to bytes length + + u8 flags; + bpf_skb_load_bytes( + skb, + hdr_len + offsetof(struct __tcphdr, ack_seq) + 4 + 1, + &flags, + sizeof(flags)); // read the second byte past __tcphdr->doff, again bit fields offsets + *custom_flags = ((u16)flags & 0x00ff); + + hdr_len += doff; + + if ((skb->len - hdr_len) < 0) { // less than 0 is a packet we can't parse + return false; } + + break; + } + case IPPROTO_UDP: { + u16 port; + bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __udphdr, source), &port, sizeof(port)); + id->src_port = __bpf_htons(port); + bpf_skb_load_bytes(skb, hdr_len + offsetof(struct __udphdr, dest), &port, sizeof(port)); + id->dst_port = __bpf_htons(port); + } } // custom flags @@ -149,8 +166,8 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, flow_id *id, u16 } static __always_inline bool same_ip(u8 *ip1, u8 *ip2) { - for (int i=0; i<16; i+=4) { - if (*((u32 *)(ip1+i)) != *((u32 *)(ip2+i))) { + for (int i = 0; i < 16; i += 4) { + if (*((u32 *)(ip1 + i)) != *((u32 *)(ip2 + i))) { return false; } } @@ -214,21 +231,21 @@ int socket__http_filter(struct __sk_buff *skb) { }; u8 *direction = (u8 *)bpf_map_lookup_elem(&flow_directions, &id); - if(direction == NULL) { + if (direction == NULL) { // Calculate direction based on first flag received // SYN and ACK mean someone else initiated the connection and this is the INGRESS direction - if((flags & (SYN_FLAG | ACK_FLAG)) == (SYN_FLAG | ACK_FLAG)) { + if ((flags & (SYN_FLAG | ACK_FLAG)) == (SYN_FLAG | ACK_FLAG)) { new_flow.iface_direction = INGRESS; } // SYN only means we initiated the connection and this is the EGRESS direction - else if((flags & SYN_FLAG) == SYN_FLAG) { + else if ((flags & SYN_FLAG) == SYN_FLAG) { new_flow.iface_direction = EGRESS; } // save, when direction was calculated based on TCP flag - if(new_flow.iface_direction != UNKNOWN) { + if (new_flow.iface_direction != UNKNOWN) { // errors are intentionally omitted bpf_map_update_elem(&flow_directions, &id, &new_flow.iface_direction, BPF_NOEXIST); - } + } // fallback for lost or already started connections and UDP else { new_flow.iface_direction = INGRESS; @@ -257,7 +274,8 @@ int socket__http_filter(struct __sk_buff *skb) { } new_flow.errno = -ret; - flow_record *record = (flow_record *)bpf_ringbuf_reserve(&direct_flows, sizeof(flow_record), 0); + flow_record *record = + (flow_record *)bpf_ringbuf_reserve(&direct_flows, sizeof(flow_record), 0); if (!record) { if (trace_messages) { bpf_dbg_printk("couldn't reserve space in the ringbuf. Dropping flow"); @@ -272,7 +290,7 @@ int socket__http_filter(struct __sk_buff *skb) { cleanup: // finally, when flow receives FIN or RST, clean flow_directions - if(flags & FIN_FLAG || flags & RST_FLAG) { + if (flags & FIN_FLAG || flags & RST_FLAG) { bpf_map_delete_elem(&flow_directions, &id); } return TC_ACT_OK; diff --git a/bpf/go_byte_arr.h b/bpf/go_byte_arr.h index 94fb06303..28e38a198 100644 --- a/bpf/go_byte_arr.h +++ b/bpf/go_byte_arr.h @@ -16,7 +16,8 @@ #include "utils.h" #include "bpf_dbg.h" -static __inline int read_go_byte_arr(char *name, void *base_ptr, u8 offset, void *field, u64 *size_ptr, u64 max_size) { +static __inline int +read_go_byte_arr(char *name, void *base_ptr, u8 offset, void *field, u64 *size_ptr, u64 max_size) { void *ptr = 0; if (bpf_probe_read(&ptr, sizeof(ptr), (void *)(base_ptr + offset)) != 0) { bpf_dbg_printk("can't read ptr for %s", name); diff --git a/bpf/go_common.h b/bpf/go_common.h index 9cacdca53..53f0fd146 100644 --- a/bpf/go_common.h +++ b/bpf/go_common.h @@ -44,8 +44,8 @@ typedef struct goroutine_metadata_t { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // key: pointer to the goroutine - __type(value, goroutine_metadata); // value: timestamp of the goroutine creation + __type(key, void *); // key: pointer to the goroutine + __type(value, goroutine_metadata); // value: timestamp of the goroutine creation __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } ongoing_goroutines SEC(".maps"); @@ -67,8 +67,8 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // key: pointer to the goroutine - __type(value, tp_info_t); // value: traceparent info + __type(key, void *); // key: pointer to the goroutine + __type(value, tp_info_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } go_trace_map SEC(".maps"); @@ -80,7 +80,8 @@ static __always_inline u64 find_parent_goroutine(void *goroutine_addr) { void *p_inv = bpf_map_lookup_elem(&go_trace_map, &r_addr); if (!p_inv) { // not this goroutine running the server request processing // Let's find the parent scope - goroutine_metadata *g_metadata = (goroutine_metadata *)bpf_map_lookup_elem(&ongoing_goroutines, &r_addr); + goroutine_metadata *g_metadata = + (goroutine_metadata *)bpf_map_lookup_elem(&ongoing_goroutines, &r_addr); if (g_metadata) { // Lookup now to see if the parent was a request r_addr = (void *)g_metadata->parent; @@ -98,15 +99,21 @@ static __always_inline u64 find_parent_goroutine(void *goroutine_addr) { return 0; } -static __always_inline void decode_go_traceparent(unsigned char *buf, unsigned char *trace_id, unsigned char *span_id, unsigned char *flags) { +static __always_inline void decode_go_traceparent(unsigned char *buf, + unsigned char *trace_id, + unsigned char *span_id, + unsigned char *flags) { unsigned char *t_id = buf + 2 + 1; // strlen(ver) + strlen("-") - unsigned char *s_id = buf + 2 + 1 + 32 + 1; // strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") - unsigned char *f_id = buf + 2 + 1 + 32 + 1 + 16 + 1; // strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + strlen(span_id) + strlen("-") + unsigned char *s_id = + buf + 2 + 1 + 32 + 1; // strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + unsigned char *f_id = + buf + 2 + 1 + 32 + 1 + 16 + + 1; // strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + strlen(span_id) + strlen("-") decode_hex(trace_id, t_id, TRACE_ID_CHAR_LEN); decode_hex(span_id, s_id, SPAN_ID_CHAR_LEN); decode_hex(flags, f_id, FLAGS_CHAR_LEN); -} +} static __always_inline void tp_from_parent(tp_info_t *tp, tp_info_t *parent) { *((u64 *)tp->trace_id) = *((u64 *)parent->trace_id); @@ -123,7 +130,8 @@ static __always_inline void tp_clone(tp_info_t *dest, tp_info_t *src) { dest->flags = src->flags; } -static __always_inline void server_trace_parent(void *goroutine_addr, tp_info_t *tp, void *req_header) { +static __always_inline void +server_trace_parent(void *goroutine_addr, tp_info_t *tp, void *req_header) { // May get overriden when decoding existing traceparent, but otherwise we set sample ON tp->flags = 1; // Get traceparent from the Request.Header @@ -149,7 +157,7 @@ static __always_inline void server_trace_parent(void *goroutine_addr, tp_info_t sort_connection_info(&conn); bpf_dbg_printk("Looking up traceparent for connection info"); tp_info_pid_t *tp_p = trace_info_for_connection(&conn); - if (tp_p) { + if (tp_p) { if (correlated_request_with_current(tp_p)) { bpf_dbg_printk("Found traceparent from trace map, another process."); found_info = 1; @@ -169,10 +177,12 @@ static __always_inline void server_trace_parent(void *goroutine_addr, tp_info_t bpf_map_update_elem(&go_trace_map, &goroutine_addr, tp, BPF_ANY); } -static __always_inline u8 client_trace_parent(void *goroutine_addr, tp_info_t *tp_i, void *req_header) { +static __always_inline u8 client_trace_parent(void *goroutine_addr, + tp_info_t *tp_i, + void *req_header) { // Get traceparent from the Request.Header u8 found_trace_id = 0; - + // May get overriden when decoding existing traceparent or finding a server span, but otherwise we set sample ON tp_i->flags = 1; @@ -195,7 +205,7 @@ static __always_inline u8 client_trace_parent(void *goroutine_addr, tp_info_t *t u64 parent_id = find_parent_goroutine(goroutine_addr); - if (parent_id) {// we found a parent request + if (parent_id) { // we found a parent request tp = (tp_info_t *)bpf_map_lookup_elem(&go_trace_map, &parent_id); } @@ -203,9 +213,9 @@ static __always_inline u8 client_trace_parent(void *goroutine_addr, tp_info_t *t bpf_dbg_printk("Found parent request trace_parent %llx", tp); tp_from_parent(tp_i, tp); } else { - urand_bytes(tp_i->trace_id, TRACE_ID_SIZE_BYTES); + urand_bytes(tp_i->trace_id, TRACE_ID_SIZE_BYTES); } - + urand_bytes(tp_i->span_id, SPAN_ID_SIZE_BYTES); } @@ -234,10 +244,15 @@ static __always_inline u8 get_conn_info_from_fd(void *fd_ptr, connection_info_t void *laddr_ptr = 0; void *raddr_ptr = 0; - bpf_probe_read(&laddr_ptr, sizeof(laddr_ptr), (void *)(fd_ptr + fd_laddr_pos + 8)); // find laddr - bpf_probe_read(&raddr_ptr, sizeof(raddr_ptr), (void *)(fd_ptr + fd_raddr_pos + 8)); // find raddr - - bpf_dbg_printk("laddr_ptr %llx, laddr %llx, raddr %llx", fd_ptr + fd_laddr_pos + 8, laddr_ptr, raddr_ptr); + bpf_probe_read( + &laddr_ptr, sizeof(laddr_ptr), (void *)(fd_ptr + fd_laddr_pos + 8)); // find laddr + bpf_probe_read( + &raddr_ptr, sizeof(raddr_ptr), (void *)(fd_ptr + fd_raddr_pos + 8)); // find raddr + + bpf_dbg_printk("laddr_ptr %llx, laddr %llx, raddr %llx", + fd_ptr + fd_laddr_pos + 8, + laddr_ptr, + raddr_ptr); if (laddr_ptr && raddr_ptr) { // read local @@ -274,7 +289,7 @@ static __always_inline u8 get_conn_info(void *conn_ptr, connection_info_t *info) return 0; } -static __always_inline void* unwrap_tls_conn_info(void *conn_ptr, void *tls_state) { +static __always_inline void *unwrap_tls_conn_info(void *conn_ptr, void *tls_state) { if (conn_ptr && tls_state) { void *c_ptr = 0; bpf_probe_read(&c_ptr, sizeof(c_ptr), (void *)(conn_ptr)); // unwrap conn diff --git a/bpf/go_grpc.h b/bpf/go_grpc.h index 9c85a3943..0fc1eb664 100644 --- a/bpf/go_grpc.h +++ b/bpf/go_grpc.h @@ -56,7 +56,7 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // key: goroutine + __type(key, void *); // key: goroutine __type(value, void *); // the transport * __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_grpc_operate_headers SEC(".maps"); @@ -78,7 +78,7 @@ struct { // Context propagation struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u32); // key: stream id + __type(key, u32); // key: stream id __type(value, grpc_client_func_invocation_t); // stored info for the client request __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_streams SEC(".maps"); @@ -90,8 +90,7 @@ struct { __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_grpc_header_writes SEC(".maps"); - -#define TRANSPORT_HTTP2 1 +#define TRANSPORT_HTTP2 1 #define TRANSPORT_HANDLER 2 // To be Injected from the user space during the eBPF program load & initialization @@ -114,7 +113,8 @@ volatile const u64 framer_w_pos; volatile const u64 grpc_transport_buf_writer_buf_pos; volatile const u64 grpc_transport_buf_writer_offset_pos; -#define OPTIMISTIC_GRPC_ENCODED_HEADER_LEN 49 // 1 + 1 + 8 + 1 +~ 38 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(38) + hpack(generated tracepanent id) +#define OPTIMISTIC_GRPC_ENCODED_HEADER_LEN \ + 49 // 1 + 1 + 8 + 1 +~ 38 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(38) + hpack(generated tracepanent id) SEC("uprobe/server_handleStream") int uprobe_server_handleStream(struct pt_regs *ctx) { @@ -133,10 +133,14 @@ int uprobe_server_handleStream(struct pt_regs *ctx) { if (stream_ptr) { void *ctx_ptr = 0; // Read the embedded context object ptr - bpf_probe_read(&ctx_ptr, sizeof(ctx_ptr), (void *)(stream_ptr + grpc_stream_ctx_ptr_pos + sizeof(void *))); + bpf_probe_read(&ctx_ptr, + sizeof(ctx_ptr), + (void *)(stream_ptr + grpc_stream_ctx_ptr_pos + sizeof(void *))); if (ctx_ptr) { - server_trace_parent(goroutine_addr, &invocation.tp, (void *)(ctx_ptr + value_context_val_ptr_pos + sizeof(void *))); + server_trace_parent(goroutine_addr, + &invocation.tp, + (void *)(ctx_ptr + value_context_val_ptr_pos + sizeof(void *))); } } @@ -172,7 +176,8 @@ SEC("uprobe/http2Server_operateHeaders") int uprobe_http2Server_operateHeaders(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); void *tr = GO_PARAM1(ctx); - bpf_dbg_printk("=== uprobe/http2Server_operateHeaders tr %llx goroutine %lx === ", tr, goroutine_addr); + bpf_dbg_printk( + "=== uprobe/http2Server_operateHeaders tr %llx goroutine %lx === ", tr, goroutine_addr); grpc_transports_t t = { .type = TRANSPORT_HTTP2, @@ -190,7 +195,9 @@ SEC("uprobe/serverHandlerTransport_HandleStreams") int uprobe_server_handler_transport_handle_streams(struct pt_regs *ctx) { void *tr = GO_PARAM1(ctx); void *goroutine_addr = GOROUTINE_PTR(ctx); - bpf_printk("=== uprobe/serverHandlerTransport_HandleStreams tr %llx goroutine %lx === ", tr, goroutine_addr); + bpf_printk("=== uprobe/serverHandlerTransport_HandleStreams tr %llx goroutine %lx === ", + tr, + goroutine_addr); void *parent_go = (void *)find_parent_goroutine(goroutine_addr); if (parent_go) { @@ -202,7 +209,7 @@ int uprobe_server_handler_transport_handle_streams(struct pt_regs *ctx) { .type = TRANSPORT_HANDLER, }; __builtin_memcpy(&t.conn, conn, sizeof(connection_info_t)); - + bpf_map_update_elem(&ongoing_grpc_transports, &tr, &t, BPF_ANY); } } @@ -228,7 +235,7 @@ int uprobe_server_handleStream_return(struct pt_regs *ctx) { u16 status = 0; if (status_ptr != NULL) { bpf_dbg_printk("can't read grpc invocation status"); - status = *status_ptr; + status = *status_ptr; } void *stream_ptr = (void *)invocation->stream; @@ -255,7 +262,11 @@ int uprobe_server_handleStream_return(struct pt_regs *ctx) { } // Get method from transport.Stream.Method - if (!read_go_str("grpc method", stream_ptr, grpc_stream_method_ptr_pos, &trace->path, sizeof(trace->path))) { + if (!read_go_str("grpc method", + stream_ptr, + grpc_stream_method_ptr_pos, + &trace->path, + sizeof(trace->path))) { bpf_dbg_printk("can't read grpc transport.Stream.Method"); bpf_ringbuf_discard(trace, 0); goto done; @@ -264,7 +275,8 @@ int uprobe_server_handleStream_return(struct pt_regs *ctx) { void *st_ptr = 0; u8 found_conn = 0; // Read the embedded object ptr - bpf_probe_read(&st_ptr, sizeof(st_ptr), (void *)(stream_ptr + grpc_stream_st_ptr_pos + sizeof(void *))); + bpf_probe_read( + &st_ptr, sizeof(st_ptr), (void *)(stream_ptr + grpc_stream_st_ptr_pos + sizeof(void *))); bpf_dbg_printk("st_ptr %llx", st_ptr); if (st_ptr) { @@ -326,7 +338,8 @@ int uprobe_transport_writeStatus(struct pt_regs *ctx) { } /* GRPC client */ -static __always_inline void clientConnStart(void *goroutine_addr, void *cc_ptr, void *ctx_ptr, void *method_ptr, void *method_len) { +static __always_inline void clientConnStart( + void *goroutine_addr, void *cc_ptr, void *ctx_ptr, void *method_ptr, void *method_len) { grpc_client_func_invocation_t invocation = { .start_monotime_ns = bpf_ktime_get_ns(), .cc = (u64)cc_ptr, @@ -339,7 +352,9 @@ static __always_inline void clientConnStart(void *goroutine_addr, void *cc_ptr, if (ctx_ptr) { void *val_ptr = 0; // Read the embedded val object ptr from ctx if there's one - bpf_probe_read(&val_ptr, sizeof(val_ptr), (void *)(ctx_ptr + value_context_val_ptr_pos + sizeof(void *))); + bpf_probe_read(&val_ptr, + sizeof(val_ptr), + (void *)(ctx_ptr + value_context_val_ptr_pos + sizeof(void *))); invocation.flags = client_trace_parent(goroutine_addr, &invocation.tp, (void *)(val_ptr)); } else { @@ -439,7 +454,10 @@ static __always_inline int grpc_connect_done(struct pt_regs *ctx, void *err) { trace->tp = invocation->tp; - trace->status = (err) ? 2 : 0; // Getting the gRPC client status is complex, if there's an error we set Code.Unknown = 2 + trace->status = + (err) + ? 2 + : 0; // Getting the gRPC client status is complex, if there's an error we set Code.Unknown = 2 // submit the completed trace via ringbuffer bpf_ringbuf_submit(trace, get_flags()); @@ -453,7 +471,7 @@ static __always_inline int grpc_connect_done(struct pt_regs *ctx, void *err) { SEC("uprobe/ClientConn_NewStream") int uprobe_ClientConn_NewStream_return(struct pt_regs *ctx) { bpf_dbg_printk("=== uprobe/proc grpc ClientConn.NewStream return === "); - + void *stream = GO_PARAM1(ctx); if (!stream) { @@ -478,7 +496,7 @@ int uprobe_ClientConn_Close(struct pt_regs *ctx) { SEC("uprobe/ClientConn_Invoke") int uprobe_ClientConn_Invoke_return(struct pt_regs *ctx) { bpf_dbg_printk("=== uprobe/proc grpc ClientConn.Invoke return === "); - + void *err = GO_PARAM1(ctx); if (err) { @@ -505,7 +523,8 @@ int uprobe_transport_http2Client_NewStream(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); void *t_ptr = GO_PARAM1(ctx); - bpf_dbg_printk("goroutine_addr %lx, t_ptr %llx, t.conn_pos %x", goroutine_addr, t_ptr, grpc_t_conn_pos); + bpf_dbg_printk( + "goroutine_addr %lx, t_ptr %llx, t.conn_pos %x", goroutine_addr, t_ptr, grpc_t_conn_pos); if (t_ptr) { void *conn_ptr = t_ptr + grpc_t_conn_pos + 8; @@ -516,7 +535,7 @@ int uprobe_transport_http2Client_NewStream(struct pt_regs *ctx) { buf[0] = 0; bpf_probe_read(&s_ptr, sizeof(s_ptr), (void *)(t_ptr + grpc_t_scheme_pos)); bpf_probe_read(buf, sizeof(buf), s_ptr); - + //bpf_dbg_printk("scheme %s", buf); if (buf[0] == 'h' && buf[1] == 't' && buf[2] == 't' && buf[3] == 'p' && buf[4] == 's') { @@ -533,14 +552,15 @@ int uprobe_transport_http2Client_NewStream(struct pt_regs *ctx) { void *conn_conn_ptr = 0; bpf_probe_read(&conn_conn_ptr, sizeof(conn_conn_ptr), conn_ptr); bpf_dbg_printk("conn_conn_ptr %llx", conn_conn_ptr); - if (conn_conn_ptr) { + if (conn_conn_ptr) { connection_info_t conn = {0}; u8 ok = get_conn_info(conn_conn_ptr, &conn); if (ok) { - bpf_map_update_elem(&ongoing_client_connections, &goroutine_addr, &conn, BPF_ANY); + bpf_map_update_elem( + &ongoing_client_connections, &goroutine_addr, &conn, BPF_ANY); } } - } + } #ifndef NO_HEADER_PROPAGATION u32 next_id = 0; @@ -549,7 +569,8 @@ int uprobe_transport_http2Client_NewStream(struct pt_regs *ctx) { bpf_dbg_printk("next_id %d", next_id); - grpc_client_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_grpc_client_requests, &goroutine_addr); + grpc_client_func_invocation_t *invocation = + bpf_map_lookup_elem(&ongoing_grpc_client_requests, &goroutine_addr); if (invocation) { grpc_client_func_invocation_t inv_save = *invocation; @@ -559,9 +580,9 @@ int uprobe_transport_http2Client_NewStream(struct pt_regs *ctx) { } else { bpf_dbg_printk("Couldn't find invocation metadata for goroutine %lx", goroutine_addr); } -#endif +#endif } - + return 0; } @@ -576,8 +597,10 @@ typedef struct grpc_framer_func_invocation { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void*); // key: go routine doing framer write headers - __type(value, grpc_framer_func_invocation_t); // the goroutine of the round trip request, which is the key for our traceparent info + __type(key, void *); // key: go routine doing framer write headers + __type( + value, + grpc_framer_func_invocation_t); // the goroutine of the round trip request, which is the key for our traceparent info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } grpc_framer_invocation_map SEC(".maps"); @@ -593,11 +616,13 @@ int uprobe_grpcFramerWriteHeaders(struct pt_regs *ctx) { void *framer = GO_PARAM1(ctx); u64 stream_id = (u64)GO_PARAM2(ctx); - bpf_dbg_printk("framer=%llx, stream_id=%lld, framer_w_pos %llx", framer, ((u64)stream_id), framer_w_pos); + bpf_dbg_printk( + "framer=%llx, stream_id=%lld, framer_w_pos %llx", framer, ((u64)stream_id), framer_w_pos); u32 stream_lookup = (u32)stream_id; - grpc_client_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_streams, &stream_lookup); + grpc_client_func_invocation_t *invocation = + bpf_map_lookup_elem(&ongoing_streams, &stream_lookup); if (invocation) { bpf_dbg_printk("Found invocation info %llx", invocation); @@ -608,7 +633,8 @@ int uprobe_grpcFramerWriteHeaders(struct pt_regs *ctx) { if (w_ptr) { s64 offset; - bpf_probe_read(&offset, sizeof(offset), (void *)(w_ptr + grpc_transport_buf_writer_offset_pos)); + bpf_probe_read( + &offset, sizeof(offset), (void *)(w_ptr + grpc_transport_buf_writer_offset_pos)); bpf_dbg_printk("Found initial data offset %d", offset); @@ -640,7 +666,8 @@ int uprobe_grpcFramerWriteHeaders(struct pt_regs *ctx) { #endif #ifndef NO_HEADER_PROPAGATION -#define HTTP2_ENCODED_HEADER_LEN 66 // 1 + 1 + 8 + 1 + 55 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(55) + generated traceparent id +#define HTTP2_ENCODED_HEADER_LEN \ + 66 // 1 + 1 + 8 + 1 + 55 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(55) + generated traceparent id SEC("uprobe/grpcFramerWriteHeaders_returns") int uprobe_grpcFramerWriteHeaders_returns(struct pt_regs *ctx) { @@ -648,7 +675,8 @@ int uprobe_grpcFramerWriteHeaders_returns(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); - grpc_framer_func_invocation_t *f_info = bpf_map_lookup_elem(&grpc_framer_invocation_map, &goroutine_addr); + grpc_framer_func_invocation_t *f_info = + bpf_map_lookup_elem(&grpc_framer_invocation_map, &goroutine_addr); if (f_info) { void *w_ptr = (void *)(f_info->framer_ptr + framer_w_pos + 16); @@ -660,9 +688,16 @@ int uprobe_grpcFramerWriteHeaders_returns(struct pt_regs *ctx) { s64 cap = 0; u64 off = f_info->offset; - bpf_probe_read(&buf_arr, sizeof(buf_arr), (void *)(w_ptr + grpc_transport_buf_writer_buf_pos)); // the buffer is the first field + bpf_probe_read( + &buf_arr, + sizeof(buf_arr), + (void *)(w_ptr + + grpc_transport_buf_writer_buf_pos)); // the buffer is the first field bpf_probe_read(&n, sizeof(n), (void *)(w_ptr + grpc_transport_buf_writer_offset_pos)); - bpf_probe_read(&cap, sizeof(cap), (void *)(w_ptr + grpc_transport_buf_writer_offset_pos + 16)); // the offset of the capacity is 2 * 8 bytes from the buf + bpf_probe_read(&cap, + sizeof(cap), + (void *)(w_ptr + grpc_transport_buf_writer_offset_pos + + 16)); // the offset of the capacity is 2 * 8 bytes from the buf bpf_clamp_umax(off, MAX_W_PTR_OFFSET); @@ -674,26 +709,28 @@ int uprobe_grpcFramerWriteHeaders_returns(struct pt_regs *ctx) { u8 key_len = TP_ENCODED_LEN | 0x80; // high tagged to signify hpack encoded value u8 val_len = TP_MAX_VAL_LENGTH; - // We don't hpack encode the value of the traceparent field, because that will require that + // We don't hpack encode the value of the traceparent field, because that will require that // we use bpf_loop, which in turn increases the kernel requirement to 5.17+. make_tp_string(tp_str, &f_info->tp); //bpf_dbg_printk("Will write %s, type = %d, key_len = %d, val_len = %d", tp_str, type_byte, key_len, val_len); - bpf_probe_write_user(buf_arr + (n & 0x0ffff), &type_byte, sizeof(type_byte)); + bpf_probe_write_user(buf_arr + (n & 0x0ffff), &type_byte, sizeof(type_byte)); n++; // Write the length of the key = 8 bpf_probe_write_user(buf_arr + (n & 0x0ffff), &key_len, sizeof(key_len)); n++; // Write 'traceparent' encoded as hpack - bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_encoded, sizeof(tp_encoded));; + bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_encoded, sizeof(tp_encoded)); + ; n += TP_ENCODED_LEN; - // Write the length of the hpack encoded traceparent field + // Write the length of the hpack encoded traceparent field bpf_probe_write_user(buf_arr + (n & 0x0ffff), &val_len, sizeof(val_len)); n++; bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_str, sizeof(tp_str)); n += TP_MAX_VAL_LENGTH; // Update the value of n in w to reflect the new size - bpf_probe_write_user((void *)(w_ptr + grpc_transport_buf_writer_offset_pos), &n, sizeof(n)); + bpf_probe_write_user( + (void *)(w_ptr + grpc_transport_buf_writer_offset_pos), &n, sizeof(n)); // http2 encodes the length of the headers in the first 3 bytes of buf, we need to update those u8 size_1 = 0; @@ -729,4 +766,4 @@ SEC("uprobe/grpcFramerWriteHeaders_returns") int uprobe_grpcFramerWriteHeaders_returns(struct pt_regs *ctx) { return 0; } -#endif +#endif diff --git a/bpf/go_kafka_def.h b/bpf/go_kafka_def.h index 4497bcdca..16ff40a63 100644 --- a/bpf/go_kafka_def.h +++ b/bpf/go_kafka_def.h @@ -11,7 +11,7 @@ #ifndef GO_KAFKA_DEFS_H #define GO_KAFKA_DEFS_H -#define KAFKA_API_FETCH 0 +#define KAFKA_API_FETCH 0 #define KAFKA_API_PRODUCE 1 #define KAFKA_API_KEY_POS 5 diff --git a/bpf/go_kafka_go.h b/bpf/go_kafka_go.h index b29c1ddc8..b53c9a69a 100644 --- a/bpf/go_kafka_go.h +++ b/bpf/go_kafka_go.h @@ -33,35 +33,35 @@ typedef struct topic { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // w_ptr + __type(key, void *); // w_ptr __type(value, tp_info_t); // traceparent __uint(max_entries, MAX_CONCURRENT_REQUESTS); } produce_traceparents SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // goroutine + __type(key, void *); // goroutine __type(value, topic_t); // topic info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_produce_topics SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // msg ptr + __type(key, void *); // msg ptr __type(value, topic_t); // topic info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_produce_messages SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // goroutine + __type(key, void *); // goroutine __type(value, produce_req_t); // rw ptr + start time __uint(max_entries, MAX_CONCURRENT_REQUESTS); } produce_requests SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // goroutine + __type(key, void *); // goroutine __type(value, kafka_go_req_t); // rw ptr + start time __uint(max_entries, MAX_CONCURRENT_REQUESTS); } fetch_requests SEC(".maps"); @@ -71,7 +71,8 @@ SEC("uprobe/writer_write_messages") int uprobe_writer_write_messages(struct pt_regs *ctx) { void *goroutine_addr = (void *)GOROUTINE_PTR(ctx); void *w_ptr = (void *)GO_PARAM1(ctx); - bpf_dbg_printk("=== uprobe/kafka-go writer_write_messages %llx w_ptr %llx === ", goroutine_addr, w_ptr); + bpf_dbg_printk( + "=== uprobe/kafka-go writer_write_messages %llx w_ptr %llx === ", goroutine_addr, w_ptr); tp_info_t tp = {}; @@ -102,7 +103,7 @@ int uprobe_writer_produce(struct pt_regs *ctx) { bpf_dbg_printk("found existing traceparent %llx", tp); __builtin_memcpy(&topic.tp, tp, sizeof(tp_info_t)); } else { - urand_bytes(topic.tp.trace_id, TRACE_ID_SIZE_BYTES); + urand_bytes(topic.tp.trace_id, TRACE_ID_SIZE_BYTES); urand_bytes(topic.tp.span_id, SPAN_ID_SIZE_BYTES); } @@ -142,8 +143,8 @@ int uprobe_protocol_roundtrip(struct pt_regs *ctx) { void *goroutine_addr = (void *)GOROUTINE_PTR(ctx); void *rw_ptr = (void *)GO_PARAM2(ctx); void *msg_ptr = (void *)GO_PARAM8(ctx); - bpf_dbg_printk("goroutine_addr %lx, rw ptr %llx, msg_ptr %llx", goroutine_addr, rw_ptr, msg_ptr); - + bpf_dbg_printk( + "goroutine_addr %lx, rw ptr %llx, msg_ptr %llx", goroutine_addr, rw_ptr, msg_ptr); if (rw_ptr) { topic_t *topic_ptr = bpf_map_lookup_elem(&ongoing_produce_messages, &msg_ptr); @@ -166,7 +167,7 @@ SEC("uprobe/protocol_RoundTrip_ret") int uprobe_protocol_roundtrip_ret(struct pt_regs *ctx) { void *goroutine_addr = (void *)GOROUTINE_PTR(ctx); bpf_dbg_printk("=== uprobe/protocol_RoundTrip ret %llx === ", goroutine_addr); - + produce_req_t *p_ptr = bpf_map_lookup_elem(&produce_requests, &goroutine_addr); bpf_dbg_printk("p_ptr %llx", p_ptr); @@ -187,7 +188,8 @@ int uprobe_protocol_roundtrip_ret(struct pt_regs *ctx) { trace->end_monotime_ns = bpf_ktime_get_ns(); void *conn_ptr = 0; - bpf_probe_read(&conn_ptr, sizeof(conn_ptr), (void *)(p_ptr->conn_ptr + 8)); // find conn + bpf_probe_read( + &conn_ptr, sizeof(conn_ptr), (void *)(p_ptr->conn_ptr + 8)); // find conn bpf_dbg_printk("conn ptr %llx", conn_ptr); if (conn_ptr) { u8 ok = get_conn_info(conn_ptr, &trace->conn); @@ -210,7 +212,6 @@ int uprobe_protocol_roundtrip_ret(struct pt_regs *ctx) { return 0; } - // Code for the fetch messages path SEC("uprobe/reader_read") int uprobe_reader_read(struct pt_regs *ctx) { diff --git a/bpf/go_nethttp.h b/bpf/go_nethttp.h index 00a9a329d..cd2647d0d 100644 --- a/bpf/go_nethttp.h +++ b/bpf/go_nethttp.h @@ -31,8 +31,8 @@ typedef struct http_func_invocation { } http_func_invocation_t; typedef struct http_client_data { - u8 method[METHOD_MAX_LEN]; - u8 path[PATH_MAX_LEN]; + u8 method[METHOD_MAX_LEN]; + u8 path[PATH_MAX_LEN]; s64 content_length; pid_info pid; @@ -55,8 +55,8 @@ struct { typedef struct server_http_func_invocation { u64 start_monotime_ns; tp_info_t tp; - u8 method[METHOD_MAX_LEN]; - u8 path[PATH_MAX_LEN]; + u8 method[METHOD_MAX_LEN]; + u8 path[PATH_MAX_LEN]; u64 content_length; u64 status; @@ -114,12 +114,13 @@ int uprobe_ServeHTTP(struct pt_regs *ctx) { invocation.path[0] = 0; if (req) { - server_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos)); - // TODO: if context propagation is supported, overwrite the header value in the map with the + server_trace_parent(goroutine_addr, &invocation.tp, (void *)(req + req_header_ptr_pos)); + // TODO: if context propagation is supported, overwrite the header value in the map with the // new span context and the same thread id. // Get method from Request.Method - if (!read_go_str("method", req, method_ptr_pos, &invocation.method, sizeof(invocation.method))) { + if (!read_go_str( + "method", req, method_ptr_pos, &invocation.method, sizeof(invocation.method))) { bpf_dbg_printk("can't read http Request.Method"); goto done; } @@ -128,12 +129,16 @@ int uprobe_ServeHTTP(struct pt_regs *ctx) { void *url_ptr = 0; int res = bpf_probe_read(&url_ptr, sizeof(url_ptr), (void *)(req + url_ptr_pos)); - if (res || !url_ptr || !read_go_str("path", url_ptr, path_ptr_pos, &invocation.path, sizeof(invocation.path))) { + if (res || !url_ptr || + !read_go_str( + "path", url_ptr, path_ptr_pos, &invocation.path, sizeof(invocation.path))) { bpf_dbg_printk("can't read http Request.URL.Path"); goto done; } - res = bpf_probe_read(&invocation.content_length, sizeof(invocation.content_length), (void *)(req + content_length_ptr_pos)); + res = bpf_probe_read(&invocation.content_length, + sizeof(invocation.content_length), + (void *)(req + content_length_ptr_pos)); if (res) { bpf_dbg_printk("can't read http Request.ContentLength"); goto done; @@ -141,7 +146,7 @@ int uprobe_ServeHTTP(struct pt_regs *ctx) { } else { goto done; } - + // Write event if (bpf_map_update_elem(&ongoing_http_server_requests, &goroutine_addr, &invocation, BPF_ANY)) { bpf_dbg_printk("can't update map element"); @@ -170,17 +175,22 @@ int uprobe_readRequestStart(struct pt_regs *ctx) { //bpf_dbg_printk("conn_conn_ptr %llx, tls_state %llx, c_tls_pos = %d, c_tls_ptr = %llx", conn_conn_ptr, tls_state, c_tls_pos, c_ptr + c_tls_pos); if (conn_conn_ptr) { void *conn_ptr = 0; - bpf_probe_read(&conn_ptr, sizeof(conn_ptr), (void *)(conn_conn_ptr + net_conn_pos)); // find conn + bpf_probe_read(&conn_ptr, + sizeof(conn_ptr), + (void *)(conn_conn_ptr + net_conn_pos)); // find conn bpf_dbg_printk("conn_ptr %llx", conn_ptr); if (conn_ptr) { connection_info_t conn = {0}; - get_conn_info(conn_ptr, &conn); // initialized to 0, no need to check the result if we succeeded - bpf_map_update_elem(&ongoing_server_connections, &goroutine_addr, &conn, BPF_ANY); + get_conn_info( + conn_ptr, + &conn); // initialized to 0, no need to check the result if we succeeded + bpf_map_update_elem( + &ongoing_server_connections, &goroutine_addr, &conn, BPF_ANY); } } } } - + return 0; } @@ -213,7 +223,7 @@ int uprobe_ServeHTTPReturns(struct pt_regs *ctx) { bpf_dbg_printk("=== uprobe/ServeHTTP returns === "); void *goroutine_addr = GOROUTINE_PTR(ctx); - bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); + bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); server_http_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_http_server_requests, &goroutine_addr); @@ -229,14 +239,14 @@ int uprobe_ServeHTTPReturns(struct pt_regs *ctx) { bpf_dbg_printk("can't read http invocation metadata"); return 0; } - } + } http_request_trace *trace = bpf_ringbuf_reserve(&events, sizeof(http_request_trace), 0); if (!trace) { bpf_dbg_printk("can't reserve space in the ringbuffer"); goto done; } - + task_pid(&trace->pid); trace->type = EVENT_HTTP_REQUEST; trace->start_monotime_ns = invocation->start_monotime_ns; @@ -283,7 +293,7 @@ int uprobe_ServeHTTPReturns(struct pt_regs *ctx) { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, void *); // key: pointer to the request header map - __type(value, u64); // the goroutine of the transport request + __type(value, u64); // the goroutine of the transport request __uint(max_entries, MAX_CONCURRENT_REQUESTS); } header_req_map SEC(".maps"); @@ -298,12 +308,10 @@ static __always_inline void roundTripStartHelper(struct pt_regs *ctx) { void *req = GO_PARAM2(ctx); - http_func_invocation_t invocation = { - .start_monotime_ns = bpf_ktime_get_ns(), - .tp = {0} - }; + http_func_invocation_t invocation = {.start_monotime_ns = bpf_ktime_get_ns(), .tp = {0}}; - __attribute__((__unused__)) u8 existing_tp = client_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos)); + __attribute__((__unused__)) u8 existing_tp = + client_trace_parent(goroutine_addr, &invocation.tp, (void *)(req + req_header_ptr_pos)); http_client_data_t trace = {0}; @@ -313,7 +321,9 @@ static __always_inline void roundTripStartHelper(struct pt_regs *ctx) { return; } - bpf_probe_read(&trace.content_length, sizeof(trace.content_length), (void *)(req + content_length_ptr_pos)); + bpf_probe_read(&trace.content_length, + sizeof(trace.content_length), + (void *)(req + content_length_ptr_pos)); // Get path from Request.URL void *url_ptr = 0; @@ -333,13 +343,14 @@ static __always_inline void roundTripStartHelper(struct pt_regs *ctx) { #ifndef NO_HEADER_PROPAGATION //if (!existing_tp) { - void *headers_ptr = 0; - bpf_probe_read(&headers_ptr, sizeof(headers_ptr), (void*)(req + req_header_ptr_pos)); - bpf_dbg_printk("goroutine_addr %lx, req ptr %llx, headers_ptr %llx", goroutine_addr, req, headers_ptr); - - if (headers_ptr) { - bpf_map_update_elem(&header_req_map, &headers_ptr, &goroutine_addr, BPF_ANY); - } + void *headers_ptr = 0; + bpf_probe_read(&headers_ptr, sizeof(headers_ptr), (void *)(req + req_header_ptr_pos)); + bpf_dbg_printk( + "goroutine_addr %lx, req ptr %llx, headers_ptr %llx", goroutine_addr, req, headers_ptr); + + if (headers_ptr) { + bpf_map_update_elem(&header_req_map, &headers_ptr, &goroutine_addr, BPF_ANY); + } //} #endif } @@ -364,7 +375,8 @@ int uprobe_roundTripReturn(struct pt_regs *ctx) { goto done; } - http_client_data_t *data = bpf_map_lookup_elem(&ongoing_http_client_requests_data, &goroutine_addr); + http_client_data_t *data = + bpf_map_lookup_elem(&ongoing_http_client_requests_data, &goroutine_addr); if (data == NULL) { bpf_dbg_printk("can't read http client invocation data"); goto done; @@ -402,7 +414,8 @@ int uprobe_roundTripReturn(struct pt_regs *ctx) { bpf_probe_read(&trace->status, sizeof(trace->status), (void *)(resp_ptr + status_code_ptr_pos)); - bpf_dbg_printk("status %d, offset %d, resp_ptr %lx", trace->status, status_code_ptr_pos, (u64)resp_ptr); + bpf_dbg_printk( + "status %d, offset %d, resp_ptr %lx", trace->status, status_code_ptr_pos, (u64)resp_ptr); // submit the completed trace via ringbuffer bpf_ringbuf_submit(trace, get_flags()); @@ -434,7 +447,8 @@ int uprobe_writeSubset(struct pt_regs *ctx) { u64 parent_goaddr = *request_goaddr; - http_func_invocation_t *func_inv = bpf_map_lookup_elem(&ongoing_http_client_requests, &parent_goaddr); + http_func_invocation_t *func_inv = + bpf_map_lookup_elem(&ongoing_http_client_requests, &parent_goaddr); if (!func_inv) { bpf_dbg_printk("Can't find client request for goroutine %llx", parent_goaddr); goto done; @@ -449,16 +463,18 @@ int uprobe_writeSubset(struct pt_regs *ctx) { if (!buf_ptr) { goto done; } - + s64 size = 0; - bpf_probe_read(&size, sizeof(s64), (void *)(io_writer_addr + io_writer_buf_ptr_pos + 8)); // grab size + bpf_probe_read( + &size, sizeof(s64), (void *)(io_writer_addr + io_writer_buf_ptr_pos + 8)); // grab size s64 len = 0; bpf_probe_read(&len, sizeof(s64), (void *)(io_writer_addr + io_writer_n_pos)); // grab len - bpf_dbg_printk("buf_ptr %llx, len=%d, size=%d", (void*)buf_ptr, len, size); + bpf_dbg_printk("buf_ptr %llx, len=%d, size=%d", (void *)buf_ptr, len, size); - if (len < (size - TP_MAX_VAL_LENGTH - TP_MAX_KEY_LENGTH - 4)) { // 4 = strlen(":_") + strlen("\r\n") + if (len < + (size - TP_MAX_VAL_LENGTH - TP_MAX_KEY_LENGTH - 4)) { // 4 = strlen(":_") + strlen("\r\n") char key[TP_MAX_KEY_LENGTH + 2] = "Traceparent: "; char end[2] = "\r\n"; bpf_probe_write_user(buf_ptr + (len & 0x0ffff), key, sizeof(key)); @@ -488,7 +504,7 @@ int uprobe_http2ResponseWriterStateWriteHeader(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); u64 status = (u64)GO_PARAM2(ctx); - bpf_dbg_printk("goroutine_addr %lx, status %d", goroutine_addr, status); + bpf_dbg_printk("goroutine_addr %lx, status %d", goroutine_addr, status); server_http_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_http_server_requests, &goroutine_addr); @@ -504,7 +520,7 @@ int uprobe_http2ResponseWriterStateWriteHeader(struct pt_regs *ctx) { bpf_dbg_printk("can't read http invocation metadata"); return 0; } - } + } invocation->status = status; @@ -517,8 +533,7 @@ int uprobe_http2serverConn_runHandler(struct pt_regs *ctx) { bpf_dbg_printk("=== uprobe/proc http2serverConn_runHandler === "); void *goroutine_addr = GOROUTINE_PTR(ctx); - bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); - + bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); void *sc = GO_PARAM1(ctx); @@ -546,7 +561,9 @@ int uprobe_http2serverConn_runHandler(struct pt_regs *ctx) { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, u32); // key: stream id - __type(value, u64); // the goroutine of the round trip request, which is the key for our traceparent info + __type( + value, + u64); // the goroutine of the round trip request, which is the key for our traceparent info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } http2_req_map SEC(".maps"); #endif @@ -575,7 +592,7 @@ int uprobe_http2RoundTrip(struct pt_regs *ctx) { if (ok) { void *goroutine_addr = GOROUTINE_PTR(ctx); - bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); + bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); bpf_map_update_elem(&ongoing_client_connections, &goroutine_addr, &conn, BPF_ANY); } @@ -584,14 +601,14 @@ int uprobe_http2RoundTrip(struct pt_regs *ctx) { #ifndef NO_HEADER_PROPAGATION u32 stream_id = 0; bpf_probe_read(&stream_id, sizeof(stream_id), (void *)(cc_ptr + cc_next_stream_id_pos)); - + bpf_dbg_printk("cc_ptr = %llx, nextStreamID=%d", cc_ptr, stream_id); if (stream_id) { void *goroutine_addr = GOROUTINE_PTR(ctx); bpf_map_update_elem(&http2_req_map, &stream_id, &goroutine_addr, BPF_ANY); } -#endif +#endif } return 0; @@ -608,8 +625,10 @@ typedef struct framer_func_invocation { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void*); // key: go routine doing framer write headers - __type(value, framer_func_invocation_t); // the goroutine of the round trip request, which is the key for our traceparent info + __type(key, void *); // key: go routine doing framer write headers + __type( + value, + framer_func_invocation_t); // the goroutine of the round trip request, which is the key for our traceparent info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } framer_invocation_map SEC(".maps"); @@ -661,7 +680,7 @@ int uprobe_http2FramerWriteHeaders(struct pt_regs *ctx) { bpf_map_update_elem(&framer_invocation_map, &goroutine_addr, &f_info, BPF_ANY); } else { - bpf_dbg_printk("N too large, ignoring..."); + bpf_dbg_printk("N too large, ignoring..."); } } } @@ -678,7 +697,8 @@ int uprobe_http2FramerWriteHeaders(struct pt_regs *ctx) { #endif #ifndef NO_HEADER_PROPAGATION -#define HTTP2_ENCODED_HEADER_LEN 66 // 1 + 1 + 8 + 1 + 55 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(55) + generated traceparent id +#define HTTP2_ENCODED_HEADER_LEN \ + 66 // 1 + 1 + 8 + 1 + 55 = type byte + hpack_len_as_byte("traceparent") + strlen(hpack("traceparent")) + len_as_byte(55) + generated traceparent id SEC("uprobe/http2FramerWriteHeaders_returns") int uprobe_http2FramerWriteHeaders_returns(struct pt_regs *ctx) { @@ -712,20 +732,21 @@ int uprobe_http2FramerWriteHeaders_returns(struct pt_regs *ctx) { u8 key_len = TP_ENCODED_LEN | 0x80; // high tagged to signify hpack encoded value u8 val_len = TP_MAX_VAL_LENGTH; - // We don't hpack encode the value of the traceparent field, because that will require that + // We don't hpack encode the value of the traceparent field, because that will require that // we use bpf_loop, which in turn increases the kernel requirement to 5.17+. make_tp_string(tp_str, &f_info->tp); //bpf_dbg_printk("Will write %s, type = %d, key_len = %d, val_len = %d", tp_str, type_byte, key_len, val_len); - bpf_probe_write_user(buf_arr + (n & 0x0ffff), &type_byte, sizeof(type_byte)); + bpf_probe_write_user(buf_arr + (n & 0x0ffff), &type_byte, sizeof(type_byte)); n++; // Write the length of the key = 8 bpf_probe_write_user(buf_arr + (n & 0x0ffff), &key_len, sizeof(key_len)); n++; // Write 'traceparent' encoded as hpack - bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_encoded, sizeof(tp_encoded));; + bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_encoded, sizeof(tp_encoded)); + ; n += TP_ENCODED_LEN; - // Write the length of the hpack encoded traceparent field + // Write the length of the hpack encoded traceparent field bpf_probe_write_user(buf_arr + (n & 0x0ffff), &val_len, sizeof(val_len)); n++; bpf_probe_write_user(buf_arr + (n & 0x0ffff), tp_str, sizeof(tp_str)); @@ -753,7 +774,7 @@ int uprobe_http2FramerWriteHeaders_returns(struct pt_regs *ctx) { size_3 = (u8)(new_size); bpf_probe_write_user((void *)(buf_arr + initial_n), &size_1, sizeof(size_1)); - bpf_probe_write_user((void *)(buf_arr + initial_n +1), &size_2, sizeof(size_2)); + bpf_probe_write_user((void *)(buf_arr + initial_n + 1), &size_2, sizeof(size_2)); bpf_probe_write_user((void *)(buf_arr + initial_n + 2), &size_3, sizeof(size_3)); } } @@ -767,7 +788,7 @@ SEC("uprobe/http2FramerWriteHeaders_returns") int uprobe_http2FramerWriteHeaders_returns(struct pt_regs *ctx) { return 0; } -#endif +#endif SEC("uprobe/connServe") int uprobe_connServe(struct pt_regs *ctx) { @@ -788,7 +809,9 @@ int uprobe_netFdRead(struct pt_regs *ctx) { connection_info_t *conn = bpf_map_lookup_elem(&ongoing_server_connections, &goroutine_addr); if (conn) { - bpf_dbg_printk("Found existing server connection, parsing FD information for socket tuples, %llx", goroutine_addr); + bpf_dbg_printk( + "Found existing server connection, parsing FD information for socket tuples, %llx", + goroutine_addr); void *fd_ptr = GO_PARAM1(ctx); get_conn_info_from_fd(fd_ptr, conn); // ok to not check the result, we leave it as 0 @@ -815,7 +838,8 @@ int uprobe_persistConnRoundTrip(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); - http_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_http_client_requests, &goroutine_addr); + http_func_invocation_t *invocation = + bpf_map_lookup_elem(&ongoing_http_client_requests, &goroutine_addr); if (!invocation) { bpf_dbg_printk("can't find invocation info for client call, this might be a bug"); return 0; @@ -825,18 +849,22 @@ int uprobe_persistConnRoundTrip(struct pt_regs *ctx) { if (pc_ptr) { void *conn_conn_ptr = pc_ptr + 8 + pc_conn_pos; // embedded struct void *tls_state = 0; - bpf_probe_read(&tls_state, sizeof(tls_state), (void *)(pc_ptr + pc_tls_pos)); // find tlsState + bpf_probe_read( + &tls_state, sizeof(tls_state), (void *)(pc_ptr + pc_tls_pos)); // find tlsState bpf_dbg_printk("conn_conn_ptr %llx, tls_state %llx", conn_conn_ptr, tls_state); conn_conn_ptr = unwrap_tls_conn_info(conn_conn_ptr, tls_state); if (conn_conn_ptr) { void *conn_ptr = 0; - bpf_probe_read(&conn_ptr, sizeof(conn_ptr), (void *)(conn_conn_ptr + net_conn_pos)); // find conn - bpf_dbg_printk("conn_ptr %llx", conn_ptr); + bpf_probe_read( + &conn_ptr, sizeof(conn_ptr), (void *)(conn_conn_ptr + net_conn_pos)); // find conn + bpf_dbg_printk("conn_ptr %llx", conn_ptr); if (conn_ptr) { connection_info_t conn = {0}; - get_conn_info(conn_ptr, &conn); // initialized to 0, no need to check the result if we succeeded + get_conn_info( + conn_ptr, + &conn); // initialized to 0, no need to check the result if we succeeded u64 pid_tid = bpf_get_current_pid_tgid(); u32 pid = pid_from_pid_tgid(pid_tid); tp_info_pid_t tp_p = { diff --git a/bpf/go_redis.h b/bpf/go_redis.h index a9b140935..1fd7b591e 100644 --- a/bpf/go_redis.h +++ b/bpf/go_redis.h @@ -20,20 +20,20 @@ volatile const u64 io_writer_buf_ptr_pos; struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // key: goroutine id + __type(key, void *); // key: goroutine id __type(value, redis_client_req_t); // the request __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_redis_requests SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, void *); // key: goroutine id + __type(key, void *); // key: goroutine id __type(value, void *); // the *Conn __uint(max_entries, MAX_CONCURRENT_REQUESTS); } redis_writes SEC(".maps"); static __always_inline void setup_request(void *goroutine_addr) { - redis_client_req_t req = { + redis_client_req_t req = { .type = EVENT_GO_REDIS, .start_monotime_ns = bpf_ktime_get_ns(), }; @@ -65,7 +65,7 @@ int uprobe_redis_process_ret(struct pt_regs *ctx) { redis_client_req_t *req = bpf_map_lookup_elem(&ongoing_redis_requests, &goroutine_addr); if (req) { - redis_client_req_t *trace = bpf_ringbuf_reserve(&events, sizeof(redis_client_req_t), 0); + redis_client_req_t *trace = bpf_ringbuf_reserve(&events, sizeof(redis_client_req_t), 0); if (trace) { bpf_dbg_printk("Sending redis client go trace"); __builtin_memcpy(trace, req, sizeof(redis_client_req_t)); @@ -112,7 +112,8 @@ int uprobe_redis_with_writer(struct pt_regs *ctx) { bpf_dbg_printk("tcp conn ptr %llx", tcp_conn_ptr); if (tcp_conn_ptr) { void *conn_ptr = 0; - bpf_probe_read(&conn_ptr, sizeof(conn_ptr), (void *)(tcp_conn_ptr + 8)); // find conn + bpf_probe_read( + &conn_ptr, sizeof(conn_ptr), (void *)(tcp_conn_ptr + 8)); // find conn bpf_dbg_printk("conn ptr %llx", conn_ptr); if (conn_ptr) { u8 ok = get_conn_info(conn_ptr, &req->conn); diff --git a/bpf/go_runtime.h b/bpf/go_runtime.h index 3c3d67329..92b92852e 100644 --- a/bpf/go_runtime.h +++ b/bpf/go_runtime.h @@ -31,9 +31,7 @@ int uprobe_proc_newproc1(struct pt_regs *ctx) { void *creator_goroutine = GOROUTINE_PTR(ctx); bpf_dbg_printk("creator_goroutine_addr %lx", creator_goroutine); - new_func_invocation_t invocation = { - .parent = (u64)GO_PARAM2(ctx) - }; + new_func_invocation_t invocation = {.parent = (u64)GO_PARAM2(ctx)}; // Save the registers on invocation to be able to fetch the arguments at return of newproc1 if (bpf_map_update_elem(&newproc1, &creator_goroutine, &invocation, BPF_ANY)) { @@ -50,8 +48,7 @@ int uprobe_proc_newproc1_ret(struct pt_regs *ctx) { bpf_dbg_printk("creator_goroutine_addr %lx", creator_goroutine); // Lookup the newproc1 invocation metadata - new_func_invocation_t *invocation = - bpf_map_lookup_elem(&newproc1, &creator_goroutine); + new_func_invocation_t *invocation = bpf_map_lookup_elem(&newproc1, &creator_goroutine); if (invocation == NULL) { bpf_dbg_printk("can't read newproc1 invocation metadata"); goto done; diff --git a/bpf/go_sarama.h b/bpf/go_sarama.h index dd0e32e4b..d6ee1274e 100644 --- a/bpf/go_sarama.h +++ b/bpf/go_sarama.h @@ -31,7 +31,7 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, void *); // key: goroutine id - __type(value, u32); // correlation id + __type(value, u32); // correlation id __uint(max_entries, MAX_CONCURRENT_REQUESTS); } ongoing_kafka_requests SEC(".maps"); @@ -42,7 +42,7 @@ int uprobe_sarama_sendInternal(struct pt_regs *ctx) { bpf_dbg_printk("goroutine_addr %lx", goroutine_addr); u32 correlation_id = 0; - + void *b_ptr = GO_PARAM1(ctx); if (b_ptr) { bpf_probe_read(&correlation_id, sizeof(u32), b_ptr + sarama_broker_corr_id_pos); @@ -51,7 +51,8 @@ int uprobe_sarama_sendInternal(struct pt_regs *ctx) { if (correlation_id) { bpf_dbg_printk("correlation_id = %d", correlation_id); - if (bpf_map_update_elem(&ongoing_kafka_requests, &goroutine_addr, &correlation_id, BPF_ANY)) { + if (bpf_map_update_elem( + &ongoing_kafka_requests, &goroutine_addr, &correlation_id, BPF_ANY)) { bpf_dbg_printk("can't update kafka requests element"); } } @@ -91,11 +92,14 @@ int uprobe_sarama_broker_write(struct pt_regs *ctx) { bpf_dbg_printk("conn conn ptr %llx", conn_conn_ptr); if (conn_conn_ptr) { void *tcp_conn_ptr = 0; - bpf_probe_read(&tcp_conn_ptr, sizeof(tcp_conn_ptr), (void *)(conn_conn_ptr + sarama_bufconn_conn_pos + 8)); // find conn + bpf_probe_read(&tcp_conn_ptr, + sizeof(tcp_conn_ptr), + (void *)(conn_conn_ptr + sarama_bufconn_conn_pos + 8)); // find conn bpf_dbg_printk("tcp conn ptr %llx", tcp_conn_ptr); if (tcp_conn_ptr) { void *conn_ptr = 0; - bpf_probe_read(&conn_ptr, sizeof(conn_ptr), (void *)(tcp_conn_ptr + 8)); // find conn + bpf_probe_read( + &conn_ptr, sizeof(conn_ptr), (void *)(tcp_conn_ptr + 8)); // find conn bpf_dbg_printk("conn ptr %llx", conn_ptr); if (conn_ptr) { u8 ok = get_conn_info(conn_ptr, &req.conn); @@ -111,7 +115,6 @@ int uprobe_sarama_broker_write(struct pt_regs *ctx) { bpf_probe_read(req.buf, KAFKA_MAX_LEN, buf_ptr); bpf_map_update_elem(&kafka_requests, &correlation_id, &req, BPF_ANY); } - } bpf_map_delete_elem(&ongoing_kafka_requests, &goroutine_addr); @@ -138,7 +141,8 @@ int uprobe_sarama_response_promise_handle(struct pt_regs *ctx) { if (req) { req->end_monotime_ns = bpf_ktime_get_ns(); - kafka_client_req_t *trace = bpf_ringbuf_reserve(&events, sizeof(kafka_client_req_t), 0); + kafka_client_req_t *trace = + bpf_ringbuf_reserve(&events, sizeof(kafka_client_req_t), 0); if (trace) { bpf_dbg_printk("Sending kafka client go trace"); diff --git a/bpf/go_sql.h b/bpf/go_sql.h index 297ead0dc..bc7fb97dc 100644 --- a/bpf/go_sql.h +++ b/bpf/go_sql.h @@ -31,12 +31,10 @@ struct { } ongoing_sql_queries SEC(".maps"); static __always_inline void set_sql_info(void *goroutine_addr, void *sql_param, void *query_len) { - sql_func_invocation_t invocation = { - .start_monotime_ns = bpf_ktime_get_ns(), - .sql_param = (u64)sql_param, - .query_len = (u64)query_len, - .tp = {0} - }; + sql_func_invocation_t invocation = {.start_monotime_ns = bpf_ktime_get_ns(), + .sql_param = (u64)sql_param, + .query_len = (u64)query_len, + .tp = {0}}; // We don't look up in the headers, no http/grpc request, therefore 0 as last argument client_trace_parent(goroutine_addr, &invocation.tp, 0); @@ -100,7 +98,7 @@ int uprobe_queryReturn(struct pt_regs *ctx) { if (query_len > sizeof(trace->sql)) { query_len = sizeof(trace->sql); } - bpf_probe_read(trace->sql, query_len, (void*)invocation->sql_param); + bpf_probe_read(trace->sql, query_len, (void *)invocation->sql_param); bpf_dbg_printk("Found sql statement %s", trace->sql); if (query_len < sizeof(trace->sql)) { trace->sql[query_len] = 0; diff --git a/bpf/go_str.h b/bpf/go_str.h index 60b89d304..e818afaf0 100644 --- a/bpf/go_str.h +++ b/bpf/go_str.h @@ -16,7 +16,8 @@ #include "utils.h" #include "bpf_dbg.h" -static __always_inline int read_go_str_n(char *name, void *base_ptr, u64 len, void *field, u64 max_size) { +static __always_inline int +read_go_str_n(char *name, void *base_ptr, u64 len, void *field, u64 max_size) { u64 size = max_size < len ? max_size : len; if (bpf_probe_read(field, size, base_ptr)) { bpf_dbg_printk("can't read string for %s", name); @@ -31,7 +32,8 @@ static __always_inline int read_go_str_n(char *name, void *base_ptr, u64 len, vo return 1; } -static __always_inline int read_go_str(char *name, void *base_ptr, u8 offset, void *field, u64 max_size) { +static __always_inline int +read_go_str(char *name, void *base_ptr, u8 offset, void *field, u64 max_size) { void *ptr = 0; if (bpf_probe_read(&ptr, sizeof(ptr), (void *)(base_ptr + offset)) != 0) { bpf_dbg_printk("can't read ptr for %s", name); diff --git a/bpf/go_traceparent.h b/bpf/go_traceparent.h index f96a753a6..5ddbb0128 100644 --- a/bpf/go_traceparent.h +++ b/bpf/go_traceparent.h @@ -29,28 +29,24 @@ #define OFFSET_OF_GO_RUNTIME_HMAP_FIELD_B 9 #define OFFSET_OF_GO_RUNTIME_HMAP_FIELD_BUCKETS 16 -struct go_string -{ +struct go_string { char *str; s64 len; }; -struct go_slice -{ +struct go_slice { void *array; s64 len; s64 cap; }; -struct go_slice_user_ptr -{ +struct go_slice_user_ptr { void *array; void *len; void *cap; }; -struct go_iface -{ +struct go_iface { void *tab; void *data; }; @@ -62,8 +58,7 @@ struct map_bucket { void *overflow; }; -struct -{ +struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(u32)); __uint(value_size, sizeof(struct map_bucket)); @@ -71,103 +66,95 @@ struct } golang_mapbucket_storage_map SEC(".maps"); // assumes s2 is all lowercase -static __always_inline int bpf_memicmp(char *s1, char *s2, s32 size) -{ - for (int i = 0; i < size; i++) - { +static __always_inline int bpf_memicmp(char *s1, char *s2, s32 size) { + for (int i = 0; i < size; i++) { if (s1[i] != s2[i] && s1[i] != (s2[i] - 32)) // compare with each uppercase character { - return i+1; + return i + 1; } } return 0; } -static __always_inline void *extract_traceparent_from_req_headers(void *headers_ptr_ptr) -{ +static __always_inline void *extract_traceparent_from_req_headers(void *headers_ptr_ptr) { void *headers_ptr; long res; res = bpf_probe_read(&headers_ptr, sizeof(headers_ptr), headers_ptr_ptr); - if (res < 0) - { + if (res < 0) { return NULL; } u64 headers_count = 0; res = bpf_probe_read(&headers_count, sizeof(headers_count), headers_ptr); - if (res < 0) - { + if (res < 0) { return NULL; } - if (headers_count == 0) - { + if (headers_count == 0) { return NULL; } unsigned char log_2_bucket_count; - res = bpf_probe_read(&log_2_bucket_count, sizeof(log_2_bucket_count), headers_ptr + OFFSET_OF_GO_RUNTIME_HMAP_FIELD_B); - if (res < 0) - { + res = bpf_probe_read(&log_2_bucket_count, + sizeof(log_2_bucket_count), + headers_ptr + OFFSET_OF_GO_RUNTIME_HMAP_FIELD_B); + if (res < 0) { return NULL; } u64 bucket_count = 1 << log_2_bucket_count; void *header_buckets; - res = bpf_probe_read(&header_buckets, sizeof(header_buckets), headers_ptr + OFFSET_OF_GO_RUNTIME_HMAP_FIELD_BUCKETS); - if (res < 0) - { + res = bpf_probe_read(&header_buckets, + sizeof(header_buckets), + headers_ptr + OFFSET_OF_GO_RUNTIME_HMAP_FIELD_BUCKETS); + if (res < 0) { return NULL; } u32 map_id = 0; - struct map_bucket *map_value = (struct map_bucket *)bpf_map_lookup_elem(&golang_mapbucket_storage_map, &map_id); - if (!map_value) - { + struct map_bucket *map_value = + (struct map_bucket *)bpf_map_lookup_elem(&golang_mapbucket_storage_map, &map_id); + if (!map_value) { return NULL; } - for (u64 j = 0; j < MAX_BUCKETS; j++) - { - if (j >= bucket_count) - { + for (u64 j = 0; j < MAX_BUCKETS; j++) { + if (j >= bucket_count) { break; } - res = bpf_probe_read(map_value, sizeof(struct map_bucket), header_buckets + (j * sizeof(struct map_bucket))); - if (res < 0) - { + res = bpf_probe_read( + map_value, sizeof(struct map_bucket), header_buckets + (j * sizeof(struct map_bucket))); + if (res < 0) { continue; } - for (u64 i = 0; i < 8; i++) - { + for (u64 i = 0; i < 8; i++) { // break the bucket iteration when tophash is zero // since "there are no more non-empty cells at higher indexes or overflows" // ref: https://github.com/golang/go/blob/9050ce9b334419066c364e747499a2faf4425dad/src/runtime/map.go#L86 - if (map_value->tophash[i] == 0) - { + if (map_value->tophash[i] == 0) { break; } // skip the cell if tophash is empty // ref: https://github.com/golang/go/blob/9050ce9b334419066c364e747499a2faf4425dad/src/runtime/map.go#L87 - if (map_value->tophash[i] == 1) - { + if (map_value->tophash[i] == 1) { continue; } - if (map_value->keys[i].len != W3C_KEY_LENGTH) - { + if (map_value->keys[i].len != W3C_KEY_LENGTH) { continue; } char current_header_key[W3C_KEY_LENGTH]; - bpf_probe_read(current_header_key, sizeof(current_header_key), map_value->keys[i].str); - if (bpf_memicmp(current_header_key, "traceparent", W3C_KEY_LENGTH)) // grpc headers don't get normalized + bpf_probe_read(current_header_key, sizeof(current_header_key), map_value->keys[i].str); + if (bpf_memicmp(current_header_key, + "traceparent", + W3C_KEY_LENGTH)) // grpc headers don't get normalized { continue; } void *traceparent_header_value_ptr = map_value->values[i].array; struct go_string traceparent_header_value_go_str; - res = bpf_probe_read(&traceparent_header_value_go_str, sizeof(traceparent_header_value_go_str), traceparent_header_value_ptr); - if (res < 0) - { + res = bpf_probe_read(&traceparent_header_value_go_str, + sizeof(traceparent_header_value_go_str), + traceparent_header_value_ptr); + if (res < 0) { return NULL; } - if (traceparent_header_value_go_str.len != W3C_VAL_LENGTH) - { + if (traceparent_header_value_go_str.len != W3C_VAL_LENGTH) { continue; } return traceparent_header_value_go_str.str; diff --git a/bpf/hpack.h b/bpf/hpack.h index 3faebce43..123982046 100644 --- a/bpf/hpack.h +++ b/bpf/hpack.h @@ -6,536 +6,68 @@ #include "tracing.h" uint32_t huffman_codes[256] = { - 0x1ff8, - 0x7fffd8, - 0xfffffe2, - 0xfffffe3, - 0xfffffe4, - 0xfffffe5, - 0xfffffe6, - 0xfffffe7, - 0xfffffe8, - 0xffffea, - 0x3ffffffc, - 0xfffffe9, - 0xfffffea, - 0x3ffffffd, - 0xfffffeb, - 0xfffffec, - 0xfffffed, - 0xfffffee, - 0xfffffef, - 0xffffff0, - 0xffffff1, - 0xffffff2, - 0x3ffffffe, - 0xffffff3, - 0xffffff4, - 0xffffff5, - 0xffffff6, - 0xffffff7, - 0xffffff8, - 0xffffff9, - 0xffffffa, - 0xffffffb, - 0x14, - 0x3f8, - 0x3f9, - 0xffa, - 0x1ff9, - 0x15, - 0xf8, - 0x7fa, - 0x3fa, - 0x3fb, - 0xf9, - 0x7fb, - 0xfa, - 0x16, - 0x17, - 0x18, - 0x0, - 0x1, - 0x2, - 0x19, - 0x1a, - 0x1b, - 0x1c, - 0x1d, - 0x1e, - 0x1f, - 0x5c, - 0xfb, - 0x7ffc, - 0x20, - 0xffb, - 0x3fc, - 0x1ffa, - 0x21, - 0x5d, - 0x5e, - 0x5f, - 0x60, - 0x61, - 0x62, - 0x63, - 0x64, - 0x65, - 0x66, - 0x67, - 0x68, - 0x69, - 0x6a, - 0x6b, - 0x6c, - 0x6d, - 0x6e, - 0x6f, - 0x70, - 0x71, - 0x72, - 0xfc, - 0x73, - 0xfd, - 0x1ffb, - 0x7fff0, - 0x1ffc, - 0x3ffc, - 0x22, - 0x7ffd, - 0x3, - 0x23, - 0x4, - 0x24, - 0x5, - 0x25, - 0x26, - 0x27, - 0x6, - 0x74, - 0x75, - 0x28, - 0x29, - 0x2a, - 0x7, - 0x2b, - 0x76, - 0x2c, - 0x8, - 0x9, - 0x2d, - 0x77, - 0x78, - 0x79, - 0x7a, - 0x7b, - 0x7ffe, - 0x7fc, - 0x3ffd, - 0x1ffd, - 0xffffffc, - 0xfffe6, - 0x3fffd2, - 0xfffe7, - 0xfffe8, - 0x3fffd3, - 0x3fffd4, - 0x3fffd5, - 0x7fffd9, - 0x3fffd6, - 0x7fffda, - 0x7fffdb, - 0x7fffdc, - 0x7fffdd, - 0x7fffde, - 0xffffeb, - 0x7fffdf, - 0xffffec, - 0xffffed, - 0x3fffd7, - 0x7fffe0, - 0xffffee, - 0x7fffe1, - 0x7fffe2, - 0x7fffe3, - 0x7fffe4, - 0x1fffdc, - 0x3fffd8, - 0x7fffe5, - 0x3fffd9, - 0x7fffe6, - 0x7fffe7, - 0xffffef, - 0x3fffda, - 0x1fffdd, - 0xfffe9, - 0x3fffdb, - 0x3fffdc, - 0x7fffe8, - 0x7fffe9, - 0x1fffde, - 0x7fffea, - 0x3fffdd, - 0x3fffde, - 0xfffff0, - 0x1fffdf, - 0x3fffdf, - 0x7fffeb, - 0x7fffec, - 0x1fffe0, - 0x1fffe1, - 0x3fffe0, - 0x1fffe2, - 0x7fffed, - 0x3fffe1, - 0x7fffee, - 0x7fffef, - 0xfffea, - 0x3fffe2, - 0x3fffe3, - 0x3fffe4, - 0x7ffff0, - 0x3fffe5, - 0x3fffe6, - 0x7ffff1, - 0x3ffffe0, - 0x3ffffe1, - 0xfffeb, - 0x7fff1, - 0x3fffe7, - 0x7ffff2, - 0x3fffe8, - 0x1ffffec, - 0x3ffffe2, - 0x3ffffe3, - 0x3ffffe4, - 0x7ffffde, - 0x7ffffdf, - 0x3ffffe5, - 0xfffff1, - 0x1ffffed, - 0x7fff2, - 0x1fffe3, - 0x3ffffe6, - 0x7ffffe0, - 0x7ffffe1, - 0x3ffffe7, - 0x7ffffe2, - 0xfffff2, - 0x1fffe4, - 0x1fffe5, - 0x3ffffe8, - 0x3ffffe9, - 0xffffffd, - 0x7ffffe3, - 0x7ffffe4, - 0x7ffffe5, - 0xfffec, - 0xfffff3, - 0xfffed, - 0x1fffe6, - 0x3fffe9, - 0x1fffe7, - 0x1fffe8, - 0x7ffff3, - 0x3fffea, - 0x3fffeb, - 0x1ffffee, - 0x1ffffef, - 0xfffff4, - 0xfffff5, - 0x3ffffea, - 0x7ffff4, - 0x3ffffeb, - 0x7ffffe6, - 0x3ffffec, - 0x3ffffed, - 0x7ffffe7, - 0x7ffffe8, - 0x7ffffe9, - 0x7ffffea, - 0x7ffffeb, - 0xffffffe, - 0x7ffffec, - 0x7ffffed, - 0x7ffffee, - 0x7ffffef, - 0x7fffff0, - 0x3ffffee, + 0x1ff8, 0x7fffd8, 0xfffffe2, 0xfffffe3, 0xfffffe4, 0xfffffe5, 0xfffffe6, 0xfffffe7, + 0xfffffe8, 0xffffea, 0x3ffffffc, 0xfffffe9, 0xfffffea, 0x3ffffffd, 0xfffffeb, 0xfffffec, + 0xfffffed, 0xfffffee, 0xfffffef, 0xffffff0, 0xffffff1, 0xffffff2, 0x3ffffffe, 0xffffff3, + 0xffffff4, 0xffffff5, 0xffffff6, 0xffffff7, 0xffffff8, 0xffffff9, 0xffffffa, 0xffffffb, + 0x14, 0x3f8, 0x3f9, 0xffa, 0x1ff9, 0x15, 0xf8, 0x7fa, + 0x3fa, 0x3fb, 0xf9, 0x7fb, 0xfa, 0x16, 0x17, 0x18, + 0x0, 0x1, 0x2, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, + 0x1e, 0x1f, 0x5c, 0xfb, 0x7ffc, 0x20, 0xffb, 0x3fc, + 0x1ffa, 0x21, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, + 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, + 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, + 0xfc, 0x73, 0xfd, 0x1ffb, 0x7fff0, 0x1ffc, 0x3ffc, 0x22, + 0x7ffd, 0x3, 0x23, 0x4, 0x24, 0x5, 0x25, 0x26, + 0x27, 0x6, 0x74, 0x75, 0x28, 0x29, 0x2a, 0x7, + 0x2b, 0x76, 0x2c, 0x8, 0x9, 0x2d, 0x77, 0x78, + 0x79, 0x7a, 0x7b, 0x7ffe, 0x7fc, 0x3ffd, 0x1ffd, 0xffffffc, + 0xfffe6, 0x3fffd2, 0xfffe7, 0xfffe8, 0x3fffd3, 0x3fffd4, 0x3fffd5, 0x7fffd9, + 0x3fffd6, 0x7fffda, 0x7fffdb, 0x7fffdc, 0x7fffdd, 0x7fffde, 0xffffeb, 0x7fffdf, + 0xffffec, 0xffffed, 0x3fffd7, 0x7fffe0, 0xffffee, 0x7fffe1, 0x7fffe2, 0x7fffe3, + 0x7fffe4, 0x1fffdc, 0x3fffd8, 0x7fffe5, 0x3fffd9, 0x7fffe6, 0x7fffe7, 0xffffef, + 0x3fffda, 0x1fffdd, 0xfffe9, 0x3fffdb, 0x3fffdc, 0x7fffe8, 0x7fffe9, 0x1fffde, + 0x7fffea, 0x3fffdd, 0x3fffde, 0xfffff0, 0x1fffdf, 0x3fffdf, 0x7fffeb, 0x7fffec, + 0x1fffe0, 0x1fffe1, 0x3fffe0, 0x1fffe2, 0x7fffed, 0x3fffe1, 0x7fffee, 0x7fffef, + 0xfffea, 0x3fffe2, 0x3fffe3, 0x3fffe4, 0x7ffff0, 0x3fffe5, 0x3fffe6, 0x7ffff1, + 0x3ffffe0, 0x3ffffe1, 0xfffeb, 0x7fff1, 0x3fffe7, 0x7ffff2, 0x3fffe8, 0x1ffffec, + 0x3ffffe2, 0x3ffffe3, 0x3ffffe4, 0x7ffffde, 0x7ffffdf, 0x3ffffe5, 0xfffff1, 0x1ffffed, + 0x7fff2, 0x1fffe3, 0x3ffffe6, 0x7ffffe0, 0x7ffffe1, 0x3ffffe7, 0x7ffffe2, 0xfffff2, + 0x1fffe4, 0x1fffe5, 0x3ffffe8, 0x3ffffe9, 0xffffffd, 0x7ffffe3, 0x7ffffe4, 0x7ffffe5, + 0xfffec, 0xfffff3, 0xfffed, 0x1fffe6, 0x3fffe9, 0x1fffe7, 0x1fffe8, 0x7ffff3, + 0x3fffea, 0x3fffeb, 0x1ffffee, 0x1ffffef, 0xfffff4, 0xfffff5, 0x3ffffea, 0x7ffff4, + 0x3ffffeb, 0x7ffffe6, 0x3ffffec, 0x3ffffed, 0x7ffffe7, 0x7ffffe8, 0x7ffffe9, 0x7ffffea, + 0x7ffffeb, 0xffffffe, 0x7ffffec, 0x7ffffed, 0x7ffffee, 0x7ffffef, 0x7fffff0, 0x3ffffee, }; uint8_t huffman_code_len[256] = { - 13, - 23, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 24, - 30, - 28, - 28, - 30, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 30, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 28, - 6, - 10, - 10, - 12, - 13, - 6, - 8, - 11, - 10, - 10, - 8, - 11, - 8, - 6, - 6, - 6, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 8, - 15, - 6, - 12, - 10, - 13, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 7, - 8, - 13, - 19, - 13, - 14, - 6, - 15, - 5, - 6, - 5, - 6, - 5, - 6, - 6, - 6, - 5, - 7, - 7, - 6, - 6, - 6, - 5, - 6, - 7, - 6, - 5, - 5, - 6, - 7, - 7, - 7, - 7, - 7, - 15, - 11, - 14, - 13, - 28, - 20, - 22, - 20, - 20, - 22, - 22, - 22, - 23, - 22, - 23, - 23, - 23, - 23, - 23, - 24, - 23, - 24, - 24, - 22, - 23, - 24, - 23, - 23, - 23, - 23, - 21, - 22, - 23, - 22, - 23, - 23, - 24, - 22, - 21, - 20, - 22, - 22, - 23, - 23, - 21, - 23, - 22, - 22, - 24, - 21, - 22, - 23, - 23, - 21, - 21, - 22, - 21, - 23, - 22, - 23, - 23, - 20, - 22, - 22, - 22, - 23, - 22, - 22, - 23, - 26, - 26, - 20, - 19, - 22, - 23, - 22, - 25, - 26, - 26, - 26, - 27, - 27, - 26, - 24, - 25, - 19, - 21, - 26, - 27, - 27, - 26, - 27, - 24, - 21, - 21, - 26, - 26, - 28, - 27, - 27, - 27, - 20, - 24, - 20, - 21, - 22, - 21, - 21, - 23, - 22, - 22, - 25, - 25, - 24, - 24, - 26, - 23, - 26, - 27, - 26, - 26, - 27, - 27, - 27, - 27, - 27, - 28, - 27, - 27, - 27, - 27, - 27, - 26, + 13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 30, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 6, 10, 10, 12, 13, 6, 8, 11, 10, 10, 8, 11, 8, 6, 6, 6, + 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6, 12, 10, 13, 6, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 8, 13, 19, 13, 14, 6, + 15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6, 6, 5, 6, 7, 6, 5, 5, 6, 7, 7, + 7, 7, 7, 15, 11, 14, 13, 28, 20, 22, 20, 20, 22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23, + 24, 24, 22, 23, 24, 23, 23, 23, 23, 21, 22, 23, 22, 23, 23, 24, 22, 21, 20, 22, 22, 23, 23, 21, + 23, 22, 22, 24, 21, 22, 23, 23, 21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23, 22, 22, 23, + 26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25, 19, 21, 26, 27, 27, 26, 27, 24, + 21, 21, 26, 26, 28, 27, 27, 27, 20, 24, 20, 21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23, + 26, 27, 26, 26, 27, 27, 27, 27, 27, 28, 27, 27, 27, 27, 27, 26, }; -#define EOS_CODE (0x3fffffff) -#define EOS_N_BITS (30) +#define EOS_CODE (0x3fffffff) +#define EOS_N_BITS (30) #define EOS_PAD_BYTE (EOS_CODE >> (EOS_N_BITS - 8)) #define TP_ENCODED_LEN 8 -static unsigned char tp_encoded[TP_ENCODED_LEN] = { 0x4d, 0x83, 0x21, 0x6b, 0x1d, 0x85, 0xa9, 0x3f }; // hpack encoded "traceparent" +static unsigned char tp_encoded[TP_ENCODED_LEN] = { + 0x4d, 0x83, 0x21, 0x6b, 0x1d, 0x85, 0xa9, 0x3f}; // hpack encoded "traceparent" struct hpack_ctx { uint8_t dst[TP_MAX_VAL_LENGTH]; int32 dst_len; uint8_t src[TP_MAX_VAL_LENGTH]; - uint64_t m_bytes; + uint64_t m_bytes; uint32_t m_count; int32 len; }; @@ -543,7 +75,7 @@ struct hpack_ctx { static int encode_iter(u32 index, struct hpack_ctx *d) { int len = d->len; - if (len >= (TP_MAX_VAL_LENGTH-4)) { + if (len >= (TP_MAX_VAL_LENGTH - 4)) { d->len = -1; return 1; } @@ -561,9 +93,9 @@ static int encode_iter(u32 index, struct hpack_ctx *d) { d->m_count %= 32; uint32_t y = (uint32_t)(d->m_bytes >> d->m_count); d->dst[len] = (uint8_t)(y >> 24); - d->dst[len+1] = (uint8_t)(y >> 16); - d->dst[len+2] = (uint8_t)(y >> 8); - d->dst[len+3] = (uint8_t)(y); + d->dst[len + 1] = (uint8_t)(y >> 16); + d->dst[len + 2] = (uint8_t)(y >> 8); + d->dst[len + 3] = (uint8_t)(y); d->len += 4; } @@ -581,7 +113,7 @@ static __always_inline int32_t hpack_encode_tp(struct hpack_ctx *d) { return -1; } - if (len > (TP_MAX_VAL_LENGTH-4)) { + if (len > (TP_MAX_VAL_LENGTH - 4)) { return -1; } @@ -597,38 +129,36 @@ static __always_inline int32_t hpack_encode_tp(struct hpack_ctx *d) { // n in (0, 8, 16, 24, 32) switch (rem) { - case 0: - return d->len; - case 1: - d->dst[len] = (uint8_t)(d->m_bytes); - d->len += 1; - return d->len; - case 2: - { - uint16_t y = (uint16_t)(d->m_bytes); - d->dst[len] = (uint8_t)(y >> 8); - d->dst[len+1] = (uint8_t)(y); - - d->len += 2; - - return d->len; - } - case 3: - { - uint16_t y = (uint16_t)(d->m_bytes >> 8); - d->dst[len] = (uint8_t)(y >> 8); - d->dst[len+1] = (uint8_t)(y); - d->dst[len+2] = (uint8_t)(d->m_bytes); - d->len += 3; - return d->len; - } + case 0: + return d->len; + case 1: + d->dst[len] = (uint8_t)(d->m_bytes); + d->len += 1; + return d->len; + case 2: { + uint16_t y = (uint16_t)(d->m_bytes); + d->dst[len] = (uint8_t)(y >> 8); + d->dst[len + 1] = (uint8_t)(y); + + d->len += 2; + + return d->len; + } + case 3: { + uint16_t y = (uint16_t)(d->m_bytes >> 8); + d->dst[len] = (uint8_t)(y >> 8); + d->dst[len + 1] = (uint8_t)(y); + d->dst[len + 2] = (uint8_t)(d->m_bytes); + d->len += 3; + return d->len; + } } // case 4: uint32_t y = (uint32_t)(d->m_bytes); d->dst[len] = (uint8_t)(y >> 24); - d->dst[len+1] = (uint8_t)(y >> 16); - d->dst[len+2] = (uint8_t)(y >> 8); - d->dst[len+3] = (uint8_t)(y); + d->dst[len + 1] = (uint8_t)(y >> 16); + d->dst[len + 2] = (uint8_t)(y >> 8); + d->dst[len + 3] = (uint8_t)(y); d->len += 4; diff --git a/bpf/http2_grpc.h b/bpf/http2_grpc.h index 0a3a2975f..5769f2477 100644 --- a/bpf/http2_grpc.h +++ b/bpf/http2_grpc.h @@ -13,27 +13,29 @@ #define FLAG_DATA_END_STREAM 0x1 typedef enum { - FrameData = 0x0, - FrameHeaders = 0x1, - FramePriority = 0x2, - FrameRSTStream = 0x3, - FrameSettings = 0x4, - FramePushPromise = 0x5, - FramePing = 0x6, - FrameGoAway = 0x7, - FrameWindowUpdate = 0x8, - FrameContinuation = 0x9, + FrameData = 0x0, + FrameHeaders = 0x1, + FramePriority = 0x2, + FrameRSTStream = 0x3, + FrameSettings = 0x4, + FramePushPromise = 0x5, + FramePing = 0x6, + FrameGoAway = 0x7, + FrameWindowUpdate = 0x8, + FrameContinuation = 0x9, } __attribute__((packed)) http2_frame_type_t; typedef struct frame_header { - u32 length:24; + u32 length : 24; http2_frame_type_t type; u8 flags; - u8 __ignore:1; - u32 stream_id:31; + u8 __ignore : 1; + u32 stream_id : 31; } __attribute__((packed)) frame_header_t; -static __always_inline u8 read_http2_grpc_frame_header(frame_header_t *frame, unsigned char *p, u32 len) { +static __always_inline u8 read_http2_grpc_frame_header(frame_header_t *frame, + unsigned char *p, + u32 len) { if (len < FRAME_HEADER_LEN) { return 0; } @@ -66,7 +68,7 @@ static __always_inline u8 is_headers_frame(frame_header_t *frame) { static __always_inline int bpf_memcmp(char *s1, char *s2, s32 size) { for (int i = 0; i < size; i++) { if (s1[i] != s2[i]) { - return i+1; + return i + 1; } } @@ -86,7 +88,8 @@ static __always_inline u8 is_http2_or_grpc(unsigned char *p, u32 len) { } static __always_inline u8 http_grpc_stream_ended(frame_header_t *frame) { - return is_headers_frame(frame) && ((frame->flags & FLAG_DATA_END_STREAM) == FLAG_DATA_END_STREAM); + return is_headers_frame(frame) && + ((frame->flags & FLAG_DATA_END_STREAM) == FLAG_DATA_END_STREAM); } static __always_inline u8 is_invalid_frame(frame_header_t *frame) { diff --git a/bpf/http_ssl.c b/bpf/http_ssl.c index 042f39487..b97c4abc1 100644 --- a/bpf/http_ssl.c +++ b/bpf/http_ssl.c @@ -36,7 +36,7 @@ int BPF_UPROBE(uprobe_ssl_do_handshake, void *s) { SEC("uretprobe/libssl.so:SSL_do_handshake") int BPF_URETPROBE(uretprobe_ssl_do_handshake, int ret) { u64 id = bpf_get_current_pid_tgid(); - + if (!valid_pid(id)) { return 0; } @@ -72,7 +72,10 @@ int BPF_UPROBE(uprobe_ssl_read, void *ssl, const void *buf, int num) { args.len_ptr = 0; bpf_map_update_elem(&active_ssl_read_args, &id, &args, BPF_ANY); - bpf_map_update_elem(&ssl_to_pid_tid, &args.ssl, &id, BPF_NOEXIST); // we must not overwrite here, remember the original thread + bpf_map_update_elem(&ssl_to_pid_tid, + &args.ssl, + &id, + BPF_NOEXIST); // we must not overwrite here, remember the original thread return 0; } @@ -117,7 +120,10 @@ int BPF_UPROBE(uprobe_ssl_read_ex, void *ssl, const void *buf, int num, size_t * args.len_ptr = (u64)readbytes; bpf_map_update_elem(&active_ssl_read_args, &id, &args, BPF_ANY); - bpf_map_update_elem(&ssl_to_pid_tid, &args.ssl, &id, BPF_NOEXIST); // we must not overwrite here, remember the original thread + bpf_map_update_elem(&ssl_to_pid_tid, + &args.ssl, + &id, + BPF_NOEXIST); // we must not overwrite here, remember the original thread return 0; } diff --git a/bpf/http_ssl.h b/bpf/http_ssl.h index f746b7c2e..8d73a4c81 100644 --- a/bpf/http_ssl.h +++ b/bpf/http_ssl.h @@ -10,7 +10,7 @@ // temporary struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u64); // the pid_tid + __type(key, u64); // the pid_tid __type(value, u64); // the SSL struct pointer __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); @@ -21,7 +21,7 @@ struct { // when it's sandwitched between ssl_handshake entry/exit. struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u64); // the SSL struct pointer + __type(key, u64); // the SSL struct pointer __type(value, ssl_pid_connection_info_t); // the pointer to the file descriptor matching ssl __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); @@ -32,14 +32,14 @@ struct { // in case we miss SSL_do_handshake struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u64); // the pid-tid pair + __type(key, u64); // the pid-tid pair __type(value, ssl_pid_connection_info_t); // the pointer to the file descriptor matching ssl __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } pid_tid_to_conn SEC(".maps"); // LRU map which holds onto the mapping of an ssl pointer to pid-tid, -// we clean-it up when we lookup by ssl. It's setup by SSL_read for cases where frameworks +// we clean-it up when we lookup by ssl. It's setup by SSL_read for cases where frameworks // process SSL requests on separate thread pools, e.g. Ruby on Rails struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); @@ -50,14 +50,14 @@ struct { // Temporary tracking of ssl_read/ssl_read_ex and ssl_write/ssl_write_ex arguments typedef struct ssl_args { - u64 ssl; // SSL struct pointer - u64 buf; // pointer to the buffer we read into + u64 ssl; // SSL struct pointer + u64 buf; // pointer to the buffer we read into u64 len_ptr; // size_t pointer of the read/written bytes, used only by SSL_read_ex and SSL_write_ex } ssl_args_t; // TODO: we should be able to make this into a single map. It's not a big deal because they are really only // tracking the parameters of SSL_read and SSL_write, so their memory consumption is minimal. If we can be -// 100% certain that SSL_read will never do an SSL_write, then these can be a single map. +// 100% certain that SSL_read will never do an SSL_write, then these can be a single map. struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); @@ -79,7 +79,8 @@ static __always_inline void cleanup_ssl_trace_info(http_info_t *info, void *ssl) ssl_pid_connection_info_t *ssl_info = bpf_map_lookup_elem(&ssl_to_conn, &ssl); if (ssl_info) { - bpf_dbg_printk("Looking to delete server trace for ssl = %llx, info->type = %d", ssl, info->type); + bpf_dbg_printk( + "Looking to delete server trace for ssl = %llx, info->type = %d", ssl, info->type); //dbg_print_http_connection_info(&ssl_info->conn.conn); // commented out since GitHub CI doesn't like this call trace_key_t t_key = {0}; t_key.extra_id = info->extra_id; @@ -93,7 +94,8 @@ static __always_inline void cleanup_ssl_trace_info(http_info_t *info, void *ssl) bpf_map_delete_elem(&ssl_to_conn, &ssl); } -static __always_inline void cleanup_ssl_server_trace(http_info_t *info, void *ssl, void *buf, u32 len) { +static __always_inline void +cleanup_ssl_server_trace(http_info_t *info, void *ssl, void *buf, u32 len) { if (info && http_will_complete(info, buf, len)) { cleanup_ssl_trace_info(info, ssl); } @@ -105,20 +107,25 @@ static __always_inline void cleanup_complete_ssl_server_trace(http_info_t *info, } } -static __always_inline void finish_possible_delayed_tls_http_request(pid_connection_info_t *pid_conn, void *ssl) { +static __always_inline void +finish_possible_delayed_tls_http_request(pid_connection_info_t *pid_conn, void *ssl) { http_info_t *info = bpf_map_lookup_elem(&ongoing_http, pid_conn); - if (info) { + if (info) { cleanup_complete_ssl_server_trace(info, ssl); - finish_http(info, pid_conn); + finish_http(info, pid_conn); } } -static __always_inline void cleanup_trace_info_for_delayed_trace(pid_connection_info_t *pid_conn, void *ssl, void *buf, u32 len) { +static __always_inline void cleanup_trace_info_for_delayed_trace(pid_connection_info_t *pid_conn, + void *ssl, + void *buf, + u32 len) { http_info_t *info = bpf_map_lookup_elem(&ongoing_http, pid_conn); cleanup_ssl_server_trace(info, ssl, buf, len); } -static __always_inline void handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, int bytes_len, u8 direction) { +static __always_inline void +handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, int bytes_len, u8 direction) { if (args && bytes_len > 0) { void *ssl = ((void *)args->ssl); u64 ssl_ptr = (u64)ssl; @@ -140,7 +147,8 @@ static __always_inline void handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, u64 pid_tid = *pid_tid_ptr; conn = bpf_map_lookup_elem(&pid_tid_to_conn, &pid_tid); - bpf_dbg_printk("Separate pool lookup ssl=%llx, pid=%d, conn=%llx", ssl_ptr, pid_tid, conn); + bpf_dbg_printk( + "Separate pool lookup ssl=%llx, pid=%d, conn=%llx", ssl_ptr, pid_tid, conn); } else { bpf_dbg_printk("Other thread lookup failed for ssl=%llx", ssl_ptr); } @@ -149,7 +157,7 @@ static __always_inline void handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, // If we found a connection setup by tcp_rcv_established, which means // we missed a SSL_do_handshake, update our ssl to connection map to be // used by the rest of the SSL lifecycle. We shouldn't rely on the SSL_write - // being on the same thread as the SSL_read. + // being on the same thread as the SSL_read. if (conn) { bpf_map_delete_elem(&pid_tid_to_conn, &id); ssl_pid_connection_info_t c; @@ -185,13 +193,18 @@ static __always_inline void handle_ssl_buf(void *ctx, u64 id, ssl_args_t *args, // } bpf_map_update_elem(&active_ssl_connections, &conn->p_conn, &ssl_ptr, BPF_ANY); - // We should attempt to clean up the server trace immediately. The cleanup information // is keyed of the *ssl, so when it's delayed we might have different *ssl on the same // connection. cleanup_trace_info_for_delayed_trace(&conn->p_conn, ssl, (void *)args->buf, bytes_len); // must be last, doesn't return - handle_buf_with_connection(ctx, &conn->p_conn, (void *)args->buf, bytes_len, WITH_SSL, direction, conn->orig_dport); + handle_buf_with_connection(ctx, + &conn->p_conn, + (void *)args->buf, + bytes_len, + WITH_SSL, + direction, + conn->orig_dport); } else { bpf_dbg_printk("No connection info! This is a bug."); } diff --git a/bpf/http_types.h b/bpf/http_types.h index f9f86bda4..6f7e903a8 100644 --- a/bpf/http_types.h +++ b/bpf/http_types.h @@ -8,12 +8,14 @@ #include "pid_types.h" #include "bpf_dbg.h" -#define FULL_BUF_SIZE 192 // should be enough for most URLs, we may need to extend it if not. Must be multiple of 16 for the copy to work. +#define FULL_BUF_SIZE \ + 192 // should be enough for most URLs, we may need to extend it if not. Must be multiple of 16 for the copy to work. #define TRACE_BUF_SIZE 1024 // must be power of 2, we do an & to limit the buffer size #define KPROBES_HTTP2_BUF_SIZE 256 #define KPROBES_HTTP2_RET_BUF_SIZE 64 -#define KPROBES_LARGE_RESPONSE_LEN 100000 // 100K and above we try to track the response actual time with kretprobes +#define KPROBES_LARGE_RESPONSE_LEN \ + 100000 // 100K and above we try to track the response actual time with kretprobes #define K_TCP_MAX_LEN 256 #define K_TCP_RES_LEN 128 @@ -21,13 +23,13 @@ #define CONN_INFO_FLAG_TRACE 0x1 #define TRACE_ID_SIZE_BYTES 16 -#define SPAN_ID_SIZE_BYTES 8 -#define FLAGS_SIZE_BYTES 1 -#define TRACE_ID_CHAR_LEN 32 -#define SPAN_ID_CHAR_LEN 16 -#define FLAGS_CHAR_LEN 2 -#define TP_MAX_VAL_LENGTH 55 -#define TP_MAX_KEY_LENGTH 11 +#define SPAN_ID_SIZE_BYTES 8 +#define FLAGS_SIZE_BYTES 1 +#define TRACE_ID_CHAR_LEN 32 +#define SPAN_ID_CHAR_LEN 16 +#define FLAGS_CHAR_LEN 2 +#define TP_MAX_VAL_LENGTH 55 +#define TP_MAX_KEY_LENGTH 11 #define TCP_SEND 1 #define TCP_RECV 0 @@ -35,21 +37,22 @@ #define NO_SSL 0 #define WITH_SSL 1 -#define MIN_HTTP2_SIZE 24 // Preface PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n https://datatracker.ietf.org/doc/html/rfc7540#section-3.5 +#define MIN_HTTP2_SIZE \ + 24 // Preface PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n https://datatracker.ietf.org/doc/html/rfc7540#section-3.5 -// Struct to keep information on the connections in flight +// Struct to keep information on the connections in flight // s = source, d = destination // h = high word, l = low word // used as hashmap key, must be 4 byte aligned? typedef struct http_connection_info { - u8 s_addr[IP_V6_ADDR_LEN]; - u8 d_addr[IP_V6_ADDR_LEN]; + u8 s_addr[IP_V6_ADDR_LEN]; + u8 d_addr[IP_V6_ADDR_LEN]; u16 s_port; u16 d_port; } connection_info_t; typedef struct http_partial_connection_info { - u8 s_addr[IP_V6_ADDR_LEN]; + u8 s_addr[IP_V6_ADDR_LEN]; u16 s_port; u16 d_port; u32 tcp_seq; @@ -70,13 +73,13 @@ typedef struct tp_info { unsigned char span_id[SPAN_ID_SIZE_BYTES]; unsigned char parent_id[SPAN_ID_SIZE_BYTES]; u64 ts; - u8 flags; + u8 flags; } tp_info_t; typedef struct tp_info_pid { tp_info_t tp; u32 pid; - u8 valid; + u8 valid; } tp_info_pid_t; // Here we keep the information that is sent on the ring buffer @@ -85,12 +88,13 @@ typedef struct http_info { connection_info_t conn_info; u64 start_monotime_ns; u64 end_monotime_ns; - unsigned char buf[FULL_BUF_SIZE] __attribute__ ((aligned (8))); // ringbuffer memcpy complains unless this is 8 byte aligned + unsigned char buf[FULL_BUF_SIZE] + __attribute__((aligned(8))); // ringbuffer memcpy complains unless this is 8 byte aligned u32 len; u32 resp_len; - u16 status; - u8 type; - u8 ssl; + u16 status; + u8 type; + u8 ssl; // we need this for system wide tracking so we can find the service name // also to filter traces from unsolicited processes that share the executable // with other instrumented processes @@ -106,12 +110,14 @@ typedef struct tcp_req { connection_info_t conn_info; u64 start_monotime_ns; u64 end_monotime_ns; - unsigned char buf[K_TCP_MAX_LEN] __attribute__ ((aligned (8))); // ringbuffer memcpy complains unless this is 8 byte aligned - unsigned char rbuf[K_TCP_RES_LEN] __attribute__ ((aligned (8))); // ringbuffer memcpy complains unless this is 8 byte aligned + unsigned char buf[K_TCP_MAX_LEN] + __attribute__((aligned(8))); // ringbuffer memcpy complains unless this is 8 byte aligned + unsigned char rbuf[K_TCP_RES_LEN] + __attribute__((aligned(8))); // ringbuffer memcpy complains unless this is 8 byte aligned u32 len; u32 resp_len; - u8 ssl; - u8 direction; + u8 ssl; + u8 direction; // we need this for system wide tracking so we can find the service name // also to filter traces from unsolicited processes that share the executable // with other instrumented processes @@ -123,9 +129,9 @@ typedef struct call_protocol_args { pid_connection_info_t pid_conn; unsigned char small_buf[MIN_HTTP2_SIZE]; u64 u_buf; - int bytes_len; + int bytes_len; u8 ssl; - u8 direction; + u8 direction; u16 orig_dport; u8 packet_type; } call_protocol_args_t; @@ -136,13 +142,13 @@ typedef struct protocol_info { u32 seq; u16 h_proto; u16 tot_len; - u8 flags; + u8 flags; } protocol_info_t; // Here we keep information on the ongoing filtered connections, PID/TID and connection type typedef struct http_connection_metadata { pid_info pid; - u8 type; + u8 type; } http_connection_metadata_t; typedef struct http2_conn_stream { @@ -151,11 +157,11 @@ typedef struct http2_conn_stream { } http2_conn_stream_t; typedef struct http2_grpc_request { - u8 flags; // Must be first + u8 flags; // Must be first connection_info_t conn_info; - u8 data[KPROBES_HTTP2_BUF_SIZE]; - u8 ret_data[KPROBES_HTTP2_RET_BUF_SIZE]; - u8 type; + u8 data[KPROBES_HTTP2_BUF_SIZE]; + u8 ret_data[KPROBES_HTTP2_RET_BUF_SIZE]; + u8 type; int len; u64 start_monotime_ns; u64 end_monotime_ns; @@ -176,13 +182,13 @@ const u8 ip4ip6_prefix[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff}; #ifdef BPF_DEBUG static __always_inline void dbg_print_http_connection_info(connection_info_t *info) { bpf_dbg_printk("[conn] s_h = %llx, s_l = %llx, s_port=%d", - *(u64 *)(&info->s_addr), - *(u64 *)(&info->s_addr[8]), - info->s_port); + *(u64 *)(&info->s_addr), + *(u64 *)(&info->s_addr[8]), + info->s_port); bpf_dbg_printk("[conn] d_h = %llx, d_l = %llx, d_port=%d", - *(u64 *)(&info->d_addr), - *(u64 *)(&info->d_addr[8]), - info->d_port); + *(u64 *)(&info->d_addr), + *(u64 *)(&info->d_addr[8]), + info->d_port); } #else static __always_inline void dbg_print_http_connection_info(connection_info_t *info) { diff --git a/bpf/k_tracer.c b/bpf/k_tracer.c index 2053ddf1a..dcbc4b613 100644 --- a/bpf/k_tracer.c +++ b/bpf/k_tracer.c @@ -49,21 +49,23 @@ typedef struct send_args { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, MAX_CONCURRENT_REQUESTS); - __type(key, u64); // pid_tid + __type(key, u64); // pid_tid __type(value, send_args_t); // size to be sent } active_send_args SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __uint(max_entries, MAX_CONCURRENT_REQUESTS); - __type(key, u64); // *sock + __type(key, u64); // *sock __type(value, send_args_t); // size to be sent } active_send_sock_args SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, partial_connection_info_t); // key: the connection info without the destination address, but with the tcp sequence - __type(value, connection_info_t); // value: traceparent info + __type( + key, + partial_connection_info_t); // key: the connection info without the destination address, but with the tcp sequence + __type(value, connection_info_t); // value: traceparent info __uint(max_entries, 1024); __uint(pinning, LIBBPF_PIN_BY_NAME); } tcp_connection_map SEC(".maps"); @@ -98,7 +100,7 @@ int BPF_KRETPROBE(kretprobe_sock_alloc, struct socket *sock) { // outbound. However, in some cases servers can optimise the accept path if // the same request is sent over and over. For that reason, in case we miss the // initial accept, we establish an active filtered connection here. By default -// sets the type to be server HTTP, in client mode we'll overwrite the +// sets the type to be server HTTP, in client mode we'll overwrite the // data in the map, since those cannot be optimised. SEC("kprobe/tcp_rcv_established") int BPF_KPROBE(kprobe_tcp_rcv_established, struct sock *sk, struct sk_buff *skb) { @@ -116,14 +118,18 @@ int BPF_KPROBE(kprobe_tcp_rcv_established, struct sock *sk, struct sk_buff *skb) //u16 orig_dport = info.conn.d_port; //dbg_print_http_connection_info(&info.conn); sort_connection_info(&pid_info.p_conn.conn); - pid_info.p_conn.pid = pid_from_pid_tgid(id); + pid_info.p_conn.pid = pid_from_pid_tgid(id); // This is a current limitation for port ordering detection for SSL. // tcp_rcv_established flip flops the ports and we can't tell if it's client or server call. // If the source port for a client call is lower, we'll get this wrong. - // TODO: Need to fix this. + // TODO: Need to fix this. pid_info.orig_dport = pid_info.p_conn.conn.s_port, - bpf_map_update_elem(&pid_tid_to_conn, &id, &pid_info, BPF_ANY); // to support SSL on missing handshake, respect the original info if there + bpf_map_update_elem( + &pid_tid_to_conn, + &id, + &pid_info, + BPF_ANY); // to support SSL on missing handshake, respect the original info if there } return 0; @@ -132,12 +138,11 @@ int BPF_KPROBE(kprobe_tcp_rcv_established, struct sock *sk, struct sk_buff *skb) // We tap into both sys_accept and sys_accept4. // We don't care about the accept entry arguments, since we get only peer information // we don't have the full picture for the socket. -// +// // Note: A current limitation is that likely we won't capture the first accept request. The // process may have already reached accept, before the instrumenter has launched. SEC("kretprobe/sys_accept4") -int BPF_KRETPROBE(kretprobe_sys_accept4, uint fd) -{ +int BPF_KRETPROBE(kretprobe_sys_accept4, uint fd) { u64 id = bpf_get_current_pid_tgid(); if (!valid_pid(id)) { @@ -168,8 +173,9 @@ int BPF_KRETPROBE(kretprobe_sys_accept4, uint fd) sort_connection_info(&info.p_conn.conn); info.p_conn.pid = pid_from_pid_tgid(id); info.orig_dport = orig_dport; - - bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL on missing handshake + + bpf_map_update_elem( + &pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL on missing handshake } cleanup: @@ -190,24 +196,25 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { tp_info_pid_t *tp_p = tp_buf(); - // Connect runs before the SYN packet is sent. - // We use this opportunity to setup a trace context information for the connection. + // Connect runs before the SYN packet is sent. + // We use this opportunity to setup a trace context information for the connection. // We'll later query the trace information in tc_egress, and serialize it on the TCP packet. // Why would we do this here instead of on the tc_egress itself? We could move this on the tc_egress, // but we would be modifying all packets, not just for processes which are instrumented, - // since we can't reliably tell the process PID in TC or socket filters. + // since we can't reliably tell the process PID in TC or socket filters. if (tp_p) { tp_p->tp.ts = bpf_ktime_get_ns(); tp_p->tp.flags = 1; tp_p->valid = 1; - tp_p->pid = TC_SYN_PACKET_ID; // set an ID up here in case someone else is doing what we are doing + tp_p->pid = + TC_SYN_PACKET_ID; // set an ID up here in case someone else is doing what we are doing urand_bytes(tp_p->tp.span_id, SPAN_ID_SIZE_BYTES); tp_info_pid_t *server_tp = find_parent_trace(); if (server_tp && valid_trace(server_tp->tp.trace_id)) { __builtin_memcpy(tp_p->tp.trace_id, server_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); __builtin_memcpy(tp_p->tp.parent_id, server_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); } else { - urand_bytes(tp_p->tp.trace_id, TRACE_ID_SIZE_BYTES); + urand_bytes(tp_p->tp.trace_id, TRACE_ID_SIZE_BYTES); __builtin_memset(tp_p->tp.parent_id, 0, sizeof(tp_p->tp.span_id)); } @@ -236,8 +243,7 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { // We tap into sys_connect so we can track properly the processes doing // HTTP client calls SEC("kretprobe/sys_connect") -int BPF_KRETPROBE(kretprobe_sys_connect, int fd) -{ +int BPF_KRETPROBE(kretprobe_sys_connect, int fd) { u64 id = bpf_get_current_pid_tgid(); if (!valid_pid(id)) { @@ -267,8 +273,8 @@ int BPF_KRETPROBE(kretprobe_sys_connect, int fd) sort_connection_info(&info.p_conn.conn); info.p_conn.pid = pid_from_pid_tgid(id); info.orig_dport = orig_dport; - - bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL + + bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL } cleanup: @@ -276,7 +282,7 @@ int BPF_KRETPROBE(kretprobe_sys_connect, int fd) return 0; } -// Main HTTP read and write operations are handled with tcp_sendmsg and tcp_recvmsg +// Main HTTP read and write operations are handled with tcp_sendmsg and tcp_recvmsg static __always_inline void *is_ssl_connection(u64 id) { void *ssl = 0; @@ -292,7 +298,7 @@ static __always_inline void *is_ssl_connection(u64 id) { if (ssl_args) { ssl = (void *)ssl_args->ssl; } - } + } return ssl; } @@ -318,9 +324,7 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s bpf_dbg_printk("=== kprobe tcp_sendmsg=%d sock=%llx size %d===", id, sk, size); - send_args_t s_args = { - .size = size - }; + send_args_t s_args = {.size = size}; if (parse_sock_info(sk, &s_args.p_conn.conn)) { u16 orig_dport = s_args.p_conn.conn.d_port; @@ -333,7 +337,7 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s if (!ssl) { void *active_ssl = is_active_ssl(&s_args.p_conn); if (!active_ssl) { - u8* buf = iovec_memory(); + u8 *buf = iovec_memory(); if (buf) { size = read_msghdr_buf(msg, buf, size); if (size) { @@ -342,7 +346,8 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s bpf_map_update_elem(&active_send_sock_args, &sock_p, &s_args, BPF_ANY); // Logically last for !ssl. - handle_buf_with_connection(ctx, &s_args.p_conn, buf, size, NO_SSL, TCP_SEND, orig_dport); + handle_buf_with_connection( + ctx, &s_args.p_conn, buf, size, NO_SSL, TCP_SEND, orig_dport); } else { bpf_dbg_printk("can't find iovec ptr in msghdr, not tracking sendmsg"); } @@ -391,8 +396,9 @@ int BPF_KRETPROBE(kretprobe_tcp_sendmsg, int sent_len) { if (s_args) { if (sent_len > 0) { update_http_sent_len(&s_args->p_conn, sent_len); - } - if (sent_len < MIN_HTTP_SIZE) { // Sometimes app servers don't send close, but small responses back + } + if (sent_len < + MIN_HTTP_SIZE) { // Sometimes app servers don't send close, but small responses back finish_possible_delayed_http_request(&s_args->p_conn); } } @@ -405,7 +411,7 @@ static __always_inline void ensure_sent_event(u64 id, u64 *sock_p) { if (s_args) { bpf_dbg_printk("Checking if we need to finish the request per thread id"); finish_possible_delayed_http_request(&s_args->p_conn); - } // see if we match on another thread, but same sock * + } // see if we match on another thread, but same sock * s_args = bpf_map_lookup_elem(&active_send_sock_args, sock_p); if (s_args) { bpf_dbg_printk("Checking if we need to finish the request per socket"); @@ -444,7 +450,8 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk, long timeout) { //int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) SEC("kprobe/tcp_recvmsg") -int BPF_KPROBE(kprobe_tcp_recvmsg, struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { +int BPF_KPROBE( + kprobe_tcp_recvmsg, struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { u64 id = bpf_get_current_pid_tgid(); if (!valid_pid(id)) { @@ -505,12 +512,13 @@ static __always_inline int return_recvmsg(void *ctx, u64 id, int copied_len) { if (!ssl) { void *active_ssl = is_active_ssl(&info); if (!active_ssl) { - u8* buf = iovec_memory(); + u8 *buf = iovec_memory(); if (buf) { copied_len = read_iovec_ctx(iov_ctx, buf, copied_len); if (copied_len) { // doesn't return must be logically last statement - handle_buf_with_connection(ctx, &info, buf, copied_len, NO_SSL, TCP_RECV, orig_dport); + handle_buf_with_connection( + ctx, &info, buf, copied_len, NO_SSL, TCP_RECV, orig_dport); } else { bpf_dbg_printk("Not copied anything"); } @@ -572,7 +580,7 @@ int socket__http_filter(struct __sk_buff *skb) { if (!tcp_close(&tcp) && tcp_empty(&tcp, skb)) { return 0; } - + // we don't want to read the whole buffer for every packed that passes our checks, we read only a bit and check if it's truly HTTP request/response. unsigned char buf[MIN_HTTP_SIZE] = {0}; bpf_skb_load_bytes(skb, tcp.hdr_len, (void *)buf, sizeof(buf)); @@ -583,7 +591,10 @@ int socket__http_filter(struct __sk_buff *skb) { } u8 packet_type = 0; - if (is_http(buf, len, &packet_type)) { // we must check tcp_close second, a packet can be a close and a response + if (is_http( + buf, + len, + &packet_type)) { // we must check tcp_close second, a packet can be a close and a response //dbg_print_http_connection_info(&conn); // commented out since GitHub CI doesn't like this call sort_connection_info(&conn); @@ -591,7 +602,7 @@ int socket__http_filter(struct __sk_buff *skb) { if (!info) { return 0; } - + __builtin_memcpy(&info->conn_info, &conn, sizeof(conn)); if (packet_type == PACKET_TYPE_REQUEST) { @@ -605,8 +616,8 @@ int socket__http_filter(struct __sk_buff *skb) { //dbg_print_http_connection_info(&conn); set_fallback_http_info(info, &conn, skb->len - tcp.hdr_len); - // The code below is looking to see if we have recorded black-box trace info on - // another interface. We do this for client calls, where essentially the original + // The code below is looking to see if we have recorded black-box trace info on + // another interface. We do this for client calls, where essentially the original // request may go out on one interface, but then get re-routed to another, which is // common with some k8s environments. // @@ -630,7 +641,8 @@ int socket__http_filter(struct __sk_buff *skb) { if (prev_conn) { tp_info_pid_t *trace_info = trace_info_for_connection(prev_conn); if (trace_info) { - if (current_immediate_epoch(trace_info->tp.ts) == current_immediate_epoch(bpf_ktime_get_ns())) { + if (current_immediate_epoch(trace_info->tp.ts) == + current_immediate_epoch(bpf_ktime_get_ns())) { //bpf_dbg_printk("Found trace info on another interface, setting it up for this connection"); tp_info_pid_t other_info = {0}; __builtin_memcpy(&other_info, trace_info, sizeof(tp_info_pid_t)); @@ -669,7 +681,7 @@ int BPF_KRETPROBE(kretprobe_sys_clone, int tid) { bpf_dbg_printk("sys_clone_ret %d -> %d", id, tid); bpf_map_update_elem(&clone_map, &child, &parent, BPF_ANY); - + return 0; } @@ -684,8 +696,9 @@ int BPF_KPROBE(kprobe_sys_exit, int status) { trace_key_t task = {0}; task_tid(&task.p_key); - bpf_dbg_printk("sys_exit %d, pid=%d, valid_pid(id)=%d", id, pid_from_pid_tgid(id), valid_pid(id)); - + bpf_dbg_printk( + "sys_exit %d, pid=%d, valid_pid(id)=%d", id, pid_from_pid_tgid(id), valid_pid(id)); + // handle the case when the thread terminates without closing a socket send_args_t *s_args = bpf_map_lookup_elem(&active_send_args, &id); if (s_args) { @@ -695,10 +708,10 @@ int BPF_KPROBE(kprobe_sys_exit, int status) { } bpf_map_delete_elem(&clone_map, &task.p_key); - // This won't delete trace ids for traces with extra_id, like NodeJS. But, - // we expect that it doesn't matter, since NodeJS main thread won't exit. + // This won't delete trace ids for traces with extra_id, like NodeJS. But, + // we expect that it doesn't matter, since NodeJS main thread won't exit. bpf_map_delete_elem(&server_traces, &task); - + return 0; } @@ -732,7 +745,7 @@ int app_ingress(struct __sk_buff *skb) { return 0; } - s32 len = skb->len-sizeof(u32); + s32 len = skb->len - sizeof(u32); bpf_printk("Received SYN packed len = %d, offset = %d, hdr_len %d", skb->len, len, tcp.hdr_len); unsigned char tp_buf[TP_MAX_VAL_LENGTH]; @@ -784,11 +797,12 @@ int app_egress(struct __sk_buff *skb) { offset_ip_checksum = ETH_HLEN + offsetof(struct iphdr, check); } else { offset_ip_tot_len = ETH_HLEN + offsetof(struct ipv6hdr, payload_len); - } + } u16 new_tot_len = bpf_htons(bpf_ntohs(tcp.tot_len) + sizeof(tp_info_pid_t)); - bpf_printk("tot_len = %u, new_tot_len = %u", bpf_ntohs(tcp.tot_len), bpf_ntohs(new_tot_len)); + bpf_printk( + "tot_len = %u, new_tot_len = %u", bpf_ntohs(tcp.tot_len), bpf_ntohs(new_tot_len)); bpf_printk("h_proto = %u, skb->len = %u", tcp.h_proto, skb->len); if (offset_ip_checksum) { diff --git a/bpf/k_tracer.h b/bpf/k_tracer.h index 10292212a..1ba76da30 100644 --- a/bpf/k_tracer.h +++ b/bpf/k_tracer.h @@ -13,27 +13,28 @@ #include "protocol_tcp.h" struct bpf_map_def SEC("maps") jump_table = { - .type = BPF_MAP_TYPE_PROG_ARRAY, - .key_size = sizeof(__u32), - .value_size = sizeof(__u32), - .max_entries = 8, + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8, }; -#define TAIL_PROTOCOL_HTTP 0 +#define TAIL_PROTOCOL_HTTP 0 #define TAIL_PROTOCOL_HTTP2 1 -#define TAIL_PROTOCOL_TCP 2 +#define TAIL_PROTOCOL_TCP 2 static __always_inline void handle_buf_with_args(void *ctx, call_protocol_args_t *args) { bpf_probe_read(args->small_buf, MIN_HTTP2_SIZE, (void *)args->u_buf); - bpf_dbg_printk("buf=[%s], pid=%d, len=%d", args->small_buf, args->pid_conn.pid, args->bytes_len); + bpf_dbg_printk( + "buf=[%s], pid=%d, len=%d", args->small_buf, args->pid_conn.pid, args->bytes_len); if (is_http(args->small_buf, MIN_HTTP_SIZE, &args->packet_type)) { bpf_tail_call(ctx, &jump_table, TAIL_PROTOCOL_HTTP); } else if (is_http2_or_grpc(args->small_buf, MIN_HTTP2_SIZE)) { bpf_dbg_printk("Found HTTP2 or gRPC connection"); u8 is_ssl = args->ssl; - bpf_map_update_elem(&ongoing_http2_connections, &args->pid_conn, &is_ssl, BPF_ANY); + bpf_map_update_elem(&ongoing_http2_connections, &args->pid_conn, &is_ssl, BPF_ANY); } else { u8 *h2g = bpf_map_lookup_elem(&ongoing_http2_connections, &args->pid_conn); if (h2g && *h2g == args->ssl) { @@ -55,7 +56,8 @@ static __always_inline void handle_buf_with_args(void *ctx, call_protocol_args_t } } -static __always_inline call_protocol_args_t* make_protocol_args(void *u_buf, int bytes_len, u8 ssl, u8 direction, u16 orig_dport) { +static __always_inline call_protocol_args_t * +make_protocol_args(void *u_buf, int bytes_len, u8 ssl, u8 direction, u16 orig_dport) { call_protocol_args_t *args = protocol_args(); if (!args) { @@ -71,13 +73,19 @@ static __always_inline call_protocol_args_t* make_protocol_args(void *u_buf, int return args; } -static __always_inline void handle_buf_with_connection(void *ctx, pid_connection_info_t *pid_conn, void *u_buf, int bytes_len, u8 ssl, u8 direction, u16 orig_dport) { +static __always_inline void handle_buf_with_connection(void *ctx, + pid_connection_info_t *pid_conn, + void *u_buf, + int bytes_len, + u8 ssl, + u8 direction, + u16 orig_dport) { call_protocol_args_t *args = make_protocol_args(u_buf, bytes_len, ssl, direction, orig_dport); if (!args) { return; } - + __builtin_memcpy(&args->pid_conn, pid_conn, sizeof(pid_connection_info_t)); handle_buf_with_args(ctx, args); @@ -85,14 +93,16 @@ static __always_inline void handle_buf_with_connection(void *ctx, pid_connection #define BUF_COPY_BLOCK_SIZE 16 -static __always_inline void read_skb_bytes(const void *skb, u32 offset, unsigned char *buf, const u32 len) { +static __always_inline void +read_skb_bytes(const void *skb, u32 offset, unsigned char *buf, const u32 len) { u32 max = offset + len; int b = 0; - for (; b < (FULL_BUF_SIZE/BUF_COPY_BLOCK_SIZE); b++) { + for (; b < (FULL_BUF_SIZE / BUF_COPY_BLOCK_SIZE); b++) { if ((offset + (BUF_COPY_BLOCK_SIZE - 1)) >= max) { break; } - bpf_skb_load_bytes(skb, offset, (void *)(&buf[b * BUF_COPY_BLOCK_SIZE]), BUF_COPY_BLOCK_SIZE); + bpf_skb_load_bytes( + skb, offset, (void *)(&buf[b * BUF_COPY_BLOCK_SIZE]), BUF_COPY_BLOCK_SIZE); offset += BUF_COPY_BLOCK_SIZE; } @@ -107,7 +117,8 @@ static __always_inline void read_skb_bytes(const void *skb, u32 offset, unsigned return; } - int remaining_to_copy = (remainder < (BUF_COPY_BLOCK_SIZE - 1)) ? remainder : (BUF_COPY_BLOCK_SIZE - 1); + int remaining_to_copy = + (remainder < (BUF_COPY_BLOCK_SIZE - 1)) ? remainder : (BUF_COPY_BLOCK_SIZE - 1); int space_in_buffer = (len < (b * BUF_COPY_BLOCK_SIZE)) ? 0 : len - (b * BUF_COPY_BLOCK_SIZE); if (remaining_to_copy <= space_in_buffer) { diff --git a/bpf/map_sizing.h b/bpf/map_sizing.h index ce232b254..1f8f615fb 100644 --- a/bpf/map_sizing.h +++ b/bpf/map_sizing.h @@ -4,6 +4,7 @@ // TODO: make this user-configurable and modify the value from the userspace when // loading the maps with the Cilium library #define MAX_CONCURRENT_REQUESTS 10000 // 10000 requests per second max for a single traced process -#define MAX_CONCURRENT_SHARED_REQUESTS 30000 // 10 * MAX_CONCURRENT_REQUESTS total ongoing requests, for maps shared among multiple tracers, e.g. pinned maps +#define MAX_CONCURRENT_SHARED_REQUESTS \ + 30000 // 10 * MAX_CONCURRENT_REQUESTS total ongoing requests, for maps shared among multiple tracers, e.g. pinned maps #endif \ No newline at end of file diff --git a/bpf/nodejs.c b/bpf/nodejs.c index 1f85e2f30..72cfaa4fc 100644 --- a/bpf/nodejs.c +++ b/bpf/nodejs.c @@ -12,8 +12,8 @@ volatile const s32 async_wrap_trigger_async_id_off = 0; struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u64); // the pid_tid - __type(value, u64); // the last AsyncWrap * + __type(key, u64); // the pid_tid + __type(value, u64); // the last AsyncWrap * __uint(max_entries, 1000); // 1000 nodejs services, small number, nodejs is single threaded __uint(pinning, LIBBPF_PIN_BY_NAME); } async_reset_args SEC(".maps"); @@ -41,7 +41,7 @@ int async_reset_ret(struct pt_regs *ctx) { if (!valid_pid(id)) { return 0; } - + bpf_dbg_printk("=== uprobe AsyncReset returns id=%d ===", id); bpf_map_delete_elem(&async_reset_args, &id); @@ -69,13 +69,15 @@ int emit_async_init(struct pt_regs *ctx) { u64 trigger_async_id = 0; bpf_probe_read_user(&async_id, sizeof(u64), ((void *)wrap) + async_wrap_async_id_off); - bpf_probe_read_user(&trigger_async_id, sizeof(u64), ((void *)wrap) + async_wrap_trigger_async_id_off); + bpf_probe_read_user( + &trigger_async_id, sizeof(u64), ((void *)wrap) + async_wrap_trigger_async_id_off); if (async_id) { bpf_map_update_elem(&active_nodejs_ids, &id, &async_id, BPF_ANY); if (trigger_async_id) { bpf_map_update_elem(&nodejs_parent_map, &async_id, &trigger_async_id, BPF_ANY); - bpf_dbg_printk("async_id = %llx, trigger_async_id = %llx", async_id, trigger_async_id); + bpf_dbg_printk( + "async_id = %llx, trigger_async_id = %llx", async_id, trigger_async_id); } else { bpf_dbg_printk("No trigger async id"); } diff --git a/bpf/nodejs.h b/bpf/nodejs.h index 7516fa977..da71d0d95 100644 --- a/bpf/nodejs.h +++ b/bpf/nodejs.h @@ -7,8 +7,8 @@ struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, u64); // the pid_tid - __type(value, u64); // the last active async_id + __type(key, u64); // the pid_tid + __type(value, u64); // the last active async_id __uint(max_entries, 1000); // 1000 nodejs services, small number, nodejs is single threaded __uint(pinning, LIBBPF_PIN_BY_NAME); } active_nodejs_ids SEC(".maps"); @@ -17,7 +17,7 @@ struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, u64); // child async_id __type(value, u64); // parent async_id - __uint(max_entries, MAX_CONCURRENT_REQUESTS); + __uint(max_entries, MAX_CONCURRENT_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } nodejs_parent_map SEC(".maps"); diff --git a/bpf/pid.h b/bpf/pid.h index 74f61b0b7..b743741ed 100644 --- a/bpf/pid.h +++ b/bpf/pid.h @@ -7,7 +7,8 @@ #include "pid_types.h" #include "bpf_dbg.h" -#define MAX_CONCURRENT_PIDS 3001 // estimate: 1000 concurrent processes (including children) * 3 namespaces per pid +#define MAX_CONCURRENT_PIDS \ + 3001 // estimate: 1000 concurrent processes (including children) * 3 namespaces per pid #define PRIME_HASH 192053 // closest prime to 3001 * 64 volatile const s32 filter_pids = 0; @@ -29,11 +30,14 @@ struct { } pid_cache SEC(".maps"); static __always_inline u8 pid_matches(pid_key_t *p) { - u64 k = (((u64)p->ns) << 32) | p->pid; // combine the namespace id and the pid into one single u64 + u64 k = + (((u64)p->ns) << 32) | p->pid; // combine the namespace id and the pid into one single u64 - u32 h = (u32)(k % PRIME_HASH); // divide with prime number lower than max pids * 64, modulo with primes gives good hash functions - u32 segment = h / 64; // divide by the segment size (8 bytes) to find the segment - u32 bit = h & 63; // lowest 64 bits gives us the placement inside the segment + u32 h = + (u32)(k % + PRIME_HASH); // divide with prime number lower than max pids * 64, modulo with primes gives good hash functions + u32 segment = h / 64; // divide by the segment size (8 bytes) to find the segment + u32 bit = h & 63; // lowest 64 bits gives us the placement inside the segment u64 *v = bpf_map_lookup_elem(&valid_pids, &segment); if (!v) { @@ -66,10 +70,7 @@ static __always_inline u32 valid_pid(u64 id) { ns_pid_ppid(task, &ns_pid, &ns_ppid, &pid_ns_id); if (ns_pid != 0) { - pid_key_t p_key = { - .pid = ns_pid, - .ns = pid_ns_id - }; + pid_key_t p_key = {.pid = ns_pid, .ns = pid_ns_id}; u8 found_ns_pid = pid_matches(&p_key); @@ -77,13 +78,10 @@ static __always_inline u32 valid_pid(u64 id) { bpf_map_update_elem(&pid_cache, &host_pid, &ns_pid, BPF_ANY); return ns_pid; } else if (ns_ppid != 0) { - pid_key_t pp_key = { - .pid = ns_ppid, - .ns = pid_ns_id - }; + pid_key_t pp_key = {.pid = ns_ppid, .ns = pid_ns_id}; u8 found_ns_ppid = pid_matches(&pp_key); - + if (found_ns_ppid) { bpf_map_update_elem(&pid_cache, &host_pid, &ns_pid, BPF_ANY); diff --git a/bpf/pid_types.h b/bpf/pid_types.h index 54aa28891..e05e1bd7b 100644 --- a/bpf/pid_types.h +++ b/bpf/pid_types.h @@ -6,19 +6,20 @@ #include "bpf_core_read.h" typedef struct pid_key { - u32 pid; // pid as seen by the userspace (for example, inside its container) + u32 pid; // pid as seen by the userspace (for example, inside its container) u32 ns; // pids namespace for the process } __attribute__((packed)) pid_key_t; typedef struct pid_info_t { - u32 host_pid; // pid as seen by the root cgroup (and by BPF) - u32 user_pid; // pid as seen by the userspace (for example, inside its container) - u32 ns; // pids namespace for the process + u32 host_pid; // pid as seen by the root cgroup (and by BPF) + u32 user_pid; // pid as seen by the userspace (for example, inside its container) + u32 ns; // pids namespace for the process } __attribute__((packed)) pid_info; // Good resource on this: https://mozillazg.com/2022/05/ebpf-libbpfgo-get-process-info-en.html // Using bpf_get_ns_current_pid_tgid is too restrictive for us -static __always_inline void ns_pid_ppid(struct task_struct *task, int *pid, int *ppid, u32 *pid_ns_id) { +static __always_inline void +ns_pid_ppid(struct task_struct *task, int *pid, int *ppid, u32 *pid_ns_id) { struct upid upid; unsigned int level = BPF_CORE_READ(task, nsproxy, pid_ns_for_children, level); diff --git a/bpf/protocol_common.h b/bpf/protocol_common.h index 9240ea2a2..90ea5d847 100644 --- a/bpf/protocol_common.h +++ b/bpf/protocol_common.h @@ -8,7 +8,7 @@ #include "pid.h" #include "bpf_dbg.h" -#define MIN_HTTP_SIZE 12 // HTTP/1.1 CCC is the smallest valid request we can have +#define MIN_HTTP_SIZE 12 // HTTP/1.1 CCC is the smallest valid request we can have #define RESPONSE_STATUS_POS 9 // HTTP/1.1 <-- #define MAX_HTTP_STATUS 599 @@ -42,23 +42,23 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, pid_connection_info_t); // connection that's SSL - __type(value, u64); // ssl + __type(key, pid_connection_info_t); // connection that's SSL + __type(value, u64); // ssl __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } active_ssl_connections SEC(".maps"); -static __always_inline http_connection_metadata_t* empty_connection_meta() { +static __always_inline http_connection_metadata_t *empty_connection_meta() { int zero = 0; return bpf_map_lookup_elem(&connection_meta_mem, &zero); } -static __always_inline u8* iovec_memory() { +static __always_inline u8 *iovec_memory() { int zero = 0; return bpf_map_lookup_elem(&iovec_mem, &zero); } -static __always_inline call_protocol_args_t* protocol_args() { +static __always_inline call_protocol_args_t *protocol_args() { int zero = 0; return bpf_map_lookup_elem(&protocol_args_mem, &zero); } @@ -81,7 +81,8 @@ static __always_inline u8 request_type_by_direction(u8 direction, u8 packet_type return 0; } -static __always_inline http_connection_metadata_t *connection_meta_by_direction(pid_connection_info_t *pid_conn, u8 direction, u8 packet_type) { +static __always_inline http_connection_metadata_t * +connection_meta_by_direction(pid_connection_info_t *pid_conn, u8 direction, u8 packet_type) { http_connection_metadata_t *meta = empty_connection_meta(); if (!meta) { return 0; @@ -104,37 +105,35 @@ struct iov_iter___dummy { typedef struct iov_iter___dummy iovec_iter_ctx; -enum iter_type___dummy { - ITER_UBUF -}; +enum iter_type___dummy { ITER_UBUF }; // extracts kernel specific iov_iter information into a iovec_iter_ctx instance -static __always_inline void get_iovec_ctx(iovec_iter_ctx* ctx, struct msghdr *msg) { +static __always_inline void get_iovec_ctx(iovec_iter_ctx *ctx, struct msghdr *msg) { ctx->ubuf = NULL; ctx->iov = NULL; - if (bpf_core_field_exists(((struct iov_iter___dummy*)&msg->msg_iter)->type)) { + if (bpf_core_field_exists(((struct iov_iter___dummy *)&msg->msg_iter)->type)) { // clear the direction bit when reading iovec_iter::type to end up // with the original enumerator value (the direction bit is the LSB // and is either 0 (READ) or 1 (WRITE)). - ctx->iter_type = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, type) & 0xfe; + ctx->iter_type = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, type) & 0xfe; } else { - ctx->iter_type = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, iter_type); + ctx->iter_type = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, iter_type); } - if (bpf_core_field_exists(((struct iov_iter___dummy*)&msg->msg_iter)->ubuf)) { - ctx->ubuf = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, ubuf); + if (bpf_core_field_exists(((struct iov_iter___dummy *)&msg->msg_iter)->ubuf)) { + ctx->ubuf = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, ubuf); } - if (bpf_core_field_exists(((struct iov_iter___dummy*)&msg->msg_iter)->iov)) { - ctx->iov = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, iov); - } else if (bpf_core_field_exists(((struct iov_iter___dummy*)&msg->msg_iter)->__iov)) { - ctx->iov = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, __iov); + if (bpf_core_field_exists(((struct iov_iter___dummy *)&msg->msg_iter)->iov)) { + ctx->iov = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, iov); + } else if (bpf_core_field_exists(((struct iov_iter___dummy *)&msg->msg_iter)->__iov)) { + ctx->iov = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, __iov); } - ctx->nr_segs = BPF_CORE_READ((struct iov_iter___dummy*)&msg->msg_iter, nr_segs); + ctx->nr_segs = BPF_CORE_READ((struct iov_iter___dummy *)&msg->msg_iter, nr_segs); } -static __always_inline int read_iovec_ctx(iovec_iter_ctx *ctx, u8* buf, size_t max_len) { +static __always_inline int read_iovec_ctx(iovec_iter_ctx *ctx, u8 *buf, size_t max_len) { if (max_len == 0) { return 0; } @@ -205,7 +204,7 @@ static __always_inline int read_iovec_ctx(iovec_iter_ctx *ctx, u8* buf, size_t m return tot_len; } -static __always_inline int read_msghdr_buf(struct msghdr *msg, u8* buf, size_t max_len) { +static __always_inline int read_msghdr_buf(struct msghdr *msg, u8 *buf, size_t max_len) { if (max_len == 0) { return 0; } @@ -221,11 +220,14 @@ static __always_inline int read_msghdr_buf(struct msghdr *msg, u8* buf, size_t m // is somehow in the ephemeral port range, it can be higher than the source port and we'd use the sorted connection // info in user space, effectively reversing the flow of the operation. We keep track of the original destination port // and we undo the swap in the data collections we send to user space. -static __always_inline void fixup_connection_info(connection_info_t *conn_info, u8 client, u16 orig_dport) { +static __always_inline void +fixup_connection_info(connection_info_t *conn_info, u8 client, u16 orig_dport) { // The destination port is the server port in userspace if ((client && conn_info->d_port != orig_dport) || (!client && conn_info->d_port == orig_dport)) { - bpf_dbg_printk("Swapped connection info for userspace, client = %d, orig_dport = %d", client, orig_dport); + bpf_dbg_printk("Swapped connection info for userspace, client = %d, orig_dport = %d", + client, + orig_dport); swap_connection_info_order(conn_info); //dbg_print_http_connection_info(conn_info); // commented out since GitHub CI doesn't like this call } diff --git a/bpf/protocol_defs.h b/bpf/protocol_defs.h index b10d2f0ab..e0292ef72 100644 --- a/bpf/protocol_defs.h +++ b/bpf/protocol_defs.h @@ -2,8 +2,8 @@ #define PROTOCOL_DEFS_H // Taken from linux/socket.h -#define AF_INET 2 /* Internet IP Protocol */ -#define AF_INET6 10 /* IP version 6 */ +#define AF_INET 2 /* Internet IP Protocol */ +#define AF_INET6 10 /* IP version 6 */ #define IP_V6_ADDR_LEN 16 @@ -12,16 +12,15 @@ #define EPHEMERAL_PORT_MIN 32768 // Taken from errno.h -#define EINPROGRESS 115 /* Operation now in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ // Taken from uapi/linux/if_ether.h -#define ETH_HLEN 14 /* Total octets in header. */ -#define ETH_P_IP 0x0800 /* Internet Protocol packet */ -#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ - +#define ETH_HLEN 14 /* Total octets in header. */ +#define ETH_P_IP 0x0800 /* Internet Protocol packet */ +#define ETH_P_IPV6 0x86DD /* IPv6 over bluebook */ // Taken from uapi/linux/in.h -#define IPPROTO_TCP 6 /* Transmission Control Protocol */ +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ // Taken from linux/include/net/tcp.h #define TCPHDR_FIN 0x01 diff --git a/bpf/protocol_http.h b/bpf/protocol_http.h index 9e0edc644..6f5aa00ee 100644 --- a/bpf/protocol_http.h +++ b/bpf/protocol_http.h @@ -38,7 +38,7 @@ struct { // empty_http_info zeroes and return the unique percpu copy in the map // this function assumes that a given thread is not trying to use many // instances at the same time -static __always_inline http_info_t* empty_http_info() { +static __always_inline http_info_t *empty_http_info() { int zero = 0; http_info_t *value = bpf_map_lookup_elem(&http_info_mem, &zero); if (value) { @@ -52,17 +52,24 @@ static __always_inline u8 is_http(unsigned char *p, u32 len, u8 *packet_type) { return 0; } //HTTP/1.x - if ((p[0] == 'H') && (p[1] == 'T') && (p[2] == 'T') && (p[3] == 'P') && (p[4] == '/') && (p[5] == '1') && (p[6] == '.')) { - *packet_type = PACKET_TYPE_RESPONSE; - return 1; - } else if ( - ((p[0] == 'G') && (p[1] == 'E') && (p[2] == 'T') && (p[3] == ' ') && (p[4] == '/')) || // GET - ((p[0] == 'P') && (p[1] == 'O') && (p[2] == 'S') && (p[3] == 'T') && (p[4] == ' ') && (p[5] == '/')) || // POST - ((p[0] == 'P') && (p[1] == 'U') && (p[2] == 'T') && (p[3] == ' ') && (p[4] == '/')) || // PUT - ((p[0] == 'P') && (p[1] == 'A') && (p[2] == 'T') && (p[3] == 'C') && (p[4] == 'H') && (p[5] == ' ') && (p[6] == '/')) || // PATCH - ((p[0] == 'D') && (p[1] == 'E') && (p[2] == 'L') && (p[3] == 'E') && (p[4] == 'T') && (p[5] == 'E') && (p[6] == ' ') && (p[7] == '/')) || // DELETE - ((p[0] == 'H') && (p[1] == 'E') && (p[2] == 'A') && (p[3] == 'D') && (p[4] == ' ') && (p[5] == '/')) || // HEAD - ((p[0] == 'O') && (p[1] == 'P') && (p[2] == 'T') && (p[3] == 'I') && (p[4] == 'O') && (p[5] == 'N') && (p[6] == 'S') && (p[7] == ' ') && (p[8] == '/')) // OPTIONS + if ((p[0] == 'H') && (p[1] == 'T') && (p[2] == 'T') && (p[3] == 'P') && (p[4] == '/') && + (p[5] == '1') && (p[6] == '.')) { + *packet_type = PACKET_TYPE_RESPONSE; + return 1; + } else if (((p[0] == 'G') && (p[1] == 'E') && (p[2] == 'T') && (p[3] == ' ') && + (p[4] == '/')) || // GET + ((p[0] == 'P') && (p[1] == 'O') && (p[2] == 'S') && (p[3] == 'T') && (p[4] == ' ') && + (p[5] == '/')) || // POST + ((p[0] == 'P') && (p[1] == 'U') && (p[2] == 'T') && (p[3] == ' ') && + (p[4] == '/')) || // PUT + ((p[0] == 'P') && (p[1] == 'A') && (p[2] == 'T') && (p[3] == 'C') && (p[4] == 'H') && + (p[5] == ' ') && (p[6] == '/')) || // PATCH + ((p[0] == 'D') && (p[1] == 'E') && (p[2] == 'L') && (p[3] == 'E') && (p[4] == 'T') && + (p[5] == 'E') && (p[6] == ' ') && (p[7] == '/')) || // DELETE + ((p[0] == 'H') && (p[1] == 'E') && (p[2] == 'A') && (p[3] == 'D') && (p[4] == ' ') && + (p[5] == '/')) || // HEAD + ((p[0] == 'O') && (p[1] == 'P') && (p[2] == 'T') && (p[3] == 'I') && (p[4] == 'O') && + (p[5] == 'N') && (p[6] == 'S') && (p[7] == ' ') && (p[8] == '/')) // OPTIONS ) { *packet_type = PACKET_TYPE_REQUEST; return 1; @@ -98,7 +105,7 @@ static __always_inline u8 http_will_complete(http_info_t *info, unsigned char *b static __always_inline void finish_http(http_info_t *info, pid_connection_info_t *pid_conn) { if (http_info_complete(info)) { - http_info_t *trace = bpf_ringbuf_reserve(&events, sizeof(http_info_t), 0); + http_info_t *trace = bpf_ringbuf_reserve(&events, sizeof(http_info_t), 0); if (trace) { bpf_dbg_printk("Sending trace %lx, response length %d", info, info->resp_len); @@ -110,21 +117,24 @@ static __always_inline void finish_http(http_info_t *info, pid_connection_info_t // bpf_dbg_printk("Terminating trace for pid=%d", pid_from_pid_tgid(pid_tid)); // dbg_print_http_connection_info(&info->conn_info); // commented out since GitHub CI doesn't like this call bpf_map_delete_elem(&ongoing_http, pid_conn); - } + } } static __always_inline void update_http_sent_len(pid_connection_info_t *pid_conn, int sent_len) { http_info_t *info = bpf_map_lookup_elem(&ongoing_http, pid_conn); - if (info) { + if (info) { info->resp_len += sent_len; } } -static __always_inline http_info_t *get_or_set_http_info(http_info_t *info, pid_connection_info_t *pid_conn, u8 packet_type) { +static __always_inline http_info_t * +get_or_set_http_info(http_info_t *info, pid_connection_info_t *pid_conn, u8 packet_type) { if (packet_type == PACKET_TYPE_REQUEST) { http_info_t *old_info = bpf_map_lookup_elem(&ongoing_http, pid_conn); if (old_info) { - finish_http(old_info, pid_conn); // this will delete ongoing_http for this connection info if there's full stale request + finish_http( + old_info, + pid_conn); // this will delete ongoing_http for this connection info if there's full stale request } bpf_map_update_elem(&ongoing_http, pid_conn, info, BPF_ANY); @@ -135,19 +145,21 @@ static __always_inline http_info_t *get_or_set_http_info(http_info_t *info, pid_ static __always_inline void finish_possible_delayed_http_request(pid_connection_info_t *pid_conn) { http_info_t *info = bpf_map_lookup_elem(&ongoing_http, pid_conn); - if (info) { + if (info) { finish_http(info, pid_conn); } } -static __always_inline void set_fallback_http_info(http_info_t *info, connection_info_t *conn, int len) { +static __always_inline void +set_fallback_http_info(http_info_t *info, connection_info_t *conn, int len) { info->start_monotime_ns = bpf_ktime_get_ns(); info->status = 0; info->len = len; bpf_map_update_elem(&ongoing_http_fallback, conn, info, BPF_ANY); } -static __always_inline void process_http_request(http_info_t *info, int len, http_connection_metadata_t *meta, int direction, u16 orig_dport) { +static __always_inline void process_http_request( + http_info_t *info, int len, http_connection_metadata_t *meta, int direction, u16 orig_dport) { // Set pid and type early as best effort in case the request times out or dies. if (meta) { info->pid = meta->pid; @@ -174,7 +186,7 @@ static __always_inline void process_http_response(http_info_t *info, unsigned ch info->resp_len = 0; info->end_monotime_ns = bpf_ktime_get_ns(); info->status = 0; - info->status += (buf[RESPONSE_STATUS_POS] - '0') * 100; + info->status += (buf[RESPONSE_STATUS_POS] - '0') * 100; info->status += (buf[RESPONSE_STATUS_POS + 1] - '0') * 10; info->status += (buf[RESPONSE_STATUS_POS + 2] - '0'); if (info->status > MAX_HTTP_STATUS) { // we read something invalid @@ -182,7 +194,12 @@ static __always_inline void process_http_response(http_info_t *info, unsigned ch } } -static __always_inline void handle_http_response(unsigned char *small_buf, pid_connection_info_t *pid_conn, http_info_t *info, int orig_len, u8 direction, u8 ssl) { +static __always_inline void handle_http_response(unsigned char *small_buf, + pid_connection_info_t *pid_conn, + http_info_t *info, + int orig_len, + u8 direction, + u8 ssl) { process_http_response(info, small_buf, orig_len); if ((direction != TCP_SEND) /*|| (ssl != NO_SSL) || (orig_len < KPROBES_LARGE_RESPONSE_LEN)*/) { @@ -195,7 +212,7 @@ static __always_inline void handle_http_response(unsigned char *small_buf, pid_c bpf_dbg_printk("Delaying finish http for large request, orig_len %d", orig_len); } } - + if (info->type == EVENT_HTTP_REQUEST) { trace_key_t t_key = {0}; t_key.extra_id = info->extra_id; @@ -246,29 +263,43 @@ int protocol_http(void *ctx) { } else { info->type = EVENT_HTTP_REQUEST; } - } + } - bpf_dbg_printk("=== http_buffer_event len=%d pid=%d still_reading=%d ===", args->bytes_len, pid_from_pid_tgid(bpf_get_current_pid_tgid()), still_reading(info)); + bpf_dbg_printk("=== http_buffer_event len=%d pid=%d still_reading=%d ===", + args->bytes_len, + pid_from_pid_tgid(bpf_get_current_pid_tgid()), + still_reading(info)); - if (args->packet_type == PACKET_TYPE_REQUEST && (info->status == 0) && (info->start_monotime_ns == 0)) { - http_connection_metadata_t *meta = connection_meta_by_direction(&args->pid_conn, args->direction, PACKET_TYPE_REQUEST); + if (args->packet_type == PACKET_TYPE_REQUEST && (info->status == 0) && + (info->start_monotime_ns == 0)) { + http_connection_metadata_t *meta = + connection_meta_by_direction(&args->pid_conn, args->direction, PACKET_TYPE_REQUEST); - get_or_create_trace_info(meta, args->pid_conn.pid, &args->pid_conn.conn, (void *)args->u_buf, args->bytes_len, capture_header_buffer); + get_or_create_trace_info(meta, + args->pid_conn.pid, + &args->pid_conn.conn, + (void *)args->u_buf, + args->bytes_len, + capture_header_buffer); - if (meta) { + if (meta) { tp_info_pid_t *tp_p = trace_info_for_connection(&args->pid_conn.conn); if (tp_p) { info->tp = tp_p->tp; if (meta->type == EVENT_HTTP_CLIENT && !valid_span(tp_p->tp.parent_id)) { - bpf_dbg_printk("Looking for trace id of a client span"); + bpf_dbg_printk("Looking for trace id of a client span"); tp_info_pid_t *server_tp = find_parent_trace(); if (server_tp && server_tp->valid && valid_trace(server_tp->tp.trace_id)) { - bpf_dbg_printk("Found existing server span for id=%llx", bpf_get_current_pid_tgid()); - __builtin_memcpy(info->tp.trace_id, server_tp->tp.trace_id, sizeof(info->tp.trace_id)); - __builtin_memcpy(info->tp.parent_id, server_tp->tp.span_id, sizeof(info->tp.parent_id)); + bpf_dbg_printk("Found existing server span for id=%llx", + bpf_get_current_pid_tgid()); + __builtin_memcpy( + info->tp.trace_id, server_tp->tp.trace_id, sizeof(info->tp.trace_id)); + __builtin_memcpy( + info->tp.parent_id, server_tp->tp.span_id, sizeof(info->tp.parent_id)); } else { - bpf_dbg_printk("Cannot find server span for id=%llx", bpf_get_current_pid_tgid()); + bpf_dbg_printk("Cannot find server span for id=%llx", + bpf_get_current_pid_tgid()); } } } else { @@ -283,18 +314,18 @@ int protocol_http(void *ctx) { bpf_probe_read(info->buf, FULL_BUF_SIZE, (void *)args->u_buf); process_http_request(info, args->bytes_len, meta, args->direction, args->orig_dport); } else if ((args->packet_type == PACKET_TYPE_RESPONSE) && (info->status == 0)) { - handle_http_response(args->small_buf, &args->pid_conn, info, args->bytes_len, args->direction, args->ssl); + handle_http_response( + args->small_buf, &args->pid_conn, info, args->bytes_len, args->direction, args->ssl); if (fallback) { finish_http(info, &args->pid_conn); } } else if (still_reading(info)) { info->len += args->bytes_len; - } + } bpf_map_delete_elem(&ongoing_http_fallback, &args->pid_conn.conn); return 0; } - #endif \ No newline at end of file diff --git a/bpf/protocol_http2.h b/bpf/protocol_http2.h index 8c86867c9..3aa07b676 100644 --- a/bpf/protocol_http2.h +++ b/bpf/protocol_http2.h @@ -32,7 +32,7 @@ struct { __uint(max_entries, 1); } http2_info_mem SEC(".maps"); -static __always_inline http2_grpc_request_t* empty_http2_info() { +static __always_inline http2_grpc_request_t *empty_http2_info() { int zero = 0; http2_grpc_request_t *value = bpf_map_lookup_elem(&http2_info_mem, &zero); if (value) { @@ -41,7 +41,8 @@ static __always_inline http2_grpc_request_t* empty_http2_info() { return value; } -static __always_inline void http2_grpc_start(http2_conn_stream_t *s_key, void *u_buf, int len, u8 direction, u8 ssl, u16 orig_dport) { +static __always_inline void http2_grpc_start( + http2_conn_stream_t *s_key, void *u_buf, int len, u8 direction, u8 ssl, u16 orig_dport) { http2_grpc_request_t *existing = bpf_map_lookup_elem(&ongoing_http2_grpc, s_key); if (existing) { bpf_dbg_printk("already found existing grpcstart, ignoring this exchange"); @@ -51,7 +52,8 @@ static __always_inline void http2_grpc_start(http2_conn_stream_t *s_key, void *u bpf_dbg_printk("http2/grpc start direction=%d stream=%d", direction, s_key->stream_id); //dbg_print_http_connection_info(&s_key->pid_conn.conn); // commented out since GitHub CI doesn't like this call if (h2g_info) { - http_connection_metadata_t *meta = connection_meta_by_direction(&s_key->pid_conn, direction, PACKET_TYPE_REQUEST); + http_connection_metadata_t *meta = + connection_meta_by_direction(&s_key->pid_conn, direction, PACKET_TYPE_REQUEST); if (!meta) { bpf_dbg_printk("Can't get meta memory or connection not found"); return; @@ -66,21 +68,23 @@ static __always_inline void http2_grpc_start(http2_conn_stream_t *s_key, void *u h2g_info->pid = meta->pid; h2g_info->type = meta->type; } - fixup_connection_info(&h2g_info->conn_info, h2g_info->type == EVENT_HTTP_CLIENT, orig_dport); + fixup_connection_info( + &h2g_info->conn_info, h2g_info->type == EVENT_HTTP_CLIENT, orig_dport); bpf_probe_read(h2g_info->data, KPROBES_HTTP2_BUF_SIZE, u_buf); bpf_map_update_elem(&ongoing_http2_grpc, s_key, h2g_info, BPF_ANY); } } -static __always_inline void http2_grpc_end(http2_conn_stream_t *stream, http2_grpc_request_t *prev_info, void *u_buf) { +static __always_inline void +http2_grpc_end(http2_conn_stream_t *stream, http2_grpc_request_t *prev_info, void *u_buf) { bpf_dbg_printk("http2/grpc end prev_info=%llx", prev_info); if (prev_info) { prev_info->end_monotime_ns = bpf_ktime_get_ns(); bpf_dbg_printk("stream_id = %d", stream->stream_id); //dbg_print_http_connection_info(&stream->pid_conn.conn); // commented out since GitHub CI doesn't like this call - http2_grpc_request_t *trace = bpf_ringbuf_reserve(&events, sizeof(http2_grpc_request_t), 0); + http2_grpc_request_t *trace = bpf_ringbuf_reserve(&events, sizeof(http2_grpc_request_t), 0); if (trace) { bpf_probe_read(prev_info->ret_data, KPROBES_HTTP2_RET_BUF_SIZE, u_buf); __builtin_memcpy(trace, prev_info, sizeof(http2_grpc_request_t)); @@ -91,7 +95,12 @@ static __always_inline void http2_grpc_end(http2_conn_stream_t *stream, http2_gr bpf_map_delete_elem(&ongoing_http2_grpc, stream); } -static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid_conn, void *u_buf, int bytes_len, u8 direction, u8 ssl, u16 orig_dport) { +static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid_conn, + void *u_buf, + int bytes_len, + u8 direction, + u8 ssl, + u16 orig_dport) { int pos = 0; u8 found_start_frame = 0; u8 found_end_frame = 0; @@ -105,15 +114,15 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid unsigned char frame_buf[FRAME_HEADER_LEN]; frame_header_t frame = {0}; - for (int i = 0; i < 8; i++) { + for (int i = 0; i < 8; i++) { if (pos >= bytes_len) { break; } bpf_probe_read(&frame_buf, FRAME_HEADER_LEN, (void *)((u8 *)u_buf + pos)); - read_http2_grpc_frame_header(&frame, frame_buf, FRAME_HEADER_LEN); + read_http2_grpc_frame_header(&frame, frame_buf, FRAME_HEADER_LEN); //bpf_dbg_printk("http2 frame type = %d, len = %d, stream_id = %d, flags = %d", frame.type, frame.length, frame.stream_id, frame.flags); - + if (is_headers_frame(&frame)) { stream.pid_conn = *pid_conn; stream.stream_id = frame.stream_id; @@ -127,7 +136,7 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid if (http_grpc_stream_ended(&frame)) { found_end_frame = 1; break; - } + } } else { // Not starting new grpc request, found end frame in a start, likely just terminating prev connection if (!(is_flags_only_frame(&frame) && http_grpc_stream_ended(&frame))) { @@ -158,7 +167,8 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid } if (found_start_frame) { - http2_grpc_start(&stream, (void *)((u8 *)u_buf + pos), bytes_len, direction, ssl, orig_dport); + http2_grpc_start( + &stream, (void *)((u8 *)u_buf + pos), bytes_len, direction, ssl, orig_dport); } else { // We only loop 6 times looking for the stream termination. If the data packed is large we'll miss the // frame saying the stream closed. In that case we try this backup path. @@ -180,7 +190,10 @@ static __always_inline void process_http2_grpc_frames(pid_connection_info_t *pid http2_grpc_end(&stream, prev_info, (void *)((u8 *)u_buf + buf_pos)); bpf_map_delete_elem(&active_ssl_connections, pid_conn); } else { - bpf_dbg_printk("grpc request/response mismatch, req_type %d, prev_info->type %d", req_type, prev_info->type); + bpf_dbg_printk( + "grpc request/response mismatch, req_type %d, prev_info->type %d", + req_type, + prev_info->type); bpf_map_delete_elem(&ongoing_http2_grpc, &stream); } } @@ -197,14 +210,12 @@ int protocol_http2(void *ctx) { return 0; } - process_http2_grpc_frames( - &args->pid_conn, - (void *)args->u_buf, - args->bytes_len, - args->direction, - args->ssl, - args->orig_dport - ); + process_http2_grpc_frames(&args->pid_conn, + (void *)args->u_buf, + args->bytes_len, + args->direction, + args->ssl, + args->orig_dport); return 0; } diff --git a/bpf/protocol_tcp.h b/bpf/protocol_tcp.h index 242772b46..3b58943ae 100644 --- a/bpf/protocol_tcp.h +++ b/bpf/protocol_tcp.h @@ -24,7 +24,7 @@ struct { __uint(max_entries, 1); } tcp_req_mem SEC(".maps"); -static __always_inline tcp_req_t* empty_tcp_req() { +static __always_inline tcp_req_t *empty_tcp_req() { int zero = 0; tcp_req_t *value = bpf_map_lookup_elem(&tcp_req_mem, &zero); if (value) { @@ -33,7 +33,12 @@ static __always_inline tcp_req_t* empty_tcp_req() { return value; } -static __always_inline void handle_unknown_tcp_connection(pid_connection_info_t *pid_conn, void *u_buf, int bytes_len, u8 direction, u8 ssl, u16 orig_dport) { +static __always_inline void handle_unknown_tcp_connection(pid_connection_info_t *pid_conn, + void *u_buf, + int bytes_len, + u8 direction, + u8 ssl, + u16 orig_dport) { tcp_req_t *existing = bpf_map_lookup_elem(&ongoing_tcp_req, pid_conn); if (!existing) { tcp_req_t *req = empty_tcp_req(); @@ -52,8 +57,10 @@ static __always_inline void handle_unknown_tcp_connection(pid_connection_info_t if (server_tp && server_tp->valid && valid_trace(server_tp->tp.trace_id)) { bpf_dbg_printk("Found existing server tp for client call"); - __builtin_memcpy(req->tp.trace_id, server_tp->tp.trace_id, sizeof(req->tp.trace_id)); - __builtin_memcpy(req->tp.parent_id, server_tp->tp.span_id, sizeof(req->tp.parent_id)); + __builtin_memcpy( + req->tp.trace_id, server_tp->tp.trace_id, sizeof(req->tp.trace_id)); + __builtin_memcpy( + req->tp.parent_id, server_tp->tp.span_id, sizeof(req->tp.parent_id)); urand_bytes(req->tp.span_id, SPAN_ID_SIZE_BYTES); } @@ -64,22 +71,23 @@ static __always_inline void handle_unknown_tcp_connection(pid_connection_info_t existing->resp_len = bytes_len; tcp_req_t *trace = bpf_ringbuf_reserve(&events, sizeof(tcp_req_t), 0); if (trace) { - bpf_dbg_printk("Sending TCP trace %lx, response length %d", existing, existing->resp_len); + bpf_dbg_printk( + "Sending TCP trace %lx, response length %d", existing, existing->resp_len); __builtin_memcpy(trace, existing, sizeof(tcp_req_t)); bpf_probe_read(trace->rbuf, K_TCP_RES_LEN, u_buf); bpf_ringbuf_submit(trace, get_flags()); } bpf_map_delete_elem(&ongoing_tcp_req, pid_conn); - } else if (existing->len > 0 && existing->len < (K_TCP_MAX_LEN/2)) { + } else if (existing->len > 0 && existing->len < (K_TCP_MAX_LEN / 2)) { // Attempt to append one more packet. I couldn't convince the verifier // to use a variable (K_TCP_MAX_LEN-existing->len). If needed we may need // to try harder. Mainly needed for userspace detection of missed gRPC, where // the protocol may sent a RST frame after we've done creating the event, so // the next event has an RST frame prepended. u32 off = existing->len; - bpf_clamp_umax(off, (K_TCP_MAX_LEN/2)); - bpf_probe_read(existing->buf + off, (K_TCP_MAX_LEN/2), u_buf); + bpf_clamp_umax(off, (K_TCP_MAX_LEN / 2)); + bpf_probe_read(existing->buf + off, (K_TCP_MAX_LEN / 2), u_buf); existing->len += bytes_len; } } @@ -93,14 +101,12 @@ int protocol_tcp(void *ctx) { return 0; } - handle_unknown_tcp_connection( - &args->pid_conn, - (void *)args->u_buf, - args->bytes_len, - args->direction, - args->ssl, - args->orig_dport - ); + handle_unknown_tcp_connection(&args->pid_conn, + (void *)args->u_buf, + args->bytes_len, + args->direction, + args->ssl, + args->orig_dport); return 0; } diff --git a/bpf/ringbuf.h b/bpf/ringbuf.h index 3ce26fa43..c4333a979 100644 --- a/bpf/ringbuf.h +++ b/bpf/ringbuf.h @@ -5,20 +5,20 @@ // These need to line up with some Go identifiers: // EventTypeHTTP, EventTypeGRPC, EventTypeHTTPClient, EventTypeGRPCClient, EventTypeSQLClient, EventTypeKHTTPRequest -#define EVENT_HTTP_REQUEST 1 -#define EVENT_GRPC_REQUEST 2 -#define EVENT_HTTP_CLIENT 3 -#define EVENT_GRPC_CLIENT 4 -#define EVENT_SQL_CLIENT 5 -#define EVENT_K_HTTP_REQUEST 6 -#define EVENT_K_HTTP2_REQUEST 7 -#define EVENT_TCP_REQUEST 8 -#define EVENT_GO_KAFKA 9 -#define EVENT_GO_REDIS 10 -#define EVENT_GO_KAFKA_SEG 11 // the segment-io version (kafka-go) has different format +#define EVENT_HTTP_REQUEST 1 +#define EVENT_GRPC_REQUEST 2 +#define EVENT_HTTP_CLIENT 3 +#define EVENT_GRPC_CLIENT 4 +#define EVENT_SQL_CLIENT 5 +#define EVENT_K_HTTP_REQUEST 6 +#define EVENT_K_HTTP2_REQUEST 7 +#define EVENT_TCP_REQUEST 8 +#define EVENT_GO_KAFKA 9 +#define EVENT_GO_REDIS 10 +#define EVENT_GO_KAFKA_SEG 11 // the segment-io version (kafka-go) has different format // setting here the following map definitions without pinning them to a global namespace -// would lead that services running both HTTP and GRPC server would duplicate +// would lead that services running both HTTP and GRPC server would duplicate // the events ringbuffer and goroutines map. // This is an edge inefficiency that allows us avoiding the gotchas of // pinning maps to the global namespace (e.g. like not cleaning them up when @@ -37,15 +37,14 @@ volatile const u32 wakeup_data_bytes; // get_flags prevents waking the userspace process up on each ringbuf message. // If wakeup_data_bytes > 0, it will wait until wakeup_data_bytes are accumulated // into the buffer before waking the userspace. -static __always_inline long get_flags() -{ - long sz; +static __always_inline long get_flags() { + long sz; - if (!wakeup_data_bytes) - return 0; + if (!wakeup_data_bytes) + return 0; - sz = bpf_ringbuf_query(&events, BPF_RB_AVAIL_DATA); - return sz >= wakeup_data_bytes ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP; + sz = bpf_ringbuf_query(&events, BPF_RB_AVAIL_DATA); + return sz >= wakeup_data_bytes ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP; } #endif \ No newline at end of file diff --git a/bpf/sockaddr.h b/bpf/sockaddr.h index 4b632e3f1..f4a476f09 100644 --- a/bpf/sockaddr.h +++ b/bpf/sockaddr.h @@ -16,14 +16,15 @@ typedef struct accept_args { static __always_inline bool parse_sock_info(struct sock *s, connection_info_t *info) { short unsigned int skc_family; BPF_CORE_READ_INTO(&skc_family, s, __sk_common.skc_family); - + // We always store the IP addresses in IPV6 format, simplifies the code and // it matches natively what our Golang userspace processing will require. if (skc_family == AF_INET) { u32 ip4_s_l; u32 ip4_d_l; - BPF_CORE_READ_INTO(&info->s_port, s, __sk_common.skc_num); // weirdly not in network byte order - BPF_CORE_READ_INTO(&ip4_s_l, s, __sk_common.skc_rcv_saddr); + BPF_CORE_READ_INTO( + &info->s_port, s, __sk_common.skc_num); // weirdly not in network byte order + BPF_CORE_READ_INTO(&ip4_s_l, s, __sk_common.skc_rcv_saddr); BPF_CORE_READ_INTO(&info->d_port, s, __sk_common.skc_dport); info->d_port = bpf_ntohs(info->d_port); BPF_CORE_READ_INTO(&ip4_d_l, s, __sk_common.skc_daddr); @@ -35,7 +36,8 @@ static __always_inline bool parse_sock_info(struct sock *s, connection_info_t *i return true; } else if (skc_family == AF_INET6) { - BPF_CORE_READ_INTO(&info->s_port, s, __sk_common.skc_num); // weirdly not in network byte order + BPF_CORE_READ_INTO( + &info->s_port, s, __sk_common.skc_num); // weirdly not in network byte order BPF_CORE_READ_INTO(&info->s_addr, s, __sk_common.skc_v6_rcv_saddr.in6_u.u6_addr8); BPF_CORE_READ_INTO(&info->d_port, s, __sk_common.skc_dport); info->d_port = bpf_ntohs(info->d_port); @@ -52,14 +54,14 @@ static __always_inline bool parse_sock_info(struct sock *s, connection_info_t *i static __always_inline bool parse_accept_socket_info(sock_args_t *args, connection_info_t *info) { struct sock *s; - struct socket *sock = (struct socket*)(args->addr); + struct socket *sock = (struct socket *)(args->addr); BPF_CORE_READ_INTO(&s, sock, sk); return parse_sock_info(s, info); } static __always_inline bool parse_connect_sock_info(sock_args_t *args, connection_info_t *info) { - return parse_sock_info((struct sock*)(args->addr), info); + return parse_sock_info((struct sock *)(args->addr), info); } static __always_inline u16 get_sockaddr_port(struct sockaddr *addr) { @@ -92,11 +94,11 @@ static __always_inline u16 get_sockaddr_port_user(struct sockaddr *addr) { //bpf_dbg_printk("addr = %llx, sa_family %d", addr, sa_family); if (sa_family == AF_INET) { - bpf_probe_read(&bport, sizeof(u16), &(((struct sockaddr_in*)addr)->sin_port)); + bpf_probe_read(&bport, sizeof(u16), &(((struct sockaddr_in *)addr)->sin_port)); } else if (sa_family == AF_INET6) { - bpf_probe_read(&bport, sizeof(u16), &(((struct sockaddr_in6*)addr)->sin6_port)); + bpf_probe_read(&bport, sizeof(u16), &(((struct sockaddr_in6 *)addr)->sin6_port)); } - + bport = bpf_ntohs(bport); return bport; diff --git a/bpf/tcp_info.h b/bpf/tcp_info.h index dc38b3c41..8c55ddb9b 100644 --- a/bpf/tcp_info.h +++ b/bpf/tcp_info.h @@ -13,13 +13,15 @@ struct __tcphdr { __be16 dest; __be32 seq; __be32 ack_seq; - __u16 res1 : 4, doff : 4, fin : 1, syn : 1, rst : 1, psh : 1, ack : 1, urg : 1, ece : 1, cwr : 1; + __u16 res1 : 4, doff : 4, fin : 1, syn : 1, rst : 1, psh : 1, ack : 1, urg : 1, ece : 1, + cwr : 1; __be16 window; __sum16 check; __be16 urg_ptr; }; -static __always_inline bool read_sk_buff(struct __sk_buff *skb, protocol_info_t *tcp, connection_info_t *conn) { +static __always_inline bool +read_sk_buff(struct __sk_buff *skb, protocol_info_t *tcp, connection_info_t *conn) { // we read the protocol just like here linux/samples/bpf/parse_ldabs.c u16 h_proto; bpf_skb_load_bytes(skb, offsetof(struct ethhdr, h_proto), &h_proto, sizeof(h_proto)); @@ -41,7 +43,8 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, protocol_info_t return false; } - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct iphdr, tot_len), &tcp->tot_len, sizeof(u16)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct iphdr, tot_len), &tcp->tot_len, sizeof(u16)); // we read the ip header linux/samples/bpf/parse_ldabs.c and linux/samples/bpf/tcbpf1_kern.c // the level 4 protocol let's us only filter TCP packets, the ip protocol gets us the source @@ -62,11 +65,15 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, protocol_info_t break; } case ETH_P_IPV6: - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, payload_len), &tcp->tot_len, sizeof(u16)); - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, nexthdr), &proto, sizeof(proto)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct ipv6hdr, payload_len), &tcp->tot_len, sizeof(u16)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct ipv6hdr, nexthdr), &proto, sizeof(proto)); - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), &conn->s_addr, sizeof(conn->s_addr)); - bpf_skb_load_bytes(skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), &conn->d_addr, sizeof(conn->d_addr)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct ipv6hdr, saddr), &conn->s_addr, sizeof(conn->s_addr)); + bpf_skb_load_bytes( + skb, ETH_HLEN + offsetof(struct ipv6hdr, daddr), &conn->d_addr, sizeof(conn->d_addr)); tcp->hdr_len = ETH_HLEN + sizeof(struct ipv6hdr); break; @@ -90,18 +97,28 @@ static __always_inline bool read_sk_buff(struct __sk_buff *skb, protocol_info_t tcp->seq = __bpf_htonl(seq); u8 doff; - bpf_skb_load_bytes(skb, tcp->hdr_len + offsetof(struct __tcphdr, ack_seq) + 4, &doff, sizeof(doff)); // read the first byte past __tcphdr->ack_seq, we can't do offsetof bit fields - doff &= 0xf0; // clean-up res1 - doff >>= 4; // move the upper 4 bits to low - doff *= 4; // convert to bytes length + bpf_skb_load_bytes( + skb, + tcp->hdr_len + offsetof(struct __tcphdr, ack_seq) + 4, + &doff, + sizeof( + doff)); // read the first byte past __tcphdr->ack_seq, we can't do offsetof bit fields + doff &= 0xf0; // clean-up res1 + doff >>= 4; // move the upper 4 bits to low + doff *= 4; // convert to bytes length u8 flags; - bpf_skb_load_bytes(skb, tcp->hdr_len + offsetof(struct __tcphdr, ack_seq) + 4 + 1, &flags, sizeof(flags)); // read the second byte past __tcphdr->doff, again bit fields offsets + bpf_skb_load_bytes( + skb, + tcp->hdr_len + offsetof(struct __tcphdr, ack_seq) + 4 + 1, + &flags, + sizeof(flags)); // read the second byte past __tcphdr->doff, again bit fields offsets tcp->flags = flags; tcp->h_proto = h_proto; tcp->hdr_len += doff; - if (tcp->hdr_len > skb->len) { // bad packet, hdr_len is greater than the skb len, we can't parse this. + if (tcp->hdr_len > + skb->len) { // bad packet, hdr_len is greater than the skb len, we can't parse this. return false; } @@ -121,7 +138,7 @@ static __always_inline bool tcp_syn(protocol_info_t *tcp) { } static __always_inline bool tcp_empty(protocol_info_t *tcp, struct __sk_buff *skb) { - return tcp->hdr_len == skb->len; + return tcp->hdr_len == skb->len; } #endif \ No newline at end of file diff --git a/bpf/trace_common.h b/bpf/trace_common.h index a15afc50c..080dc66d1 100644 --- a/bpf/trace_common.h +++ b/bpf/trace_common.h @@ -9,14 +9,14 @@ #include "runtime.h" typedef struct trace_key { - pid_key_t p_key; // pid key as seen by the userspace (for example, inside its container) - u64 extra_id; // pids namespace for the process + pid_key_t p_key; // pid key as seen by the userspace (for example, inside its container) + u64 extra_id; // pids namespace for the process } __attribute__((packed)) trace_key_t; struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, trace_key_t); // key: pid_tid - __type(value, tp_info_pid_t); // value: traceparent info + __type(key, trace_key_t); // key: pid_tid + __type(value, tp_info_pid_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } server_traces SEC(".maps"); @@ -37,13 +37,12 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, pid_key_t); // key: the child pid - __type(value, pid_key_t); // value: the parent pid + __type(key, pid_key_t); // key: the child pid + __type(value, pid_key_t); // value: the parent pid __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } clone_map SEC(".maps"); - static __always_inline unsigned char *tp_char_buf() { int zero = 0; return bpf_map_lookup_elem(&tp_char_buf_mem, &zero); @@ -56,17 +55,16 @@ static __always_inline tp_info_pid_t *tp_buf() { struct callback_ctx { unsigned char *buf; - u32 pos; + u32 pos; }; #ifdef BPF_TRACEPARENT -static int tp_match(u32 index, void *data) -{ - if (index >= (TRACE_BUF_SIZE-TRACE_PARENT_HEADER_LEN)) { +static int tp_match(u32 index, void *data) { + if (index >= (TRACE_BUF_SIZE - TRACE_PARENT_HEADER_LEN)) { return 1; } - struct callback_ctx *ctx = data; + struct callback_ctx *ctx = data; unsigned char *s = &(ctx->buf[index]); if (is_traceparent(s)) { @@ -78,17 +76,14 @@ static int tp_match(u32 index, void *data) } static __always_inline unsigned char *bpf_strstr_tp_loop(unsigned char *buf, int buf_len) { - struct callback_ctx data = { - .buf = buf, - .pos = 0 - }; + struct callback_ctx data = {.buf = buf, .pos = 0}; u32 nr_loops = (u32)buf_len; bpf_loop(nr_loops, tp_match, &data, 0); if (data.pos) { - u32 pos = (data.pos > (TRACE_BUF_SIZE-TRACE_PARENT_HEADER_LEN)) ? 0 : data.pos; + u32 pos = (data.pos > (TRACE_BUF_SIZE - TRACE_PARENT_HEADER_LEN)) ? 0 : data.pos; return &(buf[pos]); } @@ -105,7 +100,7 @@ static __always_inline tp_info_pid_t *find_parent_trace() { int attempts = 0; - do { + do { tp_info_pid_t *server_tp = bpf_map_lookup_elem(&server_traces, &t_key); if (!server_tp) { // not this goroutine running the server request processing @@ -143,11 +138,13 @@ static __always_inline unsigned char *extract_trace_id(unsigned char *tp_start) } static __always_inline unsigned char *extract_span_id(unsigned char *tp_start) { - return tp_start + 13 + 2 + 1 + 32 + 1; // strlen("Traceparent: ") + strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + return tp_start + 13 + 2 + 1 + 32 + + 1; // strlen("Traceparent: ") + strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") } static __always_inline unsigned char *extract_flags(unsigned char *tp_start) { - return tp_start + 13 + 2 + 1 + 32 + 1 + 16 + 1; // strlen("Traceparent: ") + strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + strlen(span_id) + strlen("-") + return tp_start + 13 + 2 + 1 + 32 + 1 + 16 + + 1; // strlen("Traceparent: ") + strlen(ver) + strlen("-") + strlen(trace_id) + strlen("-") + strlen(span_id) + strlen("-") } static __always_inline void delete_server_trace(trace_key_t *t_key) { @@ -164,7 +161,9 @@ static __always_inline u8 valid_trace(unsigned char *trace_id) { return *((u64 *)trace_id) != 0 && *((u64 *)(trace_id + 8)) != 0; } -static __always_inline void server_or_client_trace(http_connection_metadata_t *meta, connection_info_t *conn, tp_info_pid_t *tp_p) { +static __always_inline void server_or_client_trace(http_connection_metadata_t *meta, + connection_info_t *conn, + tp_info_pid_t *tp_p) { if (!meta) { return; } @@ -176,7 +175,8 @@ static __always_inline void server_or_client_trace(http_connection_metadata_t *m tp_info_pid_t *existing = bpf_map_lookup_elem(&server_traces, &t_key); // we have a conflict, mark this invalid and do nothing if (existing) { - bpf_dbg_printk("Found conflicting server span, marking as invalid, id=%llx", bpf_get_current_pid_tgid()); + bpf_dbg_printk("Found conflicting server span, marking as invalid, id=%llx", + bpf_get_current_pid_tgid()); existing->valid = 0; return; } @@ -186,7 +186,12 @@ static __always_inline void server_or_client_trace(http_connection_metadata_t *m } } -static __always_inline void get_or_create_trace_info(http_connection_metadata_t *meta, u32 pid, connection_info_t *conn, void *u_buf, int bytes_len, s32 capture_header_buffer) { +static __always_inline void get_or_create_trace_info(http_connection_metadata_t *meta, + u32 pid, + connection_info_t *conn, + void *u_buf, + int bytes_len, + s32 capture_header_buffer) { tp_info_pid_t *tp_p = tp_buf(); if (!tp_p) { @@ -204,8 +209,8 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t if (meta) { if (meta->type == EVENT_HTTP_CLIENT) { - // Before this change the client code only looked for a server wrapped trace and - // if it didn't find it would generate the trace information later. Now we look if + // Before this change the client code only looked for a server wrapped trace and + // if it didn't find it would generate the trace information later. Now we look if // the TC egress has setup TCP trace info for us. If we find this info we set the bool as having trace info, // i.e. we must not regenerate it later. The kprobe on 'tcp_connect' does the lookup of the server trace // for us, so the server context should already be setup. @@ -220,8 +225,10 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t if (server_tp && server_tp->valid && valid_trace(server_tp->tp.trace_id)) { found_tp = 1; bpf_dbg_printk("Found existing server tp for client call"); - __builtin_memcpy(tp_p->tp.trace_id, server_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); - __builtin_memcpy(tp_p->tp.parent_id, server_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); + __builtin_memcpy( + tp_p->tp.trace_id, server_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); + __builtin_memcpy( + tp_p->tp.parent_id, server_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); } } } else { @@ -233,24 +240,28 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t if (existing_tp) { found_tp = 1; bpf_dbg_printk("Found incoming (TCP) tp for server request"); - __builtin_memcpy(tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); - __builtin_memcpy(tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); + __builtin_memcpy( + tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); + __builtin_memcpy( + tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); } else { existing_tp = trace_info_for_connection(conn); if (correlated_requests(tp_p, existing_tp)) { found_tp = 1; bpf_dbg_printk("Found existing correlated tp for server request"); - __builtin_memcpy(tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); - __builtin_memcpy(tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); - } + __builtin_memcpy( + tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); + __builtin_memcpy( + tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); + } } } } if (!found_tp) { bpf_dbg_printk("Generating new traceparent id"); - urand_bytes(tp_p->tp.trace_id, TRACE_ID_SIZE_BYTES); + urand_bytes(tp_p->tp.trace_id, TRACE_ID_SIZE_BYTES); __builtin_memset(tp_p->tp.parent_id, 0, sizeof(tp_p->tp.span_id)); } else { bpf_dbg_printk("Using old traceparent id"); @@ -270,9 +281,9 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t } unsigned char *buf = tp_char_buf(); - if (buf) { + if (buf) { int buf_len = (int)bytes_len; - bpf_clamp_umax(buf_len, TRACE_BUF_SIZE-1); + bpf_clamp_umax(buf_len, TRACE_BUF_SIZE - 1); bpf_probe_read(buf, buf_len, u_buf); unsigned char *res = bpf_strstr_tp_loop(buf, buf_len); diff --git a/bpf/trace_util.h b/bpf/trace_util.h index a6091bf6b..4eb5e4e6b 100644 --- a/bpf/trace_util.h +++ b/bpf/trace_util.h @@ -5,23 +5,23 @@ #define TRACE_PARENT_HEADER_LEN 68 static unsigned char *hex = (unsigned char *)"0123456789abcdef"; -static unsigned char *reverse_hex = (unsigned char *) - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; +static unsigned char *reverse_hex = + (unsigned char *)"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; static __always_inline void urand_bytes(unsigned char *buf, u32 size) { for (int i = 0; i < size; i += sizeof(u32)) { @@ -30,8 +30,8 @@ static __always_inline void urand_bytes(unsigned char *buf, u32 size) { } static __always_inline void decode_hex(unsigned char *dst, unsigned char *src, int src_len) { - for (int i = 1, j = 0; i < src_len; i +=2) { - unsigned char p = src[i-1]; + for (int i = 1, j = 0; i < src_len; i += 2) { + unsigned char p = src[i - 1]; unsigned char q = src[i]; unsigned char a = reverse_hex[p & 0xff]; @@ -52,12 +52,10 @@ static __always_inline void encode_hex(unsigned char *dst, unsigned char *src, i } } - static __always_inline bool is_traceparent(unsigned char *p) { - if (((p[0] == 'T') || (p[0] == 't')) && (p[1] == 'r') && (p[2] == 'a') && (p[3] == 'c') && + if (((p[0] == 'T') || (p[0] == 't')) && (p[1] == 'r') && (p[2] == 'a') && (p[3] == 'c') && (p[4] == 'e') && ((p[5] == 'p') || (p[5] == 'P')) && (p[6] == 'a') && (p[7] == 'r') && - (p[8] == 'e') && (p[9] == 'n') && (p[10] == 't') && (p[11] == ':') && (p[12] == ' ') - ) { + (p[8] == 'e') && (p[9] == 'n') && (p[10] == 't') && (p[11] == ':') && (p[12] == ' ')) { return true; } diff --git a/bpf/tracer_common.h b/bpf/tracer_common.h index 44a630aec..95d7a54a7 100644 --- a/bpf/tracer_common.h +++ b/bpf/tracer_common.h @@ -19,7 +19,8 @@ #define PATH_MAX_LEN 100 #define METHOD_MAX_LEN 7 // Longest method: OPTIONS -#define REMOTE_ADDR_MAX_LEN 50 // We need 48: 39(ip v6 max) + 1(: separator) + 7(port length max value 65535) + 1(null terminator) +#define REMOTE_ADDR_MAX_LEN \ + 50 // We need 48: 39(ip v6 max) + 1(: separator) + 7(port length max value 65535) + 1(null terminator) #define HOST_LEN 64 // can be a fully qualified DNS name #define TRACEPARENT_LEN 55 #define SQL_MAX_LEN 500 @@ -30,14 +31,14 @@ // Trace of an HTTP call invocation. It is instantiated by the return uprobe and forwarded to the // user space through the events ringbuffer. typedef struct http_request_trace_t { - u8 type; // Must be first + u8 type; // Must be first u64 go_start_monotime_ns; u64 start_monotime_ns; u64 end_monotime_ns; - u8 method[METHOD_MAX_LEN]; - u8 path[PATH_MAX_LEN]; + u8 method[METHOD_MAX_LEN]; + u8 path[PATH_MAX_LEN]; u16 status; - connection_info_t conn __attribute__ ((aligned (8))); + connection_info_t conn __attribute__((aligned(8))); s64 content_length; tp_info_t tp; @@ -45,43 +46,43 @@ typedef struct http_request_trace_t { } __attribute__((packed)) http_request_trace; typedef struct sql_request_trace_t { - u8 type; // Must be first + u8 type; // Must be first u64 start_monotime_ns; u64 end_monotime_ns; - u8 sql[SQL_MAX_LEN]; + u8 sql[SQL_MAX_LEN]; u16 status; tp_info_t tp; pid_info pid; } __attribute__((packed)) sql_request_trace; typedef struct kafka_client_req { - u8 type; // Must be first + u8 type; // Must be first u64 start_monotime_ns; u64 end_monotime_ns; - u8 buf[KAFKA_MAX_LEN]; - connection_info_t conn __attribute__ ((aligned (8))); + u8 buf[KAFKA_MAX_LEN]; + connection_info_t conn __attribute__((aligned(8))); tp_info_t tp; pid_info pid; } __attribute__((packed)) kafka_client_req_t; typedef struct kafka_go_req { - u8 type; // Must be first + u8 type; // Must be first u64 start_monotime_ns; u64 end_monotime_ns; - u8 topic[MAX_TOPIC_NAME_LEN]; - connection_info_t conn __attribute__ ((aligned (8))); + u8 topic[MAX_TOPIC_NAME_LEN]; + connection_info_t conn __attribute__((aligned(8))); tp_info_t tp; pid_info pid; u8 op; } __attribute__((packed)) kafka_go_req_t; typedef struct redis_client_req { - u8 type; // Must be first + u8 type; // Must be first u64 start_monotime_ns; u64 end_monotime_ns; - u8 buf[REDIS_MAX_LEN]; - connection_info_t conn __attribute__ ((aligned (8))); - tp_info_t tp __attribute__ ((aligned (8))); + u8 buf[REDIS_MAX_LEN]; + connection_info_t conn __attribute__((aligned(8))); + tp_info_t tp __attribute__((aligned(8))); pid_info pid; u8 err; } __attribute__((packed)) redis_client_req_t; diff --git a/bpf/tracing.h b/bpf/tracing.h index b9e90f7a6..6a0e70e4b 100644 --- a/bpf/tracing.h +++ b/bpf/tracing.h @@ -5,12 +5,12 @@ #include "http_types.h" #define NANOSECONDS_PER_EPOCH (15LL * 1000000000LL) // 15 seconds -#define NANOSECONDS_PER_IMM_EPOCH (100000000LL) // 100 ms +#define NANOSECONDS_PER_IMM_EPOCH (100000000LL) // 100 ms struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, connection_info_t); // key: the connection info - __type(value, tp_info_pid_t); // value: traceparent info + __type(value, tp_info_pid_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } trace_map SEC(".maps"); @@ -18,20 +18,22 @@ struct { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, connection_info_t); // key: the connection info - __type(value, tp_info_pid_t); // value: traceparent info + __type(value, tp_info_pid_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } incoming_trace_map SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, connection_info_t); // key: the connection info - __type(value, tp_info_pid_t); // value: traceparent info + __type(value, tp_info_pid_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_REQUESTS); } outgoing_trace_map SEC(".maps"); static __always_inline void make_tp_string(unsigned char *buf, tp_info_t *tp) { // Version - *buf++ = '0'; *buf++ = '0'; *buf++ = '-'; + *buf++ = '0'; + *buf++ = '0'; + *buf++ = '-'; // TraceID encode_hex(buf, tp->trace_id, TRACE_ID_SIZE_BYTES); @@ -44,11 +46,12 @@ static __always_inline void make_tp_string(unsigned char *buf, tp_info_t *tp) { *buf++ = '-'; // Flags - *buf++ = '0'; *buf = (tp->flags == 0) ? '0' : '1'; + *buf++ = '0'; + *buf = (tp->flags == 0) ? '0' : '1'; } static __always_inline tp_info_pid_t *trace_info_for_connection(connection_info_t *conn) { - return (tp_info_pid_t *)bpf_map_lookup_elem(&trace_map, conn); + return (tp_info_pid_t *)bpf_map_lookup_elem(&trace_map, conn); } static __always_inline u64 current_epoch(u64 ts) { @@ -84,7 +87,7 @@ static __always_inline u8 correlated_request_with_current(tp_info_pid_t *existin u64 pid_tid = bpf_get_current_pid_tgid(); u64 ts = bpf_ktime_get_ns(); - u32 pid= pid_from_pid_tgid(pid_tid); + u32 pid = pid_from_pid_tgid(pid_tid); // We check for correlated requests which are in order, but from different PIDs // Same PID means that we had client port reuse, which might falsely match prior diff --git a/bpf/watch_helper.c b/bpf/watch_helper.c index 90db9203a..15114a4c7 100644 --- a/bpf/watch_helper.c +++ b/bpf/watch_helper.c @@ -22,9 +22,9 @@ struct { } watch_events SEC(".maps"); SEC("kprobe/sys_bind") -int kprobe_sys_bind(struct pt_regs *ctx) { +int kprobe_sys_bind(struct pt_regs *ctx) { // unwrap the args because it's a sys call - struct pt_regs * __ctx = (struct pt_regs *)PT_REGS_PARM1(ctx); + struct pt_regs *__ctx = (struct pt_regs *)PT_REGS_PARM1(ctx); void *addr; bpf_probe_read(&addr, sizeof(void *), (void *)&PT_REGS_PARM2(__ctx)); @@ -41,7 +41,7 @@ int kprobe_sys_bind(struct pt_regs *ctx) { watch_info_t *trace = bpf_ringbuf_reserve(&watch_events, sizeof(watch_info_t), 0); if (trace) { trace->flags = WATCH_BIND; - trace->payload = port; + trace->payload = port; bpf_dbg_printk("New port bound %d", trace->payload); bpf_ringbuf_submit(trace, 0);