-
Notifications
You must be signed in to change notification settings - Fork 108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tracking the NodeJS event loop #998
Changes from all commits
57f544b
a529d41
3db3d66
98ea021
1c624eb
c166a1c
4686a89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -121,7 +121,6 @@ int BPF_KPROBE(kprobe_tcp_rcv_established, struct sock *sk, struct sk_buff *skb) | |
// If the source port for a client call is lower, we'll get this wrong. | ||
// TODO: Need to fix this. | ||
pid_info.orig_dport = pid_info.p_conn.conn.s_port, | ||
task_tid(&pid_info.c_tid); | ||
bpf_map_update_elem(&pid_tid_to_conn, &id, &pid_info, BPF_ANY); // to support SSL on missing handshake, respect the original info if there | ||
} | ||
|
||
|
@@ -167,7 +166,6 @@ int BPF_KRETPROBE(kretprobe_sys_accept4, uint fd) | |
sort_connection_info(&info.p_conn.conn); | ||
info.p_conn.pid = pid_from_pid_tgid(id); | ||
info.orig_dport = orig_dport; | ||
task_tid(&info.c_tid); | ||
|
||
bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL on missing handshake | ||
} | ||
|
@@ -234,7 +232,6 @@ int BPF_KRETPROBE(kretprobe_sys_connect, int fd) | |
sort_connection_info(&info.p_conn.conn); | ||
info.p_conn.pid = pid_from_pid_tgid(id); | ||
info.orig_dport = orig_dport; | ||
task_tid(&info.c_tid); | ||
|
||
bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL | ||
} | ||
|
@@ -336,7 +333,6 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s | |
.orig_dport = orig_dport, | ||
}; | ||
bpf_memcpy(&ssl_conn.p_conn, &s_args.p_conn, sizeof(pid_connection_info_t)); | ||
task_tid(&ssl_conn.c_tid); | ||
bpf_map_update_elem(&ssl_to_conn, &ssl, &ssl_conn, BPF_ANY); | ||
} | ||
|
||
|
@@ -628,8 +624,8 @@ int BPF_KPROBE(kprobe_sys_exit, int status) { | |
return 0; | ||
} | ||
|
||
pid_key_t task = {0}; | ||
task_tid(&task); | ||
trace_key_t task = {0}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extended data structure. It now has an extra ID, which is the current runtime ID. For now it's 0 for all other languages than NodeJS, and it's the current async_id for NodeJS. |
||
task_tid(&task.p_key); | ||
|
||
bpf_dbg_printk("sys_exit %d, pid=%d, valid_pid(id)=%d", id, pid_from_pid_tgid(id), valid_pid(id)); | ||
|
||
|
@@ -641,7 +637,9 @@ int BPF_KPROBE(kprobe_sys_exit, int status) { | |
bpf_map_delete_elem(&active_ssl_connections, &s_args->p_conn); | ||
} | ||
|
||
bpf_map_delete_elem(&clone_map, &task); | ||
bpf_map_delete_elem(&clone_map, &task.p_key); | ||
// This won't delete trace ids for traces with extra_id, like NodeJS. But, | ||
// we expect that it doesn't matter, since NodeJS main thread won't exit. | ||
bpf_map_delete_elem(&server_traces, &task); | ||
|
||
return 0; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,7 +62,6 @@ typedef struct http_pid_connection_info { | |
typedef struct ssl_pid_connection_info { | ||
pid_connection_info_t p_conn; | ||
u16 orig_dport; | ||
pid_key_t c_tid; | ||
} ssl_pid_connection_info_t; | ||
|
||
typedef struct tp_info { | ||
|
@@ -96,6 +95,8 @@ typedef struct http_info { | |
// with other instrumented processes | ||
pid_info pid; | ||
tp_info_t tp; | ||
u64 extra_id; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are now stored here so we can correctly clean-up the server trace information when the HTTP request is finished. |
||
u32 task_tid; | ||
} http_info_t; | ||
|
||
// Here we track unknown TCP requests that are not HTTP, HTTP2 or gRPC | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#include "vmlinux.h" | ||
#include "bpf_helpers.h" | ||
#include "bpf_dbg.h" | ||
#include "pid.h" | ||
#include "ringbuf.h" | ||
#include "nodejs.h" | ||
|
||
char __license[] SEC("license") = "Dual MIT/GPL"; | ||
|
||
volatile const s32 async_wrap_async_id_off = 0; | ||
volatile const s32 async_wrap_trigger_async_id_off = 0; | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_LRU_HASH); | ||
__type(key, u64); // the pid_tid | ||
__type(value, u64); // the last AsyncWrap * | ||
__uint(max_entries, 1000); // 1000 nodejs services, small number, nodejs is single threaded | ||
__uint(pinning, LIBBPF_PIN_BY_NAME); | ||
} async_reset_args SEC(".maps"); | ||
|
||
SEC("uprobe/node:AsyncReset") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tracks this NodeJS function to remember the AsyncWrap NodeJS pointer. We then use the pointer to read the async_id_ and async_trigger_id_ in EmitAsyncInit. |
||
int async_reset(struct pt_regs *ctx) { | ||
u64 id = bpf_get_current_pid_tgid(); | ||
|
||
if (!valid_pid(id)) { | ||
return 0; | ||
} | ||
|
||
u64 wrap = (u64)PT_REGS_PARM1(ctx); | ||
|
||
bpf_dbg_printk("=== uprobe AsyncReset id=%d wrap=%llx ===", id, wrap); | ||
bpf_map_update_elem(&async_reset_args, &id, &wrap, BPF_ANY); | ||
|
||
return 0; | ||
} | ||
|
||
SEC("uretprobe/node:AsyncReset") | ||
int async_reset_ret(struct pt_regs *ctx) { | ||
u64 id = bpf_get_current_pid_tgid(); | ||
|
||
if (!valid_pid(id)) { | ||
return 0; | ||
} | ||
|
||
bpf_dbg_printk("=== uprobe AsyncReset returns id=%d ===", id); | ||
bpf_map_delete_elem(&async_reset_args, &id); | ||
|
||
return 0; | ||
} | ||
|
||
SEC("uprobe/node:EmitAsyncInit") | ||
int emit_async_init(struct pt_regs *ctx) { | ||
u64 id = bpf_get_current_pid_tgid(); | ||
|
||
if (!valid_pid(id)) { | ||
return 0; | ||
} | ||
|
||
bpf_dbg_printk("=== uprobe EmitAsyncInit id=%d ===", id); | ||
|
||
u64 *wrap_val = bpf_map_lookup_elem(&async_reset_args, &id); | ||
bpf_dbg_printk("wrap_val = %llx", wrap_val); | ||
if (wrap_val) { | ||
u64 wrap = *wrap_val; | ||
bpf_dbg_printk("wrap = %llx", wrap); | ||
|
||
if (wrap) { | ||
u64 async_id = 0; | ||
u64 trigger_async_id = 0; | ||
|
||
bpf_probe_read_user(&async_id, sizeof(u64), ((void *)wrap) + async_wrap_async_id_off); | ||
bpf_probe_read_user(&trigger_async_id, sizeof(u64), ((void *)wrap) + async_wrap_trigger_async_id_off); | ||
|
||
if (async_id) { | ||
bpf_map_update_elem(&active_nodejs_ids, &id, &async_id, BPF_ANY); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Saves the current async_id_ in play and the child -> parent relationship with the trigger_async_id_. |
||
if (trigger_async_id) { | ||
bpf_map_update_elem(&nodejs_parent_map, &async_id, &trigger_async_id, BPF_ANY); | ||
bpf_dbg_printk("async_id = %llx, trigger_async_id = %llx", async_id, trigger_async_id); | ||
} else { | ||
bpf_dbg_printk("No trigger async id"); | ||
} | ||
} else { | ||
bpf_dbg_printk("No async id"); | ||
} | ||
} | ||
} else { | ||
bpf_dbg_printk("No wrap value found"); | ||
} | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#ifndef NODE_JS_H | ||
#define NODE_JS_H | ||
|
||
#include "vmlinux.h" | ||
#include "bpf_helpers.h" | ||
#include "bpf_builtins.h" | ||
#include "map_sizing.h" | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_LRU_HASH); | ||
__type(key, u64); // the pid_tid | ||
__type(value, u64); // the last active async_id | ||
__uint(max_entries, 1000); // 1000 nodejs services, small number, nodejs is single threaded | ||
__uint(pinning, LIBBPF_PIN_BY_NAME); | ||
} active_nodejs_ids SEC(".maps"); | ||
|
||
struct { | ||
__uint(type, BPF_MAP_TYPE_LRU_HASH); | ||
__type(key, u64); // child async_id | ||
__type(value, u64); // parent async_id | ||
__uint(max_entries, MAX_CONCURRENT_REQUESTS); | ||
__uint(pinning, LIBBPF_PIN_BY_NAME); | ||
} nodejs_parent_map SEC(".maps"); | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#ifndef RUNTIME_SUPPORT_H | ||
#define RUNTIME_SUPPORT_H | ||
|
||
#include "vmlinux.h" | ||
#include "bpf_helpers.h" | ||
#include "bpf_builtins.h" | ||
#include "pid_types.h" | ||
#include "nodejs.h" | ||
|
||
static __always_inline u64 extra_runtime_id() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Meant to support other runtimes which have internal threading models, for now only checks NodeJS. |
||
u64 id = bpf_get_current_pid_tgid(); | ||
|
||
u64 *active_node_id = (u64 *)bpf_map_lookup_elem(&active_nodejs_ids, &id); | ||
if (active_node_id) { | ||
return *active_node_id; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
static __always_inline u64 parent_runtime_id(pid_key_t *p_key, u64 runtime_id) { | ||
u64 *parent_id = (u64 *)bpf_map_lookup_elem(&nodejs_parent_map, &runtime_id); | ||
if (parent_id) { | ||
return *parent_id; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't required anymore. I figured out a better way to track what I need for cleaning up the parent trace information. Essentially, now I store the namespaced threadID and the extra runtime ID on the HTTP request. It's the most accurate information to use on cleanup, since we use those to create the parent request trace.