Skip to content

Commit

Permalink
perf: use lazy program data initialization
Browse files Browse the repository at this point in the history
We currently initialize all task context related fields on the beginning
of our BPF programs. Reading all the relevant fields takes some CPU
cycles although these fields are not always used. In many cases, for
example when filters are applied, the event is not sent to userspace and
is just dropped without any side effects.

Reading only the essential fields and leaving all other fields
initialization to the submit stage will avoid wasting these cycles, and
will also allow us to perform some of the event enrichment in userspace
instead (not part of this PR).

To do this change, we have to remove the matched_policies which were
cached in task_info since we can't check if "context changed" anymore.
Considering that the average user will only use one or two scope filters
(e.g. container id or binary name), computing the scope on every run is
not a big overhead - running BPF statistics with and without the
change verified this is indeed the case, and with the change the
performance is even slightly better.

Although this change does not introduce a visible performance gain for
most of the events, it is clear for some other specific event that it
does. For example, the hidden_inode event which attach a program to the
filldir64 function has a 50% performance gain.
  • Loading branch information
yanivagman committed Mar 24, 2024
1 parent 8030c56 commit f806cb4
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 164 deletions.
5 changes: 5 additions & 0 deletions pkg/ebpf/c/common/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <vmlinux.h>

#include <common/context.h>
#include <common/hash.h>
#include <common/network.h>

Expand Down Expand Up @@ -448,6 +449,10 @@ statfunc int events_perf_submit(program_data_t *p, u32 id, long ret)
p->event->context.eventid = id;
p->event->context.retval = ret;

init_task_context(&p->event->context.task, p->event->task, p->config->options);
// keep task_info updated
bpf_probe_read_kernel(&p->task_info->context, sizeof(task_context_t), &p->event->context.task);

// Get Stack trace
if (p->config->options & OPT_CAPTURE_STACK_TRACES) {
int stack_id = bpf_get_stackid(p->ctx, &stack_addresses, BPF_F_USER_STACK);
Expand Down
160 changes: 47 additions & 113 deletions pkg/ebpf/c/common/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

// PROTOTYPES

statfunc int init_context(void *, event_context_t *, struct task_struct *, u32);
statfunc int init_task_context(task_context_t *, struct task_struct *, u32);
statfunc void init_proc_info_scratch(u32, scratch_t *);
statfunc proc_info_t *init_proc_info(u32, u32);
statfunc void init_task_info_scratch(u32, scratch_t *);
Expand All @@ -21,74 +21,43 @@ statfunc void reset_event_args(program_data_t *);

// FUNCTIONS

statfunc int
init_context(void *ctx, event_context_t *context, struct task_struct *task, u32 options)
statfunc int init_task_context(task_context_t *tsk_ctx, struct task_struct *task, u32 options)
{
long ret = 0;
u64 id = bpf_get_current_pid_tgid();

// NOTE: parent is always a real process, not a potential thread group leader
struct task_struct *leader = get_leader_task(task);
struct task_struct *up_parent = get_leader_task(get_parent_task(leader));

// Task Info on Host
context->task.host_tid = id;
context->task.host_pid = id >> 32;
context->task.host_ppid = get_task_pid(up_parent); // always a real process (not a lwp)
tsk_ctx->host_ppid = get_task_pid(up_parent); // always a real process (not a lwp)
// Namespaces Info
context->task.tid = get_task_ns_pid(task);
context->task.pid = get_task_ns_tgid(task);
tsk_ctx->tid = get_task_ns_pid(task);
tsk_ctx->pid = get_task_ns_tgid(task);

u32 task_pidns_id = get_task_pid_ns_id(task);
u32 up_parent_pidns_id = get_task_pid_ns_id(up_parent);

if (task_pidns_id == up_parent_pidns_id)
context->task.ppid = get_task_ns_pid(up_parent); // e.g: pid 1 will have nsppid 0
tsk_ctx->ppid = get_task_ns_pid(up_parent); // e.g: pid 1 will have nsppid 0

context->task.pid_id = task_pidns_id;
context->task.mnt_id = get_task_mnt_ns_id(task);
tsk_ctx->pid_id = task_pidns_id;
tsk_ctx->mnt_id = get_task_mnt_ns_id(task);
// User Info
context->task.uid = bpf_get_current_uid_gid();
tsk_ctx->uid = bpf_get_current_uid_gid();
// Times
context->task.start_time = get_task_start_time(task);
context->task.leader_start_time = get_task_start_time(leader);
context->task.parent_start_time = get_task_start_time(up_parent);

context->task.flags = 0;
tsk_ctx->start_time = get_task_start_time(task);
tsk_ctx->leader_start_time = get_task_start_time(leader);
tsk_ctx->parent_start_time = get_task_start_time(up_parent);

if (is_compat(task))
context->task.flags |= IS_COMPAT_FLAG;
tsk_ctx->flags |= IS_COMPAT_FLAG;

// Program name
__builtin_memset(context->task.comm, 0, sizeof(context->task.comm));
ret = bpf_get_current_comm(&context->task.comm, sizeof(context->task.comm));
if (unlikely(ret < 0)) {
tracee_log(ctx, BPF_LOG_LVL_ERROR, BPF_LOG_ID_GET_CURRENT_COMM, ret);
return -1;
}
bpf_get_current_comm(&tsk_ctx->comm, sizeof(tsk_ctx->comm));

// UTS Name
char *uts_name = get_task_uts_name(task);
if (uts_name) {
__builtin_memset(context->task.uts_name, 0, sizeof(context->task.uts_name));
bpf_probe_read_str(&context->task.uts_name, TASK_COMM_LEN, uts_name);
}

// Cgroup ID
if (options & OPT_CGROUP_V1) {
context->task.cgroup_id = get_cgroup_v1_subsys0_id(task);
} else {
context->task.cgroup_id = bpf_get_current_cgroup_id();
}

// Context timestamp
context->ts = bpf_ktime_get_ns();
// Clean Stack Trace ID
context->stack_id = 0;
// Processor ID
context->processor_id = (u16) bpf_get_smp_processor_id();
// Syscall ID
context->syscall = get_task_syscall_id(task);
if (uts_name)
bpf_probe_read_kernel_str(&tsk_ctx->uts_name, TASK_COMM_LEN, uts_name);

return 0;
}
Expand All @@ -112,10 +81,7 @@ statfunc proc_info_t *init_proc_info(u32 pid, u32 scratch_idx)

statfunc void init_task_info_scratch(u32 tid, scratch_t *scratch)
{
scratch->task_info.syscall_traced = false;
scratch->task_info.policies_version = 0;
scratch->task_info.recompute_scope = true;
scratch->task_info.container_state = CONTAINER_UNKNOWN;
__builtin_memset(&scratch->task_info, 0, sizeof(task_info_t));
bpf_map_update_elem(&task_info_map, &tid, &scratch->task_info, BPF_NOEXIST);
}

Expand All @@ -130,22 +96,13 @@ statfunc task_info_t *init_task_info(u32 tid, u32 scratch_idx)
return bpf_map_lookup_elem(&task_info_map, &tid);
}

statfunc bool context_changed(task_context_t *old, task_context_t *new)
{
return (old->cgroup_id != new->cgroup_id) || old->uid != new->uid ||
old->mnt_id != new->mnt_id || old->pid_id != new->pid_id ||
*(u64 *) old->comm != *(u64 *) new->comm ||
*(u64 *) &old->comm[8] != *(u64 *) &new->comm[8] ||
*(u64 *) old->uts_name != *(u64 *) new->uts_name ||
*(u64 *) &old->uts_name[8] != *(u64 *) &new->uts_name[8];
}

// clang-format off
statfunc int init_program_data(program_data_t *p, void *ctx)
{
long ret = 0;
int zero = 0;

p->ctx = ctx;

// allow caller to specify a stack/map based event_data_t pointer
if (p->event == NULL) {
p->event = bpf_map_lookup_elem(&event_data_map, &zero);
Expand All @@ -157,19 +114,20 @@ statfunc int init_program_data(program_data_t *p, void *ctx)
if (unlikely(p->config == NULL))
return 0;

p->event->task = (struct task_struct *) bpf_get_current_task();
ret = init_context(ctx, &p->event->context, p->event->task, p->config->options);
if (unlikely(ret < 0)) {
// disable logging as a workaround for instruction limit verifier error on kernel 4.19
// tracee_log(ctx, BPF_LOG_LVL_ERROR, BPF_LOG_ID_INIT_CONTEXT, ret);
return 0;
}

p->ctx = ctx;
p->event->args_buf.offset = 0;
p->event->args_buf.argnum = 0;
p->event->task = (struct task_struct *) bpf_get_current_task();

__builtin_memset(&p->event->context.task, 0, sizeof(p->event->context.task));

bool container_lookup_required = true;
// get the minimal context required at this stage
// any other context will be initialized only if event is submitted
u64 id = bpf_get_current_pid_tgid();
p->event->context.task.host_tid = id;
p->event->context.task.host_pid = id >> 32;
p->event->context.ts = bpf_ktime_get_ns();
p->event->context.processor_id = (u16) bpf_get_smp_processor_id();
p->event->context.syscall = get_task_syscall_id(p->event->task);

u32 host_pid = p->event->context.task.host_pid;
p->proc_info = bpf_map_lookup_elem(&proc_info_map, &host_pid);
Expand All @@ -186,29 +144,9 @@ statfunc int init_program_data(program_data_t *p, void *ctx)
if (unlikely(p->task_info == NULL))
return 0;

// just initialized task info: recompute_scope is already set to true
goto out;
}

// in some places we don't call should_trace() (e.g. sys_exit) which also initializes
// matched_policies. Use previously found scopes then to initialize it.
p->event->context.matched_policies = p->task_info->matched_scopes;

// check if we need to recompute scope due to context change
if (context_changed(&p->task_info->context, &p->event->context.task))
p->task_info->recompute_scope = true;

u8 container_state = p->task_info->container_state;

// if task is already part of a container: no need to check if state changed
switch (container_state) {
case CONTAINER_STARTED:
case CONTAINER_EXISTED:
p->event->context.task.flags |= CONTAINER_STARTED_FLAG;
container_lookup_required = false;
init_task_context(&p->task_info->context, p->event->task, p->config->options);
}

out:
if (unlikely(p->event->context.policies_version != p->config->policies_version)) {
// copy policies_config to event data
long ret = bpf_probe_read_kernel(
Expand All @@ -218,27 +156,26 @@ statfunc int init_program_data(program_data_t *p, void *ctx)

p->event->context.policies_version = p->config->policies_version;
}
if (p->task_info->policies_version != p->event->context.policies_version) {
p->task_info->policies_version = p->event->context.policies_version;
p->task_info->recompute_scope = true;
}

if (container_lookup_required) {
u32 cgroup_id_lsb = p->event->context.task.cgroup_id;
u8 *state = bpf_map_lookup_elem(&containers_map, &cgroup_id_lsb);

if (state != NULL) {
p->task_info->container_state = *state;
switch (*state) {
case CONTAINER_STARTED:
case CONTAINER_EXISTED:
p->event->context.task.flags |= CONTAINER_STARTED_FLAG;
}
if (p->config->options & OPT_CGROUP_V1) {
p->event->context.task.cgroup_id = get_cgroup_v1_subsys0_id(p->event->task);
} else {
p->event->context.task.cgroup_id = bpf_get_current_cgroup_id();
}
p->task_info->context.cgroup_id = p->event->context.task.cgroup_id;
u32 cgroup_id_lsb = p->event->context.task.cgroup_id;
u8 *state = bpf_map_lookup_elem(&containers_map, &cgroup_id_lsb);
if (state != NULL) {
p->task_info->container_state = *state;
switch (*state) {
case CONTAINER_STARTED:
case CONTAINER_EXISTED:
p->event->context.task.flags |= CONTAINER_STARTED_FLAG;
}
}

// update task_info with the new context
bpf_probe_read(&p->task_info->context, sizeof(task_context_t), &p->event->context.task);
// initialize matched_policies to all policies match
p->event->context.matched_policies = ~0ULL;

return 1;
}
Expand Down Expand Up @@ -268,9 +205,6 @@ statfunc int init_tailcall_program_data(program_data_t *p, void *ctx)
return 0;
}

p->event->args_buf.offset = 0;
p->event->args_buf.argnum = 0;

return 1;
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/ebpf/c/common/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ statfunc void *get_path_str(struct path *path)
return NULL;

size_t buf_off = get_path_str_buf(path, string_p);
return &string_p->buf[buf_off];
return &string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)];
}

statfunc file_id_t get_file_id(struct file *file)
Expand All @@ -279,7 +279,7 @@ statfunc void *get_path_str_cached(struct file *file)

size_t buf_off = get_path_str_buf(__builtin_preserve_access_index(&file->f_path), string_p);
if (likely(sizeof(string_p->buf) > buf_off + sizeof(path_buf_t))) {
path = (path_buf_t *) (&string_p->buf[0] + buf_off);
path = (path_buf_t *) (&string_p->buf[buf_off & ((MAX_PERCPU_BUFSIZE >> 1) - 1)]);
bpf_map_update_elem(&io_file_path_cache_map, &file_id, path, BPF_ANY);
} else {
return NULL;
Expand Down
32 changes: 15 additions & 17 deletions pkg/ebpf/c/common/filtering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <maps.h>
#include <common/logging.h>
#include <common/task.h>
#include <common/common.h>

// PROTOTYPES
Expand Down Expand Up @@ -189,7 +190,7 @@ statfunc u64 bool_filter_matches(u64 filter_out_scopes, bool val)

statfunc u64 compute_scopes(program_data_t *p)
{
task_context_t *context = &p->task_info->context;
task_context_t *context = &p->event->context.task;

// Don't monitor self
if (p->config->tracee_pid == context->host_pid)
Expand Down Expand Up @@ -254,6 +255,7 @@ statfunc u64 compute_scopes(program_data_t *p)
}

if (policies_cfg->uid_filter_enabled_scopes) {
context->uid = bpf_get_current_uid_gid();
u64 filter_out_scopes = policies_cfg->uid_filter_out_scopes;
u64 mask = ~policies_cfg->uid_filter_enabled_scopes;
u64 max = policies_cfg->uid_max;
Expand All @@ -265,24 +267,27 @@ statfunc u64 compute_scopes(program_data_t *p)
}

if (policies_cfg->mnt_ns_filter_enabled_scopes) {
context->mnt_id = get_task_mnt_ns_id(p->event->task);
u64 filter_out_scopes = policies_cfg->mnt_ns_filter_out_scopes;
u64 mask = ~policies_cfg->mnt_ns_filter_enabled_scopes;
u64 mnt_id = context->mnt_id;

filter_map = get_filter_map(&mnt_ns_filter_version, version);
res &= equality_filter_matches(filter_out_scopes, filter_map, &mnt_id) | mask;
res &= equality_filter_matches(filter_out_scopes, filter_map, &context->mnt_id) | mask;
}

if (policies_cfg->pid_ns_filter_enabled_scopes) {
context->pid_id = get_task_pid_ns_id(p->event->task);
u64 filter_out_scopes = policies_cfg->pid_ns_filter_out_scopes;
u64 mask = ~policies_cfg->pid_ns_filter_enabled_scopes;
u64 pid_id = context->pid_id;

filter_map = get_filter_map(&pid_ns_filter_version, version);
res &= equality_filter_matches(filter_out_scopes, filter_map, &pid_id) | mask;
res &= equality_filter_matches(filter_out_scopes, filter_map, &context->pid_id) | mask;
}

if (policies_cfg->uts_ns_filter_enabled_scopes) {
char *uts_name = get_task_uts_name(p->event->task);
if (uts_name)
bpf_probe_read_kernel_str(&context->uts_name, TASK_COMM_LEN, uts_name);
u64 filter_out_scopes = policies_cfg->uts_ns_filter_out_scopes;
u64 mask = ~policies_cfg->uts_ns_filter_enabled_scopes;

Expand All @@ -291,6 +296,7 @@ statfunc u64 compute_scopes(program_data_t *p)
}

if (policies_cfg->comm_filter_enabled_scopes) {
bpf_get_current_comm(&context->comm, sizeof(context->comm));
u64 filter_out_scopes = policies_cfg->comm_filter_out_scopes;
u64 mask = ~policies_cfg->comm_filter_enabled_scopes;

Expand All @@ -299,9 +305,9 @@ statfunc u64 compute_scopes(program_data_t *p)
}

if (policies_cfg->cgroup_id_filter_enabled_scopes) {
u32 cgroup_id_lsb = context->cgroup_id;
u64 filter_out_scopes = policies_cfg->cgroup_id_filter_out_scopes;
u64 mask = ~policies_cfg->cgroup_id_filter_enabled_scopes;
u32 cgroup_id_lsb = context->cgroup_id;

filter_map = get_filter_map(&cgroup_id_filter_version, version);
res &= equality_filter_matches(filter_out_scopes, filter_map, &cgroup_id_lsb) | mask;
Expand All @@ -310,10 +316,9 @@ statfunc u64 compute_scopes(program_data_t *p)
if (policies_cfg->proc_tree_filter_enabled_scopes) {
u64 filter_out_scopes = policies_cfg->proc_tree_filter_out_scopes;
u64 mask = ~policies_cfg->proc_tree_filter_enabled_scopes;
u32 host_pid = context->host_pid;

filter_map = get_filter_map(&process_tree_map_version, version);
res &= equality_filter_matches(filter_out_scopes, filter_map, &host_pid) | mask;
res &= equality_filter_matches(filter_out_scopes, filter_map, &context->host_pid) | mask;
}

if (policies_cfg->bin_path_filter_enabled_scopes) {
Expand All @@ -339,15 +344,8 @@ statfunc u64 compute_scopes(program_data_t *p)

statfunc u64 should_trace(program_data_t *p)
{
// use cache whenever possible
if (p->task_info->recompute_scope) {
p->task_info->matched_scopes = compute_scopes(p);
p->task_info->recompute_scope = false;
}

p->event->context.matched_policies = p->task_info->matched_scopes;

return p->task_info->matched_scopes;
p->event->context.matched_policies = compute_scopes(p);
return p->event->context.matched_policies;
}

statfunc u64 should_submit(u32 event_id, event_data_t *event)
Expand Down
Loading

0 comments on commit f806cb4

Please sign in to comment.