Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

achieve the page fault #15

Open
wants to merge 4 commits into
base: kindling-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 112 additions & 4 deletions driver/bpf/fillers.h
Original file line number Diff line number Diff line change
Expand Up @@ -4089,12 +4089,76 @@ FILLER(sched_switch_e, false)
return res;
}

static __always_inline int __bpf_pagefault_analysis(struct filler_data *data, u32 tid)
{
int res;
struct pagefault_data *pgftp = bpf_map_lookup_elem(&pagefault_map, &tid);
if (!pgftp)
return 0;

// {"pgft_maj", PT_UINT64, PF_DEC},
res = bpf_val_to_ring_type(data, pgftp->maj_flt, PT_UINT64);
if (res != PPM_SUCCESS)
return res;

// {"pgft_min", PT_UINT64, PF_DEC},
res = bpf_val_to_ring_type(data, pgftp->min_flt, PT_UINT64);
if (res != PPM_SUCCESS)
return res;

// {"vm_size", PT_UINT32, PF_DEC},
res = bpf_val_to_ring_type(data, pgftp->vm_size, PT_UINT32);
if (res != PPM_SUCCESS)
return res;

// {"vm_rss", PT_UINT32, PF_DEC}
res = bpf_val_to_ring_type(data, pgftp->vm_rss, PT_UINT32);
if (res != PPM_SUCCESS)
return res;

// {"vm_swap", PT_UINT32, PF_DEC}
res = bpf_val_to_ring_type(data, pgftp->vm_swap, PT_UINT32);
if (res != PPM_SUCCESS)
return res;

return res;
}

static __always_inline int bpf_pagefault_analysis(void *ctx, u32 tid)
{
struct filler_data data;
int res;

res = init_filler_data(ctx, &data, false);
if (res == PPM_SUCCESS) {
if (!data.state->tail_ctx.len)
write_evt_hdr(&data);
res = __bpf_pagefault_analysis(&data, tid);
}

if (res == PPM_SUCCESS)
res = push_evt_frame(ctx, &data);

if (data.state)
data.state->tail_ctx.prev_res = res;

bpf_kp_terminate_filler(&data);
return 0;
}

FILLER(sys_pagefault_e, false)
{
struct page_fault_args *ctx;
unsigned long error_code;
unsigned long address;
unsigned long ip;
struct task_struct *task;
unsigned long total_vm;
unsigned long maj_flt;
unsigned long min_flt;
struct mm_struct *mm;
long total_rss;
long swap;
u32 flags;
int res;

Expand All @@ -4111,17 +4175,61 @@ FILLER(sys_pagefault_e, false)
error_code = ctx->error_code;
#endif

res = bpf_val_to_ring(data, address);


task = (struct task_struct *)bpf_get_current_task();


/*
* pgft_maj
*/
maj_flt = _READ(task->maj_flt);
res = bpf_val_to_ring_type(data, maj_flt, PT_UINT64);
if (res != PPM_SUCCESS)
return res;

res = bpf_val_to_ring(data, ip);
/*
* pgft_min
*/
min_flt = _READ(task->min_flt);
res = bpf_val_to_ring_type(data, min_flt, PT_UINT64);
if (res != PPM_SUCCESS)
return res;

flags = pf_flags_to_scap(error_code);
res = bpf_val_to_ring(data, flags);
total_vm = 0;
total_rss = 0;
swap = 0;

mm = _READ(task->mm);
if (mm) {
total_vm = _READ(mm->total_vm);
total_vm <<= (PAGE_SHIFT - 10);
total_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10);
swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10);
}

/*
* vm_size
*/
res = bpf_val_to_ring_type(data, total_vm, PT_UINT32);
if (res != PPM_SUCCESS)
return res;

/*
* vm_rss
*/
res = bpf_val_to_ring_type(data, total_rss, PT_UINT32);
if (res != PPM_SUCCESS)
return res;

/*
* vm_swap
*/
res = bpf_val_to_ring_type(data, swap, PT_UINT32);

// pid_t tid = _READ(task->pid);
// int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY);
// if(map_res != 0) return PPM_MAP_FAILURE;
return res;
}

Expand Down
24 changes: 16 additions & 8 deletions driver/bpf/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,6 @@ struct bpf_map_def __bpf_section("maps") local_state_map = {
.max_entries = 0,
};

#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
struct bpf_map_def __bpf_section("maps") stash_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u64),
.value_size = sizeof(struct sys_stash_args),
.max_entries = 65535,
};
#endif

struct bpf_map_def __bpf_section("maps") rtt_static_map = {
.type = BPF_MAP_TYPE_HASH,
Expand All @@ -113,6 +105,22 @@ struct bpf_map_def __bpf_section("maps") stash_tuple_map = {
.max_entries = 65535,
};

struct bpf_map_def __bpf_section("maps") pagefault_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(pid_t),
.value_size = sizeof(struct pagefault_data),
.max_entries = 1048576,
};

#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
struct bpf_map_def __bpf_section("maps") stash_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(u64),
.value_size = sizeof(struct sys_stash_args),
.max_entries = 65535,
};
#endif

enum offcpu_type {
ON, // 0
DISK, // 1
Expand Down
2 changes: 1 addition & 1 deletion driver/bpf/plumbing_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ static __always_inline bool prepare_filler(void *ctx,
goto cleanup;
return true;

cleanup:
cleanup:
release_local_state(state);
return false;
}
Expand Down
69 changes: 64 additions & 5 deletions driver/bpf/probe.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,39 @@ BPF_PROBE("raw_syscalls/", sys_exit, sys_exit_args)
return 0;
}

// Multiple eBPF programs on the same hook point
// Warning: this prog must be in the front of another. Make sure the tail_call is at the end.
BPF_PROBE("sched/", sched_process_exit_multiple, sched_process_exit_args)
{
struct sysdig_bpf_settings *settings;
struct task_struct *task;
unsigned int flags;

task = (struct task_struct *)bpf_get_current_task();

flags = _READ(task->flags);
if (flags & PF_KTHREAD)
return 0;

settings = get_bpf_settings();
if (!settings)
return 0;

if (!settings->capture_enabled)
return 0;

u32 tid = _READ(task->pid);
//perf out pagefault data as an event when the thread exited.
if (settings->page_faults) {
if (prepare_filler(ctx, ctx, PPME_PAGE_FAULT_E, settings, 0)) {
bpf_pagefault_analysis(ctx, tid);
}
bpf_map_delete_elem(&pagefault_map, &tid);
}

return 0;
}

BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args)
{
struct sysdig_bpf_settings *settings;
Expand All @@ -174,8 +207,8 @@ BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args)

evt_type = PPME_PROCEXIT_1_E;
#ifdef CPU_ANALYSIS
// perf out
u32 tid = _READ(task->pid);
// perf out
if (prepare_filler(ctx, ctx, PPME_CPU_ANALYSIS_E, settings, 0)) {
bpf_cpu_analysis(ctx, tid);
}
Expand Down Expand Up @@ -324,7 +357,6 @@ BPF_PROBE("sched/", sched_wakeup, sched_process_exit_args)
static __always_inline int bpf_page_fault(struct page_fault_args *ctx)
{
struct sysdig_bpf_settings *settings;
enum ppm_event_type evt_type;

settings = get_bpf_settings();
if (!settings)
Expand All @@ -335,10 +367,37 @@ static __always_inline int bpf_page_fault(struct page_fault_args *ctx)

if (!settings->capture_enabled)
return 0;
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
struct mm_struct *mm = _READ(task->mm);
pid_t tid = _READ(task->pid);
pid_t pid = _READ(task->tgid);
u64 cur_maj = _READ(task->maj_flt);
struct pagefault_data *last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid);
if(!last_pgft && cur_maj != 0)
{
struct pagefault_data pgft_data = {};
bpf_map_update_elem(&pagefault_map, &tid, &pgft_data, BPF_ANY);
last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid);
}

if(last_pgft && cur_maj != last_pgft->maj_flt)
{
last_pgft->pid = pid;
last_pgft->tid = tid;
last_pgft->maj_flt = _READ(task->maj_flt);
last_pgft->min_flt = _READ(task->min_flt);

if (mm)
{
last_pgft->vm_size = _READ(mm->total_vm);
last_pgft->vm_size <<= (PAGE_SHIFT - 10);
last_pgft->vm_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10);
last_pgft->vm_swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10);
}

evt_type = PPME_PAGE_FAULT_E;

call_filler(ctx, ctx, evt_type, settings, UF_ALWAYS_DROP);
last_pgft->timestamp = bpf_ktime_get_ns() + settings->boot_time;
}

return 0;
}

Expand Down
19 changes: 17 additions & 2 deletions driver/bpf/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,17 @@ struct tcp_reset_args {
};
#endif

struct pagefault_data {
__u32 pid;
__u32 tid;
__u32 vm_size;
__u32 vm_rss;
__u32 vm_swap;
__u64 maj_flt;
__u64 min_flt;
__u64 timestamp;
};

enum sysdig_map_types {
SYSDIG_PERF_MAP = 0,
SYSDIG_TAIL_MAP = 1,
Expand All @@ -261,10 +272,13 @@ enum sysdig_map_types {
SYSDIG_TMP_SCRATCH_MAP = 7,
SYSDIG_SETTINGS_MAP = 8,
SYSDIG_LOCAL_STATE_MAP = 9,
SYSDIG_RTT_STATISTICS = 10,
SYSDIG_STASH_TUPLE_MAP = 11,
SYSDIG_PAGEFAULT_MAP = 12,
#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS
SYSDIG_STASH_MAP = 10,
SYSDIG_RTT_STATISTICS = 11,
SYSDIG_STASH_MAP = 13,
#endif

};

struct sysdig_bpf_settings {
Expand All @@ -275,6 +289,7 @@ struct sysdig_bpf_settings {
bool capture_enabled;
bool do_dynamic_snaplen;
bool page_faults;
bool pgft_map_clear;
bool dropping_mode;
bool is_dropping;
bool tracers_enabled;
Expand Down
2 changes: 1 addition & 1 deletion driver/event_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ const struct ppm_event_info g_event_info[PPM_EVENT_MAX] = {
/* PPME_INFRASTRUCTURE_EVENT_X */{"NA4", EC_SYSTEM, EF_UNUSED, 0},
/* PPME_SYSCALL_EXECVE_18_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 1, {{"filename", PT_FSPATH, PF_NA} } },
/* PPME_SYSCALL_EXECVE_18_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 17, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC} } },
/* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 3, {{"addr", PT_UINT64, PF_HEX}, {"ip", PT_UINT64, PF_HEX}, {"error", PT_FLAGS32, PF_HEX, pf_flags} } },
/* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 5, {{"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC} } },
/* PPME_PAGE_FAULT_X */ {"NA5", EC_OTHER, EF_UNUSED, 0},
/* PPME_SYSCALL_EXECVE_19_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 1, {{"filename", PT_FSPATH, PF_NA} } },
/* PPME_SYSCALL_EXECVE_19_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 19, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC}, {"pgid", PT_PID, PF_DEC}, {"loginuid", PT_INT32, PF_DEC} } },
Expand Down
1 change: 1 addition & 0 deletions driver/ppm_events_public.h
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,7 @@ struct ppm_event_entry {
#define PPM_FAILURE_INVALID_USER_MEMORY -2
#define PPM_FAILURE_BUG -3
#define PPM_SKIP_EVENT -4
#define PPM_MAP_FAILURE -5

#define RW_SNAPLEN 80
#define RW_MAX_SNAPLEN PPM_MAX_ARG_SIZE
Expand Down
19 changes: 19 additions & 0 deletions userspace/libscap/scap.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ limitations under the License.
#endif // _WIN32

#include "scap.h"
#include "../../driver/bpf/types.h"
#ifdef HAS_CAPTURE
#if !defined(_WIN32) && !defined(CYGWING_AGENT)
#include "driver_config.h"
Expand Down Expand Up @@ -1918,7 +1919,25 @@ int32_t scap_enable_tracers_capture(scap_t* handle)
}
#endif


#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32)
int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int maxlen)
{
if(handle->m_mode != SCAP_MODE_LIVE)
{
snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_get_page_faults_from_map not supported on this scap mode");
ASSERT(false);
return SCAP_FAILURE;
}
if(handle->m_ndevs)
{
if(handle->m_bpf)
{
return scap_bpf_get_page_faults_from_map(handle, last_time, cur_time, results, counts, maxlen);
}
}
}

int32_t scap_enable_page_faults(scap_t *handle)
{
if(handle->m_mode != SCAP_MODE_LIVE)
Expand Down
2 changes: 2 additions & 0 deletions userspace/libscap/scap.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct iovec;
#include "uthash.h"
#include "../common/types.h"
#include "../../driver/ppm_events_public.h"
#include "../../driver/bpf/types.h"
#ifdef _WIN32
#include <time.h>
#define MAP_FAILED (void*)-1
Expand Down Expand Up @@ -1067,6 +1068,7 @@ void scap_set_refresh_proc_table_when_saving(scap_t* handle, bool refresh);
uint64_t scap_ftell(scap_t *handle);
void scap_fseek(scap_t *handle, uint64_t off);
int32_t scap_enable_tracers_capture(scap_t* handle);
int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen);
int32_t scap_enable_page_faults(scap_t *handle);
int32_t scap_enable_skb_capture(scap_t *handle);
int32_t scap_disable_skb_capture(scap_t *handle);
Expand Down
Loading