diff --git a/driver/bpf/fillers.h b/driver/bpf/fillers.h index 8c81a3025c..9f8dd09177 100644 --- a/driver/bpf/fillers.h +++ b/driver/bpf/fillers.h @@ -4089,12 +4089,76 @@ FILLER(sched_switch_e, false) return res; } +static __always_inline int __bpf_pagefault_analysis(struct filler_data *data, u32 tid) +{ + int res; + struct pagefault_data *pgftp = bpf_map_lookup_elem(&pagefault_map, &tid); + if (!pgftp) + return 0; + + // {"pgft_maj", PT_UINT64, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->maj_flt, PT_UINT64); + if (res != PPM_SUCCESS) + return res; + + // {"pgft_min", PT_UINT64, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->min_flt, PT_UINT64); + if (res != PPM_SUCCESS) + return res; + + // {"vm_size", PT_UINT32, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->vm_size, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + // {"vm_rss", PT_UINT32, PF_DEC} + res = bpf_val_to_ring_type(data, pgftp->vm_rss, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + // {"vm_swap", PT_UINT32, PF_DEC} + res = bpf_val_to_ring_type(data, pgftp->vm_swap, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + return res; +} + +static __always_inline int bpf_pagefault_analysis(void *ctx, u32 tid) +{ + struct filler_data data; + int res; + + res = init_filler_data(ctx, &data, false); + if (res == PPM_SUCCESS) { + if (!data.state->tail_ctx.len) + write_evt_hdr(&data); + res = __bpf_pagefault_analysis(&data, tid); + } + + if (res == PPM_SUCCESS) + res = push_evt_frame(ctx, &data); + + if (data.state) + data.state->tail_ctx.prev_res = res; + + bpf_kp_terminate_filler(&data); + return 0; +} + FILLER(sys_pagefault_e, false) { struct page_fault_args *ctx; unsigned long error_code; unsigned long address; unsigned long ip; + struct task_struct *task; + unsigned long total_vm; + unsigned long maj_flt; + unsigned long min_flt; + struct mm_struct *mm; + long total_rss; + long swap; u32 flags; int res; @@ -4111,17 +4175,61 @@ FILLER(sys_pagefault_e, false) error_code = ctx->error_code; #endif - res = bpf_val_to_ring(data, address); + + + task = (struct task_struct *)bpf_get_current_task(); + + + /* + * pgft_maj + */ + maj_flt = _READ(task->maj_flt); + res = bpf_val_to_ring_type(data, maj_flt, PT_UINT64); if (res != PPM_SUCCESS) return res; - res = bpf_val_to_ring(data, ip); + /* + * pgft_min + */ + min_flt = _READ(task->min_flt); + res = bpf_val_to_ring_type(data, min_flt, PT_UINT64); if (res != PPM_SUCCESS) return res; - flags = pf_flags_to_scap(error_code); - res = bpf_val_to_ring(data, flags); + total_vm = 0; + total_rss = 0; + swap = 0; + + mm = _READ(task->mm); + if (mm) { + total_vm = _READ(mm->total_vm); + total_vm <<= (PAGE_SHIFT - 10); + total_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10); + swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10); + } + + /* + * vm_size + */ + res = bpf_val_to_ring_type(data, total_vm, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + /* + * vm_rss + */ + res = bpf_val_to_ring_type(data, total_rss, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + /* + * vm_swap + */ + res = bpf_val_to_ring_type(data, swap, PT_UINT32); + // pid_t tid = _READ(task->pid); + // int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY); + // if(map_res != 0) return PPM_MAP_FAILURE; return res; } diff --git a/driver/bpf/maps.h b/driver/bpf/maps.h index c6402a802d..04f424f255 100644 --- a/driver/bpf/maps.h +++ b/driver/bpf/maps.h @@ -90,14 +90,6 @@ struct bpf_map_def __bpf_section("maps") local_state_map = { .max_entries = 0, }; -#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS -struct bpf_map_def __bpf_section("maps") stash_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u64), - .value_size = sizeof(struct sys_stash_args), - .max_entries = 65535, -}; -#endif struct bpf_map_def __bpf_section("maps") rtt_static_map = { .type = BPF_MAP_TYPE_HASH, @@ -113,6 +105,22 @@ struct bpf_map_def __bpf_section("maps") stash_tuple_map = { .max_entries = 65535, }; +struct bpf_map_def __bpf_section("maps") pagefault_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(struct pagefault_data), + .max_entries = 1048576, +}; + +#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS +struct bpf_map_def __bpf_section("maps") stash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u64), + .value_size = sizeof(struct sys_stash_args), + .max_entries = 65535, +}; +#endif + enum offcpu_type { ON, // 0 DISK, // 1 diff --git a/driver/bpf/plumbing_helpers.h b/driver/bpf/plumbing_helpers.h index 99c86df855..0e79dadd67 100644 --- a/driver/bpf/plumbing_helpers.h +++ b/driver/bpf/plumbing_helpers.h @@ -712,7 +712,7 @@ static __always_inline bool prepare_filler(void *ctx, goto cleanup; return true; - cleanup: +cleanup: release_local_state(state); return false; } diff --git a/driver/bpf/probe.c b/driver/bpf/probe.c index 5b9175d291..e9e25ad967 100644 --- a/driver/bpf/probe.c +++ b/driver/bpf/probe.c @@ -152,6 +152,39 @@ BPF_PROBE("raw_syscalls/", sys_exit, sys_exit_args) return 0; } +// Multiple eBPF programs on the same hook point +// Warning: this prog must be in the front of another. Make sure the tail_call is at the end. +BPF_PROBE("sched/", sched_process_exit_multiple, sched_process_exit_args) +{ + struct sysdig_bpf_settings *settings; + struct task_struct *task; + unsigned int flags; + + task = (struct task_struct *)bpf_get_current_task(); + + flags = _READ(task->flags); + if (flags & PF_KTHREAD) + return 0; + + settings = get_bpf_settings(); + if (!settings) + return 0; + + if (!settings->capture_enabled) + return 0; + + u32 tid = _READ(task->pid); + //perf out pagefault data as an event when the thread exited. + if (settings->page_faults) { + if (prepare_filler(ctx, ctx, PPME_PAGE_FAULT_E, settings, 0)) { + bpf_pagefault_analysis(ctx, tid); + } + bpf_map_delete_elem(&pagefault_map, &tid); + } + + return 0; +} + BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args) { struct sysdig_bpf_settings *settings; @@ -174,8 +207,8 @@ BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args) evt_type = PPME_PROCEXIT_1_E; #ifdef CPU_ANALYSIS - // perf out u32 tid = _READ(task->pid); + // perf out if (prepare_filler(ctx, ctx, PPME_CPU_ANALYSIS_E, settings, 0)) { bpf_cpu_analysis(ctx, tid); } @@ -324,7 +357,6 @@ BPF_PROBE("sched/", sched_wakeup, sched_process_exit_args) static __always_inline int bpf_page_fault(struct page_fault_args *ctx) { struct sysdig_bpf_settings *settings; - enum ppm_event_type evt_type; settings = get_bpf_settings(); if (!settings) @@ -335,10 +367,37 @@ static __always_inline int bpf_page_fault(struct page_fault_args *ctx) if (!settings->capture_enabled) return 0; + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + struct mm_struct *mm = _READ(task->mm); + pid_t tid = _READ(task->pid); + pid_t pid = _READ(task->tgid); + u64 cur_maj = _READ(task->maj_flt); + struct pagefault_data *last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid); + if(!last_pgft && cur_maj != 0) + { + struct pagefault_data pgft_data = {}; + bpf_map_update_elem(&pagefault_map, &tid, &pgft_data, BPF_ANY); + last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid); + } + + if(last_pgft && cur_maj != last_pgft->maj_flt) + { + last_pgft->pid = pid; + last_pgft->tid = tid; + last_pgft->maj_flt = _READ(task->maj_flt); + last_pgft->min_flt = _READ(task->min_flt); + + if (mm) + { + last_pgft->vm_size = _READ(mm->total_vm); + last_pgft->vm_size <<= (PAGE_SHIFT - 10); + last_pgft->vm_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10); + last_pgft->vm_swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10); + } - evt_type = PPME_PAGE_FAULT_E; - - call_filler(ctx, ctx, evt_type, settings, UF_ALWAYS_DROP); + last_pgft->timestamp = bpf_ktime_get_ns() + settings->boot_time; + } + return 0; } diff --git a/driver/bpf/types.h b/driver/bpf/types.h index 613c243a87..6c5933076a 100644 --- a/driver/bpf/types.h +++ b/driver/bpf/types.h @@ -250,6 +250,17 @@ struct tcp_reset_args { }; #endif +struct pagefault_data { + __u32 pid; + __u32 tid; + __u32 vm_size; + __u32 vm_rss; + __u32 vm_swap; + __u64 maj_flt; + __u64 min_flt; + __u64 timestamp; +}; + enum sysdig_map_types { SYSDIG_PERF_MAP = 0, SYSDIG_TAIL_MAP = 1, @@ -261,10 +272,13 @@ enum sysdig_map_types { SYSDIG_TMP_SCRATCH_MAP = 7, SYSDIG_SETTINGS_MAP = 8, SYSDIG_LOCAL_STATE_MAP = 9, + SYSDIG_RTT_STATISTICS = 10, + SYSDIG_STASH_TUPLE_MAP = 11, + SYSDIG_PAGEFAULT_MAP = 12, #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS - SYSDIG_STASH_MAP = 10, - SYSDIG_RTT_STATISTICS = 11, + SYSDIG_STASH_MAP = 13, #endif + }; struct sysdig_bpf_settings { @@ -275,6 +289,7 @@ struct sysdig_bpf_settings { bool capture_enabled; bool do_dynamic_snaplen; bool page_faults; + bool pgft_map_clear; bool dropping_mode; bool is_dropping; bool tracers_enabled; diff --git a/driver/event_table.c b/driver/event_table.c index 5961ec3735..382ce530a0 100644 --- a/driver/event_table.c +++ b/driver/event_table.c @@ -302,7 +302,7 @@ const struct ppm_event_info g_event_info[PPM_EVENT_MAX] = { /* PPME_INFRASTRUCTURE_EVENT_X */{"NA4", EC_SYSTEM, EF_UNUSED, 0}, /* PPME_SYSCALL_EXECVE_18_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 1, {{"filename", PT_FSPATH, PF_NA} } }, /* PPME_SYSCALL_EXECVE_18_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 17, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC} } }, - /* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 3, {{"addr", PT_UINT64, PF_HEX}, {"ip", PT_UINT64, PF_HEX}, {"error", PT_FLAGS32, PF_HEX, pf_flags} } }, + /* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 5, {{"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC} } }, /* PPME_PAGE_FAULT_X */ {"NA5", EC_OTHER, EF_UNUSED, 0}, /* PPME_SYSCALL_EXECVE_19_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 1, {{"filename", PT_FSPATH, PF_NA} } }, /* PPME_SYSCALL_EXECVE_19_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 19, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC}, {"pgid", PT_PID, PF_DEC}, {"loginuid", PT_INT32, PF_DEC} } }, diff --git a/driver/ppm_events_public.h b/driver/ppm_events_public.h index 3f86018c85..aba5d69809 100644 --- a/driver/ppm_events_public.h +++ b/driver/ppm_events_public.h @@ -1663,6 +1663,7 @@ struct ppm_event_entry { #define PPM_FAILURE_INVALID_USER_MEMORY -2 #define PPM_FAILURE_BUG -3 #define PPM_SKIP_EVENT -4 +#define PPM_MAP_FAILURE -5 #define RW_SNAPLEN 80 #define RW_MAX_SNAPLEN PPM_MAX_ARG_SIZE diff --git a/userspace/libscap/scap.c b/userspace/libscap/scap.c index 4279e92fa7..034d12ca67 100644 --- a/userspace/libscap/scap.c +++ b/userspace/libscap/scap.c @@ -35,6 +35,7 @@ limitations under the License. #endif // _WIN32 #include "scap.h" +#include "../../driver/bpf/types.h" #ifdef HAS_CAPTURE #if !defined(_WIN32) && !defined(CYGWING_AGENT) #include "driver_config.h" @@ -1918,7 +1919,25 @@ int32_t scap_enable_tracers_capture(scap_t* handle) } #endif + #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int maxlen) +{ + if(handle->m_mode != SCAP_MODE_LIVE) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_get_page_faults_from_map not supported on this scap mode"); + ASSERT(false); + return SCAP_FAILURE; + } + if(handle->m_ndevs) + { + if(handle->m_bpf) + { + return scap_bpf_get_page_faults_from_map(handle, last_time, cur_time, results, counts, maxlen); + } + } +} + int32_t scap_enable_page_faults(scap_t *handle) { if(handle->m_mode != SCAP_MODE_LIVE) diff --git a/userspace/libscap/scap.h b/userspace/libscap/scap.h index d9f63703b6..ecc460f56c 100644 --- a/userspace/libscap/scap.h +++ b/userspace/libscap/scap.h @@ -60,6 +60,7 @@ struct iovec; #include "uthash.h" #include "../common/types.h" #include "../../driver/ppm_events_public.h" +#include "../../driver/bpf/types.h" #ifdef _WIN32 #include #define MAP_FAILED (void*)-1 @@ -1067,6 +1068,7 @@ void scap_set_refresh_proc_table_when_saving(scap_t* handle, bool refresh); uint64_t scap_ftell(scap_t *handle); void scap_fseek(scap_t *handle, uint64_t off); int32_t scap_enable_tracers_capture(scap_t* handle); +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); int32_t scap_enable_page_faults(scap_t *handle); int32_t scap_enable_skb_capture(scap_t *handle); int32_t scap_disable_skb_capture(scap_t *handle); diff --git a/userspace/libscap/scap_bpf.c b/userspace/libscap/scap_bpf.c index 89d825eb39..58722f5d4d 100644 --- a/userspace/libscap/scap_bpf.c +++ b/userspace/libscap/scap_bpf.c @@ -116,6 +116,30 @@ static int bpf_map_update_elem(int fd, const void *key, const void *value, uint6 return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } +static int bpf_map_get_next_key(int fd, void *key, void *next_key){ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + + attr.map_fd = fd; + attr.key = (unsigned long) key; + attr.next_key = (unsigned long) next_key; + + return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +static int bpf_map_delete_elem(int fd, const void *key) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + + attr.map_fd = fd; + attr.key = (unsigned long) key; + + return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + static int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; @@ -838,8 +862,17 @@ static int32_t load_bpf_file(scap_t *handle, const char *path) memcmp(shname, "raw_tracepoint/", sizeof("raw_tracepoint/") - 1) == 0 || memcmp(shname, "kprobe/", sizeof("kprobe/") - 1) == 0 || memcmp(shname, "kretprobe/", sizeof("kretprobe/") - 1) == 0) - { - int load_result = load_tracepoint(handle, shname, data->d_buf, data->d_size); + { + //Handling multiple programs on the same hook point + char event[100]; + strcpy(event, shname); + int len = strlen(event); + if(len >= 9 && strncmp(&event[len - 9], "_multiple", 9) == 0) + { + event[len - 9] = '\0'; + } + + int load_result = load_tracepoint(handle, event, data->d_buf, data->d_size); if((memcmp(shname, "kprobe/", sizeof("kprobe/") - 1) == 0 || memcmp(shname, "kretprobe/", sizeof("kretprobe/") - 1) == 0) && load_result == SCAP_UNKNOWN_KPROBE) @@ -1221,6 +1254,33 @@ int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle) return SCAP_SUCCESS; } +/* + |last_time|--------scan_interval--------|cur_time| + A(ignore): |pagefault.timestamp| + B(catch): |pagefault.timestamp| +*/ +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen) +{ + int next_key, lookup_key; + lookup_key = -1; + int32_t cnt = 0; + while(bpf_map_get_next_key(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAP], &lookup_key, &next_key) == 0){ + if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAP], &next_key, &results[cnt]) != 0){ + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAP bpf_map_lookup_elem < 0"); + return SCAP_FAILURE; + } + if(results[cnt].timestamp > last_time && results[cnt].timestamp <= cur_time) { + cnt++; + } + lookup_key = next_key; + if(cnt >= maxlen){ + break; + } + } + *counts = cnt; + return SCAP_SUCCESS; +} + int32_t scap_bpf_enable_page_faults(scap_t* handle) { struct sysdig_bpf_settings settings; @@ -1443,6 +1503,7 @@ static int32_t set_default_settings(scap_t *handle) settings.is_dropping = false; settings.tracers_enabled = false; settings.skb_capture = false; + settings.pgft_map_clear = false; settings.fullcapture_port_range_start = 0; settings.fullcapture_port_range_end = 0; settings.statsd_port = 8125; diff --git a/userspace/libscap/scap_bpf.h b/userspace/libscap/scap_bpf.h index af2e248bf4..36c27baf67 100644 --- a/userspace/libscap/scap_bpf.h +++ b/userspace/libscap/scap_bpf.h @@ -18,6 +18,7 @@ limitations under the License. #define _SCAP_BPF_H #include "compat/perf_event.h" +#include "../../driver/bpf/types.h" struct perf_event_sample { struct perf_event_header header; @@ -41,6 +42,7 @@ int32_t scap_bpf_set_statsd_port(scap_t* handle, uint16_t port); int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_disable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_enable_page_faults(scap_t* handle); +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); int32_t scap_bpf_start_dropping_mode(scap_t* handle, uint32_t sampling_ratio); int32_t scap_bpf_stop_dropping_mode(scap_t* handle); int32_t scap_bpf_enable_tracers_capture(scap_t* handle); diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp index 6f380400a9..24fd01ef1f 100644 --- a/userspace/libsinsp/examples/test.cpp +++ b/userspace/libsinsp/examples/test.cpp @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include #include "util.h" using namespace std; @@ -48,6 +49,58 @@ static void usage() // "evt.category=process or evt.category=net" // "evt.dir=< and (evt.category=net or (evt.type=execveat or evt.type=execve or evt.type=clone or evt.type=fork or evt.type=vfork))" // +void testPagefault(sinsp *inspector){ + //uint32_t threadcount = inspector->m_thread_manager->get_thread_count(); + threadinfo_map_t *threadmap = inspector->m_thread_manager->get_threads(); + unordered_map threadstable = threadmap->getThreadsTable(); + unordered_map maj_mp, min_mp; //from pid to maj or min value + + //1. test initialization of therads table. + cout << "total number of threads initialized is " << threadstable.size() << "...\n"; + for(auto e: threadstable){ + sinsp_threadinfo* tmp = e.second.get(); + if(tmp->m_pid == tmp->m_tid) continue; + maj_mp[tmp->m_pid] += tmp->m_pfmajor; + min_mp[tmp->m_pid] += tmp->m_pfminor; + //cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << " maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; + } + for(auto e: min_mp){ + auto tmp = threadstable.find(e.first); + sinsp_threadinfo* temp = inspector->build_threadinfo(); + temp->m_pid = temp->m_tid = e.first; + temp->m_pfminor = tmp->second->m_pfminor - e.second; + temp->m_pfmajor = tmp->second->m_pfmajor - maj_mp[e.first]; + threadstable[temp->m_tid] = threadinfo_map_t::ptr_t(temp); + } + for(auto e: threadstable){ + sinsp_threadinfo* tmp = e.second.get(); + cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << ": maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; + + } + + //2. test eBPF pagefault_map + uint64_t last_time = 0; + pagefault_data *results = new pagefault_data[101000]; + int32_t maxlen = 65535; + int counts = 0; + for(int i = 0;i < 5;i++){ + chrono::nanoseconds ns = std::chrono::duration_cast< std::chrono::nanoseconds>( + std::chrono::system_clock::now().time_since_epoch() + ); + uint64_t cur = ns.count(); + inspector->get_page_faults_from_map(last_time, cur, results, &counts, maxlen); + last_time = cur; + cout << "curtime: " << cur << "---catch " << counts << " pagefaults from 2 seconds before." << endl; + for(int j = 0; j < counts;j++){ + cout << "pid: " << results[j].pid << " tid: " << results[j].tid << " major: " << results[j].maj_flt << " minor: " << results[j].min_flt + << " vmsize: " << results[j].vm_size << " vmrss: " << results[j].vm_rss << " vmswap: " << results[j].vm_swap << " timestamp: " << results[j].timestamp << endl << endl; + } + sleep(2); + } + + delete []results; +} + int main(int argc, char **argv) { sinsp inspector; @@ -82,7 +135,9 @@ int main(int argc, char **argv) signal(SIGPIPE, sigint_handler); inspector.open(); + inspector.enable_page_faults(); + if(!filter_string.empty()) { try @@ -94,6 +149,8 @@ int main(int argc, char **argv) } } + testPagefault(&inspector); + while(!g_interrupted) { sinsp_evt* ev = NULL; @@ -116,8 +173,23 @@ int main(int argc, char **argv) string cmdline; sinsp_threadinfo::populate_cmdline(cmdline, thread); - if(thread->is_main_thread()) + if(ev->get_type() == PPME_PAGE_FAULT_E) { + cout << "[PAGEFAULTS]:[PID=" << thread->m_pid << "]:" + << "[TID=" << thread->m_tid << "]:" + << "[TYPE=" << get_event_type(ev->get_type()) << "]:" + << "[EXE=" << thread->get_exepath() << "]:" + << "[CMD=" << cmdline << "]" + << "[PAGE_FAULT_MAJOR=" << *((uint64_t *) (ev->get_param(0)->m_val)) << "]:" + << "[PAGE_FAULT_MINOR=" << *((uint64_t *) (ev->get_param(1)->m_val)) << "]" + << "[VMSIZE=" << *((uint32_t *) (ev->get_param(2)->m_val)) << "]" + << "[VMRSS=" << *((uint32_t *) (ev->get_param(3)->m_val)) << "]" + << "[VMSWAP=" << *((uint32_t *) (ev->get_param(4)->m_val)) << "]" + << endl << endl; + } + + if(thread->is_main_thread()) + { string date_time; sinsp_utils::ts_to_iso_8601(ev->get_ts(), &date_time); @@ -163,13 +235,13 @@ int main(int argc, char **argv) { parent_pid = p_thr->m_pid; } - cout << "[PPID=" << parent_pid << "]:" << "[PID=" << thread->m_pid << "]:" + << "[TID=" << thread->m_tid << "]:" << "[TYPE=" << get_event_type(ev->get_type()) << "]:" << "[EXE=" << thread->get_exepath() << "]:" << "[CMD=" << cmdline << "]" - << endl; + << endl << endl; } } else diff --git a/userspace/libsinsp/examples/util.cpp b/userspace/libsinsp/examples/util.cpp index 3ebe86f71b..17518f7584 100644 --- a/userspace/libsinsp/examples/util.cpp +++ b/userspace/libsinsp/examples/util.cpp @@ -325,6 +325,9 @@ std::string get_event_type(uint16_t type) case PPME_SOCKET_RECVMSG_X: return "recvmsg"; case PPME_SOCKET_RECVMMSG_E: case PPME_SOCKET_RECVMMSG_X: return "recvmmsg"; + //page fault + case PPME_PAGE_FAULT_E: + case PPME_PAGE_FAULT_X: return "pagefault"; default: return "UNKNOWN " + to_string(type); }; } diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index f6d131bacd..78efa4291f 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -248,6 +248,21 @@ void sinsp::enable_tracers_capture() #endif } +int32_t sinsp::get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen) +{ +#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) + if(is_live() && m_h != NULL) + { + int32_t ret = scap_get_page_faults_from_map(m_h, last_time, cur_time, results, counts, maxlen); + if(ret == SCAP_FAILURE) + { + throw sinsp_exception("error getting page_faults from map"); + } + return ret; + } +#endif +} + void sinsp::enable_page_faults() { #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) @@ -261,6 +276,7 @@ void sinsp::enable_page_faults() #endif } + void sinsp::init() { // diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index 1206e9a0ca..a575d0c7e8 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -893,6 +893,7 @@ class SINSP_PUBLIC sinsp : public capture_stats_source, public wmi_handle_source void remove_meta_event_callback(); void filter_proc_table_when_saving(bool filter); void enable_tracers_capture(); + int32_t get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); void enable_page_faults(); uint64_t get_bytes_read() { diff --git a/userspace/libsinsp/threadinfo.cpp b/userspace/libsinsp/threadinfo.cpp index 6c2edddfa8..30c24f569b 100644 --- a/userspace/libsinsp/threadinfo.cpp +++ b/userspace/libsinsp/threadinfo.cpp @@ -1782,7 +1782,8 @@ threadinfo_map_t::ptr_t sinsp_thread_manager::find_thread(int64_t tid, bool look if(tid == m_last_tid) { thr = m_last_tinfo.lock(); - if (thr) { + if(thr) + { #ifdef GATHER_INTERNAL_STATS m_cached_lookups->increment(); #endif @@ -1790,7 +1791,8 @@ threadinfo_map_t::ptr_t sinsp_thread_manager::find_thread(int64_t tid, bool look // for something that may not need to be precise thr->m_lastaccess_ts = m_inspector->get_lastevent_ts(); return thr; - } } + } + } // // Caching failed, do a real lookup diff --git a/userspace/libsinsp/threadinfo.h b/userspace/libsinsp/threadinfo.h index db28f5c443..6959fdc9e7 100644 --- a/userspace/libsinsp/threadinfo.h +++ b/userspace/libsinsp/threadinfo.h @@ -33,6 +33,7 @@ struct iovec { #include #include #include +#include #include "fdinfo.h" #include "internal_metrics.h" @@ -526,6 +527,10 @@ class threadinfo_map_t return m_threads.size(); } + inline std::unordered_map getThreadsTable(){ + return m_threads; + } + protected: std::unordered_map m_threads; };