From 7b34ded432bf6ee0a0de9527bbbf2a55f2779eca Mon Sep 17 00:00:00 2001 From: yaofighting Date: Tue, 20 Dec 2022 12:14:03 +0800 Subject: [PATCH 1/3] support page fault Signed-off-by: yaofighting --- driver/bpf/fillers.h | 59 ++++++++++++++++-- driver/bpf/maps.h | 24 +++++--- driver/bpf/probe.c | 23 +++++++ driver/bpf/types.h | 8 ++- driver/event_table.c | 2 +- driver/ppm_events_public.h | 1 + userspace/libscap/scap.c | 27 +++++++++ userspace/libscap/scap.h | 3 + userspace/libscap/scap_bpf.c | 89 ++++++++++++++++++++++++++++ userspace/libscap/scap_bpf.h | 3 + userspace/libsinsp/examples/test.cpp | 44 +++++++++++++- userspace/libsinsp/examples/util.cpp | 3 + userspace/libsinsp/sinsp.cpp | 41 +++++++++++++ userspace/libsinsp/sinsp.h | 3 + userspace/libsinsp/threadinfo.h | 5 ++ 15 files changed, 317 insertions(+), 18 deletions(-) diff --git a/driver/bpf/fillers.h b/driver/bpf/fillers.h index 917701515d..c7b1e5f71a 100644 --- a/driver/bpf/fillers.h +++ b/driver/bpf/fillers.h @@ -3898,6 +3898,13 @@ FILLER(sys_pagefault_e, false) unsigned long error_code; unsigned long address; unsigned long ip; + struct task_struct *task; + unsigned long total_vm; + unsigned long maj_flt; + unsigned long min_flt; + struct mm_struct *mm; + long total_rss; + long swap; u32 flags; int res; @@ -3914,17 +3921,61 @@ FILLER(sys_pagefault_e, false) error_code = ctx->error_code; #endif - res = bpf_val_to_ring(data, address); + + + task = (struct task_struct *)bpf_get_current_task(); + + + /* + * pgft_maj + */ + maj_flt = _READ(task->maj_flt); + res = bpf_val_to_ring_type(data, maj_flt, PT_UINT64); if (res != PPM_SUCCESS) return res; - res = bpf_val_to_ring(data, ip); + /* + * pgft_min + */ + min_flt = _READ(task->min_flt); + res = bpf_val_to_ring_type(data, min_flt, PT_UINT64); if (res != PPM_SUCCESS) return res; - flags = pf_flags_to_scap(error_code); - res = bpf_val_to_ring(data, flags); + total_vm = 0; + total_rss = 0; + swap = 0; + + mm = _READ(task->mm); + if (mm) { + total_vm = _READ(mm->total_vm); + total_vm <<= (PAGE_SHIFT - 10); + total_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10); + swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10); + } + + /* + * vm_size + */ + res = bpf_val_to_ring_type(data, total_vm, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + /* + * vm_rss + */ + res = bpf_val_to_ring_type(data, total_rss, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + /* + * vm_swap + */ + res = bpf_val_to_ring_type(data, swap, PT_UINT32); + pid_t tid = _READ(task->pid); + int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY); + if(map_res != 0) return PPM_MAP_FAILURE; return res; } diff --git a/driver/bpf/maps.h b/driver/bpf/maps.h index ba34a30a0f..a2232f5e6c 100644 --- a/driver/bpf/maps.h +++ b/driver/bpf/maps.h @@ -90,14 +90,6 @@ struct bpf_map_def __bpf_section("maps") local_state_map = { .max_entries = 0, }; -#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS -struct bpf_map_def __bpf_section("maps") stash_map = { - .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(u64), - .value_size = sizeof(struct sys_stash_args), - .max_entries = 65535, -}; -#endif struct bpf_map_def __bpf_section("maps") rtt_static_map = { .type = BPF_MAP_TYPE_HASH, @@ -113,6 +105,22 @@ struct bpf_map_def __bpf_section("maps") stash_tuple_map = { .max_entries = 65535, }; +struct bpf_map_def __bpf_section("maps") pgft_major_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(u64), + .max_entries = 1048576, +}; + +#ifndef BPF_SUPPORTS_RAW_TRACEPOINTS +struct bpf_map_def __bpf_section("maps") stash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u64), + .value_size = sizeof(struct sys_stash_args), + .max_entries = 65535, +}; +#endif + enum offcpu_type { ON, // 0 DISK, // 1 diff --git a/driver/bpf/probe.c b/driver/bpf/probe.c index b8a80666ca..2d51df6284 100644 --- a/driver/bpf/probe.c +++ b/driver/bpf/probe.c @@ -409,6 +409,29 @@ static __always_inline int bpf_page_fault(struct page_fault_args *ctx) if (!settings->capture_enabled) return 0; + + if(settings->pgft_map_clear) + return 0; + + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + unsigned long maj_flt = _READ(task->maj_flt); + if(maj_flt == 0){ + return 0; + } + pid_t tid = _READ(task->pid); + unsigned long *last_maj = bpf_map_lookup_elem(&pgft_major_map, &tid); + if(last_maj && *last_maj == maj_flt){ + return 0; + } + + if(!last_maj){ + int key = -1; + unsigned long *page_faults_threads_number = bpf_map_lookup_elem(&pgft_major_map, &key); + if(page_faults_threads_number){ + (*page_faults_threads_number)++; + bpf_map_update_elem(&pgft_major_map, &key, page_faults_threads_number, BPF_ANY); + } + } evt_type = PPME_PAGE_FAULT_E; diff --git a/driver/bpf/types.h b/driver/bpf/types.h index 613c243a87..d2d61342de 100644 --- a/driver/bpf/types.h +++ b/driver/bpf/types.h @@ -261,10 +261,13 @@ enum sysdig_map_types { SYSDIG_TMP_SCRATCH_MAP = 7, SYSDIG_SETTINGS_MAP = 8, SYSDIG_LOCAL_STATE_MAP = 9, + SYSDIG_RTT_STATISTICS = 10, + SYSDIG_STASH_TUPLE_MAP = 11, + SYSDIG_PAGEFAULT_MAJOR_MAP = 12, #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS - SYSDIG_STASH_MAP = 10, - SYSDIG_RTT_STATISTICS = 11, + SYSDIG_STASH_MAP = 13, #endif + }; struct sysdig_bpf_settings { @@ -275,6 +278,7 @@ struct sysdig_bpf_settings { bool capture_enabled; bool do_dynamic_snaplen; bool page_faults; + bool pgft_map_clear; bool dropping_mode; bool is_dropping; bool tracers_enabled; diff --git a/driver/event_table.c b/driver/event_table.c index 689d54cd21..2a0e83085d 100644 --- a/driver/event_table.c +++ b/driver/event_table.c @@ -302,7 +302,7 @@ const struct ppm_event_info g_event_info[PPM_EVENT_MAX] = { /* PPME_INFRASTRUCTURE_EVENT_X */{"NA4", EC_SYSTEM, EF_UNUSED, 0}, /* PPME_SYSCALL_EXECVE_18_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 1, {{"filename", PT_FSPATH, PF_NA} } }, /* PPME_SYSCALL_EXECVE_18_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE | EF_OLD_VERSION, 17, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC} } }, - /* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 3, {{"addr", PT_UINT64, PF_HEX}, {"ip", PT_UINT64, PF_HEX}, {"error", PT_FLAGS32, PF_HEX, pf_flags} } }, + /* PPME_PAGE_FAULT_E */ {"page_fault", EC_OTHER, EF_SKIPPARSERESET | EF_DROP_SIMPLE_CONS, 5, {{"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC} } }, /* PPME_PAGE_FAULT_X */ {"NA5", EC_OTHER, EF_UNUSED, 0}, /* PPME_SYSCALL_EXECVE_19_E */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 1, {{"filename", PT_FSPATH, PF_NA} } }, /* PPME_SYSCALL_EXECVE_19_X */{"execve", EC_PROCESS, EF_MODIFIES_STATE, 19, {{"res", PT_ERRNO, PF_DEC}, {"exe", PT_CHARBUF, PF_NA}, {"args", PT_BYTEBUF, PF_NA}, {"tid", PT_PID, PF_DEC}, {"pid", PT_PID, PF_DEC}, {"ptid", PT_PID, PF_DEC}, {"cwd", PT_CHARBUF, PF_NA}, {"fdlimit", PT_UINT64, PF_DEC}, {"pgft_maj", PT_UINT64, PF_DEC}, {"pgft_min", PT_UINT64, PF_DEC}, {"vm_size", PT_UINT32, PF_DEC}, {"vm_rss", PT_UINT32, PF_DEC}, {"vm_swap", PT_UINT32, PF_DEC}, {"comm", PT_CHARBUF, PF_NA}, {"cgroups", PT_BYTEBUF, PF_NA}, {"env", PT_BYTEBUF, PF_NA}, {"tty", PT_INT32, PF_DEC}, {"pgid", PT_PID, PF_DEC}, {"loginuid", PT_INT32, PF_DEC} } }, diff --git a/driver/ppm_events_public.h b/driver/ppm_events_public.h index 3f86018c85..aba5d69809 100644 --- a/driver/ppm_events_public.h +++ b/driver/ppm_events_public.h @@ -1663,6 +1663,7 @@ struct ppm_event_entry { #define PPM_FAILURE_INVALID_USER_MEMORY -2 #define PPM_FAILURE_BUG -3 #define PPM_SKIP_EVENT -4 +#define PPM_MAP_FAILURE -5 #define RW_SNAPLEN 80 #define RW_MAX_SNAPLEN PPM_MAX_ARG_SIZE diff --git a/userspace/libscap/scap.c b/userspace/libscap/scap.c index 4279e92fa7..f23e2a2f9b 100644 --- a/userspace/libscap/scap.c +++ b/userspace/libscap/scap.c @@ -35,6 +35,7 @@ limitations under the License. #endif // _WIN32 #include "scap.h" +#include "../../driver/bpf/types.h" #ifdef HAS_CAPTURE #if !defined(_WIN32) && !defined(CYGWING_AGENT) #include "driver_config.h" @@ -1918,6 +1919,32 @@ int32_t scap_enable_tracers_capture(scap_t* handle) } #endif +int scap_get_pagefaults_threads_number(scap_t *handle){ + return scap_bpf_get_pagefault_threads_number(handle); +} + +int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val){ + return scap_bpf_update_pagefaults_threads_number(handle, tid, val); +} + +#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) +int32_t scap_pagefaults_map_clear(scap_t *handle){ + if(handle->m_mode != SCAP_MODE_LIVE) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_pagefaults_map_clear not supported on this scap mode"); + ASSERT(false); + return SCAP_FAILURE; + } + if(handle->m_ndevs) + { + if(handle->m_bpf) + { + return scap_bpf_clear_pagefault_map(handle); + } + } +} +#endif + #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) int32_t scap_enable_page_faults(scap_t *handle) { diff --git a/userspace/libscap/scap.h b/userspace/libscap/scap.h index d9f63703b6..995312b7d7 100644 --- a/userspace/libscap/scap.h +++ b/userspace/libscap/scap.h @@ -1068,6 +1068,9 @@ uint64_t scap_ftell(scap_t *handle); void scap_fseek(scap_t *handle, uint64_t off); int32_t scap_enable_tracers_capture(scap_t* handle); int32_t scap_enable_page_faults(scap_t *handle); +int scap_get_pagefaults_threads_number(scap_t *handle); +int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val); +int32_t scap_pagefaults_map_clear(scap_t *handle); int32_t scap_enable_skb_capture(scap_t *handle); int32_t scap_disable_skb_capture(scap_t *handle); uint64_t scap_get_unexpected_block_readsize(scap_t* handle); diff --git a/userspace/libscap/scap_bpf.c b/userspace/libscap/scap_bpf.c index 89d825eb39..7421641f6b 100644 --- a/userspace/libscap/scap_bpf.c +++ b/userspace/libscap/scap_bpf.c @@ -116,6 +116,30 @@ static int bpf_map_update_elem(int fd, const void *key, const void *value, uint6 return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); } +static int bpf_map_get_next_key(int fd, void *key, void *next_key){ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + + attr.map_fd = fd; + attr.key = (unsigned long) key; + attr.next_key = (unsigned long) next_key; + + return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + +static int bpf_map_delete_elem(int fd, const void *key) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + + attr.map_fd = fd; + attr.key = (unsigned long) key; + + return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} + static int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; @@ -1220,7 +1244,71 @@ int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle) return SCAP_SUCCESS; } +int32_t scap_bpf_clear_pagefault_map(scap_t* handle){ + struct sysdig_bpf_settings settings; + int k = 0; + if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_lookup_elem < 0"); + return SCAP_FAILURE; + } + + // start to clear map & set the mutex + settings.pgft_map_clear = true; + if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0"); + return SCAP_FAILURE; + } + + + int next_key, lookup_key; + lookup_key = -1; + while(bpf_map_get_next_key(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &lookup_key, &next_key) == 0){ + bpf_map_delete_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &next_key); + lookup_key = next_key; + } + + // end up to clear map & clear the mutex + settings.pgft_map_clear = false; + if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0"); + return SCAP_FAILURE; + } + + + k = -1; + unsigned long val = 0; + + if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val, BPF_ANY) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0"); + return SCAP_FAILURE; + } + + return SCAP_SUCCESS; +} +int scap_bpf_get_pagefault_threads_number(scap_t* handle){ + int k = -1; + unsigned long val = 0; + + if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_lookup_elem < 0"); + return -1; + } + return val; +} +int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val){ + if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &tid, &val, BPF_ANY) != 0) + { + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0"); + return SCAP_FAILURE; + } + return SCAP_SUCCESS; +} int32_t scap_bpf_enable_page_faults(scap_t* handle) { struct sysdig_bpf_settings settings; @@ -1443,6 +1531,7 @@ static int32_t set_default_settings(scap_t *handle) settings.is_dropping = false; settings.tracers_enabled = false; settings.skb_capture = false; + settings.pgft_map_clear = false; settings.fullcapture_port_range_start = 0; settings.fullcapture_port_range_end = 0; settings.statsd_port = 8125; diff --git a/userspace/libscap/scap_bpf.h b/userspace/libscap/scap_bpf.h index af2e248bf4..b41abe705e 100644 --- a/userspace/libscap/scap_bpf.h +++ b/userspace/libscap/scap_bpf.h @@ -40,7 +40,10 @@ int32_t scap_bpf_set_fullcapture_port_range(scap_t* handle, uint16_t range_start int32_t scap_bpf_set_statsd_port(scap_t* handle, uint16_t port); int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_disable_dynamic_snaplen(scap_t* handle); +int32_t scap_bpf_clear_pagefault_map(scap_t* handle); int32_t scap_bpf_enable_page_faults(scap_t* handle); +int scap_bpf_get_pagefault_threads_number(scap_t* handle); +int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val); int32_t scap_bpf_start_dropping_mode(scap_t* handle, uint32_t sampling_ratio); int32_t scap_bpf_stop_dropping_mode(scap_t* handle); int32_t scap_bpf_enable_tracers_capture(scap_t* handle); diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp index 6f380400a9..d1580166f1 100644 --- a/userspace/libsinsp/examples/test.cpp +++ b/userspace/libsinsp/examples/test.cpp @@ -48,6 +48,39 @@ static void usage() // "evt.category=process or evt.category=net" // "evt.dir=< and (evt.category=net or (evt.type=execveat or evt.type=execve or evt.type=clone or evt.type=fork or evt.type=vfork))" // +void printThreadTable(sinsp *inspector, int flag){ + //uint32_t threadcount = inspector->m_thread_manager->get_thread_count(); + threadinfo_map_t *threadmap = inspector->m_thread_manager->get_threads(); + unordered_map threadstable = threadmap->getThreadsTable(); + unordered_map maj_mp, min_mp; //from pid to maj or min value + + cout << "total number of threads initialized is " << threadstable.size() << "...\n"; + for(auto e: threadstable){ + sinsp_threadinfo* tmp = e.second.get(); + if(tmp->m_pid == tmp->m_tid) continue; + maj_mp[tmp->m_pid] += tmp->m_pfmajor; + min_mp[tmp->m_pid] += tmp->m_pfminor; + //cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << " maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; + } + for(auto e: min_mp){ + auto tmp = threadstable.find(e.first); + sinsp_threadinfo* temp = inspector->build_threadinfo(); + temp->m_pid = temp->m_tid = e.first; + temp->m_pfminor = tmp->second->m_pfminor - e.second; + temp->m_pfmajor = tmp->second->m_pfmajor - maj_mp[e.first]; + threadstable[temp->m_tid] = threadinfo_map_t::ptr_t(temp); + } + for(auto e: threadstable){ + sinsp_threadinfo* tmp = e.second.get(); + if(flag) + inspector->update_pagefaults_threads_number(tmp->m_tid, tmp->m_pfmajor); + cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << " maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; + + } + if(flag) + inspector->update_pagefaults_threads_number(-1, threadstable.size()); +} + int main(int argc, char **argv) { sinsp inspector; @@ -82,7 +115,9 @@ int main(int argc, char **argv) signal(SIGPIPE, sigint_handler); inspector.open(); + inspector.enable_page_faults(); + if(!filter_string.empty()) { try @@ -94,6 +129,7 @@ int main(int argc, char **argv) } } + int cnt = 0; while(!g_interrupted) { sinsp_evt* ev = NULL; @@ -117,7 +153,7 @@ int main(int argc, char **argv) sinsp_threadinfo::populate_cmdline(cmdline, thread); if(thread->is_main_thread()) - { + { string date_time; sinsp_utils::ts_to_iso_8601(ev->get_ts(), &date_time); @@ -163,13 +199,15 @@ int main(int argc, char **argv) { parent_pid = p_thr->m_pid; } - cout << "[PPID=" << parent_pid << "]:" << "[PID=" << thread->m_pid << "]:" + << "[TID=" << thread->m_tid << "]:" << "[TYPE=" << get_event_type(ev->get_type()) << "]:" << "[EXE=" << thread->get_exepath() << "]:" << "[CMD=" << cmdline << "]" - << endl; + << "[PAGE_FAULT_MAJOR=" << *((uint64_t *) (ev->get_param(0)->m_val)) << "]:" + << "[PAGE_FAULT_MINOR=" << *((uint64_t *) (ev->get_param(1)->m_val)) << "]" + << endl << endl; } } else diff --git a/userspace/libsinsp/examples/util.cpp b/userspace/libsinsp/examples/util.cpp index 3ebe86f71b..17518f7584 100644 --- a/userspace/libsinsp/examples/util.cpp +++ b/userspace/libsinsp/examples/util.cpp @@ -325,6 +325,9 @@ std::string get_event_type(uint16_t type) case PPME_SOCKET_RECVMSG_X: return "recvmsg"; case PPME_SOCKET_RECVMMSG_E: case PPME_SOCKET_RECVMMSG_X: return "recvmmsg"; + //page fault + case PPME_PAGE_FAULT_E: + case PPME_PAGE_FAULT_X: return "pagefault"; default: return "UNKNOWN " + to_string(type); }; } diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index f6d131bacd..38276b3388 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -248,6 +248,19 @@ void sinsp::enable_tracers_capture() #endif } +void sinsp::clear_page_faults_map() +{ +#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) + if(is_live() && m_h != NULL) + { + if(scap_pagefaults_map_clear(m_h) != SCAP_SUCCESS) + { + throw sinsp_exception("error clearing page_faults map"); + } + } +#endif +} + void sinsp::enable_page_faults() { #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) @@ -261,6 +274,34 @@ void sinsp::enable_page_faults() #endif } +int sinsp::get_pagefault_threads_number(){ + #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) + if(is_live() && m_h != NULL) + { + int ret = scap_get_pagefaults_threads_number(m_h); + if(ret == -1) + { + throw sinsp_exception("error getting page_faults threads number"); + } + return ret; + } +#endif +} + +void sinsp::update_pagefaults_threads_number(int tid, unsigned long val) +{ +#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) + if(is_live() && m_h != NULL) + { + if(scap_update_pagefaults_thread_number(m_h, tid, val) != SCAP_SUCCESS) + { + throw sinsp_exception("error updating page_faults threads number"); + } + } +#endif +} + + void sinsp::init() { // diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index 1206e9a0ca..5cb36a94a2 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -893,7 +893,10 @@ class SINSP_PUBLIC sinsp : public capture_stats_source, public wmi_handle_source void remove_meta_event_callback(); void filter_proc_table_when_saving(bool filter); void enable_tracers_capture(); + void clear_page_faults_map(); void enable_page_faults(); + int get_pagefault_threads_number(); + void update_pagefaults_threads_number(int tid, unsigned long val); uint64_t get_bytes_read() { return scap_ftell(m_h); diff --git a/userspace/libsinsp/threadinfo.h b/userspace/libsinsp/threadinfo.h index db28f5c443..6959fdc9e7 100644 --- a/userspace/libsinsp/threadinfo.h +++ b/userspace/libsinsp/threadinfo.h @@ -33,6 +33,7 @@ struct iovec { #include #include #include +#include #include "fdinfo.h" #include "internal_metrics.h" @@ -526,6 +527,10 @@ class threadinfo_map_t return m_threads.size(); } + inline std::unordered_map getThreadsTable(){ + return m_threads; + } + protected: std::unordered_map m_threads; }; From 552ede18eb923ec40bc1a7ca0aa20d6647c789bc Mon Sep 17 00:00:00 2001 From: yaofighting Date: Mon, 20 Mar 2023 15:49:01 +0800 Subject: [PATCH 2/3] Optimization: Improve the performance of pagefault. Signed-off-by: yaofighting --- driver/bpf/fillers.h | 63 +++++++++++++++++++- driver/bpf/maps.h | 4 +- driver/bpf/plumbing_helpers.h | 2 +- driver/bpf/probe.c | 84 +++++++++++++++++++-------- driver/bpf/types.h | 13 ++++- userspace/libscap/scap.c | 16 ++--- userspace/libscap/scap.h | 5 +- userspace/libscap/scap_bpf.c | 87 +++++++++------------------- userspace/libscap/scap_bpf.h | 5 +- userspace/libsinsp/examples/test.cpp | 51 +++++++++++++--- userspace/libsinsp/sinsp.cpp | 35 ++--------- userspace/libsinsp/sinsp.h | 4 +- userspace/libsinsp/threadinfo.cpp | 6 +- 13 files changed, 223 insertions(+), 152 deletions(-) diff --git a/driver/bpf/fillers.h b/driver/bpf/fillers.h index 4aaa1a0964..9f8dd09177 100644 --- a/driver/bpf/fillers.h +++ b/driver/bpf/fillers.h @@ -4089,6 +4089,63 @@ FILLER(sched_switch_e, false) return res; } +static __always_inline int __bpf_pagefault_analysis(struct filler_data *data, u32 tid) +{ + int res; + struct pagefault_data *pgftp = bpf_map_lookup_elem(&pagefault_map, &tid); + if (!pgftp) + return 0; + + // {"pgft_maj", PT_UINT64, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->maj_flt, PT_UINT64); + if (res != PPM_SUCCESS) + return res; + + // {"pgft_min", PT_UINT64, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->min_flt, PT_UINT64); + if (res != PPM_SUCCESS) + return res; + + // {"vm_size", PT_UINT32, PF_DEC}, + res = bpf_val_to_ring_type(data, pgftp->vm_size, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + // {"vm_rss", PT_UINT32, PF_DEC} + res = bpf_val_to_ring_type(data, pgftp->vm_rss, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + // {"vm_swap", PT_UINT32, PF_DEC} + res = bpf_val_to_ring_type(data, pgftp->vm_swap, PT_UINT32); + if (res != PPM_SUCCESS) + return res; + + return res; +} + +static __always_inline int bpf_pagefault_analysis(void *ctx, u32 tid) +{ + struct filler_data data; + int res; + + res = init_filler_data(ctx, &data, false); + if (res == PPM_SUCCESS) { + if (!data.state->tail_ctx.len) + write_evt_hdr(&data); + res = __bpf_pagefault_analysis(&data, tid); + } + + if (res == PPM_SUCCESS) + res = push_evt_frame(ctx, &data); + + if (data.state) + data.state->tail_ctx.prev_res = res; + + bpf_kp_terminate_filler(&data); + return 0; +} + FILLER(sys_pagefault_e, false) { struct page_fault_args *ctx; @@ -4170,9 +4227,9 @@ FILLER(sys_pagefault_e, false) */ res = bpf_val_to_ring_type(data, swap, PT_UINT32); - pid_t tid = _READ(task->pid); - int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY); - if(map_res != 0) return PPM_MAP_FAILURE; + // pid_t tid = _READ(task->pid); + // int map_res = bpf_map_update_elem(&pgft_major_map, &tid, &maj_flt, BPF_ANY); + // if(map_res != 0) return PPM_MAP_FAILURE; return res; } diff --git a/driver/bpf/maps.h b/driver/bpf/maps.h index 160a821eb3..04f424f255 100644 --- a/driver/bpf/maps.h +++ b/driver/bpf/maps.h @@ -105,10 +105,10 @@ struct bpf_map_def __bpf_section("maps") stash_tuple_map = { .max_entries = 65535, }; -struct bpf_map_def __bpf_section("maps") pgft_major_map = { +struct bpf_map_def __bpf_section("maps") pagefault_map = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(pid_t), - .value_size = sizeof(u64), + .value_size = sizeof(struct pagefault_data), .max_entries = 1048576, }; diff --git a/driver/bpf/plumbing_helpers.h b/driver/bpf/plumbing_helpers.h index 99c86df855..0e79dadd67 100644 --- a/driver/bpf/plumbing_helpers.h +++ b/driver/bpf/plumbing_helpers.h @@ -712,7 +712,7 @@ static __always_inline bool prepare_filler(void *ctx, goto cleanup; return true; - cleanup: +cleanup: release_local_state(state); return false; } diff --git a/driver/bpf/probe.c b/driver/bpf/probe.c index da4ce455ab..e9e25ad967 100644 --- a/driver/bpf/probe.c +++ b/driver/bpf/probe.c @@ -152,6 +152,39 @@ BPF_PROBE("raw_syscalls/", sys_exit, sys_exit_args) return 0; } +// Multiple eBPF programs on the same hook point +// Warning: this prog must be in the front of another. Make sure the tail_call is at the end. +BPF_PROBE("sched/", sched_process_exit_multiple, sched_process_exit_args) +{ + struct sysdig_bpf_settings *settings; + struct task_struct *task; + unsigned int flags; + + task = (struct task_struct *)bpf_get_current_task(); + + flags = _READ(task->flags); + if (flags & PF_KTHREAD) + return 0; + + settings = get_bpf_settings(); + if (!settings) + return 0; + + if (!settings->capture_enabled) + return 0; + + u32 tid = _READ(task->pid); + //perf out pagefault data as an event when the thread exited. + if (settings->page_faults) { + if (prepare_filler(ctx, ctx, PPME_PAGE_FAULT_E, settings, 0)) { + bpf_pagefault_analysis(ctx, tid); + } + bpf_map_delete_elem(&pagefault_map, &tid); + } + + return 0; +} + BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args) { struct sysdig_bpf_settings *settings; @@ -174,8 +207,8 @@ BPF_PROBE("sched/", sched_process_exit, sched_process_exit_args) evt_type = PPME_PROCEXIT_1_E; #ifdef CPU_ANALYSIS - // perf out u32 tid = _READ(task->pid); + // perf out if (prepare_filler(ctx, ctx, PPME_CPU_ANALYSIS_E, settings, 0)) { bpf_cpu_analysis(ctx, tid); } @@ -324,7 +357,6 @@ BPF_PROBE("sched/", sched_wakeup, sched_process_exit_args) static __always_inline int bpf_page_fault(struct page_fault_args *ctx) { struct sysdig_bpf_settings *settings; - enum ppm_event_type evt_type; settings = get_bpf_settings(); if (!settings) @@ -335,33 +367,37 @@ static __always_inline int bpf_page_fault(struct page_fault_args *ctx) if (!settings->capture_enabled) return 0; - - if(settings->pgft_map_clear) - return 0; - struct task_struct *task = (struct task_struct *)bpf_get_current_task(); - unsigned long maj_flt = _READ(task->maj_flt); - if(maj_flt == 0){ - return 0; - } + struct mm_struct *mm = _READ(task->mm); pid_t tid = _READ(task->pid); - unsigned long *last_maj = bpf_map_lookup_elem(&pgft_major_map, &tid); - if(last_maj && *last_maj == maj_flt){ - return 0; + pid_t pid = _READ(task->tgid); + u64 cur_maj = _READ(task->maj_flt); + struct pagefault_data *last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid); + if(!last_pgft && cur_maj != 0) + { + struct pagefault_data pgft_data = {}; + bpf_map_update_elem(&pagefault_map, &tid, &pgft_data, BPF_ANY); + last_pgft = bpf_map_lookup_elem(&pagefault_map, &tid); } - - if(!last_maj){ - int key = -1; - unsigned long *page_faults_threads_number = bpf_map_lookup_elem(&pgft_major_map, &key); - if(page_faults_threads_number){ - (*page_faults_threads_number)++; - bpf_map_update_elem(&pgft_major_map, &key, page_faults_threads_number, BPF_ANY); + + if(last_pgft && cur_maj != last_pgft->maj_flt) + { + last_pgft->pid = pid; + last_pgft->tid = tid; + last_pgft->maj_flt = _READ(task->maj_flt); + last_pgft->min_flt = _READ(task->min_flt); + + if (mm) + { + last_pgft->vm_size = _READ(mm->total_vm); + last_pgft->vm_size <<= (PAGE_SHIFT - 10); + last_pgft->vm_rss = bpf_get_mm_rss(mm) << (PAGE_SHIFT - 10); + last_pgft->vm_swap = bpf_get_mm_swap(mm) << (PAGE_SHIFT - 10); } - } - - evt_type = PPME_PAGE_FAULT_E; - call_filler(ctx, ctx, evt_type, settings, UF_ALWAYS_DROP); + last_pgft->timestamp = bpf_ktime_get_ns() + settings->boot_time; + } + return 0; } diff --git a/driver/bpf/types.h b/driver/bpf/types.h index d2d61342de..6c5933076a 100644 --- a/driver/bpf/types.h +++ b/driver/bpf/types.h @@ -250,6 +250,17 @@ struct tcp_reset_args { }; #endif +struct pagefault_data { + __u32 pid; + __u32 tid; + __u32 vm_size; + __u32 vm_rss; + __u32 vm_swap; + __u64 maj_flt; + __u64 min_flt; + __u64 timestamp; +}; + enum sysdig_map_types { SYSDIG_PERF_MAP = 0, SYSDIG_TAIL_MAP = 1, @@ -263,7 +274,7 @@ enum sysdig_map_types { SYSDIG_LOCAL_STATE_MAP = 9, SYSDIG_RTT_STATISTICS = 10, SYSDIG_STASH_TUPLE_MAP = 11, - SYSDIG_PAGEFAULT_MAJOR_MAP = 12, + SYSDIG_PAGEFAULT_MAP = 12, #ifndef BPF_SUPPORTS_RAW_TRACEPOINTS SYSDIG_STASH_MAP = 13, #endif diff --git a/userspace/libscap/scap.c b/userspace/libscap/scap.c index f23e2a2f9b..55e9325c85 100644 --- a/userspace/libscap/scap.c +++ b/userspace/libscap/scap.c @@ -1919,19 +1919,13 @@ int32_t scap_enable_tracers_capture(scap_t* handle) } #endif -int scap_get_pagefaults_threads_number(scap_t *handle){ - return scap_bpf_get_pagefault_threads_number(handle); -} - -int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val){ - return scap_bpf_update_pagefaults_threads_number(handle, tid, val); -} #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) -int32_t scap_pagefaults_map_clear(scap_t *handle){ +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) +{ if(handle->m_mode != SCAP_MODE_LIVE) { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_pagefaults_map_clear not supported on this scap mode"); + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "scap_get_page_faults_from_map not supported on this scap mode"); ASSERT(false); return SCAP_FAILURE; } @@ -1939,13 +1933,11 @@ int32_t scap_pagefaults_map_clear(scap_t *handle){ { if(handle->m_bpf) { - return scap_bpf_clear_pagefault_map(handle); + return scap_bpf_get_page_faults_from_map(handle, last_time, cur_time, results, counts); } } } -#endif -#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) int32_t scap_enable_page_faults(scap_t *handle) { if(handle->m_mode != SCAP_MODE_LIVE) diff --git a/userspace/libscap/scap.h b/userspace/libscap/scap.h index 995312b7d7..a9affb4cdd 100644 --- a/userspace/libscap/scap.h +++ b/userspace/libscap/scap.h @@ -60,6 +60,7 @@ struct iovec; #include "uthash.h" #include "../common/types.h" #include "../../driver/ppm_events_public.h" +#include "../../driver/bpf/types.h" #ifdef _WIN32 #include #define MAP_FAILED (void*)-1 @@ -1067,10 +1068,8 @@ void scap_set_refresh_proc_table_when_saving(scap_t* handle, bool refresh); uint64_t scap_ftell(scap_t *handle); void scap_fseek(scap_t *handle, uint64_t off); int32_t scap_enable_tracers_capture(scap_t* handle); +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); int32_t scap_enable_page_faults(scap_t *handle); -int scap_get_pagefaults_threads_number(scap_t *handle); -int32_t scap_update_pagefaults_thread_number(scap_t *handle, int tid, unsigned long val); -int32_t scap_pagefaults_map_clear(scap_t *handle); int32_t scap_enable_skb_capture(scap_t *handle); int32_t scap_disable_skb_capture(scap_t *handle); uint64_t scap_get_unexpected_block_readsize(scap_t* handle); diff --git a/userspace/libscap/scap_bpf.c b/userspace/libscap/scap_bpf.c index 7421641f6b..c0080e3745 100644 --- a/userspace/libscap/scap_bpf.c +++ b/userspace/libscap/scap_bpf.c @@ -862,8 +862,17 @@ static int32_t load_bpf_file(scap_t *handle, const char *path) memcmp(shname, "raw_tracepoint/", sizeof("raw_tracepoint/") - 1) == 0 || memcmp(shname, "kprobe/", sizeof("kprobe/") - 1) == 0 || memcmp(shname, "kretprobe/", sizeof("kretprobe/") - 1) == 0) - { - int load_result = load_tracepoint(handle, shname, data->d_buf, data->d_size); + { + //Handling multiple programs on the same hook point + char event[100]; + strcpy(event, shname); + int len = strlen(event); + if(len >= 9 && strncmp(&event[len - 9], "_multiple", 9) == 0) + { + event[len - 9] = '\0'; + } + + int load_result = load_tracepoint(handle, event, data->d_buf, data->d_size); if((memcmp(shname, "kprobe/", sizeof("kprobe/") - 1) == 0 || memcmp(shname, "kretprobe/", sizeof("kretprobe/") - 1) == 0) && load_result == SCAP_UNKNOWN_KPROBE) @@ -1244,71 +1253,31 @@ int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle) return SCAP_SUCCESS; } -int32_t scap_bpf_clear_pagefault_map(scap_t* handle){ - struct sysdig_bpf_settings settings; - int k = 0; - - if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_lookup_elem < 0"); - return SCAP_FAILURE; - } - - // start to clear map & set the mutex - settings.pgft_map_clear = true; - if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0"); - return SCAP_FAILURE; - } - +/* + |last_time|--------scan_interval--------|cur_time| + A(ignore): |pagefault.timestamp| + B(catch): |pagefault.timestamp| +*/ +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) +{ int next_key, lookup_key; lookup_key = -1; - while(bpf_map_get_next_key(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &lookup_key, &next_key) == 0){ - bpf_map_delete_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &next_key); + int32_t cnt = 0; + while(bpf_map_get_next_key(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAP], &lookup_key, &next_key) == 0){ + if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAP], &next_key, &results[cnt]) != 0){ + snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAP bpf_map_lookup_elem < 0"); + return SCAP_FAILURE; + } + if(results[cnt].timestamp > last_time && results[cnt].timestamp <= cur_time) { + cnt++; + } lookup_key = next_key; } - - // end up to clear map & clear the mutex - settings.pgft_map_clear = false; - if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_SETTINGS_MAP], &k, &settings, BPF_ANY) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_SETTINGS_MAP bpf_map_update_elem < 0"); - return SCAP_FAILURE; - } - - - k = -1; - unsigned long val = 0; - - if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val, BPF_ANY) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0"); - return SCAP_FAILURE; - } - + *counts = cnt; return SCAP_SUCCESS; } -int scap_bpf_get_pagefault_threads_number(scap_t* handle){ - int k = -1; - unsigned long val = 0; - if(bpf_map_lookup_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &k, &val) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_lookup_elem < 0"); - return -1; - } - return val; -} -int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val){ - if(bpf_map_update_elem(handle->m_bpf_map_fds[SYSDIG_PAGEFAULT_MAJOR_MAP], &tid, &val, BPF_ANY) != 0) - { - snprintf(handle->m_lasterr, SCAP_LASTERR_SIZE, "SYSDIG_PAGEFAULT_MAJOR_MAP bpf_map_update_elem < 0"); - return SCAP_FAILURE; - } - return SCAP_SUCCESS; -} int32_t scap_bpf_enable_page_faults(scap_t* handle) { struct sysdig_bpf_settings settings; diff --git a/userspace/libscap/scap_bpf.h b/userspace/libscap/scap_bpf.h index b41abe705e..7b85033800 100644 --- a/userspace/libscap/scap_bpf.h +++ b/userspace/libscap/scap_bpf.h @@ -18,6 +18,7 @@ limitations under the License. #define _SCAP_BPF_H #include "compat/perf_event.h" +#include "../../driver/bpf/types.h" struct perf_event_sample { struct perf_event_header header; @@ -40,10 +41,8 @@ int32_t scap_bpf_set_fullcapture_port_range(scap_t* handle, uint16_t range_start int32_t scap_bpf_set_statsd_port(scap_t* handle, uint16_t port); int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_disable_dynamic_snaplen(scap_t* handle); -int32_t scap_bpf_clear_pagefault_map(scap_t* handle); int32_t scap_bpf_enable_page_faults(scap_t* handle); -int scap_bpf_get_pagefault_threads_number(scap_t* handle); -int32_t scap_bpf_update_pagefaults_threads_number(scap_t* handle, int tid, unsigned long val); +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); int32_t scap_bpf_start_dropping_mode(scap_t* handle, uint32_t sampling_ratio); int32_t scap_bpf_stop_dropping_mode(scap_t* handle); int32_t scap_bpf_enable_tracers_capture(scap_t* handle); diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp index d1580166f1..16757aa1a0 100644 --- a/userspace/libsinsp/examples/test.cpp +++ b/userspace/libsinsp/examples/test.cpp @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#include #include "util.h" using namespace std; @@ -48,12 +49,13 @@ static void usage() // "evt.category=process or evt.category=net" // "evt.dir=< and (evt.category=net or (evt.type=execveat or evt.type=execve or evt.type=clone or evt.type=fork or evt.type=vfork))" // -void printThreadTable(sinsp *inspector, int flag){ +void testPagefault(sinsp *inspector){ //uint32_t threadcount = inspector->m_thread_manager->get_thread_count(); threadinfo_map_t *threadmap = inspector->m_thread_manager->get_threads(); unordered_map threadstable = threadmap->getThreadsTable(); unordered_map maj_mp, min_mp; //from pid to maj or min value + //1. test initialization of therads table. cout << "total number of threads initialized is " << threadstable.size() << "...\n"; for(auto e: threadstable){ sinsp_threadinfo* tmp = e.second.get(); @@ -72,13 +74,30 @@ void printThreadTable(sinsp *inspector, int flag){ } for(auto e: threadstable){ sinsp_threadinfo* tmp = e.second.get(); - if(flag) - inspector->update_pagefaults_threads_number(tmp->m_tid, tmp->m_pfmajor); - cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << " maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; + cout << "pid is " << tmp->m_pid << " & tid is " << tmp->m_tid << ": maj_flt: " << tmp->m_pfmajor << "\tmin_flt: " << tmp->m_pfminor << '\n'; } - if(flag) - inspector->update_pagefaults_threads_number(-1, threadstable.size()); + + //2. test eBPF pagefault_map + uint64_t last_time = 0; + pagefault_data *results = new pagefault_data[101000]; + int counts = 0; + for(int i = 0;i < 5;i++){ + chrono::nanoseconds ns = std::chrono::duration_cast< std::chrono::nanoseconds>( + std::chrono::system_clock::now().time_since_epoch() + ); + uint64_t cur = ns.count(); + inspector->get_page_faults_from_map(last_time, cur, results, &counts); + last_time = cur; + cout << "curtime: " << cur << "---catch " << counts << " pagefaults from 2 seconds before." << endl; + for(int j = 0; j < counts;j++){ + cout << "pid: " << results[j].pid << " tid: " << results[j].tid << " major: " << results[j].maj_flt << " minor: " << results[j].min_flt + << " vmsize: " << results[j].vm_size << " vmrss: " << results[j].vm_rss << " vmswap: " << results[j].vm_swap << " timestamp: " << results[j].timestamp << endl << endl; + } + sleep(2); + } + + delete []results; } int main(int argc, char **argv) @@ -129,7 +148,8 @@ int main(int argc, char **argv) } } - int cnt = 0; + testPagefault(&inspector); + while(!g_interrupted) { sinsp_evt* ev = NULL; @@ -152,6 +172,21 @@ int main(int argc, char **argv) string cmdline; sinsp_threadinfo::populate_cmdline(cmdline, thread); + if(ev->get_type() == PPME_PAGE_FAULT_E) + { + cout << "[PAGEFAULTS]:[PID=" << thread->m_pid << "]:" + << "[TID=" << thread->m_tid << "]:" + << "[TYPE=" << get_event_type(ev->get_type()) << "]:" + << "[EXE=" << thread->get_exepath() << "]:" + << "[CMD=" << cmdline << "]" + << "[PAGE_FAULT_MAJOR=" << *((uint64_t *) (ev->get_param(0)->m_val)) << "]:" + << "[PAGE_FAULT_MINOR=" << *((uint64_t *) (ev->get_param(1)->m_val)) << "]" + << "[VMSIZE=" << *((uint32_t *) (ev->get_param(2)->m_val)) << "]" + << "[VMRSS=" << *((uint32_t *) (ev->get_param(3)->m_val)) << "]" + << "[VMSWAP=" << *((uint32_t *) (ev->get_param(4)->m_val)) << "]" + << endl << endl; + } + if(thread->is_main_thread()) { string date_time; @@ -205,8 +240,6 @@ int main(int argc, char **argv) << "[TYPE=" << get_event_type(ev->get_type()) << "]:" << "[EXE=" << thread->get_exepath() << "]:" << "[CMD=" << cmdline << "]" - << "[PAGE_FAULT_MAJOR=" << *((uint64_t *) (ev->get_param(0)->m_val)) << "]:" - << "[PAGE_FAULT_MINOR=" << *((uint64_t *) (ev->get_param(1)->m_val)) << "]" << endl << endl; } } diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index 38276b3388..8b1e02ccbe 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -248,15 +248,17 @@ void sinsp::enable_tracers_capture() #endif } -void sinsp::clear_page_faults_map() +int32_t sinsp::get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) { #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) if(is_live() && m_h != NULL) { - if(scap_pagefaults_map_clear(m_h) != SCAP_SUCCESS) + int32_t ret = scap_get_page_faults_from_map(m_h, last_time, cur_time, results, counts); + if(ret == SCAP_FAILURE) { - throw sinsp_exception("error clearing page_faults map"); + throw sinsp_exception("error getting page_faults from map"); } + return ret; } #endif } @@ -274,33 +276,6 @@ void sinsp::enable_page_faults() #endif } -int sinsp::get_pagefault_threads_number(){ - #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) - if(is_live() && m_h != NULL) - { - int ret = scap_get_pagefaults_threads_number(m_h); - if(ret == -1) - { - throw sinsp_exception("error getting page_faults threads number"); - } - return ret; - } -#endif -} - -void sinsp::update_pagefaults_threads_number(int tid, unsigned long val) -{ -#if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) - if(is_live() && m_h != NULL) - { - if(scap_update_pagefaults_thread_number(m_h, tid, val) != SCAP_SUCCESS) - { - throw sinsp_exception("error updating page_faults threads number"); - } - } -#endif -} - void sinsp::init() { diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index 5cb36a94a2..a2fd7c7c20 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -893,10 +893,8 @@ class SINSP_PUBLIC sinsp : public capture_stats_source, public wmi_handle_source void remove_meta_event_callback(); void filter_proc_table_when_saving(bool filter); void enable_tracers_capture(); - void clear_page_faults_map(); + int32_t get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); void enable_page_faults(); - int get_pagefault_threads_number(); - void update_pagefaults_threads_number(int tid, unsigned long val); uint64_t get_bytes_read() { return scap_ftell(m_h); diff --git a/userspace/libsinsp/threadinfo.cpp b/userspace/libsinsp/threadinfo.cpp index 6c2edddfa8..30c24f569b 100644 --- a/userspace/libsinsp/threadinfo.cpp +++ b/userspace/libsinsp/threadinfo.cpp @@ -1782,7 +1782,8 @@ threadinfo_map_t::ptr_t sinsp_thread_manager::find_thread(int64_t tid, bool look if(tid == m_last_tid) { thr = m_last_tinfo.lock(); - if (thr) { + if(thr) + { #ifdef GATHER_INTERNAL_STATS m_cached_lookups->increment(); #endif @@ -1790,7 +1791,8 @@ threadinfo_map_t::ptr_t sinsp_thread_manager::find_thread(int64_t tid, bool look // for something that may not need to be precise thr->m_lastaccess_ts = m_inspector->get_lastevent_ts(); return thr; - } } + } + } // // Caching failed, do a real lookup From f4310ade03cff29e990231767f8ccf13551d8a6c Mon Sep 17 00:00:00 2001 From: yaofighting Date: Mon, 20 Mar 2023 16:41:08 +0800 Subject: [PATCH 3/3] add maxlen to avoid array out of bounds. Signed-off-by: yaofighting --- userspace/libscap/scap.c | 4 ++-- userspace/libscap/scap.h | 2 +- userspace/libscap/scap_bpf.c | 5 ++++- userspace/libscap/scap_bpf.h | 2 +- userspace/libsinsp/examples/test.cpp | 3 ++- userspace/libsinsp/sinsp.cpp | 4 ++-- userspace/libsinsp/sinsp.h | 2 +- 7 files changed, 13 insertions(+), 9 deletions(-) diff --git a/userspace/libscap/scap.c b/userspace/libscap/scap.c index 55e9325c85..034d12ca67 100644 --- a/userspace/libscap/scap.c +++ b/userspace/libscap/scap.c @@ -1921,7 +1921,7 @@ int32_t scap_enable_tracers_capture(scap_t* handle) #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) -int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int maxlen) { if(handle->m_mode != SCAP_MODE_LIVE) { @@ -1933,7 +1933,7 @@ int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64 { if(handle->m_bpf) { - return scap_bpf_get_page_faults_from_map(handle, last_time, cur_time, results, counts); + return scap_bpf_get_page_faults_from_map(handle, last_time, cur_time, results, counts, maxlen); } } } diff --git a/userspace/libscap/scap.h b/userspace/libscap/scap.h index a9affb4cdd..ecc460f56c 100644 --- a/userspace/libscap/scap.h +++ b/userspace/libscap/scap.h @@ -1068,7 +1068,7 @@ void scap_set_refresh_proc_table_when_saving(scap_t* handle, bool refresh); uint64_t scap_ftell(scap_t *handle); void scap_fseek(scap_t *handle, uint64_t off); int32_t scap_enable_tracers_capture(scap_t* handle); -int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); +int32_t scap_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); int32_t scap_enable_page_faults(scap_t *handle); int32_t scap_enable_skb_capture(scap_t *handle); int32_t scap_disable_skb_capture(scap_t *handle); diff --git a/userspace/libscap/scap_bpf.c b/userspace/libscap/scap_bpf.c index c0080e3745..58722f5d4d 100644 --- a/userspace/libscap/scap_bpf.c +++ b/userspace/libscap/scap_bpf.c @@ -1259,7 +1259,7 @@ int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle) A(ignore): |pagefault.timestamp| B(catch): |pagefault.timestamp| */ -int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen) { int next_key, lookup_key; lookup_key = -1; @@ -1273,6 +1273,9 @@ int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, ui cnt++; } lookup_key = next_key; + if(cnt >= maxlen){ + break; + } } *counts = cnt; return SCAP_SUCCESS; diff --git a/userspace/libscap/scap_bpf.h b/userspace/libscap/scap_bpf.h index 7b85033800..36c27baf67 100644 --- a/userspace/libscap/scap_bpf.h +++ b/userspace/libscap/scap_bpf.h @@ -42,7 +42,7 @@ int32_t scap_bpf_set_statsd_port(scap_t* handle, uint16_t port); int32_t scap_bpf_enable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_disable_dynamic_snaplen(scap_t* handle); int32_t scap_bpf_enable_page_faults(scap_t* handle); -int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); +int32_t scap_bpf_get_page_faults_from_map(scap_t* handle, uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); int32_t scap_bpf_start_dropping_mode(scap_t* handle, uint32_t sampling_ratio); int32_t scap_bpf_stop_dropping_mode(scap_t* handle); int32_t scap_bpf_enable_tracers_capture(scap_t* handle); diff --git a/userspace/libsinsp/examples/test.cpp b/userspace/libsinsp/examples/test.cpp index 16757aa1a0..24fd01ef1f 100644 --- a/userspace/libsinsp/examples/test.cpp +++ b/userspace/libsinsp/examples/test.cpp @@ -81,13 +81,14 @@ void testPagefault(sinsp *inspector){ //2. test eBPF pagefault_map uint64_t last_time = 0; pagefault_data *results = new pagefault_data[101000]; + int32_t maxlen = 65535; int counts = 0; for(int i = 0;i < 5;i++){ chrono::nanoseconds ns = std::chrono::duration_cast< std::chrono::nanoseconds>( std::chrono::system_clock::now().time_since_epoch() ); uint64_t cur = ns.count(); - inspector->get_page_faults_from_map(last_time, cur, results, &counts); + inspector->get_page_faults_from_map(last_time, cur, results, &counts, maxlen); last_time = cur; cout << "curtime: " << cur << "---catch " << counts << " pagefaults from 2 seconds before." << endl; for(int j = 0; j < counts;j++){ diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index 8b1e02ccbe..78efa4291f 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -248,12 +248,12 @@ void sinsp::enable_tracers_capture() #endif } -int32_t sinsp::get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts) +int32_t sinsp::get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen) { #if defined(HAS_CAPTURE) && ! defined(CYGWING_AGENT) && ! defined(_WIN32) if(is_live() && m_h != NULL) { - int32_t ret = scap_get_page_faults_from_map(m_h, last_time, cur_time, results, counts); + int32_t ret = scap_get_page_faults_from_map(m_h, last_time, cur_time, results, counts, maxlen); if(ret == SCAP_FAILURE) { throw sinsp_exception("error getting page_faults from map"); diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index a2fd7c7c20..a575d0c7e8 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -893,7 +893,7 @@ class SINSP_PUBLIC sinsp : public capture_stats_source, public wmi_handle_source void remove_meta_event_callback(); void filter_proc_table_when_saving(bool filter); void enable_tracers_capture(); - int32_t get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts); + int32_t get_page_faults_from_map(uint64_t last_time, uint64_t cur_time, struct pagefault_data results[], int32_t *counts, int32_t maxlen); void enable_page_faults(); uint64_t get_bytes_read() {