From 9f18ca878a4dd065007279ec14b75ae6e890a027 Mon Sep 17 00:00:00 2001 From: Kenneth Giusti Date: Thu, 5 Oct 2023 11:58:22 -0400 Subject: [PATCH] Fixes #1247: Add router process RSS access via management --- include/qpid/dispatch/platform.h | 17 +++++++ include/qpid/dispatch/router.h | 7 ++- .../skupper_router/management/skrouter.json | 7 ++- src/dispatch.c | 47 ------------------ src/http-libwebsockets.c | 38 +++++++++++++- src/platform.c | 40 +++++++++++++++ src/router_core/agent_router.c | 12 ++++- src/router_core/agent_router.h | 2 +- src/server.c | 20 ++++---- tests/platform_test.c | 13 ++++- tools/skstat | 49 +++++++++++-------- 11 files changed, 167 insertions(+), 85 deletions(-) diff --git a/include/qpid/dispatch/platform.h b/include/qpid/dispatch/platform.h index 5f89c2b91..eb458ab57 100644 --- a/include/qpid/dispatch/platform.h +++ b/include/qpid/dispatch/platform.h @@ -45,4 +45,21 @@ uintmax_t qd_platform_memory_size(void); */ double normalize_memory_size(const uint64_t bytes, const char **suffix); +/** + * Return the total amount of virtual memory currently occupied by the router process. This includes data, stack, and + * code memory. On systems supporting virtual memory this value may be larger than the physical RAM available on the + * platform. Thread safe. + * + * Return 0 if the memory usage cannot be determined. + */ +uint64_t qd_router_virtual_memory_usage(void); + +/** + * Return the total amount of resident memory currently occupied by the router process. This is the portion of the + * router's virtual memory that is currently in RAM. Thread safe. + * + * Return 0 if the memory usage cannot be determined. + */ +uint64_t qd_router_rss_memory_usage(void); + #endif diff --git a/include/qpid/dispatch/router.h b/include/qpid/dispatch/router.h index e1e8f1d30..a90880b8f 100644 --- a/include/qpid/dispatch/router.h +++ b/include/qpid/dispatch/router.h @@ -138,8 +138,11 @@ const char* qd_address_logstr(qd_address_t* address); /** Retrieve the proper forwarder for a given semantic */ qd_router_forwarder_t *qd_router_get_forwarder(qd_address_treatment_t t); -/** Retrieve the routers current memory usage (in bytes) */ -uint64_t qd_router_memory_usage(void); +/** Retrieve the routers current use of virtual memory (in bytes). Thread safe */ +uint64_t qd_router_virtual_memory_usage(void); + +/** Retrieve the routers current use of Resident memory (in bytes). Thread safe */ +uint64_t qd_router_rss_memory_usage(void); /** Return true if the router is running in a test mode with various debug/test hooks enabled. This mode can only be * enabled by passing the '-T' command line option to the router on startup. It should not be used in production. diff --git a/python/skupper_router/management/skrouter.json b/python/skupper_router/management/skrouter.json index fbedd1038..044f2e9b4 100644 --- a/python/skupper_router/management/skrouter.json +++ b/python/skupper_router/management/skrouter.json @@ -592,7 +592,12 @@ "memoryUsage": { "type": "integer", "graph": true, - "description": "The current amount of memory in use by the router process in bytes. This includes memory provisioned for stack, data, and code (VmSize). This value is set to Null if the platform does not provide access to the process memory size." + "description": "The current amount of virtual memory (VmSize) in use by the router process in bytes. This includes memory provisioned for stack, data, and code. This value is set to Null if the platform does not provide access to the process virtual memory size." + }, + "residentMemoryUsage": { + "type": "integer", + "graph": true, + "description": "The current amount of system memory in use by the router process in bytes (RSS: Resident Set Size). This is the portion of the process memory space that currently resides in RAM. This value is set to Null if the platform does not provide access to the process resident memory size." }, "dataConnectionCount": { "description": "The number of parallel data connections to carry streaming data between routers. Applies only to interior routers", diff --git a/src/dispatch.c b/src/dispatch.c index 0e23d94ae..a097d19f1 100644 --- a/src/dispatch.c +++ b/src/dispatch.c @@ -392,53 +392,6 @@ qdr_core_t* qd_dispatch_router_core(const qd_dispatch_t *qd) return qd->router->router_core; } - -/* qd_router_memory_usage - * - * Return the amount of memory currently provisioned by the router process. - * This includes data, stack, and code memory. On systems supporting virtual - * memory this value may be larger than the physical RAM available on the - * platform. - * - * Return 0 if the memory usage cannot be determined. - */ -uint64_t qd_router_memory_usage(void) -{ - // @TODO(kgiusti): only works for linux (what? doesn't everyone run linux?) - - // parse the VmSize value out of the /proc/[pid]/status file - const pid_t my_pid = getpid(); - const char *status_template = "/proc/%ld/status"; - char status_path[64]; - if (snprintf(status_path, 64, status_template, (long int)my_pid) >= 64) { - // huh, did not fit? Should not happen - return 0; - } - - FILE *status_fp = fopen(status_path, "r"); - if (!status_fp) { - // possible - if not on linux - return 0; - } - - // the format of the /proc/[pid]/status file is documented in the linux man - // pages (man proc) - size_t buflen = 0; - char *buffer = 0; - uint64_t my_mem_kb = 0; - int scanned = 0; - while (getline(&buffer, &buflen, status_fp) != -1) { - scanned = sscanf(buffer, "VmSize: %"SCNu64, &my_mem_kb); - if (scanned == 1) - break; - } - free(buffer); - - fclose(status_fp); - return (scanned == 1) ? my_mem_kb * 1024 : 0; -} - - /** * Return a reference to connection_manager */ diff --git a/src/http-libwebsockets.c b/src/http-libwebsockets.c index 5479b6e01..9bbaac979 100644 --- a/src/http-libwebsockets.c +++ b/src/http-libwebsockets.c @@ -702,6 +702,37 @@ static size_t _write_allocator_metrics(uint8_t **start, size_t available) return save - available; } +// Write the router process memory use metrics to the output buffer. Return the total octets written (not including null +// terminator) or zero on error. +// +// On successful return (*start) will be advanced to the terminating null byte. +// +static size_t _write_memory_metrics(uint8_t **start, size_t available) +{ + const size_t save = available; + uint64_t vmsize = qd_router_virtual_memory_usage(); + uint64_t rss = qd_router_rss_memory_usage(); + size_t rc = 0; + + if (vmsize > 0) { // 0 means not available + rc = _write_metric(start, available, "qdr_router_vmsize_bytes", "gauge", vmsize); + if (rc == 0) { + return 0; + } + available -= rc; + } + + if (rss > 0) { + rc = _write_metric(start, available, "qdr_router_rss_bytes", "gauge", rss); + if (rc == 0) { + return 0; + } + available -= rc; + } + + return save - available; +} + // Gather the current metrics and write them to the output buffer. Return the total bytes written to the buffer (not // including null terminator) or zero on error. // @@ -710,7 +741,8 @@ static size_t _write_allocator_metrics(uint8_t **start, size_t available) static size_t _generate_metrics_response(stats_request_state_t *state, uint8_t **start, const uint8_t * const end) { if (_write_global_metrics(state, start, end - *start) == 0 - || _write_allocator_metrics(start, end - *start) == 0) { + || _write_allocator_metrics(start, end - *start) == 0 + || _write_memory_metrics(start, end - *start) == 0) { // error, close the connection return 0; } @@ -741,9 +773,11 @@ static int callback_metrics(struct lws *wsi, enum lws_callback_reasons reason, size_t buf_size = HTTP_HEADER_LEN // router global metrics: + (metrics_length * PER_METRIC_BUF_SIZE) - // alloc_pool metrics (+ 1 for alloc_pool_total_bytes): + // alloc_pool metrics (+ 1 for qdr_alloc_pool_bytes): + (DEQ_SIZE(allocator_metrics) * PER_METRIC_BUF_SIZE * PER_ALLOC_METRIC_COUNT) + PER_METRIC_BUF_SIZE + // qdr_router_vmsize_bytes and qdr_router_rss_bytes + + (2 * PER_METRIC_BUF_SIZE) // 1 terminating null + 1; stats->state = new_stats_request_state(buf_size); diff --git a/src/platform.c b/src/platform.c index b80c35b43..bc4d7ad60 100644 --- a/src/platform.c +++ b/src/platform.c @@ -157,3 +157,43 @@ double normalize_memory_size(const uint64_t bytes, const char **suffix) return value; } +// Parse the /proc/self/status file for the line matching sscanf_pattern and return the corresponding metric. +// Linux-specific. Thread safe. +// +static uint64_t _parse_proc_memory_metric(const char *sscanf_pattern) +{ + FILE *fp = fopen("/proc/self/status", "r"); + if (!fp) { + // possible - if not on linux + return 0; + } + + // the format of the /proc/self/status file is documented in the linux man + // pages (man proc) + + size_t buflen = 0; + char *buffer = 0; + uint64_t metric = 0; + + while (getline(&buffer, &buflen, fp) != -1) { + if (sscanf(buffer, sscanf_pattern, &metric) == 1) { + break; // matched + } + } + free(buffer); + fclose(fp); + + return metric; +} + +uint64_t qd_router_virtual_memory_usage(void) +{ + // VmSize is in kB + return _parse_proc_memory_metric("VmSize: %" SCNu64) * 1024; +} + +uint64_t qd_router_rss_memory_usage(void) +{ + // VmRSS is in kB + return _parse_proc_memory_metric("VmRSS: %" SCNu64) * 1024; +} diff --git a/src/router_core/agent_router.c b/src/router_core/agent_router.c index 596f106e5..8bdb724c3 100644 --- a/src/router_core/agent_router.c +++ b/src/router_core/agent_router.c @@ -56,6 +56,7 @@ #define QDR_ROUTER_UPTIME_SECONDS 29 #define QDR_ROUTER_MEMORY_USAGE 30 #define QDR_ROUTER_WORKER_THREADS 31 +#define QDR_ROUTER_RSS_USAGE 32 const char *qdr_router_columns[] = {"name", @@ -90,6 +91,7 @@ const char *qdr_router_columns[] = "uptimeSeconds", "memoryUsage", "workerThreads", + "residentMemoryUsage", 0}; @@ -240,7 +242,15 @@ static void qdr_agent_write_column_CT(qd_composed_field_t *body, int col, qdr_co break; case QDR_ROUTER_MEMORY_USAGE: { - uint64_t size = qd_router_memory_usage(); + uint64_t size = qd_router_virtual_memory_usage(); + if (size) + qd_compose_insert_ulong(body, size); + else // memory usage not available + qd_compose_insert_null(body); + } break; + + case QDR_ROUTER_RSS_USAGE: { + uint64_t size = qd_router_rss_memory_usage(); if (size) qd_compose_insert_ulong(body, size); else // memory usage not available diff --git a/src/router_core/agent_router.h b/src/router_core/agent_router.h index 73e5097bf..4af7181ee 100644 --- a/src/router_core/agent_router.h +++ b/src/router_core/agent_router.h @@ -21,7 +21,7 @@ #include "router_core_private.h" -#define QDR_ROUTER_COLUMN_COUNT 32 +#define QDR_ROUTER_COLUMN_COUNT 33 extern const char *qdr_router_columns[QDR_ROUTER_COLUMN_COUNT + 1]; diff --git a/src/server.c b/src/server.c index 980d3cd0e..3fd6bb47f 100644 --- a/src/server.c +++ b/src/server.c @@ -1497,15 +1497,17 @@ void qd_server_run(qd_dispatch_t *qd) qd_server->thread_count, (long) getpid()); // Log message is matched in system_tests const uintmax_t ram_size = qd_platform_memory_size(); - const uint64_t vm_size = qd_router_memory_usage(); - if (ram_size && vm_size) { - const char *suffix_vm = 0; - const char *suffix_ram = 0; - double vm = normalize_memory_size(vm_size, &suffix_vm); - double ram = normalize_memory_size(ram_size, &suffix_ram); - qd_log(LOG_ROUTER, QD_LOG_INFO, "Process VmSize %.2f %s (%.2f %s available memory)", vm, suffix_vm, - ram, suffix_ram); - } + const uint64_t vm_size = qd_router_virtual_memory_usage(); + const uint64_t rss_size = qd_router_rss_memory_usage(); + const char *suffix_vm = 0; + const char *suffix_rss = 0; + const char *suffix_ram = 0; + double vm = normalize_memory_size(vm_size, &suffix_vm); + double rss = normalize_memory_size(rss_size, &suffix_rss); + double ram = normalize_memory_size(ram_size, &suffix_ram); + qd_log(LOG_ROUTER, QD_LOG_INFO, + "Process VmSize %.2f %s RSS %.2f %s (%.2f %s available memory)", + vm, suffix_vm, rss, suffix_rss, ram, suffix_ram); #ifndef NDEBUG qd_log(LOG_ROUTER, QD_LOG_INFO, "Running in DEBUG Mode"); diff --git a/tests/platform_test.c b/tests/platform_test.c index 9df246d15..2c4727fe0 100644 --- a/tests/platform_test.c +++ b/tests/platform_test.c @@ -27,12 +27,21 @@ #include -// simple sanity check that qd_platform_memory_size() returns a meaningful value +// Simple sanity check that the various platform memory metrics return a meaningful values. // static char *test_memory_size(void *context) { + // NOTE WELL: these functions may not work on non-linux platforms. If these tests fail please consider providing a + // patch to update the failing function to work properly on your platform. + if (qd_platform_memory_size() == 0) { - return "ERROR: qd_platform_memory_size() cannot detect memory size"; + return "ERROR: qd_platform_memory_size() cannot detect memory size (NEED PORTING?)"; + } + if (qd_router_virtual_memory_usage() == 0) { + return "ERROR: qd_router_virtual_memory_usage() cannot detect VmSize (NEED PORTING?)"; + } + if (qd_router_rss_memory_usage() == 0) { + return "ERROR: qd_router_rss_memory_usage() cannot detect RSS (NEED PORTING?)"; } return 0; } diff --git a/tools/skstat b/tools/skstat index 6d78dcc19..45eb5e115 100755 --- a/tools/skstat +++ b/tools/skstat @@ -324,11 +324,17 @@ class BusManager: pass try: - if router.memoryUsage > 0: + if router.memoryUsage is not None: rows.append(('VmSize', NumKMG(router.memoryUsage, base=1024))) except Exception: pass + try: + if router.residentMemoryUsage is not None: + rows.append(('RSS', NumKMG(router.residentMemoryUsage, + base=1024))) + except Exception: + pass rows.append(('Area', router.area)) rows.append(('Auto Links', PlainNum(router.autoLinkCount))) rows.append(('Links', PlainNum(router.linkCount))) @@ -692,26 +698,29 @@ class BusManager: # attempt to get the qdrouterd process memory usage # this may not be present on all platforms - rows = [] + router = self.query('io.skupper.router.router')[0] try: - objects = self.query('io.skupper.router.router', ['memoryUsage']) - mem = objects[0].memoryUsage - except Exception: - mem = None - - if mem is not None: - hdr_format = Header.KiMiGi - rows.append(mem) - else: - hdr_format = Header.NONE - rows.append("-") - - rows.append(pooled_total) - - disp.formattedTable("\nMemory Summary", - [Header("VmSize", hdr_format), - Header("Pooled", Header.KiMiGi)], - [rows]) + vm_size = router.memoryUsage + except AttributeError: + vm_size = None + try: + rss_size = router.residentMemoryUsage + except AttributeError: + rss_size = None + + headers = [] + values = [] + if vm_size is not None: + headers.append(Header("VmSize", Header.KiMiGi)) + values.append(vm_size) + + if rss_size is not None: + headers.append(Header("RSS", Header.KiMiGi)) + values.append(rss_size) + + headers.append(Header("Pooled", Header.KiMiGi)) + values.append(pooled_total) + disp.formattedTable("\nMemory Summary", headers, [values]) def displayPolicy(self, show_date_id=True): disp = Display(prefix=" ", bodyFormat=self.bodyFormat)