From 393a1812a60ad3fb466bdbf98b198341282b728c Mon Sep 17 00:00:00 2001 From: "Jonathan M. Henson" Date: Tue, 17 Oct 2023 16:46:00 -0700 Subject: [PATCH] add nic afinity checks. --- bin/system_info/print_system_info.c | 6 ++- include/aws/common/private/system_info_priv.h | 1 + include/aws/common/system_info.h | 3 ++ source/linux/system_info.c | 37 ++++++++++++++++++- source/system_info.c | 10 +++++ 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/bin/system_info/print_system_info.c b/bin/system_info/print_system_info.c index 3ceec31cd..cfeb23c75 100644 --- a/bin/system_info/print_system_info.c +++ b/bin/system_info/print_system_info.c @@ -46,8 +46,10 @@ int main(void) { const struct aws_string **nic_array = aws_system_environment_get_network_cards(env); for (size_t i = 0; i < nic_count; ++i) { - fprintf(stdout, " '%s'", aws_string_c_str(nic_array[i])); - + fprintf(stdout, " {\n"); + fprintf(stdout, " 'device_name: '%s',\n", aws_string_c_str(nic_array[i])); + fprintf(stdout, " 'numa_node: 'lu'\n", (unsigned long)aws_system_environment_get_cpu_group_for_network_card(env, i)); + fprintf(stdout, " }\n"); if (i != nic_count - 1) { fprintf(stdout, ","); } diff --git a/include/aws/common/private/system_info_priv.h b/include/aws/common/private/system_info_priv.h index b1cc2e7f9..67714206b 100644 --- a/include/aws/common/private/system_info_priv.h +++ b/include/aws/common/private/system_info_priv.h @@ -15,6 +15,7 @@ struct aws_system_environment { struct aws_byte_buf virtualization_vendor; struct aws_byte_buf product_name; struct aws_array_list str_list_network_cards; + struct aws_array_list u16_nic_to_cpu_group; enum aws_platform_os os; size_t cpu_count; size_t cpu_group_count; diff --git a/include/aws/common/system_info.h b/include/aws/common/system_info.h index 7203a4fd2..b8b7c8370 100644 --- a/include/aws/common/system_info.h +++ b/include/aws/common/system_info.h @@ -64,6 +64,9 @@ size_t aws_system_environment_get_network_card_count(const struct aws_system_env AWS_COMMON_API const struct aws_string **aws_system_environment_get_network_cards(const struct aws_system_environment *env); +AWS_COMMON_API +uint16_t aws_system_environment_get_cpu_group_for_network_card(const struct aws_system_environment *env, size_t card_index); + /** * Returns the number of processors for the specified compute environment. */ diff --git a/source/linux/system_info.c b/source/linux/system_info.c index f6bfb858d..afbe95d71 100644 --- a/source/linux/system_info.c +++ b/source/linux/system_info.c @@ -21,16 +21,51 @@ int aws_system_environment_load_platform_impl(struct aws_system_environment *env aws_byte_buf_init_from_file_with_size_hint( &env->product_name, env->allocator, "/sys/devices/virtual/dmi/id/product_name", 32 /*size_hint*/); + /* iterate over network devices. */ struct ifaddrs *addrs = NULL; struct ifaddrs *iterator = NULL; getifaddrs(&addrs); iterator = addrs; - + while(iterator) { if (iterator->ifa_addr && iterator->ifa_addr->sa_family == AF_PACKET) { struct aws_string *device_name = aws_string_new_from_c_str(env->allocator, iterator->ifa_name); aws_array_list_push_back(&env->str_list_network_cards, &device_name); + + /* figure out what numa node if any the network card is on. */ + uint16_t group_id = 0; + + struct aws_byte_buf temp_numa_info; + aws_byte_buf_init(&temp_numa_info, env->allocator, 256); + struct aws_byte_cursor initial_path = aws_byte_cursor_from_c_str("/sys/class/net/"); + aws_byte_buf_write_from_whole_cursor(&temp_numa_info, initial_path); + struct aws_byte_cursor device_name_cur = aws_byte_cursor_from_string(device_name); + aws_byte_buf_append_dynamic(&temp_numa_info, &device_name_cur); + struct aws_byte_cursor final_path_segment = aws_byte_cursor_from_c_str("/device/numa_node"); + aws_byte_buf_append_dynamic(&temp_numa_info, &final_path_segment); + /* add a null terminator for sys-call land. */ + aws_byte_buf_append_byte_dynamic(&temp_numa_info, 0); + + /* fill in buffer and read it converting to int. */ + struct aws_byte_buf node_file; + AWS_ZERO_STRUCT(node_file); + + if (aws_byte_buf_init_from_file(&node_file, env->allocator, (const char *)temp_numa_info.buffer) == AWS_OP_SUCCESS) { + struct aws_byte_cursor file_cur = aws_byte_cursor_from_buf(&temp_numa_info); + + uint64_t parsed_int = 0; + if (aws_byte_cursor_utf8_parse_u64(file_cur, &parsed_int) == AWS_OP_SUCCESS) { + + /* should always be true, but doesn't hurt to be safe. */ + if (parsed_int < UINT16_MAX) { + group_id = (uint16_t)parsed_int; + } + } + aws_byte_buf_clean_up(&node_file); + } + aws_byte_buf_clean_up(&temp_numa_info); + aws_array_list_push_back(&env->u16_nic_to_cpu_group, &group_id); } iterator = iterator->ifa_next; } diff --git a/source/system_info.c b/source/system_info.c index 3f33acd5b..2ab710451 100644 --- a/source/system_info.c +++ b/source/system_info.c @@ -12,6 +12,7 @@ void s_destroy_env(void *arg) { if (env) { aws_system_environment_destroy_platform_impl(env); aws_array_list_clean_up(&env->str_list_network_cards); + aws_array_list_clean_up(&env->u16_nic_to_cpu_group); aws_mem_release(env->allocator, env); } } @@ -22,6 +23,7 @@ struct aws_system_environment *aws_system_environment_load(struct aws_allocator aws_ref_count_init(&env->ref_count, env, s_destroy_env); aws_array_list_init_dynamic(&env->str_list_network_cards, env->allocator, 2, sizeof(struct aws_string *)); + aws_array_list_init_dynamic(&env->u16_nic_to_cpu_group, env->allocator, 2, sizeof(uint16_t)); if (aws_system_environment_load_platform_impl(env)) { AWS_LOGF_ERROR( @@ -89,3 +91,11 @@ size_t aws_system_environment_get_network_card_count(const struct aws_system_env const struct aws_string **aws_system_environment_get_network_cards(const struct aws_system_environment *env) { return env->str_list_network_cards.data; } + +uint16_t aws_system_environment_get_cpu_group_for_network_card(const struct aws_system_environment *env, size_t card_index) { + AWS_FATAL_ASSERT(card_index < aws_array_list_length(&env->u16_nic_to_cpu_group)); + + uint16_t value = 0; + aws_array_list_get_at(&env->u16_nic_to_cpu_group, &value, card_index); + return value; +}