From 765afea3ff4b25a4686b04be0e98dc95b8a15a7e Mon Sep 17 00:00:00 2001 From: Lindsay Stewart Date: Wed, 13 Sep 2023 11:58:05 -0700 Subject: [PATCH] Reduce allocs in ktls app data send (#4181) --- tests/unit/s2n_ktls_io_test.c | 218 +++++++++++++++++++++++++++++----- tls/s2n_ktls_io.c | 99 ++++++++++----- 2 files changed, 258 insertions(+), 59 deletions(-) diff --git a/tests/unit/s2n_ktls_io_test.c b/tests/unit/s2n_ktls_io_test.c index 65dc4c8eca4..f58d89c361e 100644 --- a/tests/unit/s2n_ktls_io_test.c +++ b/tests/unit/s2n_ktls_io_test.c @@ -15,6 +15,7 @@ #include "s2n_test.h" #include "testlib/s2n_ktls_test_utils.h" +#include "testlib/s2n_mem_testlib.h" #include "testlib/s2n_testlib.h" #include "tls/s2n_ktls.h" #include "utils/s2n_random.h" @@ -558,6 +559,9 @@ int main(int argc, char **argv) EXPECT_FAILURE_WITH_ERRNO( s2n_ktls_sendv_with_offset(&conn, NULL, 1, 0, &blocked), S2N_ERR_NULL); + EXPECT_FAILURE_WITH_ERRNO( + s2n_ktls_sendv_with_offset(&conn, NULL, 1, 1, &blocked), + S2N_ERR_NULL); EXPECT_FAILURE_WITH_ERRNO( s2n_ktls_sendv_with_offset(&conn, &test_iovec, 1, 0, NULL), S2N_ERR_NULL); @@ -671,38 +675,20 @@ int main(int argc, char **argv) EXPECT_OK(s2n_test_validate_data(&out, test_data, sizeof(test_data))); }; - /* Test: Send with very large number of iovecs */ - { - DEFER_CLEANUP(struct s2n_connection *conn = s2n_connection_new(S2N_SERVER), - s2n_connection_ptr_free); - EXPECT_NOT_NULL(conn); - - DEFER_CLEANUP(struct s2n_test_ktls_io_stuffer out = { 0 }, - s2n_ktls_io_stuffer_free); - EXPECT_OK(s2n_test_init_ktls_io_stuffer_send(conn, &out)); - - size_t many_iov_lens[1000] = { 0 }; - DEFER_CLEANUP(struct s2n_test_iovecs test_iovecs = { 0 }, s2n_test_iovecs_free); - EXPECT_OK(s2n_test_new_iovecs(&test_iovecs, &test_data_blob, - many_iov_lens, s2n_array_len(many_iov_lens))); - - s2n_blocked_status blocked = S2N_NOT_BLOCKED; - ssize_t result = s2n_ktls_sendv_with_offset(conn, - test_iovecs.iovecs, test_iovecs.iovecs_count, 0, &blocked); - EXPECT_EQUAL(result, sizeof(test_data)); - - EXPECT_EQUAL(out.sendmsg_invoked_count, 1); - EXPECT_EQUAL(blocked, S2N_NOT_BLOCKED); - EXPECT_OK(s2n_test_validate_ancillary(&out, TLS_APPLICATION_DATA, sizeof(test_data))); - EXPECT_OK(s2n_test_validate_data(&out, test_data, sizeof(test_data))); - }; - /* Test: Send with offset */ { DEFER_CLEANUP(struct s2n_test_iovecs test_iovecs = { 0 }, s2n_test_iovecs_free); EXPECT_OK(s2n_test_new_iovecs(&test_iovecs, &test_data_blob, test_iov_lens, s2n_array_len(test_iov_lens))); + size_t large_test_iov_lens[100] = { 0 }; + EXPECT_MEMCPY_SUCCESS(large_test_iov_lens, test_iov_lens, sizeof(test_iov_lens)); + + DEFER_CLEANUP(struct s2n_test_iovecs large_test_iovecs = { 0 }, + s2n_test_iovecs_free); + EXPECT_OK(s2n_test_new_iovecs(&large_test_iovecs, &test_data_blob, + large_test_iov_lens, s2n_array_len(large_test_iov_lens))); + /* Test: Send with invalid / too large offset */ { const size_t bad_offset = sizeof(test_data) + 1; @@ -716,16 +702,38 @@ int main(int argc, char **argv) EXPECT_OK(s2n_test_init_ktls_io_stuffer_send(conn, &out)); s2n_blocked_status blocked = S2N_NOT_BLOCKED; - ssize_t result = s2n_ktls_sendv_with_offset(conn, - test_iovecs.iovecs, test_iovecs.iovecs_count, bad_offset, &blocked); + ssize_t result = s2n_ktls_sendv_with_offset(conn, large_test_iovecs.iovecs, + large_test_iovecs.iovecs_count, bad_offset, &blocked); EXPECT_FAILURE_WITH_ERRNO(result, S2N_ERR_INVALID_ARGUMENT); EXPECT_EQUAL(out.sendmsg_invoked_count, 0); EXPECT_EQUAL(blocked, S2N_NOT_BLOCKED); EXPECT_OK(s2n_test_records_in_ancillary(&out, 0)); - } + }; + + /* Test: Send with offset equal to total data size */ + { + const size_t offset = sizeof(test_data); + + DEFER_CLEANUP(struct s2n_connection *conn = s2n_connection_new(S2N_SERVER), + s2n_connection_ptr_free); + EXPECT_NOT_NULL(conn); - /* Test: Send with all possible valid offsets */ + DEFER_CLEANUP(struct s2n_test_ktls_io_stuffer out = { 0 }, + s2n_ktls_io_stuffer_free); + EXPECT_OK(s2n_test_init_ktls_io_stuffer_send(conn, &out)); + + s2n_blocked_status blocked = S2N_NOT_BLOCKED; + int written = s2n_ktls_sendv_with_offset(conn, large_test_iovecs.iovecs, + large_test_iovecs.iovecs_count, offset, &blocked); + EXPECT_EQUAL(written, 0); + + EXPECT_EQUAL(out.sendmsg_invoked_count, 1); + EXPECT_EQUAL(blocked, S2N_NOT_BLOCKED); + EXPECT_OK(s2n_test_records_in_ancillary(&out, 0)); + }; + + /* Test: Send with small iovecs array and all possible valid offsets */ for (size_t offset = 0; offset < sizeof(test_data); offset++) { DEFER_CLEANUP(struct s2n_connection *conn = s2n_connection_new(S2N_SERVER), s2n_connection_ptr_free); @@ -748,6 +756,30 @@ int main(int argc, char **argv) EXPECT_OK(s2n_test_validate_ancillary(&out, TLS_APPLICATION_DATA, expected_sent)); EXPECT_OK(s2n_test_validate_data(&out, test_data + offset, expected_sent)); } + + /* Test: Send with large iovecs array and all possible valid offsets */ + for (size_t offset = 0; offset < sizeof(test_data); offset++) { + DEFER_CLEANUP(struct s2n_connection *conn = s2n_connection_new(S2N_SERVER), + s2n_connection_ptr_free); + EXPECT_NOT_NULL(conn); + + DEFER_CLEANUP(struct s2n_test_ktls_io_stuffer out = { 0 }, + s2n_ktls_io_stuffer_free); + EXPECT_OK(s2n_test_init_ktls_io_stuffer_send(conn, &out)); + + const size_t expected_sent = sizeof(test_data) - offset; + EXPECT_TRUE(expected_sent > 0); + + s2n_blocked_status blocked = S2N_NOT_BLOCKED; + ssize_t result = s2n_ktls_sendv_with_offset(conn, large_test_iovecs.iovecs, + large_test_iovecs.iovecs_count, offset, &blocked); + EXPECT_EQUAL(result, expected_sent); + + EXPECT_EQUAL(out.sendmsg_invoked_count, 1); + EXPECT_EQUAL(blocked, S2N_NOT_BLOCKED); + EXPECT_OK(s2n_test_validate_ancillary(&out, TLS_APPLICATION_DATA, expected_sent)); + EXPECT_OK(s2n_test_validate_data(&out, test_data + offset, expected_sent)); + } }; /* Test: Partial write */ @@ -799,6 +831,128 @@ int main(int argc, char **argv) EXPECT_EQUAL(io_ctx.invoked_count, 1); EXPECT_EQUAL(blocked, S2N_BLOCKED_ON_WRITE); }; + + /* Test: Memory usage */ + { + const size_t iov_lens[100] = { 10, 5, 0, 1, 100, 100, 10 }; + const size_t small_iov_lens_count = 10; + const size_t large_iov_lens_count = s2n_array_len(iov_lens); + + DEFER_CLEANUP(struct s2n_test_iovecs small_iovecs = { 0 }, s2n_test_iovecs_free); + EXPECT_OK(s2n_test_new_iovecs(&small_iovecs, &test_data_blob, + iov_lens, small_iov_lens_count)); + + DEFER_CLEANUP(struct s2n_test_iovecs large_iovecs = { 0 }, s2n_test_iovecs_free); + EXPECT_OK(s2n_test_new_iovecs(&large_iovecs, &test_data_blob, + iov_lens, large_iov_lens_count)); + + const size_t one_iovec_size = sizeof(struct iovec); + const size_t large_iovecs_size = large_iovecs.iovecs_count * one_iovec_size; + + struct { + struct s2n_test_iovecs *iovecs; + size_t offset; + uint32_t expected_malloc; + uint32_t expected_malloc_count; + } test_cases[] = { + /* Small iovecs never require an allocation */ + { + .iovecs = &small_iovecs, + .offset = 1, + .expected_malloc_count = 0, + }, + { + .iovecs = &small_iovecs, + .offset = iov_lens[0], + .expected_malloc_count = 0, + }, + { + .iovecs = &small_iovecs, + .offset = iov_lens[0] + 1, + .expected_malloc_count = 0, + }, + /* Large iovecs with offset evenly divisible by the iov_lens do + * not require an alloc. + * Example: { x, y, z }, offset=x -> { y, z } + */ + { + .iovecs = &large_iovecs, + .offset = iov_lens[0], + .expected_malloc_count = 0, + }, + { + .iovecs = &large_iovecs, + .offset = iov_lens[0] + iov_lens[1], + .expected_malloc_count = 0, + }, + /* Large iovecs with offset not evenly divisible by the iov_lens + * modify an entry so require an alloc. + * Example: { x, y, z }, offset=1 -> { x-1, y, z } + */ + { + .iovecs = &large_iovecs, + .offset = 1, + .expected_malloc_count = 1, + .expected_malloc = large_iovecs_size, + }, + { + .iovecs = &large_iovecs, + .offset = iov_lens[0] + 1, + .expected_malloc_count = 1, + .expected_malloc = large_iovecs_size - one_iovec_size, + }, + /* Large iovecs that become small iovecs when the offset + * is applied do not require an alloc. + */ + { + .iovecs = &large_iovecs, + .offset = sizeof(test_data) - 1, + .expected_malloc_count = 0, + }, + /* No alloc if the entire large iovec is skipped */ + { + .iovecs = &large_iovecs, + .offset = sizeof(test_data), + .expected_malloc_count = 0, + }, + }; + + for (size_t i = 0; i < s2n_array_len(test_cases); i++) { + struct iovec *iovecs = test_cases[i].iovecs->iovecs; + const size_t iovecs_count = test_cases[i].iovecs->iovecs_count; + const size_t offset = test_cases[i].offset; + + const size_t expected_send = sizeof(test_data) - offset; + + DEFER_CLEANUP(struct s2n_connection *conn = s2n_connection_new(S2N_SERVER), + s2n_connection_ptr_free); + EXPECT_NOT_NULL(conn); + + DEFER_CLEANUP(struct s2n_test_ktls_io_stuffer out = { 0 }, + s2n_ktls_io_stuffer_free); + EXPECT_OK(s2n_test_init_ktls_io_stuffer_send(conn, &out)); + + /* Preemptively allocate sendmsg memory to avoid false positives */ + EXPECT_SUCCESS(s2n_stuffer_resize(&out.data_buffer, expected_send)); + EXPECT_SUCCESS(s2n_stuffer_resize(&out.ancillary_buffer, 100)); + + DEFER_CLEANUP(struct s2n_mem_test_cb_scope scope = { 0 }, + s2n_mem_test_free_callbacks); + EXPECT_OK(s2n_mem_test_init_callbacks(&scope)); + + s2n_blocked_status blocked = S2N_NOT_BLOCKED; + ssize_t result = s2n_ktls_sendv_with_offset(conn, iovecs, iovecs_count, + offset, &blocked); + EXPECT_EQUAL(result, sizeof(test_data) - offset); + + size_t malloc_count = test_cases[i].expected_malloc_count; + EXPECT_OK(s2n_mem_test_assert_malloc_count(malloc_count)); + if (malloc_count) { + EXPECT_OK(s2n_mem_test_assert_malloc(test_cases[i].expected_malloc)); + } + EXPECT_OK(s2n_mem_test_assert_all_freed()); + } + }; }; /* Test: s2n_ktls_send_cb */ @@ -814,8 +968,8 @@ int main(int argc, char **argv) /* Safety */ { struct s2n_test_ktls_io_stuffer ctx = { 0 }; - EXPECT_FAILURE_WITH_ERRNO(s2n_ktls_send_cb(NULL, test_data, 1), S2N_ERR_IO); - EXPECT_FAILURE_WITH_ERRNO(s2n_ktls_send_cb(&ctx, NULL, 1), S2N_ERR_IO); + EXPECT_FAILURE_WITH_ERRNO(s2n_ktls_send_cb(NULL, test_data, 1), S2N_ERR_NULL); + EXPECT_FAILURE_WITH_ERRNO(s2n_ktls_send_cb(&ctx, NULL, 1), S2N_ERR_NULL); }; /* Test: Basic write succeeds */ diff --git a/tls/s2n_ktls_io.c b/tls/s2n_ktls_io.c index b0448ff6303..04e210932aa 100644 --- a/tls/s2n_ktls_io.c +++ b/tls/s2n_ktls_io.c @@ -39,6 +39,9 @@ #define S2N_KTLS_RECORD_TYPE_SIZE (sizeof(uint8_t)) #define S2N_KTLS_CONTROL_BUFFER_SIZE (CMSG_SPACE(S2N_KTLS_RECORD_TYPE_SIZE)) +#define S2N_MAX_STACK_IOVECS 16 +#define S2N_MAX_STACK_IOVECS_MEM (S2N_MAX_STACK_IOVECS * sizeof(struct iovec)) + /* Used to override sendmsg and recvmsg for testing. */ static ssize_t s2n_ktls_default_sendmsg(void *io_context, const struct msghdr *msg); static ssize_t s2n_ktls_default_recvmsg(void *io_context, struct msghdr *msg); @@ -274,32 +277,73 @@ S2N_RESULT s2n_ktls_recvmsg(void *io_context, uint8_t *record_type, uint8_t *buf return S2N_RESULT_OK; } -static S2N_RESULT s2n_ktls_new_iovecs_with_offset(const struct iovec *bufs, - size_t count, size_t offs, struct s2n_blob *mem) +/* The iovec array `bufs` is constant and owned by the application. + * + * However, we need to apply the given offset to `bufs`. That may involve + * updating the iov_base and iov_len of entries in `bufs` to reflect the bytes + * already sent. Because `bufs` is constant, we need to instead copy `bufs` and + * modify the copy. + * + * Since one of the primary benefits of kTLS is that we avoid buffering application + * data and can pass application data as-is to the kernel, we try to limit the + * situations where we need to copy `bufs` and use stack memory where possible. + * + * Note: We are copying an array of iovecs here, NOT the scattered application + * data the iovecs reference. On Linux, the maximum data copied would be + * 1024 (IOV_MAX on Linux) * 16 (sizeof(struct iovec)) = ~16KB. + * + * To avoid any copies when using a large number of iovecs, applications should + * call s2n_sendv instead of s2n_sendv_with_offset. + */ +static S2N_RESULT s2n_ktls_update_bufs_with_offset(const struct iovec **bufs, size_t *count, + size_t offs, struct s2n_blob *mem) { - RESULT_ENSURE(bufs != NULL || count == 0, S2N_ERR_NULL); + RESULT_ENSURE_REF(bufs); + RESULT_ENSURE_REF(count); + RESULT_ENSURE(*bufs != NULL || *count == 0, S2N_ERR_NULL); RESULT_ENSURE_REF(mem); - RESULT_GUARD_POSIX(s2n_realloc(mem, sizeof(struct iovec) * count)); - struct iovec *new_bufs = (struct iovec *) (void *) mem->data; - RESULT_ENSURE_REF(new_bufs); - - for (size_t i = 0; i < count; i++) { - size_t old_len = bufs[i].iov_len; - if (offs < old_len) { - new_bufs[i].iov_base = (uint8_t *) bufs[i].iov_base + offs; - new_bufs[i].iov_len = old_len - offs; - offs = 0; - } else { - /* Zero any iovec skipped by the offset. - * We could change the count of the copy instead, but this is simpler. */ - new_bufs[i].iov_base = NULL; - new_bufs[i].iov_len = 0; - offs -= old_len; + size_t skipped = 0; + while (offs > 0) { + /* If we need to skip more iovecs than actually exist, + * then the offset is too large and therefore invalid. + */ + RESULT_ENSURE(skipped < *count, S2N_ERR_INVALID_ARGUMENT); + + size_t iov_len = (*bufs)[skipped].iov_len; + + /* This is the last iovec affected by the offset. */ + if (offs < iov_len) { + break; } + + offs -= iov_len; + skipped++; } - /* The offset cannot be greater than the total size of all iovecs */ - RESULT_ENSURE(offs == 0, S2N_ERR_INVALID_ARGUMENT); + + *count = (*count) - skipped; + if (*count == 0) { + return S2N_RESULT_OK; + } + + *bufs = &(*bufs)[skipped]; + if (offs == 0) { + return S2N_RESULT_OK; + } + + size_t size = (*count) * (sizeof(struct iovec)); + /* If possible, use the existing stack memory in `mem` for the copy. + * Otherwise, we need to allocate sufficient new heap memory. */ + if (size > mem->size) { + RESULT_GUARD_POSIX(s2n_alloc(mem, size)); + } + + struct iovec *new_bufs = (struct iovec *) (void *) mem->data; + RESULT_CHECKED_MEMCPY(new_bufs, *bufs, size); + new_bufs[0].iov_base = (uint8_t *) new_bufs[0].iov_base + offs; + new_bufs[0].iov_len = new_bufs[0].iov_len - offs; + *bufs = new_bufs; + return S2N_RESULT_OK; } @@ -312,13 +356,11 @@ ssize_t s2n_ktls_sendv_with_offset(struct s2n_connection *conn, const struct iov POSIX_ENSURE(offs_in >= 0, S2N_ERR_INVALID_ARGUMENT); size_t offs = offs_in; - DEFER_CLEANUP(struct s2n_blob new_bufs = { 0 }, s2n_free); + DEFER_CLEANUP(struct s2n_blob new_bufs = { 0 }, s2n_free_or_wipe); + uint8_t new_bufs_mem[S2N_MAX_STACK_IOVECS_MEM] = { 0 }; + POSIX_GUARD(s2n_blob_init(&new_bufs, new_bufs_mem, sizeof(new_bufs_mem))); if (offs > 0) { - /* We can't modify the application-owned iovecs to reflect the offset. - * Therefore, we must alloc and modify a copy. - */ - POSIX_GUARD_RESULT(s2n_ktls_new_iovecs_with_offset(bufs, count, offs, &new_bufs)); - bufs = (const struct iovec *) (void *) new_bufs.data; + POSIX_GUARD_RESULT(s2n_ktls_update_bufs_with_offset(&bufs, &count, offs, &new_bufs)); } size_t bytes_written = 0; @@ -329,6 +371,9 @@ ssize_t s2n_ktls_sendv_with_offset(struct s2n_connection *conn, const struct iov int s2n_ktls_send_cb(void *io_context, const uint8_t *buf, uint32_t len) { + POSIX_ENSURE_REF(io_context); + POSIX_ENSURE_REF(buf); + /* For now, all control records are assumed to be alerts. * We can set the record_type on the io_context in the future. */