diff --git a/src/flb_utils.c b/src/flb_utils.c index 2ccd3a935f8..97819867840 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -47,21 +47,14 @@ extern struct flb_aws_error_reporter *error_reporter; #include #endif -#define FLB_UTILS_REPLACE_FRAGMENT_START_LEN 31 -#define FLB_UTILS_REPLACE_FRAGMENT_END_LEN 4 - /* * The following block descriptor describes the private use unicode character range - * used for denoting invalid unicode fragments. Invalid fragment 0xCE would become + * used for denoting invalid utf-8 fragments. Invalid fragment 0xCE would become * utf-8 codepoint U+E0CE if FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR is set to * E0 since U+E0CE = U+ */ #define FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR 0xE0 -static char *flb_utils_replace_fragment_start - = "\xEF\xBF\xBD"" corrupted utf-8 sequence : "; -static char *flb_utils_replace_fragment_end = " \xEF\xBF\xBD"; - void flb_utils_error(int err) { char *msg = NULL; @@ -644,7 +637,6 @@ static inline void encoded_to_buf(char *out, const char *in, int len) int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_t str_len) { - static const char int2hex[] = "0123456789abcdef"; int i; int b; int ret; @@ -752,9 +744,6 @@ int flb_utils_write_str(char *buf, int *off, size_t size, } else if (c > 0xFFFF) { utf_sequence_length = flb_utf8_len(str + i); - if (available - written < 6) { - return FLB_FALSE; - } if (i + utf_sequence_length > str_len) { break; /* skip truncated UTF-8 */ @@ -791,17 +780,28 @@ int flb_utils_write_str(char *buf, int *off, size_t size, --i; if (is_valid) { + if (available - written < utf_sequence_length) { + return FLB_FALSE; + } + encoded_to_buf(p, tmp, utf_sequence_length); p += utf_sequence_length; } else { - /* utf sequence is invalid. Print fragments out using private block - * codepoint range 0xE000 to 0xE0FF + if (available - written < utf_sequence_length * 3) { + return FLB_FALSE; + } + + /* + * Utf-8 sequence is invalid. Map fragments to private use area + * codepoints in range: + * 0x00 to + * 0xFF */ for (b = 0; b < utf_sequence_length; ++b) { /* - * Utf-8 private block invalid hex formatting - * Format unicode charpoint in the following format: + * Utf-8 private block invalid hex mapping. Format unicode charpoint + * in the following format: * * +--------+--------+--------+ * |1110PPPP|10PPPPHH|10HHHHHH| @@ -809,7 +809,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, * * Where: * P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte) - * H is Hex fragment bits (1 byte) + * H is Utf-8 fragment hex bits (1 byte) * 1 is bit 1 * 0 is bit 0 */ @@ -825,10 +825,10 @@ int flb_utils_write_str(char *buf, int *off, size_t size, *p = 0x80; /* print end of unicode private block header last 4 bits */ - *p |= (FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f; + *p |= ((FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f); /* print hex fragment first 2 bits */ - *p |= tmp[b] >> 6; + *p |= (tmp[b] >> 6) & 0x03; *p++; /* unicode codepoint middle */ diff --git a/tests/internal/utils.c b/tests/internal/utils.c index a6f7a2a20d1..4e9e4604abf 100644 --- a/tests/internal/utils.c +++ b/tests/internal/utils.c @@ -6,9 +6,6 @@ #include "flb_tests_internal.h" -#define FLB_UTILS_TEST_FRAGMENT_START "\xEF\xBF\xBD"" corrupted utf-8 sequence : " -#define FLB_UTILS_TEST_FRAGMENT_END " \xEF\xBF\xBD" - struct url_check { int ret; @@ -23,6 +20,7 @@ struct write_str_case { char *input; int input_len; char *output; + int ret; }; struct url_check url_checks[] = { @@ -122,9 +120,15 @@ void test_url_split() } /* test case loop for flb_utils_write_str */ +static void write_str_test_cases_w_buf_size(struct write_str_case *cases, int buf_size); static void write_str_test_cases(struct write_str_case *cases) { - char buf[100] = {0}; - int size = sizeof(buf); + write_str_test_cases_w_buf_size(cases, 100); +} + +/* test case loop for flb_utils_write_str */ +static void write_str_test_cases_w_buf_size(struct write_str_case *cases, int buf_size) { + char *buf = flb_calloc(buf_size + 1, sizeof(char)); + int size = buf_size + 1; int off; int ret; @@ -132,8 +136,15 @@ static void write_str_test_cases(struct write_str_case *cases) { while (!(tcase->input == 0 && tcase->output == 0)) { memset(buf, 0, size); off = 0; - ret = flb_utils_write_str(buf, &off, size, tcase->input, tcase->input_len); - TEST_CHECK(ret == FLB_TRUE); + ret = flb_utils_write_str(buf, &off, buf_size, tcase->input, tcase->input_len); + + if(!TEST_CHECK(ret == tcase->ret)) { + TEST_MSG("Input string: %s", tcase->input); + TEST_MSG("| Expected return value: %s", (tcase->ret == FLB_TRUE) ? "FLB_TRUE" + : "FLB_FALSE"); + TEST_MSG("| Produced return value: %s", (ret == FLB_TRUE) ? "FLB_TRUE" + : "FLB_FALSE"); + } if(!TEST_CHECK(memcmp(buf, tcase->output, off) == 0)) { TEST_MSG("Input string: %s", tcase->input); TEST_MSG("| Expected output: %s", tcase->output); @@ -141,13 +152,19 @@ static void write_str_test_cases(struct write_str_case *cases) { } if (!TEST_CHECK(strlen(buf) == strlen(tcase->output))) { TEST_MSG("Input string: %s", tcase->input); - TEST_MSG("| Expected length: %d", strlen(tcase->output)); - TEST_MSG("| Produced length: %d", strlen(buf)); + TEST_MSG("| Expected length: %zu", strlen(tcase->output)); + TEST_MSG("| Produced length: %zu", strlen(buf)); TEST_MSG("| Expected output: %s", tcase->output); TEST_MSG("| Produced output: %s", buf); } + if (!TEST_CHECK(buf[size-1] == 0)) { + TEST_MSG("Out buffer overwrite detected '%c'", buf[size-1]); + } + ++tcase; } + + flb_free(buf); } void test_write_str() @@ -188,16 +205,22 @@ void test_write_str() void test_write_str_invalid_trailing_bytes() { struct write_str_case cases[] = { - /* Invalid unicode (one bad trailing byte) */ + /* Invalid unicode (one bad trailing bytes) */ { "\xe3\x81\x01""abc", 6, /* note that 0x01 is an invalid byte */ - "\\u0001abc" /* replace invalid unicode */ + "\xee\x83\xa3" /* e3 fragment */ /* replace invalid unicode */ + "\xee\x82\x81" /* 81 fragment */ + "\\u0001abc", + FLB_TRUE }, - - /* Invalid unicode (two bad trailing byte) */ + /* + * Invalid unicode (two bad trailing bytes) + */ { "\xe3\x01\x01""abc", 6, - "\\u0001\\u0001abc" + "\xee\x83\xa3" /* e3 fragment */ + "\\u0001\\u0001abc", + FLB_TRUE }, { 0 } }; @@ -214,7 +237,8 @@ void test_write_str_invalid_leading_byte() */ { "\x00\x01\xe3\x81\x82""abc", 8, /* note that 0x01 is an invalid byte */ - "\\u0000\\u0001""\xe3\x81\x82""abc" /* escape hex */ + "\\u0000\\u0001""\xe3\x81\x82""abc", /* escape hex */ + FLB_TRUE }, /* * Invalid unicode fragment (two byte fragment) @@ -224,16 +248,22 @@ void test_write_str_invalid_leading_byte() */ { "\xf3\x81\x81\xe3\x81\x82""abc", 9, /* note that 0xf3 0x81 0x81 is an invalid fragment */ - FLB_UTILS_TEST_FRAGMENT_START"f3 81 81"FLB_UTILS_TEST_FRAGMENT_END"\xe3\x81\x82""abc" /* replace invalid unicode */ + "\xee\x83\xb3" /* replace invalid unicode */ + "\xee\x82\x81" + "\xee\x82\x81" + "\xe3\x81\x82""abc", + FLB_TRUE }, - /* * Invalid unicode (one bad leading byte + one bad trailing byte) * note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte */ { "\xf3\x81\x01\xe3\x81\x82""abc", 9, /* note that 0x01 is an invalid byte */ - FLB_UTILS_TEST_FRAGMENT_START"f3 81"FLB_UTILS_TEST_FRAGMENT_END"\\u0001""\xe3\x81\x82""abc" /* replace invalid unicode */ + "\xee\x83\xb3" /* f3 fragment */ /* replace invalid unicode */ + "\xee\x82\x81" /* 81 fragment */ + "\\u0001""\xe3\x81\x82""abc", /* valid unicode */ + FLB_TRUE }, { 0 } }; @@ -248,10 +278,11 @@ void test_write_str_invalid_leading_byte_case_2() /* Invalid leading bytes */ { "\x81\x82""abc", 5, /* note that 0x81 & 0x82 are invalid leading bytes */ - FLB_UTILS_TEST_FRAGMENT_START"81"FLB_UTILS_TEST_FRAGMENT_END - FLB_UTILS_TEST_FRAGMENT_START"82"FLB_UTILS_TEST_FRAGMENT_END"abc" /* replace invalid unicode */ + "\xee\x82\x81" /* 81 fragment */ /* replace invalid unicode */ + "\xee\x82\x82" /* 82 fragment */ + "abc", + FLB_TRUE }, - /* * Invalid unicode (one bad leading byte + one bad trailing byte + one bad leading byte) * note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte @@ -259,8 +290,13 @@ void test_write_str_invalid_leading_byte_case_2() */ { "\xf3\x81\x01\x81\x82""abc", 8, /* note that 0x81 & 0x82 are invalid leading bytes */ - FLB_UTILS_TEST_FRAGMENT_START"f3 81"FLB_UTILS_TEST_FRAGMENT_END - "\\u0001""\xEF\xBF\xBD\xEF\xBF\xBD""abc" /* replace invalid unicode */ + "\xee\x83\xb3" /* f3 fragment */ /* replace invalid unicode */ + "\xee\x82\x81" /* 81 fragment */ + "\\u0001" /* 0x01 hex escape */ + "\xee\x82\x81" /* 81 fragment */ + "\xee\x82\x82" /* 82 fragment */ + "abc", + FLB_TRUE }, { 0 } }; @@ -273,8 +309,9 @@ void test_write_str_edge_cases() struct write_str_case cases[] = { /* Invalid unicode (one bad leading byte) */ { - "\xe3", 1, /* will this buffer overrun? */ - "" /* discard invalid unicode */ + "\xf3", 1, /* will this buffer overrun? */ + "", /* discard invalid unicode */ + FLB_TRUE }, { 0 } }; @@ -282,6 +319,39 @@ void test_write_str_edge_cases() write_str_test_cases(cases); } +void test_write_str_buffer_overrun() +{ + struct write_str_case cases[] = { + { + "\x81" + "\xe3\x81\x82", 4, + "\xee\x82\x81", /* 81 fragment */ + /* Not enough space for valid unicode fragment "\xe3\x81\x82" */ + FLB_FALSE + }, + { + "aa""\x81", 3, + "aa" + "\xee\x82\x81", /* 81 fragment */ + FLB_TRUE + }, + { + "aaa""\x81", 4, /* out buffer size: 5, needed bytes: 2 + 3 + 3 = 8 */ + "aaa", + /* "\xee\x82\x81", */ /* 81 fragment -- would overrun */ + FLB_FALSE + }, + { + "aaa" + "\xe3\x81\x82", 6, /* required is already grater than buffer */ + "", + FLB_FALSE + }, + { 0 } + }; + write_str_test_cases_w_buf_size(cases, 5); +} + struct proxy_url_check { int ret; char *url; /* full URL */ @@ -403,6 +473,7 @@ TEST_LIST = { { "test_write_str_invalid_leading_byte", test_write_str_invalid_leading_byte }, { "test_write_str_edge_cases", test_write_str_edge_cases }, { "test_write_str_invalid_leading_byte_case_2", test_write_str_invalid_leading_byte_case_2 }, + { "test_write_str_buffer_overrun", test_write_str_buffer_overrun }, { "proxy_url_split", test_proxy_url_split }, { 0 } };