Skip to content

Commit

Permalink
utils: add private block hex range tests and buffer overrun sentinels
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewfala committed Nov 17, 2021
1 parent 692e912 commit f097f40
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 44 deletions.
38 changes: 19 additions & 19 deletions src/flb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,14 @@ extern struct flb_aws_error_reporter *error_reporter;
#include <openssl/rand.h>
#endif

#define FLB_UTILS_REPLACE_FRAGMENT_START_LEN 31
#define FLB_UTILS_REPLACE_FRAGMENT_END_LEN 4

/*
* The following block descriptor describes the private use unicode character range
* used for denoting invalid unicode fragments. Invalid fragment 0xCE would become
* used for denoting invalid utf-8 fragments. Invalid fragment 0xCE would become
* utf-8 codepoint U+E0CE if FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR is set to
* E0 since U+E0CE = U+<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR><HEX_FRAGMENT>
*/
#define FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR 0xE0

static char *flb_utils_replace_fragment_start
= "\xEF\xBF\xBD"" corrupted utf-8 sequence : ";
static char *flb_utils_replace_fragment_end = " \xEF\xBF\xBD";

void flb_utils_error(int err)
{
char *msg = NULL;
Expand Down Expand Up @@ -644,7 +637,6 @@ static inline void encoded_to_buf(char *out, const char *in, int len)
int flb_utils_write_str(char *buf, int *off, size_t size,
const char *str, size_t str_len)
{
static const char int2hex[] = "0123456789abcdef";
int i;
int b;
int ret;
Expand Down Expand Up @@ -752,9 +744,6 @@ int flb_utils_write_str(char *buf, int *off, size_t size,
}
else if (c > 0xFFFF) {
utf_sequence_length = flb_utf8_len(str + i);
if (available - written < 6) {
return FLB_FALSE;
}

if (i + utf_sequence_length > str_len) {
break; /* skip truncated UTF-8 */
Expand Down Expand Up @@ -791,25 +780,36 @@ int flb_utils_write_str(char *buf, int *off, size_t size,
--i;

if (is_valid) {
if (available - written < utf_sequence_length) {
return FLB_FALSE;
}

encoded_to_buf(p, tmp, utf_sequence_length);
p += utf_sequence_length;
}
else {
/* utf sequence is invalid. Print fragments out using private block
* codepoint range 0xE000 to 0xE0FF
if (available - written < utf_sequence_length * 3) {
return FLB_FALSE;
}

/*
* Utf-8 sequence is invalid. Map fragments to private use area
* codepoints in range:
* 0x<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR>00 to
* 0x<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR>FF
*/
for (b = 0; b < utf_sequence_length; ++b) {
/*
* Utf-8 private block invalid hex formatting
* Format unicode charpoint in the following format:
* Utf-8 private block invalid hex mapping. Format unicode charpoint
* in the following format:
*
* +--------+--------+--------+
* |1110PPPP|10PPPPHH|10HHHHHH|
* +--------+--------+--------+
*
* Where:
* P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte)
* H is Hex fragment bits (1 byte)
* H is Utf-8 fragment hex bits (1 byte)
* 1 is bit 1
* 0 is bit 0
*/
Expand All @@ -825,10 +825,10 @@ int flb_utils_write_str(char *buf, int *off, size_t size,
*p = 0x80;

/* print end of unicode private block header last 4 bits */
*p |= (FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f;
*p |= ((FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f);

/* print hex fragment first 2 bits */
*p |= tmp[b] >> 6;
*p |= (tmp[b] >> 6) & 0x03;
*p++;

/* unicode codepoint middle */
Expand Down
121 changes: 96 additions & 25 deletions tests/internal/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

#include "flb_tests_internal.h"

#define FLB_UTILS_TEST_FRAGMENT_START "\xEF\xBF\xBD"" corrupted utf-8 sequence : "
#define FLB_UTILS_TEST_FRAGMENT_END " \xEF\xBF\xBD"


struct url_check {
int ret;
Expand All @@ -23,6 +20,7 @@ struct write_str_case {
char *input;
int input_len;
char *output;
int ret;
};

struct url_check url_checks[] = {
Expand Down Expand Up @@ -122,32 +120,51 @@ void test_url_split()
}

/* test case loop for flb_utils_write_str */
static void write_str_test_cases_w_buf_size(struct write_str_case *cases, int buf_size);
static void write_str_test_cases(struct write_str_case *cases) {
char buf[100] = {0};
int size = sizeof(buf);
write_str_test_cases_w_buf_size(cases, 100);
}

/* test case loop for flb_utils_write_str */
static void write_str_test_cases_w_buf_size(struct write_str_case *cases, int buf_size) {
char *buf = flb_calloc(buf_size + 1, sizeof(char));
int size = buf_size + 1;
int off;
int ret;

struct write_str_case *tcase = cases;
while (!(tcase->input == 0 && tcase->output == 0)) {
memset(buf, 0, size);
off = 0;
ret = flb_utils_write_str(buf, &off, size, tcase->input, tcase->input_len);
TEST_CHECK(ret == FLB_TRUE);
ret = flb_utils_write_str(buf, &off, buf_size, tcase->input, tcase->input_len);

if(!TEST_CHECK(ret == tcase->ret)) {
TEST_MSG("Input string: %s", tcase->input);
TEST_MSG("| Expected return value: %s", (tcase->ret == FLB_TRUE) ? "FLB_TRUE"
: "FLB_FALSE");
TEST_MSG("| Produced return value: %s", (ret == FLB_TRUE) ? "FLB_TRUE"
: "FLB_FALSE");
}
if(!TEST_CHECK(memcmp(buf, tcase->output, off) == 0)) {
TEST_MSG("Input string: %s", tcase->input);
TEST_MSG("| Expected output: %s", tcase->output);
TEST_MSG("| Produced output: %s", buf);
}
if (!TEST_CHECK(strlen(buf) == strlen(tcase->output))) {
TEST_MSG("Input string: %s", tcase->input);
TEST_MSG("| Expected length: %d", strlen(tcase->output));
TEST_MSG("| Produced length: %d", strlen(buf));
TEST_MSG("| Expected length: %zu", strlen(tcase->output));
TEST_MSG("| Produced length: %zu", strlen(buf));
TEST_MSG("| Expected output: %s", tcase->output);
TEST_MSG("| Produced output: %s", buf);
}
if (!TEST_CHECK(buf[size-1] == 0)) {
TEST_MSG("Out buffer overwrite detected '%c'", buf[size-1]);
}

++tcase;
}

flb_free(buf);
}

void test_write_str()
Expand Down Expand Up @@ -188,16 +205,22 @@ void test_write_str()
void test_write_str_invalid_trailing_bytes()
{
struct write_str_case cases[] = {
/* Invalid unicode (one bad trailing byte) */
/* Invalid unicode (one bad trailing bytes) */
{
"\xe3\x81\x01""abc", 6, /* note that 0x01 is an invalid byte */
"\\u0001abc" /* replace invalid unicode */
"\xee\x83\xa3" /* e3 fragment */ /* replace invalid unicode */
"\xee\x82\x81" /* 81 fragment */
"\\u0001abc",
FLB_TRUE
},

/* Invalid unicode (two bad trailing byte) */
/*
* Invalid unicode (two bad trailing bytes)
*/
{
"\xe3\x01\x01""abc", 6,
"\\u0001\\u0001abc"
"\xee\x83\xa3" /* e3 fragment */
"\\u0001\\u0001abc",
FLB_TRUE
},
{ 0 }
};
Expand All @@ -214,7 +237,8 @@ void test_write_str_invalid_leading_byte()
*/
{
"\x00\x01\xe3\x81\x82""abc", 8, /* note that 0x01 is an invalid byte */
"\\u0000\\u0001""\xe3\x81\x82""abc" /* escape hex */
"\\u0000\\u0001""\xe3\x81\x82""abc", /* escape hex */
FLB_TRUE
},
/*
* Invalid unicode fragment (two byte fragment)
Expand All @@ -224,16 +248,22 @@ void test_write_str_invalid_leading_byte()
*/
{
"\xf3\x81\x81\xe3\x81\x82""abc", 9, /* note that 0xf3 0x81 0x81 is an invalid fragment */
FLB_UTILS_TEST_FRAGMENT_START"f3 81 81"FLB_UTILS_TEST_FRAGMENT_END"\xe3\x81\x82""abc" /* replace invalid unicode */
"\xee\x83\xb3" /* replace invalid unicode */
"\xee\x82\x81"
"\xee\x82\x81"
"\xe3\x81\x82""abc",
FLB_TRUE
},

/*
* Invalid unicode (one bad leading byte + one bad trailing byte)
* note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte
*/
{
"\xf3\x81\x01\xe3\x81\x82""abc", 9, /* note that 0x01 is an invalid byte */
FLB_UTILS_TEST_FRAGMENT_START"f3 81"FLB_UTILS_TEST_FRAGMENT_END"\\u0001""\xe3\x81\x82""abc" /* replace invalid unicode */
"\xee\x83\xb3" /* f3 fragment */ /* replace invalid unicode */
"\xee\x82\x81" /* 81 fragment */
"\\u0001""\xe3\x81\x82""abc", /* valid unicode */
FLB_TRUE
},
{ 0 }
};
Expand All @@ -248,19 +278,25 @@ void test_write_str_invalid_leading_byte_case_2()
/* Invalid leading bytes */
{
"\x81\x82""abc", 5, /* note that 0x81 & 0x82 are invalid leading bytes */
FLB_UTILS_TEST_FRAGMENT_START"81"FLB_UTILS_TEST_FRAGMENT_END
FLB_UTILS_TEST_FRAGMENT_START"82"FLB_UTILS_TEST_FRAGMENT_END"abc" /* replace invalid unicode */
"\xee\x82\x81" /* 81 fragment */ /* replace invalid unicode */
"\xee\x82\x82" /* 82 fragment */
"abc",
FLB_TRUE
},

/*
* Invalid unicode (one bad leading byte + one bad trailing byte + one bad leading byte)
* note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte
* 0x81 & 0x82 are invalid leading bytes
*/
{
"\xf3\x81\x01\x81\x82""abc", 8, /* note that 0x81 & 0x82 are invalid leading bytes */
FLB_UTILS_TEST_FRAGMENT_START"f3 81"FLB_UTILS_TEST_FRAGMENT_END
"\\u0001""\xEF\xBF\xBD\xEF\xBF\xBD""abc" /* replace invalid unicode */
"\xee\x83\xb3" /* f3 fragment */ /* replace invalid unicode */
"\xee\x82\x81" /* 81 fragment */
"\\u0001" /* 0x01 hex escape */
"\xee\x82\x81" /* 81 fragment */
"\xee\x82\x82" /* 82 fragment */
"abc",
FLB_TRUE
},
{ 0 }
};
Expand All @@ -273,15 +309,49 @@ void test_write_str_edge_cases()
struct write_str_case cases[] = {
/* Invalid unicode (one bad leading byte) */
{
"\xe3", 1, /* will this buffer overrun? */
"" /* discard invalid unicode */
"\xf3", 1, /* will this buffer overrun? */
"", /* discard invalid unicode */
FLB_TRUE
},
{ 0 }
};

write_str_test_cases(cases);
}

void test_write_str_buffer_overrun()
{
struct write_str_case cases[] = {
{
"\x81"
"\xe3\x81\x82", 4,
"\xee\x82\x81", /* 81 fragment */
/* Not enough space for valid unicode fragment "\xe3\x81\x82" */
FLB_FALSE
},
{
"aa""\x81", 3,
"aa"
"\xee\x82\x81", /* 81 fragment */
FLB_TRUE
},
{
"aaa""\x81", 4, /* out buffer size: 5, needed bytes: 2 + 3 + 3 = 8 */
"aaa",
/* "\xee\x82\x81", */ /* 81 fragment -- would overrun */
FLB_FALSE
},
{
"aaa"
"\xe3\x81\x82", 6, /* required is already grater than buffer */
"",
FLB_FALSE
},
{ 0 }
};
write_str_test_cases_w_buf_size(cases, 5);
}

struct proxy_url_check {
int ret;
char *url; /* full URL */
Expand Down Expand Up @@ -403,6 +473,7 @@ TEST_LIST = {
{ "test_write_str_invalid_leading_byte", test_write_str_invalid_leading_byte },
{ "test_write_str_edge_cases", test_write_str_edge_cases },
{ "test_write_str_invalid_leading_byte_case_2", test_write_str_invalid_leading_byte_case_2 },
{ "test_write_str_buffer_overrun", test_write_str_buffer_overrun },
{ "proxy_url_split", test_proxy_url_split },
{ 0 }
};

0 comments on commit f097f40

Please sign in to comment.