Skip to content

Commit

Permalink
flb_utils_write_str: replacement character
Browse files Browse the repository at this point in the history
Signed-off-by: Matthew Fala <[email protected]>
  • Loading branch information
matthewfala committed Nov 12, 2021
1 parent d588bee commit 184a49b
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 7 deletions.
31 changes: 24 additions & 7 deletions src/flb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ extern struct flb_aws_error_reporter *error_reporter;
#include <openssl/rand.h>
#endif

static char *flb_utils_replacement_character = "\xEF\xBF\xBD";

void flb_utils_error(int err)
{
char *msg = NULL;
Expand Down Expand Up @@ -744,23 +746,38 @@ int flb_utils_write_str(char *buf, int *off, size_t size,

is_valid = FLB_TRUE;
for (b = 0; b < hex_bytes; b++) {
/* Trailing characters must start with bit 1 */
if (b > 0 && !(str[i+b] & 0x80)) {
/* Invalid unicode character. skip */
flb_warn("[pack] unexpected UTF-8 continuation byte, "
"omitting a character");
/* Leading characters must start with bits 11 */
if (b == 0 && ((str[i] & 0xC0) != 0xC0)) {
/* Invalid unicode character. replace */
flb_debug("[pack] unexpected UTF-8 leading byte, "
"substituting character with replacement character");
++i; /* Consume invalid leading byte */
is_valid = FLB_FALSE;
break;
}
/* Trailing characters must start with bits 10 */
else if (b > 0 && ((str[i] & 0xC0) != 0x80)) {
/* Invalid unicode character. replace */
flb_debug("[pack] unexpected UTF-8 continuation byte, "
"substituting character with replacement character");
/* This byte, i, is the start of the next unicode character */
is_valid = FLB_FALSE;
break;
}

tmp[b] = str[i+b];
tmp[b] = str[i];
++i;
}
--i;

if (is_valid) {
encoded_to_buf(p, tmp, hex_bytes);
p += hex_bytes;
}
i += (hex_bytes - 1);
else {
encoded_to_buf(p, flb_utils_replacement_character, 3);
p += 3;
}
}
else {
*p++ = c;
Expand Down
127 changes: 127 additions & 0 deletions tests/internal/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ struct url_check {
char *uri; /* expected uri */
};

struct write_str_case {
char *input;
int input_len;
char *output;
};

struct url_check url_checks[] = {
{0, "https://fluentbit.io/something",
"https", "fluentbit.io", "443", "/something"},
Expand Down Expand Up @@ -112,6 +118,28 @@ void test_url_split()
}
}

/* test case loop for flb_utils_write_str */
static void write_str_test_cases(struct write_str_case *cases) {
char buf[100] = {0};
int size = sizeof(buf);
int off;
int ret;

struct write_str_case *tcase = cases;
while (!(tcase->input == 0 && tcase->output == 0)) {
memset(buf, 0, size);
off = 0;
ret = flb_utils_write_str(buf, &off, size, tcase->input, tcase->input_len);
TEST_CHECK(ret == FLB_TRUE);
if(!TEST_CHECK(memcmp(buf, tcase->output, off) == 0)) {
TEST_MSG("Input string: %s", tcase->input);
TEST_MSG("| Expected output: %s", tcase->output);
TEST_MSG("| Produced output: %s", buf);
}
++tcase;
}
}

void test_write_str()
{
char buf[10];
Expand Down Expand Up @@ -147,6 +175,101 @@ void test_write_str()
TEST_CHECK(ret == FLB_FALSE);
}

void test_write_str_invalid_trailing_bytes()
{
struct write_str_case cases[] = {
/* Invalid unicode (one bad trailing byte) */
{
"\xe3\x81\x01""abc", 6, /* note that 0x01 is an invalid byte */
"�""\\u0001abc" /* replace invalid unicode */
},

/* Invalid unicode (two bad trailing byte) */
{
"\xe3\x01\x01""abc", 6,
"�""\\u0001\\u0001abc"
},
{ 0 }
};

write_str_test_cases(cases);
}

void test_write_str_invalid_leading_byte()
{

struct write_str_case cases[] = {
/*
* Escaped leading hex (two hex, one valid unicode)
*/
{
"\x00\x01\xe3\x81\x82""abc", 8, /* note that 0x01 is an invalid byte */
"\\u0000\\u0001""\xe3\x81\x82""abc" /* escape hex */
},
/*
* Invalid unicode fragment (two byte fragment)
* note that 0xf3 is a leading byte with 3 trailing bytes. note that 0xe3 is also a
* leading byte with 2 trailing bytes. This should not be consumed by 0xf3 invalid
* unicode character
*/
{
"\xf3\x81\x81\xe3\x81\x82""abc", 9, /* note that 0xf3 0x81 0x81 is an invalid fragment */
"�""\xe3\x81\x82""abc" /* replace invalid unicode */
},

/*
* Invalid unicode (one bad leading byte + one bad trailing byte)
* note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte
*/
{
"\xf3\x81\x01\xe3\x81\x82""abc", 9, /* note that 0x01 is an invalid byte */
"�""\\u0001""\xe3\x81\x82""abc" /* replace invalid unicode */
},
{ 0 }
};

write_str_test_cases(cases);
}

void test_write_str_invalid_leading_byte_case_2()
{

struct write_str_case cases[] = {
/* Invalid leading bytes */
{
"\x81\x82""abc", 5, /* note that 0x81 & 0x82 are invalid leading bytes */
"��abc" /* replace invalid unicode */
},

/*
* Invalid unicode (one bad leading byte + one bad trailing byte + one bad leading byte)
* note that 0xf3 is a leading byte with 3 trailing bytes. 0x01 is an invalid byte
* 0x81 & 0x82 are invalid leading bytes
*/
{
"\xf3\x81\x01\x81\x82""abc", 8, /* note that 0x81 & 0x82 are invalid leading bytes */
"�\\u0001��abc" /* replace invalid unicode */
},
{ 0 }
};

write_str_test_cases(cases);
}

void test_write_str_edge_cases()
{
struct write_str_case cases[] = {
/* Invalid unicode (one bad leading byte) */
{
"\xe3", 1, /* will this buffer overrun? */
"" /* discard invalid unicode */
},
{ 0 }
};

write_str_test_cases(cases);
}

struct proxy_url_check {
int ret;
char *url; /* full URL */
Expand Down Expand Up @@ -264,6 +387,10 @@ TEST_LIST = {
/* JSON maps iteration */
{ "url_split", test_url_split },
{ "write_str", test_write_str },
{ "test_write_str_invalid_trailing_bytes", test_write_str_invalid_trailing_bytes },
{ "test_write_str_invalid_leading_byte", test_write_str_invalid_leading_byte },
{ "test_write_str_edge_cases", test_write_str_edge_cases },
{ "test_write_str_invalid_leading_byte_case_2", test_write_str_invalid_leading_byte_case_2 },
{ "proxy_url_split", test_proxy_url_split },
{ 0 }
};

0 comments on commit 184a49b

Please sign in to comment.