Skip to content

Commit

Permalink
utils: hex fragment tests
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewfala committed Nov 17, 2021
1 parent a006b7b commit 692e912
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 17 deletions.
65 changes: 50 additions & 15 deletions src/flb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ extern struct flb_aws_error_reporter *error_reporter;
#define FLB_UTILS_REPLACE_FRAGMENT_START_LEN 31
#define FLB_UTILS_REPLACE_FRAGMENT_END_LEN 4

/*
* The following block descriptor describes the private use unicode character range
* used for denoting invalid unicode fragments. Invalid fragment 0xCE would become
* utf-8 codepoint U+E0CE if FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR is set to
* E0 since U+E0CE = U+<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR><HEX_FRAGMENT>
*/
#define FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR 0xE0

static char *flb_utils_replace_fragment_start
= "\xEF\xBF\xBD"" corrupted utf-8 sequence : ";
static char *flb_utils_replace_fragment_end = " \xEF\xBF\xBD";
Expand Down Expand Up @@ -787,22 +795,49 @@ int flb_utils_write_str(char *buf, int *off, size_t size,
p += utf_sequence_length;
}
else {
encoded_to_buf(p, flb_utils_replace_fragment_start,
FLB_UTILS_REPLACE_FRAGMENT_START_LEN);
p += FLB_UTILS_REPLACE_FRAGMENT_START_LEN;

for (utf_sequence_number = 0; utf_sequence_number < utf_sequence_length;
++utf_sequence_number) {
if (utf_sequence_number > 0) {
*p++ = ' ';
}
*p++ = int2hex[(tmp[utf_sequence_number] & 0xff) / 0x10];
*p++ = int2hex[(tmp[utf_sequence_number] & 0xff) % 0x10];
/* utf sequence is invalid. Print fragments out using private block
* codepoint range 0xE000 to 0xE0FF
*/
for (b = 0; b < utf_sequence_length; ++b) {
/*
* Utf-8 private block invalid hex formatting
* Format unicode charpoint in the following format:
*
* +--------+--------+--------+
* |1110PPPP|10PPPPHH|10HHHHHH|
* +--------+--------+--------+
*
* Where:
* P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte)
* H is Hex fragment bits (1 byte)
* 1 is bit 1
* 0 is bit 0
*/

/* unicode codepoint start */
*p = 0xE0;

/* print unicode private block header first 4 bits */
*p |= FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR >> 4;
*p++;

/* unicode codepoint middle */
*p = 0x80;

/* print end of unicode private block header last 4 bits */
*p |= (FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f;

/* print hex fragment first 2 bits */
*p |= tmp[b] >> 6;
*p++;

/* unicode codepoint middle */
*p = 0x80;

/* print hex fragment last 2 bits */
*p |= tmp[b] & 0x3f;
*p++;
}

encoded_to_buf(p, flb_utils_replace_fragment_end,
FLB_UTILS_REPLACE_FRAGMENT_END_LEN);
p += FLB_UTILS_REPLACE_FRAGMENT_END_LEN;
}
}
else {
Expand Down
11 changes: 9 additions & 2 deletions tests/internal/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,13 @@ static void write_str_test_cases(struct write_str_case *cases) {
TEST_MSG("| Expected output: %s", tcase->output);
TEST_MSG("| Produced output: %s", buf);
}
if (!TEST_CHECK(strlen(buf) == strlen(tcase->output))) {
TEST_MSG("Input string: %s", tcase->input);
TEST_MSG("| Expected length: %d", strlen(tcase->output));
TEST_MSG("| Produced length: %d", strlen(buf));
TEST_MSG("| Expected output: %s", tcase->output);
TEST_MSG("| Produced output: %s", buf);
}
++tcase;
}
}
Expand Down Expand Up @@ -184,13 +191,13 @@ void test_write_str_invalid_trailing_bytes()
/* Invalid unicode (one bad trailing byte) */
{
"\xe3\x81\x01""abc", 6, /* note that 0x01 is an invalid byte */
FLB_UTILS_TEST_FRAGMENT_START"e3 81"FLB_UTILS_TEST_FRAGMENT_END"\\u0001abc" /* replace invalid unicode */
"\\u0001abc" /* replace invalid unicode */
},

/* Invalid unicode (two bad trailing byte) */
{
"\xe3\x01\x01""abc", 6,
FLB_UTILS_TEST_FRAGMENT_START"e3"FLB_UTILS_TEST_FRAGMENT_END"\\u0001\\u0001abc"
"\\u0001\\u0001abc"
},
{ 0 }
};
Expand Down

0 comments on commit 692e912

Please sign in to comment.