Skip to content

Commit

Permalink
utf16_literal_to_utf8: Eliminate Duff's Device
Browse files Browse the repository at this point in the history
This fixes -Wimplicit-fallthrough warnings with GCC7.
  • Loading branch information
FSMaxB committed Mar 2, 2017
1 parent ad5abf4 commit 9d07917
Showing 1 changed file with 20 additions and 31 deletions.
51 changes: 20 additions & 31 deletions cJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -452,21 +452,13 @@ static unsigned parse_hex4(const unsigned char * const input)
* A literal can be one or two sequences of the form \uXXXX */
static unsigned char utf16_literal_to_utf8(const unsigned char * const input_pointer, const unsigned char * const input_end, unsigned char **output_pointer, const unsigned char **error_pointer)
{
/* first bytes of UTF8 encoding for a given length in bytes */
static const unsigned char firstByteMark[5] =
{
0x00, /* should never happen */
0x00, /* 0xxxxxxx */
0xC0, /* 110xxxxx */
0xE0, /* 1110xxxx */
0xF0 /* 11110xxx */
};

long unsigned int codepoint = 0;
unsigned int first_code = 0;
const unsigned char *first_sequence = input_pointer;
unsigned char utf8_length = 0;
unsigned char utf8_position = 0;
unsigned char sequence_length = 0;
unsigned char first_byte_mark = 0;

/* get the first utf16 sequence */
first_code = parse_hex4(first_sequence + 2);
Expand Down Expand Up @@ -537,16 +529,19 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
{
/* two bytes, encoding 110xxxxx 10xxxxxx */
utf8_length = 2;
first_byte_mark = 0xC0; /* 11000000 */
}
else if (codepoint < 0x10000)
{
/* three bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx */
utf8_length = 3;
first_byte_mark = 0xE0; /* 11100000 */
}
else if (codepoint <= 0x10FFFF)
{
/* four bytes, encoding 1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8_length = 4;
first_byte_mark = 0xF0; /* 11110000 */
}
else
{
Expand All @@ -556,28 +551,22 @@ static unsigned char utf16_literal_to_utf8(const unsigned char * const input_poi
}

/* encode as utf8 */
switch (utf8_length)
{
case 4:
/* 10xxxxxx */
(*output_pointer)[3] = (unsigned char)((codepoint | 0x80) & 0xBF);
codepoint >>= 6;
case 3:
/* 10xxxxxx */
(*output_pointer)[2] = (unsigned char)((codepoint | 0x80) & 0xBF);
codepoint >>= 6;
case 2:
(*output_pointer)[1] = (unsigned char)((codepoint | 0x80) & 0xBF);
codepoint >>= 6;
case 1:
/* depending on the length in bytes this determines the
encoding of the first UTF8 byte */
(*output_pointer)[0] = (unsigned char)((codepoint | firstByteMark[utf8_length]) & 0xFF);
break;
default:
*error_pointer = first_sequence;
goto fail;
for (utf8_position = (unsigned char)(utf8_length - 1); utf8_position > 0; utf8_position--)
{
/* 10xxxxxx */
(*output_pointer)[utf8_position] = (unsigned char)((codepoint | 0x80) & 0xBF);
codepoint >>= 6;
}
/* encode first byte */
if (utf8_length > 1)
{
(*output_pointer)[0] = (unsigned char)((codepoint | first_byte_mark) & 0xFF);
}
else
{
(*output_pointer)[0] = (unsigned char)(codepoint & 0x7F);
}

*output_pointer += utf8_length;

return sequence_length;
Expand Down

0 comments on commit 9d07917

Please sign in to comment.