Skip to content

Commit

Permalink
NeonDecoder: implemented support for unicode surrogate pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
JanTvrdik authored and dg committed Dec 30, 2014
1 parent eb8a490 commit 6b2b821
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/Neon/Decoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private function parse($indent, $result = NULL, $key = NULL, $hasKey = FALSE)
'false' => FALSE, 'False' => FALSE, 'FALSE' => FALSE, 'no' => FALSE, 'No' => FALSE, 'NO' => FALSE, 'off' => FALSE, 'Off' => FALSE, 'OFF' => FALSE,
);
if ($t[0] === '"') {
$value = preg_replace_callback('#\\\\(?:u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i', array($this, 'cbString'), substr($t, 1, -1));
$value = preg_replace_callback('#\\\\(?:ud[89ab][0-9a-f]{2}\\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i', array($this, 'cbString'), substr($t, 1, -1));
} elseif ($t[0] === "'") {
$value = substr($t, 1, -1);
} elseif (isset($consts[$t]) && (!isset($tokens[$n+1][0]) || ($tokens[$n+1][0] !== ':' && $tokens[$n+1][0] !== '='))) {
Expand Down Expand Up @@ -296,8 +296,14 @@ private function cbString($m)
$sq = $m[0];
if (isset($mapping[$sq[1]])) {
return $mapping[$sq[1]];
} elseif ($sq[1] === 'u' && strlen($sq) === 6) {
return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', hexdec(substr($sq, 2))));
} elseif ($sq[1] === 'u' && strlen($sq) >= 6) {
$lead = hexdec(substr($sq, 2, 4));
$tail = hexdec(substr($sq, 8, 4));
$code = $tail ? (0x2400 + (($lead - 0xD800) << 10) + $tail) : $lead;
if ($code >= 0xD800 && $code <= 0xDFFF) {
$this->error("Invalid UTF-8 (lone surrogate) $sq");
}
return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
} elseif ($sq[1] === 'x' && strlen($sq) === 4) {
return chr(hexdec(substr($sq, 2)));
} else {
Expand Down
5 changes: 5 additions & 0 deletions tests/Neon/Decoder.errors.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ Assert::exception(function() {
}, 'Nette\Neon\Exception', "Unexpected 'World' on line 2, column 1." );


Assert::exception(function() {
Neon::decode('"\uD801"');
}, 'Nette\Neon\Exception', "Invalid UTF-8 (lone surrogate) \\uD801 on line 1, column 1." );


Assert::exception(function() {
Neon::decode("- Dave,\n- Rimmer,\n- Kryten,\n");
}, 'Nette\Neon\Exception', "Unexpected ',' on line 1, column 7." );
Expand Down
3 changes: 3 additions & 0 deletions tests/Neon/Decoder.scalar.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Assert::same( 'the"string', Neon::decode('the"string #literal') );
Assert::same( "the'string #literal", Neon::decode('"the\'string #literal"') );
Assert::same( 'the"string #literal', Neon::decode("'the\"string #literal'") );
Assert::same( 'the"string #literal', Neon::decode('"the\\"string #literal"') );
Assert::same( '@', Neon::decode('"\u0040"') );
Assert::same( "\xC4\x9B", Neon::decode('"\u011B"') );
Assert::same( "\xf0\x90\x90\x81", Neon::decode('"\uD801\uDC01"') ); // U+10401 encoded as surrogate pair
Assert::same( '<literal> <literal>', Neon::decode('<literal> <literal>') );
Assert::same( "", Neon::decode("''") );
Assert::same( "", Neon::decode('""') );
Expand Down

0 comments on commit 6b2b821

Please sign in to comment.