From fc05fc155d45f1fd983894a56d5095854934d96d Mon Sep 17 00:00:00 2001 From: Thomas Punt Date: Mon, 28 Dec 2015 15:57:58 +0000 Subject: [PATCH 1/3] Add support for a digit separator in PHP --- Zend/zend_language_scanner.l | 68 +++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index c2a3a4519871a..1b9a5b3b13b6b 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -112,6 +112,22 @@ do { \ } \ } +#define STRIP_UNDERSCORES(n, len) \ +{ \ + int i, old_len = len; \ + char *new_n, *old_n; \ + for (i = 0, new_n = old_n = n; i < old_len; ++i, ++old_n) { \ + if (*old_n != '_') { \ + *new_n++ = *old_n; \ + } else { \ + --len; \ + } \ + } \ + if (old_len > len) { \ + *new_n = '\0'; \ + } \ +} + /* To save initial string length after scanning to first variable */ #define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len) #define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len) @@ -1092,11 +1108,11 @@ restart: /*!re2c re2c:yyfill:check = 0; -LNUM [0-9]+ -DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) +LNUM [0-9]+(_[0-9]+)* +DNUM (([0-9]+(_[0-9]+)*)*"."([0-9]+(_[0-9]+)*)+)|(([0-9]+(_[0-9]+)*)+"."([0-9]+(_[0-9]+)*)*) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) -HNUM "0x"[0-9a-fA-F]+ -BNUM "0b"[01]+ +HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)* +BNUM "0b"[01]+(_[01]+)* LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* @@ -1600,9 +1616,11 @@ NEWLINE ("\r"|"\n"|"\r\n") } {BNUM} { - char *bin = yytext + 2; /* Skip "0b" */ + /* The +/- 2 skips "0b" */ int len = yyleng - 2; - char *end; + char *end, *bin = strndup(yytext + 2, len); + + STRIP_UNDERSCORES(bin, len) /* Skip any leading 0s */ while (*bin == '0') { @@ -1628,31 +1646,35 @@ NEWLINE ("\r"|"\n"|"\r\n") } {LNUM} { - char *end; - if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ + int len = yyleng; + char *end, *lnum = strndup(yytext, yyleng); + + STRIP_UNDERSCORES(lnum, len) + + if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ errno = 0; - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0)); /* This isn't an assert, we need to ensure 019 isn't valid octal * Because the lexing itself doesn't do that for us */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); RETURN_TOKEN(T_LNUMBER); } } else { errno = 0; - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0)); if (errno == ERANGE) { /* Overflow */ errno = 0; - if (yytext[0] == '0') { /* octal overflow */ + if (lnum[0] == '0') { /* octal overflow */ errno = 0; - ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end)); + ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end)); } else { - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end)); + ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end)); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); @@ -1662,7 +1684,7 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_DNUMBER); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); RETURN_TOKEN(T_DNUMBER); @@ -1673,9 +1695,11 @@ NEWLINE ("\r"|"\n"|"\r\n") } {HNUM} { - char *hex = yytext + 2; /* Skip "0x" */ + /* The +/- 2 skips "0x" */ int len = yyleng - 2; - char *end; + char *end, *hex = strndup(yytext + 2, len); + + STRIP_UNDERSCORES(hex, len) /* Skip any leading 0s */ while (*hex == '0') { @@ -1723,10 +1747,14 @@ string: {DNUM}|{EXPONENT_DNUM} { const char *end; + int len = yyleng; + char *dnum = strndup(yytext, yyleng); + + STRIP_UNDERSCORES(dnum, len) - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end)); + ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ - ZEND_ASSERT(end == yytext + yyleng); + ZEND_ASSERT(end == dnum + len); RETURN_TOKEN(T_DNUMBER); } From 7435a317b01103881c2edf34af39468addc83e65 Mon Sep 17 00:00:00 2001 From: Thomas Punt Date: Mon, 28 Dec 2015 15:58:29 +0000 Subject: [PATCH 2/3] Add tests for the digit separator --- Zend/tests/digit_separator_001.phpt | 18 ++++++++++++++++++ Zend/tests/digit_separator_002.phpt | 8 ++++++++ Zend/tests/digit_separator_003.phpt | 8 ++++++++ Zend/tests/digit_separator_004.phpt | 8 ++++++++ Zend/tests/digit_separator_005.phpt | 8 ++++++++ Zend/tests/digit_separator_006.phpt | 8 ++++++++ Zend/tests/digit_separator_007.phpt | 8 ++++++++ Zend/tests/digit_separator_008.phpt | 8 ++++++++ Zend/tests/digit_separator_009.phpt | 8 ++++++++ 9 files changed, 82 insertions(+) create mode 100644 Zend/tests/digit_separator_001.phpt create mode 100644 Zend/tests/digit_separator_002.phpt create mode 100644 Zend/tests/digit_separator_003.phpt create mode 100644 Zend/tests/digit_separator_004.phpt create mode 100644 Zend/tests/digit_separator_005.phpt create mode 100644 Zend/tests/digit_separator_006.phpt create mode 100644 Zend/tests/digit_separator_007.phpt create mode 100644 Zend/tests/digit_separator_008.phpt create mode 100644 Zend/tests/digit_separator_009.phpt diff --git a/Zend/tests/digit_separator_001.phpt b/Zend/tests/digit_separator_001.phpt new file mode 100644 index 0000000000000..07a0f7b4c0f93 --- /dev/null +++ b/Zend/tests/digit_separator_001.phpt @@ -0,0 +1,18 @@ +--TEST-- +Valid use of digit separator +--FILE-- + Date: Wed, 30 Dec 2015 10:04:48 +0000 Subject: [PATCH 3/3] Improve implementation Use estrndup() over strndup() Free allocated memory Only duplicate yytext when neccesary --- Zend/zend_language_scanner.l | 82 ++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 1b9a5b3b13b6b..edfe77e9a7f1c 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -1617,10 +1617,17 @@ NEWLINE ("\r"|"\n"|"\r\n") {BNUM} { /* The +/- 2 skips "0b" */ - int len = yyleng - 2; - char *end, *bin = strndup(yytext + 2, len); + int len = yyleng - 2, contains_underscores, i; + char *end, *bin = yytext + 2; - STRIP_UNDERSCORES(bin, len) + for (i = 0; i < len && bin[i] != '_'; ++i); + + contains_underscores = i != len; + + if (contains_underscores) { + bin = estrndup(bin, len); + STRIP_UNDERSCORES(bin, len) + } /* Skip any leading 0s */ while (*bin == '0') { @@ -1636,20 +1643,33 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2)); ZEND_ASSERT(!errno && end == yytext + yyleng); } + if (contains_underscores) { + efree(bin); + } RETURN_TOKEN(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == yytext + yyleng); + if (contains_underscores) { + efree(bin); + } RETURN_TOKEN(T_DNUMBER); } } {LNUM} { - int len = yyleng; - char *end, *lnum = strndup(yytext, yyleng); + int len = yyleng, contains_underscores, i; + char *end, *lnum = yytext; + + for (i = 0; i < len && lnum[i] != '_'; ++i); - STRIP_UNDERSCORES(lnum, len) + contains_underscores = i != len; + + if (contains_underscores) { + lnum = estrndup(lnum, len); + STRIP_UNDERSCORES(lnum, len) + } if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ errno = 0; @@ -1660,6 +1680,9 @@ NEWLINE ("\r"|"\n"|"\r\n") if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN(T_LNUMBER); } } else { @@ -1678,28 +1701,47 @@ NEWLINE ("\r"|"\n"|"\r\n") zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN(T_DNUMBER); } ZEND_ASSERT(!errno); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN(T_DNUMBER); } /* Also not an assert for the same reason */ if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN(T_DNUMBER); } } ZEND_ASSERT(!errno); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN(T_LNUMBER); } {HNUM} { /* The +/- 2 skips "0x" */ - int len = yyleng - 2; - char *end, *hex = strndup(yytext + 2, len); + int len = yyleng - 2, contains_underscores, i; + char *end, *hex = yytext + 2; + + for (i = 0; i < len && hex[i] != '_'; ++i); - STRIP_UNDERSCORES(hex, len) + contains_underscores = i != len; + + if (contains_underscores) { + hex = estrndup(hex, len); + STRIP_UNDERSCORES(hex, len) + } /* Skip any leading 0s */ while (*hex == '0') { @@ -1715,11 +1757,17 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16)); ZEND_ASSERT(!errno && end == hex + len); } + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == hex + len); + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN(T_DNUMBER); } } @@ -1747,14 +1795,24 @@ string: {DNUM}|{EXPONENT_DNUM} { const char *end; - int len = yyleng; - char *dnum = strndup(yytext, yyleng); + int len = yyleng, contains_underscores, i; + char *dnum = yytext; + + for (i = 0; i < len && dnum[i] != '_'; ++i); - STRIP_UNDERSCORES(dnum, len) + contains_underscores = i != len; + + if (contains_underscores) { + dnum = estrndup(dnum, len); + STRIP_UNDERSCORES(dnum, len) + } ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == dnum + len); + if (contains_underscores) { + efree(dnum); + } RETURN_TOKEN(T_DNUMBER); }