Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for a digit separator #1699

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Zend/tests/digit_separator_001.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
--TEST--
Valid use of digit separator
--FILE--
<?php

var_dump(1_000_000 === 1000000);
var_dump(3.141_592 === 3.141592);
var_dump(0x02_56_12 === 0x025612);
var_dump(0b0010_1101 === 0b00101101);
var_dump(0267_3432 === 02673432);
var_dump(1_123.456_7e2 === 1123.4567e2);
--EXPECT--
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
bool(true)
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_002.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: trailing underscore
--FILE--
<?php

100_;
--EXPECTF--
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_003.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: adjacent underscores
--FILE--
<?php

10__0;
--EXPECTF--
Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_004.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore left of period
--FILE--
<?php

100_.0;
--EXPECTF--
Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_005.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore right of period
--FILE--
<?php

100._0;
--EXPECTF--
Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_006.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore next to 0x
--FILE--
<?php

0x_0123;
--EXPECTF--
Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_007.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore next to 0b
--FILE--
<?php

0b_0101;
--EXPECTF--
Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_008.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore left of e
--FILE--
<?php

1_e2;
--EXPECTF--
Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d
8 changes: 8 additions & 0 deletions Zend/tests/digit_separator_009.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--TEST--
Invalid use: underscore right of e
--FILE--
<?php

1e_2;
--EXPECTF--
Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d
142 changes: 114 additions & 28 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,22 @@ do { \
} \
}

#define STRIP_UNDERSCORES(n, len) \
{ \
int i, old_len = len; \
char *new_n, *old_n; \
for (i = 0, new_n = old_n = n; i < old_len; ++i, ++old_n) { \
if (*old_n != '_') { \
*new_n++ = *old_n; \
} else { \
--len; \
} \
} \
if (old_len > len) { \
*new_n = '\0'; \
} \
}

/* To save initial string length after scanning to first variable */
#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) SCNG(scanned_string_len) = (len)
#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() SCNG(scanned_string_len)
Expand Down Expand Up @@ -1092,11 +1108,11 @@ restart:

/*!re2c
re2c:yyfill:check = 0;
LNUM [0-9]+
DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
LNUM [0-9]+(_[0-9]+)*
DNUM (([0-9]+(_[0-9]+)*)*"."([0-9]+(_[0-9]+)*)+)|(([0-9]+(_[0-9]+)*)+"."([0-9]+(_[0-9]+)*)*)
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
HNUM "0x"[0-9a-fA-F]+
BNUM "0b"[01]+
HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
Expand Down Expand Up @@ -1600,87 +1616,137 @@ NEWLINE ("\r"|"\n"|"\r\n")
}

<ST_IN_SCRIPTING>{BNUM} {
char *bin = yytext + 2; /* Skip "0b" */
int len = yyleng - 2;
char *end;
/* The +/- 2 skips "0b" */
int len = yyleng - 2, contains_underscores, i;
char *end, *bin = yytext + 2;

/* Skip any leading 0s */
while (*bin == '0') {
while (*bin == '0' || *bin == '_') {
++bin;
--len;
}

for (i = 0; i < len && bin[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
bin = estrndup(bin, len);
STRIP_UNDERSCORES(bin, len)
}

if (len < SIZEOF_ZEND_LONG * 8) {
if (len == 0) {
ZVAL_LONG(zendlval, 0);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
ZEND_ASSERT(!errno && end == yytext + yyleng);
ZEND_ASSERT(!errno && end == bin + len);
}
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
ZEND_ASSERT(end == bin + len);
if (contains_underscores) {
efree(bin);
}
RETURN_TOKEN(T_DNUMBER);
}
}

<ST_IN_SCRIPTING>{LNUM} {
char *end;
if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
int len = yyleng, contains_underscores, i;
char *end, *lnum = yytext;

for (i = 0; i < len && lnum[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
lnum = estrndup(lnum, len);
STRIP_UNDERSCORES(lnum, len)
}

if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
/* This isn't an assert, we need to ensure 019 isn't valid octal
* Because the lexing itself doesn't do that for us
*/
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN(T_LNUMBER);
}
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
if (errno == ERANGE) { /* Overflow */
errno = 0;
if (yytext[0] == '0') { /* octal overflow */
if (lnum[0] == '0') { /* octal overflow */
errno = 0;
ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
} else {
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN(T_DNUMBER);
}
ZEND_ASSERT(!errno);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN(T_DNUMBER);
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN(T_DNUMBER);
}
}
ZEND_ASSERT(!errno);
if (contains_underscores) {
efree(lnum);
}
RETURN_TOKEN(T_LNUMBER);
}

<ST_IN_SCRIPTING>{HNUM} {
char *hex = yytext + 2; /* Skip "0x" */
int len = yyleng - 2;
char *end;
/* The +/- 2 skips "0x" */
int len = yyleng - 2, contains_underscores, i;
char *end, *hex = yytext + 2;

/* Skip any leading 0s */
while (*hex == '0') {
hex++;
len--;
while (*hex == '0' || *hex == '_') {
++hex;
--len;
}

for (i = 0; i < len && hex[i] != '_'; ++i);

contains_underscores = i != len;

if (contains_underscores) {
hex = estrndup(hex, len);
STRIP_UNDERSCORES(hex, len)
}

if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
Expand All @@ -1691,11 +1757,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
if (contains_underscores) {
efree(hex);
}
RETURN_TOKEN(T_DNUMBER);
}
}
Expand Down Expand Up @@ -1723,10 +1795,24 @@ string:

<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
const char *end;
int len = yyleng, contains_underscores, i;
char *dnum = yytext;

for (i = 0; i < len && dnum[i] != '_'; ++i);

ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
contains_underscores = i != len;

if (contains_underscores) {
dnum = estrndup(dnum, len);
STRIP_UNDERSCORES(dnum, len)
}

ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
ZEND_ASSERT(end == dnum + len);
if (contains_underscores) {
efree(dnum);
}
RETURN_TOKEN(T_DNUMBER);
}

Expand Down