From 119a702ca3d0da39ab603c0aba67f4294c246451 Mon Sep 17 00:00:00 2001 From: Greg Sherwood Date: Tue, 19 Nov 2019 14:29:27 +1100 Subject: [PATCH] PHP 7.4 numeric separators are now tokenized in the same way when using older PHP versions (ref #2546) --- package.xml | 10 ++ src/Tokenizers/PHP.php | 60 +++++++++++ .../BackfillNumericSeparatorTest.inc | 22 ++++ .../BackfillNumericSeparatorTest.php | 101 ++++++++++++++++++ 4 files changed, 193 insertions(+) create mode 100644 tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc create mode 100644 tests/Core/Tokenizer/BackfillNumericSeparatorTest.php diff --git a/package.xml b/package.xml index a64a75c71b..96b8a74bee 100644 --- a/package.xml +++ b/package.xml @@ -32,6 +32,10 @@ http://pear.php.net/dtd/package-2.0.xsd"> -- The token after the statement (normally a semicolon) becomes the scope closer -- The token is also associated with the opening and closing parenthesis of the statement -- Any functions named "fn" will cause have a T_FN token for the function name, but have no scope information + - PHP 7.4 numeric separators are now tokenized in the same way when using older PHP versions + -- Previously, a number like 1_000 would tokenize as T_LNUMBER (1), T_STRING (_000) + -- Now, the number tokenizes as T_LNUMBER (1_000) + -- Sniff developers should consider how numbers with underscores impact their custom sniffs - The PHPCS file cache now takes file permissions into account -- The cache is now invalidated for a file when its permissions are changed - File::getMethodParameters() now supports arrow functions @@ -125,6 +129,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -1960,6 +1966,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2001,6 +2009,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 9d67fdb819..2c6364dd97 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -968,6 +968,66 @@ protected function tokenize($string) continue; } + /* + Before PHP 7.4, underscores inside T_LNUMBER and T_DNUMBER + tokens split the token with a T_STRING. So look for + and change these tokens in earlier versions. + */ + + if ($tokenIsArray === true + && ($token[0] === T_LNUMBER + || $token[0] === T_DNUMBER) + && isset($tokens[($stackPtr + 1)]) === true + && is_array($tokens[($stackPtr + 1)]) === true + && $tokens[($stackPtr + 1)][0] === T_STRING + && $tokens[($stackPtr + 1)][1][0] === '_' + ) { + $newContent = $token[1]; + $newType = $token[0]; + for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { + if (is_array($tokens[$i]) === false) { + break; + } + + if ($tokens[$i][0] === T_LNUMBER + || $tokens[$i][0] === T_DNUMBER + || ($tokens[$i][0] === T_STRING + && $tokens[$i][1][0] === '_') + ) { + $newContent .= $tokens[$i][1]; + + // Any T_DNUMBER token needs to make the + // new number a T_DNUMBER as well. + if ($tokens[$i][0] === T_DNUMBER) { + $newType = T_DNUMBER; + } + + // Support floats. + if ($tokens[$i][0] === T_STRING + && substr(strtolower($tokens[$i][1]), -1) === 'e' + && $tokens[($i + 1)] === '-' + ) { + $newContent .= '-'; + $i++; + } + + continue; + } + + break; + }//end for + + $newToken = []; + $newToken['code'] = $newType; + $newToken['type'] = Util\Tokens::tokenName($token[0]); + $newToken['content'] = $newContent; + $finalTokens[$newStackPtr] = $newToken; + + $newStackPtr++; + $stackPtr = ($i - 1); + continue; + }//end if + /* Convert ? to T_NULLABLE OR T_INLINE_THEN */ diff --git a/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc b/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc new file mode 100644 index 0000000000..73656f2b62 --- /dev/null +++ b/tests/Core/Tokenizer/BackfillNumericSeparatorTest.inc @@ -0,0 +1,22 @@ + + * @copyright 2019 Squiz Pty Ltd (ABN 77 084 670 600) + * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer; + +use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest; + +class BackfillNumericSeparatorTest extends AbstractMethodUnitTest +{ + + + /** + * Test that numbers using numeric seperators are tokenized correctly. + * + * @param array $testData The data required for the specific test case. + * + * @dataProvider dataTestBackfill + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * + * @return void + */ + public function testBackfill($testData) + { + $tokens = self::$phpcsFile->getTokens(); + $number = $this->getTargetToken($testData['marker'], $testData['type']); + $this->assertSame($tokens[$number]['content'], $testData['value']); + + }//end testBackfill() + + + /** + * Data provider. + * + * @see testBackfill() + * + * @return array + */ + public function dataTestBackfill() + { + return [ + [ + [ + 'marker' => '/* testSimpleLNumber */', + 'type' => T_LNUMBER, + 'value' => '1_000_000_000', + ], + ], + [ + [ + 'marker' => '/* testSimpleDNumber */', + 'type' => T_DNUMBER, + 'value' => '107_925_284.88', + ], + ], + [ + [ + 'marker' => '/* testFloat */', + 'type' => T_DNUMBER, + 'value' => '6.674_083e-11', + ], + ], + [ + [ + 'marker' => '/* testHex */', + 'type' => T_LNUMBER, + 'value' => '0xCAFE_F00D', + ], + ], + [ + [ + 'marker' => '/* testHexMultiple */', + 'type' => T_LNUMBER, + 'value' => '0x42_72_6F_77_6E', + ], + ], + [ + [ + 'marker' => '/* testBinary */', + 'type' => T_LNUMBER, + 'value' => '0b0101_1111', + ], + ], + [ + [ + 'marker' => '/* testOctal */', + 'type' => T_LNUMBER, + 'value' => '0137_041', + ], + ], + ]; + + }//end dataTestBackfill() + + +}//end class