From fc4970b75e912827d4ab54e2e7c0c3b6fc5f6f9d Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 25 Mar 2021 14:34:21 +0100 Subject: [PATCH] PHP 8.0 | Tokenizer/PHP: improve retokenization of T_DEFAULT This commit improves the tokenization of "default" keywords, by making sure that the keyword is not used as a constant or property. If it is, it will never be either `T_DEFAULT` or `T_MATCH_DEFAULT`, so it will be retokenized to `T_STRING` early. Fixes 3280 --- src/Tokenizers/PHP.php | 73 +++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 06263166e9..31ab77613a 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -1356,40 +1356,63 @@ protected function tokenize($string) if ($tokenIsArray === true && $token[0] === T_DEFAULT ) { - for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { - if ($tokens[$x] === ',') { - // Skip over potential trailing comma (supported in PHP). - continue; + $ignoreContext = [ + T_OBJECT_OPERATOR => true, + T_NULLSAFE_OBJECT_OPERATOR => true, + T_NS_SEPARATOR => true, + T_PAAMAYIM_NEKUDOTAYIM => true, + ]; + + if (isset($ignoreContext[$finalTokens[$lastNotEmptyToken]['code']]) === false) { + for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { + if ($tokens[$x] === ',') { + // Skip over potential trailing comma (supported in PHP). + continue; + } + + if (is_array($tokens[$x]) === false + || isset(Util\Tokens::$emptyTokens[$tokens[$x][0]]) === false + ) { + // Non-empty, non-comma content. + break; + } } - if (is_array($tokens[$x]) === false - || isset(Util\Tokens::$emptyTokens[$tokens[$x][0]]) === false + if (isset($tokens[$x]) === true + && is_array($tokens[$x]) === true + && $tokens[$x][0] === T_DOUBLE_ARROW ) { - // Non-empty, non-comma content. - break; - } - } + // Modify the original token stack for the double arrow so that + // future checks can disregard the double arrow token more easily. + // For match expression "case" statements, this is handled + // in PHP::processAdditional(). + $tokens[$x][0] = T_MATCH_ARROW; + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL; + } - if (isset($tokens[$x]) === true - && is_array($tokens[$x]) === true - && $tokens[$x][0] === T_DOUBLE_ARROW - ) { - // Modify the original token stack for the double arrow so that - // future checks can disregard the double arrow token more easily. - // For match expression "case" statements, this is handled - // in PHP::processAdditional(). - $tokens[$x][0] = T_MATCH_ARROW; - if (PHP_CODESNIFFER_VERBOSITY > 1) { - echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL; - } + $newToken = []; + $newToken['code'] = T_MATCH_DEFAULT; + $newToken['type'] = 'T_MATCH_DEFAULT'; + $newToken['content'] = $token[1]; + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $stackPtr changed from T_DEFAULT to T_MATCH_DEFAULT".PHP_EOL; + } + + $finalTokens[$newStackPtr] = $newToken; + $newStackPtr++; + continue; + }//end if + } else { + // Definitely not the "default" keyword. $newToken = []; - $newToken['code'] = T_MATCH_DEFAULT; - $newToken['type'] = 'T_MATCH_DEFAULT'; + $newToken['code'] = T_STRING; + $newToken['type'] = 'T_STRING'; $newToken['content'] = $token[1]; if (PHP_CODESNIFFER_VERBOSITY > 1) { - echo "\t\t* token $stackPtr changed from T_DEFAULT to T_MATCH_DEFAULT".PHP_EOL; + echo "\t\t* token $stackPtr changed from T_DEFAULT to T_STRING".PHP_EOL; } $finalTokens[$newStackPtr] = $newToken;