From 835343649c1cba2c2880afe314bf50408aa50b62 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Sat, 20 Feb 2021 16:11:00 +0100 Subject: [PATCH] Tokenizer/PHP: retokenize the match double arrow to T_MATCH_ARROW The double arrow in PHP is used in a number of contexts: * Long/short arrays with keys; * Long/short lists with keys; * In the `as` part of `foreach()` statements with keys; * For `yield` statements with keys; * For arrow functions as the scope opener; * And now for `match` expressions to separate the cases from the return value (body). As most of these constructs can be nested in each other - an arrow function in an array value, a match expression in a list key -, every sniff handling any of these constructs has to take a lot of care when searching for the double arrow for the construct they are handling, to prevent matching a double arrow belonging to another type of construct nested in the target construct. This type of detection and special handling has to be done in each individual sniff which in one way or another has to deal with the `T_DOUBLE_ARROW` token and can cause quite some processing overhead. With that in mind, the double arrow as a scope opener for arrow functions has previously already been retokenized to `T_FN_ARROW`. Following the same reasoning, I'm proposing to retokenize the double arrow which separates `match` case expressions from the body expression to `T_MATCH_ARROW`. This should make life easier for any sniff dealing with any of the above constructs and will prevent potential false positives being introduced for sniffs currently handling any of these constructs, but not yet updated to allow for match expressions. Includes a set of dedicated unit tests verifying the tokenization of the double arrow operator in all currently supported contexts, including in combined (nested) contexts. --- package.xml | 6 + src/Tokenizers/PHP.php | 51 ++++++ src/Util/Tokens.php | 1 + tests/Core/Tokenizer/DoubleArrowTest.inc | 221 ++++++++++++++++++++++ tests/Core/Tokenizer/DoubleArrowTest.php | 223 +++++++++++++++++++++++ 5 files changed, 502 insertions(+) create mode 100644 tests/Core/Tokenizer/DoubleArrowTest.inc create mode 100644 tests/Core/Tokenizer/DoubleArrowTest.php diff --git a/package.xml b/package.xml index d46ce4c7ff..794f7463db 100644 --- a/package.xml +++ b/package.xml @@ -204,6 +204,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2112,6 +2114,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2192,6 +2196,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 01171ed434..92427059b7 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -373,6 +373,7 @@ class PHP extends Tokenizer T_LOGICAL_OR => 2, T_LOGICAL_XOR => 3, T_MATCH => 5, + T_MATCH_ARROW => 2, T_MATCH_DEFAULT => 7, T_METHOD_C => 10, T_MINUS_EQUAL => 2, @@ -1371,6 +1372,15 @@ protected function tokenize($string) && is_array($tokens[$x]) === true && $tokens[$x][0] === T_DOUBLE_ARROW ) { + // Modify the original token stack for the double arrow so that + // future checks can disregard the double arrow token more easily. + // For match expression "case" statements, this is handled + // in PHP::processAdditional(). + $tokens[$x][0] = T_MATCH_ARROW; + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL; + } + $newToken = []; $newToken['code'] = T_MATCH_DEFAULT; $newToken['type'] = 'T_MATCH_DEFAULT'; @@ -2450,6 +2460,47 @@ protected function processAdditional() echo "\t\t* cleaned parenthesis of token $i *".PHP_EOL; } } + } else { + // Retokenize the double arrows for match expression cases to `T_MATCH_ARROW`. + $searchFor = [ + T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, + T_OPEN_SQUARE_BRACKET => T_OPEN_SQUARE_BRACKET, + T_OPEN_PARENTHESIS => T_OPEN_PARENTHESIS, + T_OPEN_SHORT_ARRAY => T_OPEN_SHORT_ARRAY, + T_DOUBLE_ARROW => T_DOUBLE_ARROW, + ]; + $searchFor += Util\Tokens::$scopeOpeners; + + for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) { + if (isset($searchFor[$this->tokens[$x]['code']]) === false) { + continue; + } + + if (isset($this->tokens[$x]['scope_closer']) === true) { + $x = $this->tokens[$x]['scope_closer']; + continue; + } + + if (isset($this->tokens[$x]['parenthesis_closer']) === true) { + $x = $this->tokens[$x]['parenthesis_closer']; + continue; + } + + if (isset($this->tokens[$x]['bracket_closer']) === true) { + $x = $this->tokens[$x]['bracket_closer']; + continue; + } + + // This must be a double arrow, but make sure anyhow. + if ($this->tokens[$x]['code'] === T_DOUBLE_ARROW) { + $this->tokens[$x]['code'] = T_MATCH_ARROW; + $this->tokens[$x]['type'] = 'T_MATCH_ARROW'; + + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t\t* token $x changed from T_DOUBLE_ARROW to T_MATCH_ARROW".PHP_EOL; + } + } + }//end for }//end if continue; diff --git a/src/Util/Tokens.php b/src/Util/Tokens.php index ae47bc86f1..7071259b71 100644 --- a/src/Util/Tokens.php +++ b/src/Util/Tokens.php @@ -77,6 +77,7 @@ define('T_FN_ARROW', 'PHPCS_T_FN_ARROW'); define('T_TYPE_UNION', 'PHPCS_T_TYPE_UNION'); define('T_PARAM_NAME', 'PHPCS_T_PARAM_NAME'); +define('T_MATCH_ARROW', 'PHPCS_T_MATCH_ARROW'); define('T_MATCH_DEFAULT', 'PHPCS_T_MATCH_DEFAULT'); // Some PHP 5.5 tokens, replicated for lower versions. diff --git a/tests/Core/Tokenizer/DoubleArrowTest.inc b/tests/Core/Tokenizer/DoubleArrowTest.inc new file mode 100644 index 0000000000..ad5e5798d9 --- /dev/null +++ b/tests/Core/Tokenizer/DoubleArrowTest.inc @@ -0,0 +1,221 @@ + 'Zero', + ); +} + +function simpleShortArray($x) { + return [ + /* testShortArrayArrowSimple */ + 0 => 'Zero', + ]; +} + +function simpleLongList($x) { + list( + /* testLongListArrowSimple */ + 0 => $a, + ) = $x; +} + +function simpleShortList($x) { + [ + /* testShortListArrowSimple */ + 0 => $a, + ] = $x; +} + +function simpleYield($x) { + $i = 0; + foreach (explode("\n", $x) as $line) { + /* testYieldArrowSimple */ + yield ++$i => $line; + } +} + +function simpleForeach($x) { + /* testForeachArrowSimple */ + foreach ($x as $k => $value) {} +} + +function simpleMatch($x) { + return match ($x) { + /* testMatchArrowSimpleSingleCase */ + 0 => 'Zero', + /* testMatchArrowSimpleMultiCase */ + 2, 4, 6 => 'Zero', + /* testMatchArrowSimpleSingleCaseWithTrailingComma */ + 1, => 'Zero', + /* testMatchArrowSimpleMultiCaseWithTrailingComma */ + 3, 5, => 'Zero', + }; +} + +function simpleArrowFunction($y) { + /* testFnArrowSimple */ + return fn ($y) => callMe($y); +} + +function matchNestedInMatch() { + $x = match ($y) { + /* testMatchArrowNestedMatchOuter */ + default, => match ($z) { + /* testMatchArrowNestedMatchInner */ + 1 => 1 + }, + }; +} + +function matchNestedInLongArrayValue() { + $array = array( + /* testLongArrayArrowWithNestedMatchValue1 */ + 'a' => match ($test) { + /* testMatchArrowInLongArrayValue1 */ + 1 => 'a', + /* testMatchArrowInLongArrayValue2 */ + 2 => 'b' + }, + /* testLongArrayArrowWithNestedMatchValue2 */ + $i => match ($test) { + /* testMatchArrowInLongArrayValue3 */ + 1 => 'a', + }, + ); +} + +function matchNestedInShortArrayValue() { + $array = [ + /* testShortArrayArrowWithNestedMatchValue1 */ + 'a' => match ($test) { + /* testMatchArrowInShortArrayValue1 */ + 1 => 'a', + /* testMatchArrowInShortArrayValue2 */ + 2 => 'b' + }, + /* testShortArrayArrowWithNestedMatchValue2 */ + $i => match ($test) { + /* testMatchArrowInShortArrayValue3 */ + 1 => 'a', + }, + ]; +} + +function matchNestedInLongArrayKey() { + $array = array( + match ($test) { /* testMatchArrowInLongArrayKey1 */ 1 => 'a', /* testMatchArrowInLongArrayKey2 */ 2 => 'b' } + /* testLongArrayArrowWithMatchKey */ + => 'dynamic keys, woho!', + ); +} + +function matchNestedInShortArrayKey() { + $array = [ + match ($test) { /* testMatchArrowInShortArrayKey1 */ 1 => 'a', /* testMatchArrowInShortArrayKey2 */ 2 => 'b' } + /* testShortArrayArrowWithMatchKey */ + => 'dynamic keys, woho!', + ]; +} + +function arraysNestedInMatch() { + $matcher = match ($x) { + /* testMatchArrowWithLongArrayBodyWithKeys */ + 0 => array( + /* testLongArrayArrowInMatchBody1 */ + 0 => 1, + /* testLongArrayArrowInMatchBody2 */ + 'a' => 2, + /* testLongArrayArrowInMatchBody3 */ + 'b' => 3 + ), + /* testMatchArrowWithShortArrayBodyWithoutKeys */ + 1 => [1, 2, 3], + /* testMatchArrowWithLongArrayBodyWithoutKeys */ + 2 => array( 1, [1, 2, 3], 2, 3), + /* testMatchArrowWithShortArrayBodyWithKeys */ + 3 => [ + /* testShortArrayArrowInMatchBody1 */ + 0 => 1, + /* testShortArrayArrowInMatchBody2 */ + 'a' => array(1, 2, 3), + /* testShortArrayArrowInMatchBody3 */ + 'b' => 2, + 3 + ], + /* testShortArrayArrowinMatchCase1 */ + [4 => 'a', /* testShortArrayArrowinMatchCase2 */ 5 => 6] + /* testMatchArrowWithShortArrayWithKeysAsCase */ + => 'match with array as case value', + /* testShortArrayArrowinMatchCase3 */ + [4 => 'a'], /* testLongArrayArrowinMatchCase4 */ array(5 => 6), + /* testMatchArrowWithMultipleArraysWithKeysAsCase */ + => 'match with multiple arrays as case value', + }; +} + +function matchNestedInArrowFunction($x) { + /* testFnArrowWithMatchInValue */ + $fn = fn($x) => match(true) { + /* testMatchArrowInFnBody1 */ + 1, 2, 3, 4, 5 => 'foo', + /* testMatchArrowInFnBody2 */ + default => 'bar', + }; +} + +function arrowFunctionsNestedInMatch($x) { + return match ($x) { + /* testMatchArrowWithFnBody1 */ + 1 => /* testFnArrowInMatchBody1 */ fn($y) => callMe($y), + /* testMatchArrowWithFnBody2 */ + default => /* testFnArrowInMatchBody2 */ fn($y) => callThem($y) + }; +} + +function matchShortArrayMismash() { + $array = [ + match ($test) { + /* testMatchArrowInComplexShortArrayKey1 */ + 1 => [ /* testShortArrayArrowInComplexMatchValueinShortArrayKey */ 1 => 'a'], + /* testMatchArrowInComplexShortArrayKey2 */ + 2 => 'b' + /* testShortArrayArrowInComplexMatchArrayMismash */ + } => match ($test) { + /* testMatchArrowInComplexShortArrayValue1 */ + 1 => [ /* testShortArrayArrowInComplexMatchValueinShortArrayValue */ 1 => 'a'], + /* testMatchArrowInComplexShortArrayValue1 */ + 2 => /* testFnArrowInComplexMatchValueInShortArrayValue */ fn($y) => callMe($y) + }, + ]; +} + + +function longListInMatch($x, $y) { + return match($x) { + /* testMatchArrowWithLongListBody */ + 1 => list('a' => $a, /* testLongListArrowInMatchBody */ 'b' => $b, 'c' => list('d' => $c)) = $y, + /* testLongListArrowInMatchCase */ + list('a' => $a, 'b' => $b) = $y /* testMatchArrowWithLongListInCase */ => 'something' + }; +} + +function shortListInMatch($x, $y) { + return match($x) { + /* testMatchArrowWithShortListBody */ + 1 => ['a' => $a, 'b' => $b, 'c' => /* testShortListArrowInMatchBody */ ['d' => $c]] = $y, + /* testShortListArrowInMatchCase */ + ['a' => $a, 'b' => $b] = $y /* testMatchArrowWithShortListInCase */ => 'something' + }; +} + +function matchInLongList() { + /* testMatchArrowInLongListKey */ + list(match($x) {1 => 1, 2 => 2} /* testLongListArrowWithMatchInKey */ => $a) = $array; +} + +function matchInShortList() { + /* testMatchArrowInShortListKey */ + [match($x) {1 => 1, 2 => 2} /* testShortListArrowWithMatchInKey */ => $a] = $array; +} diff --git a/tests/Core/Tokenizer/DoubleArrowTest.php b/tests/Core/Tokenizer/DoubleArrowTest.php new file mode 100644 index 0000000000..c5c7b04cc8 --- /dev/null +++ b/tests/Core/Tokenizer/DoubleArrowTest.php @@ -0,0 +1,223 @@ + + * @copyright 2020-2021 Squiz Pty Ltd (ABN 77 084 670 600) + * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer; + +use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest; + +class DoubleArrowTest extends AbstractMethodUnitTest +{ + + + /** + * Test that "normal" double arrows are correctly tokenized as `T_DOUBLE_ARROW`. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataDoubleArrow + * @coversNothing + * + * @return void + */ + public function testDoubleArrow($testMarker) + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_DOUBLE_ARROW, T_MATCH_ARROW, T_FN_ARROW]); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_DOUBLE_ARROW, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_DOUBLE_ARROW (code)'); + $this->assertSame('T_DOUBLE_ARROW', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_DOUBLE_ARROW (type)'); + + }//end testDoubleArrow() + + + /** + * Data provider. + * + * @see testDoubleArrow() + * + * @return array + */ + public function dataDoubleArrow() + { + return [ + 'simple_long_array' => ['/* testLongArrayArrowSimple */'], + 'simple_short_array' => ['/* testShortArrayArrowSimple */'], + 'simple_long_list' => ['/* testLongListArrowSimple */'], + 'simple_short_list' => ['/* testShortListArrowSimple */'], + 'simple_yield' => ['/* testYieldArrowSimple */'], + 'simple_foreach' => ['/* testForeachArrowSimple */'], + + 'long_array_with_match_value_1' => ['/* testLongArrayArrowWithNestedMatchValue1 */'], + 'long_array_with_match_value_2' => ['/* testLongArrayArrowWithNestedMatchValue2 */'], + 'short_array_with_match_value_1' => ['/* testShortArrayArrowWithNestedMatchValue1 */'], + 'short_array_with_match_value_2' => ['/* testShortArrayArrowWithNestedMatchValue2 */'], + + 'long_array_with_match_key' => ['/* testLongArrayArrowWithMatchKey */'], + 'short_array_with_match_key' => ['/* testShortArrayArrowWithMatchKey */'], + + 'long_array_in_match_body_1' => ['/* testLongArrayArrowInMatchBody1 */'], + 'long_array_in_match_body_2' => ['/* testLongArrayArrowInMatchBody2 */'], + 'long_array_in_match_body_2' => ['/* testLongArrayArrowInMatchBody3 */'], + 'short_array_in_match_body_1' => ['/* testShortArrayArrowInMatchBody1 */'], + 'short_array_in_match_body_2' => ['/* testShortArrayArrowInMatchBody2 */'], + 'short_array_in_match_body_2' => ['/* testShortArrayArrowInMatchBody3 */'], + + 'short_array_in_match_case_1' => ['/* testShortArrayArrowinMatchCase1 */'], + 'short_array_in_match_case_2' => ['/* testShortArrayArrowinMatchCase2 */'], + 'short_array_in_match_case_3' => ['/* testShortArrayArrowinMatchCase3 */'], + 'long_array_in_match_case_4' => ['/* testLongArrayArrowinMatchCase4 */'], + + 'in_complex_short_array_key_match_value' => ['/* testShortArrayArrowInComplexMatchValueinShortArrayKey */'], + 'in_complex_short_array_toplevel' => ['/* testShortArrayArrowInComplexMatchArrayMismash */'], + 'in_complex_short_array_value_match_value' => ['/* testShortArrayArrowInComplexMatchValueinShortArrayValue */'], + + 'long_list_in_match_body' => ['/* testLongListArrowInMatchBody */'], + 'long_list_in_match_case' => ['/* testLongListArrowInMatchCase */'], + 'short_list_in_match_body' => ['/* testShortListArrowInMatchBody */'], + 'short_list_in_match_case' => ['/* testShortListArrowInMatchCase */'], + 'long_list_with_match_in_key' => ['/* testLongListArrowWithMatchInKey */'], + 'short_list_with_match_in_key' => ['/* testShortListArrowWithMatchInKey */'], + ]; + + }//end dataDoubleArrow() + + + /** + * Test that double arrows in match expressions which are the demarkation between a case and the return value + * are correctly tokenized as `T_MATCH_ARROW`. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataMatchArrow + * @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional + * + * @return void + */ + public function testMatchArrow($testMarker) + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_DOUBLE_ARROW, T_MATCH_ARROW, T_FN_ARROW]); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_MATCH_ARROW, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_MATCH_ARROW (code)'); + $this->assertSame('T_MATCH_ARROW', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_MATCH_ARROW (type)'); + + }//end testMatchArrow() + + + /** + * Data provider. + * + * @see testMatchArrow() + * + * @return array + */ + public function dataMatchArrow() + { + return [ + 'single_case' => ['/* testMatchArrowSimpleSingleCase */'], + 'multi_case' => ['/* testMatchArrowSimpleMultiCase */'], + 'single_case_with_trailing_comma' => ['/* testMatchArrowSimpleSingleCaseWithTrailingComma */'], + 'multi_case_with_trailing_comma' => ['/* testMatchArrowSimpleMultiCaseWithTrailingComma */'], + 'match_nested_outer' => ['/* testMatchArrowNestedMatchOuter */'], + 'match_nested_inner' => ['/* testMatchArrowNestedMatchInner */'], + + 'in_long_array_value_1' => ['/* testMatchArrowInLongArrayValue1 */'], + 'in_long_array_value_2' => ['/* testMatchArrowInLongArrayValue2 */'], + 'in_long_array_value_3' => ['/* testMatchArrowInLongArrayValue3 */'], + 'in_short_array_value_1' => ['/* testMatchArrowInShortArrayValue1 */'], + 'in_short_array_value_2' => ['/* testMatchArrowInShortArrayValue2 */'], + 'in_short_array_value_3' => ['/* testMatchArrowInShortArrayValue3 */'], + + 'in_long_array_key_1' => ['/* testMatchArrowInLongArrayKey1 */'], + 'in_long_array_key_2' => ['/* testMatchArrowInLongArrayKey2 */'], + 'in_short_array_key_1' => ['/* testMatchArrowInShortArrayKey1 */'], + 'in_short_array_key_2' => ['/* testMatchArrowInShortArrayKey2 */'], + + 'with_long_array_value_with_keys' => ['/* testMatchArrowWithLongArrayBodyWithKeys */'], + 'with_short_array_value_without_keys' => ['/* testMatchArrowWithShortArrayBodyWithoutKeys */'], + 'with_long_array_value_without_keys' => ['/* testMatchArrowWithLongArrayBodyWithoutKeys */'], + 'with_short_array_value_with_keys' => ['/* testMatchArrowWithShortArrayBodyWithKeys */'], + + 'with_short_array_with_keys_as_case' => ['/* testMatchArrowWithShortArrayWithKeysAsCase */'], + 'with_multiple_arrays_with_keys_as_case' => ['/* testMatchArrowWithMultipleArraysWithKeysAsCase */'], + + 'in_fn_body_case' => ['/* testMatchArrowInFnBody1 */'], + 'in_fn_body_default' => ['/* testMatchArrowInFnBody2 */'], + 'with_fn_body_case' => ['/* testMatchArrowWithFnBody1 */'], + 'with_fn_body_default' => ['/* testMatchArrowWithFnBody2 */'], + + 'in_complex_short_array_key_1' => ['/* testMatchArrowInComplexShortArrayKey1 */'], + 'in_complex_short_array_key_2' => ['/* testMatchArrowInComplexShortArrayKey2 */'], + 'in_complex_short_array_value_1' => ['/* testMatchArrowInComplexShortArrayValue1 */'], + 'in_complex_short_array_key_2' => ['/* testMatchArrowInComplexShortArrayValue1 */'], + + 'with_long_list_in_body' => ['/* testMatchArrowWithLongListBody */'], + 'with_long_list_in_case' => ['/* testMatchArrowWithLongListInCase */'], + 'with_short_list_in_body' => ['/* testMatchArrowWithShortListBody */'], + 'with_short_list_in_case' => ['/* testMatchArrowWithShortListInCase */'], + 'in_long_list_key' => ['/* testMatchArrowInLongListKey */'], + 'in_short_list_key' => ['/* testMatchArrowInShortListKey */'], + ]; + + }//end dataMatchArrow() + + + /** + * Test that double arrows used as the scope opener for an arrow function + * are correctly tokenized as `T_FN_ARROW`. + * + * @param string $testMarker The comment prefacing the target token. + * + * @dataProvider dataFnArrow + * @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional + * + * @return void + */ + public function testFnArrow($testMarker) + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_DOUBLE_ARROW, T_MATCH_ARROW, T_FN_ARROW]); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_FN_ARROW, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_FN_ARROW (code)'); + $this->assertSame('T_FN_ARROW', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_FN_ARROW (type)'); + + }//end testFnArrow() + + + /** + * Data provider. + * + * @see testFnArrow() + * + * @return array + */ + public function dataFnArrow() + { + return [ + 'simple_fn' => ['/* testFnArrowSimple */'], + + 'with_match_as_value' => ['/* testFnArrowWithMatchInValue */'], + 'in_match_value_case' => ['/* testFnArrowInMatchBody1 */'], + 'in_match_value_default' => ['/* testFnArrowInMatchBody2 */'], + + 'in_complex_match_value_in_short_array' => ['/* testFnArrowInComplexMatchValueInShortArrayValue */'], + ]; + + }//end dataFnArrow() + + +}//end class