-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tokenizer/PHP: bug fix - fix performance issue
Happened to come across this while investigating something else. As it was, as long as no open parenthesis or variable was encountered, this snippet would loop to the end of the file for each `T_ARRAY` token encountered as it would only `break` in the `if/elseif` and there was no `else` clause. Basically, we only want the `array` keyword to be tokenized as `T_ARRAY` if it is an actual array declaration. In all other cases, it should be tokenized as `T_STRING`. This fixes the performance leak by only looping to the first non-empty token after the keyword, checking if it's an open parenthesis and retokenizing the `T_ARRAY` to `T_STRING` in all other cases. It also removes the need for the separate _return type_ retokenization of the array keyword. Includes adding unit tests specifically for the array keyword.
- Loading branch information
Showing
4 changed files
with
225 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
<?php | ||
|
||
/* testEmptyArray */ | ||
$var = array(); | ||
|
||
/* testArrayWithSpace */ | ||
$var = array (1 => 10); | ||
|
||
/* testArrayWithComment */ | ||
$var = Array /*comment*/ (1 => 10); | ||
|
||
/* testNestingArray */ | ||
$var = array( | ||
/* testNestedArray */ | ||
array( | ||
'key' => 'value', | ||
|
||
/* testClosureReturnType */ | ||
'closure' => function($a) use($global) : Array {}, | ||
), | ||
); | ||
|
||
/* testFunctionDeclarationParamType */ | ||
function foo(array $a) {} | ||
|
||
/* testFunctionDeclarationReturnType */ | ||
function foo($a) : int|array|null {} | ||
|
||
class Bar { | ||
/* testClassConst */ | ||
const ARRAY = []; | ||
|
||
/* testClassMethod */ | ||
public function array() {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
<?php | ||
/** | ||
* Tests that the array keyword is tokenized correctly. | ||
* | ||
* @author Juliette Reinders Folmer <[email protected]> | ||
* @copyright 2021 Squiz Pty Ltd (ABN 77 084 670 600) | ||
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence | ||
*/ | ||
|
||
namespace PHP_CodeSniffer\Tests\Core\Tokenizer; | ||
|
||
use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest; | ||
|
||
class ArrayKeywordTest extends AbstractMethodUnitTest | ||
{ | ||
|
||
|
||
/** | ||
* Test that the array keyword is correctly tokenized as `T_ARRAY`. | ||
* | ||
* @param string $testMarker The comment prefacing the target token. | ||
* @param string $testContent Optional. The token content to look for. | ||
* | ||
* @dataProvider dataArrayKeyword | ||
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize | ||
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap | ||
* | ||
* @return void | ||
*/ | ||
public function testArrayKeyword($testMarker, $testContent='array') | ||
{ | ||
$tokens = self::$phpcsFile->getTokens(); | ||
|
||
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); | ||
$tokenArray = $tokens[$token]; | ||
|
||
$this->assertSame(T_ARRAY, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (code)'); | ||
$this->assertSame('T_ARRAY', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (type)'); | ||
|
||
$this->assertArrayHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is not set'); | ||
$this->assertArrayHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is not set'); | ||
$this->assertArrayHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is not set'); | ||
|
||
}//end testArrayKeyword() | ||
|
||
|
||
/** | ||
* Data provider. | ||
* | ||
* @see testArrayKeyword() | ||
* | ||
* @return array | ||
*/ | ||
public function dataArrayKeyword() | ||
{ | ||
return [ | ||
'empty array' => ['/* testEmptyArray */'], | ||
'array with space before parenthesis' => ['/* testArrayWithSpace */'], | ||
'array with comment before parenthesis' => [ | ||
'/* testArrayWithComment */', | ||
'Array', | ||
], | ||
'nested: outer array' => ['/* testNestingArray */'], | ||
'nested: inner array' => ['/* testNestedArray */'], | ||
]; | ||
|
||
}//end dataArrayKeyword() | ||
|
||
|
||
/** | ||
* Test that the array keyword when used in a type declaration is correctly tokenized as `T_STRING`. | ||
* | ||
* @param string $testMarker The comment prefacing the target token. | ||
* @param string $testContent Optional. The token content to look for. | ||
* | ||
* @dataProvider dataArrayType | ||
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize | ||
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap | ||
* | ||
* @return void | ||
*/ | ||
public function testArrayType($testMarker, $testContent='array') | ||
{ | ||
$tokens = self::$phpcsFile->getTokens(); | ||
|
||
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); | ||
$tokenArray = $tokens[$token]; | ||
|
||
$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)'); | ||
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)'); | ||
|
||
$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set'); | ||
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set'); | ||
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set'); | ||
|
||
}//end testArrayType() | ||
|
||
|
||
/** | ||
* Data provider. | ||
* | ||
* @see testArrayType() | ||
* | ||
* @return array | ||
*/ | ||
public function dataArrayType() | ||
{ | ||
return [ | ||
'closure return type' => [ | ||
'/* testClosureReturnType */', | ||
'Array', | ||
], | ||
'function param type' => ['/* testFunctionDeclarationParamType */'], | ||
'function union return type' => ['/* testFunctionDeclarationReturnType */'], | ||
]; | ||
|
||
}//end dataArrayType() | ||
|
||
|
||
/** | ||
* Verify that the retokenization of `T_ARRAY` tokens to `T_STRING` is handled correctly | ||
* for tokens with the contents 'array' which aren't in actual fact the array keyword. | ||
* | ||
* @param string $testMarker The comment prefacing the target token. | ||
* @param string $testContent The token content to look for. | ||
* | ||
* @dataProvider dataNotArrayKeyword | ||
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize | ||
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap | ||
* | ||
* @return void | ||
*/ | ||
public function testNotArrayKeyword($testMarker, $testContent='array') | ||
{ | ||
$tokens = self::$phpcsFile->getTokens(); | ||
|
||
$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); | ||
$tokenArray = $tokens[$token]; | ||
|
||
$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)'); | ||
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)'); | ||
|
||
$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set'); | ||
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set'); | ||
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set'); | ||
|
||
}//end testNotArrayKeyword() | ||
|
||
|
||
/** | ||
* Data provider. | ||
* | ||
* @see testNotArrayKeyword() | ||
* | ||
* @return array | ||
*/ | ||
public function dataNotArrayKeyword() | ||
{ | ||
return [ | ||
'class-constant-name' => [ | ||
'/* testClassConst */', | ||
'ARRAY', | ||
], | ||
'class-method-name' => ['/* testClassMethod */'], | ||
]; | ||
|
||
}//end dataNotArrayKeyword() | ||
|
||
|
||
}//end class |