Skip to content

Commit

Permalink
Tokenizer/PHP: bug fix - fix performance issue
Browse files Browse the repository at this point in the history
Happened to come across this while investigating something else.

As it was, as long as no open parenthesis or variable was encountered, this snippet would loop to the end of the file for each `T_ARRAY` token encountered as it would only `break` in the `if/elseif` and there was no `else` clause.

Basically, we only want the `array` keyword to be tokenized as `T_ARRAY` if it is an actual array declaration. In all other cases, it should be tokenized as `T_STRING`.

This fixes the performance leak by only looping to the first non-empty token after the keyword, checking if it's an open parenthesis and retokenizing the `T_ARRAY` to `T_STRING` in all other cases.

It also removes the need for the separate _return type_ retokenization of the array keyword.

Includes adding unit tests specifically for the array keyword.
  • Loading branch information
jrfnl committed May 27, 2021
1 parent d2574b9 commit 964a38c
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 26 deletions.
6 changes: 6 additions & 0 deletions package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
<dir name="Tokenizer">
<file baseinstalldir="" name="AnonClassParenthesisOwnerTest.inc" role="test" />
<file baseinstalldir="" name="AnonClassParenthesisOwnerTest.php" role="test" />
<file baseinstalldir="" name="ArrayKeywordTest.inc" role="test" />
<file baseinstalldir="" name="ArrayKeywordTest.php" role="test" />
<file baseinstalldir="" name="AttributesTest.inc" role="test" />
<file baseinstalldir="" name="AttributesTest.php" role="test" />
<file baseinstalldir="" name="BackfillFnTokenTest.inc" role="test" />
Expand Down Expand Up @@ -2098,6 +2100,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
<install as="CodeSniffer/Core/Sniffs/AbstractArraySniffTestable.php" name="tests/Core/Sniffs/AbstractArraySniffTestable.php" />
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" />
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.php" name="tests/Core/Tokenizer/ArrayKeywordTest.php" />
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.inc" name="tests/Core/Tokenizer/ArrayKeywordTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.php" name="tests/Core/Tokenizer/AttributesTest.php" />
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.inc" name="tests/Core/Tokenizer/AttributesTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/BackfillFnTokenTest.php" name="tests/Core/Tokenizer/BackfillFnTokenTest.php" />
Expand Down Expand Up @@ -2186,6 +2190,8 @@ http://pear.php.net/dtd/package-2.0.xsd">
<install as="CodeSniffer/Core/Sniffs/AbstractArraySniffTestable.php" name="tests/Core/Sniffs/AbstractArraySniffTestable.php" />
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.php" />
<install as="CodeSniffer/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" name="tests/Core/Tokenizer/AnonClassParenthesisOwnerTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.php" name="tests/Core/Tokenizer/ArrayKeywordTest.php" />
<install as="CodeSniffer/Core/Tokenizer/ArrayKeywordTest.inc" name="tests/Core/Tokenizer/ArrayKeywordTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.php" name="tests/Core/Tokenizer/AttributesTest.php" />
<install as="CodeSniffer/Core/Tokenizer/AttributesTest.inc" name="tests/Core/Tokenizer/AttributesTest.inc" />
<install as="CodeSniffer/Core/Tokenizer/BackfillFnTokenTest.php" name="tests/Core/Tokenizer/BackfillFnTokenTest.php" />
Expand Down
40 changes: 14 additions & 26 deletions src/Tokenizers/PHP.php
Original file line number Diff line number Diff line change
Expand Up @@ -1769,23 +1769,6 @@ function return types. We want to keep the parenthesis map clean,

break;
}//end for

// Any T_ARRAY tokens we find between here and the next
// token that can't be part of the return type, need to be
// converted to T_STRING tokens.
for ($x; $x < $numTokens; $x++) {
if ((is_array($tokens[$x]) === false && $tokens[$x] !== '|')
|| (is_array($tokens[$x]) === true && isset($allowed[$tokens[$x][0]]) === false)
) {
break;
} else if (is_array($tokens[$x]) === true && $tokens[$x][0] === T_ARRAY) {
$tokens[$x][0] = T_STRING;

if (PHP_CODESNIFFER_VERBOSITY > 1) {
echo "\t\t* token $x changed from T_ARRAY to T_STRING".PHP_EOL;
}
}
}
}//end if
}//end if
}//end if
Expand Down Expand Up @@ -2066,20 +2049,25 @@ function return types. We want to keep the parenthesis map clean,
}
}//end if

// This is a special condition for T_ARRAY tokens used for
// type hinting function arguments as being arrays. We want to keep
// the parenthesis map clean, so let's tag these tokens as
// This is a special condition for T_ARRAY tokens used for anything else
// but array declarations, like type hinting function arguments as
// being arrays.
// We want to keep the parenthesis map clean, so let's tag these tokens as
// T_STRING.
if ($newToken['code'] === T_ARRAY) {
for ($i = $stackPtr; $i < $numTokens; $i++) {
if ($tokens[$i] === '(') {
break;
} else if ($tokens[$i][0] === T_VARIABLE) {
$newToken['code'] = T_STRING;
$newToken['type'] = 'T_STRING';
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
if (is_array($tokens[$i]) === false
|| isset(Util\Tokens::$emptyTokens[$tokens[$i][0]]) === false
) {
// Non-empty content.
break;
}
}

if ($tokens[$i] !== '(' && $i !== $numTokens) {
$newToken['code'] = T_STRING;
$newToken['type'] = 'T_STRING';
}
}

// This is a special case when checking PHP 5.5+ code in PHP < 5.5
Expand Down
35 changes: 35 additions & 0 deletions tests/Core/Tokenizer/ArrayKeywordTest.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

/* testEmptyArray */
$var = array();

/* testArrayWithSpace */
$var = array (1 => 10);

/* testArrayWithComment */
$var = Array /*comment*/ (1 => 10);

/* testNestingArray */
$var = array(
/* testNestedArray */
array(
'key' => 'value',

/* testClosureReturnType */
'closure' => function($a) use($global) : Array {},
),
);

/* testFunctionDeclarationParamType */
function foo(array $a) {}

/* testFunctionDeclarationReturnType */
function foo($a) : int|array|null {}

class Bar {
/* testClassConst */
const ARRAY = [];

/* testClassMethod */
public function array() {}
}
170 changes: 170 additions & 0 deletions tests/Core/Tokenizer/ArrayKeywordTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
<?php
/**
* Tests that the array keyword is tokenized correctly.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2021 Squiz Pty Ltd (ABN 77 084 670 600)
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer;

use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest;

class ArrayKeywordTest extends AbstractMethodUnitTest
{


/**
* Test that the array keyword is correctly tokenized as `T_ARRAY`.
*
* @param string $testMarker The comment prefacing the target token.
* @param string $testContent Optional. The token content to look for.
*
* @dataProvider dataArrayKeyword
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
*
* @return void
*/
public function testArrayKeyword($testMarker, $testContent='array')
{
$tokens = self::$phpcsFile->getTokens();

$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
$tokenArray = $tokens[$token];

$this->assertSame(T_ARRAY, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (code)');
$this->assertSame('T_ARRAY', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (type)');

$this->assertArrayHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is not set');
$this->assertArrayHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is not set');
$this->assertArrayHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is not set');

}//end testArrayKeyword()


/**
* Data provider.
*
* @see testArrayKeyword()
*
* @return array
*/
public function dataArrayKeyword()
{
return [
'empty array' => ['/* testEmptyArray */'],
'array with space before parenthesis' => ['/* testArrayWithSpace */'],
'array with comment before parenthesis' => [
'/* testArrayWithComment */',
'Array',
],
'nested: outer array' => ['/* testNestingArray */'],
'nested: inner array' => ['/* testNestedArray */'],
];

}//end dataArrayKeyword()


/**
* Test that the array keyword when used in a type declaration is correctly tokenized as `T_STRING`.
*
* @param string $testMarker The comment prefacing the target token.
* @param string $testContent Optional. The token content to look for.
*
* @dataProvider dataArrayType
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
*
* @return void
*/
public function testArrayType($testMarker, $testContent='array')
{
$tokens = self::$phpcsFile->getTokens();

$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
$tokenArray = $tokens[$token];

$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)');
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)');

$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set');
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set');
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set');

}//end testArrayType()


/**
* Data provider.
*
* @see testArrayType()
*
* @return array
*/
public function dataArrayType()
{
return [
'closure return type' => [
'/* testClosureReturnType */',
'Array',
],
'function param type' => ['/* testFunctionDeclarationParamType */'],
'function union return type' => ['/* testFunctionDeclarationReturnType */'],
];

}//end dataArrayType()


/**
* Verify that the retokenization of `T_ARRAY` tokens to `T_STRING` is handled correctly
* for tokens with the contents 'array' which aren't in actual fact the array keyword.
*
* @param string $testMarker The comment prefacing the target token.
* @param string $testContent The token content to look for.
*
* @dataProvider dataNotArrayKeyword
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize
* @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap
*
* @return void
*/
public function testNotArrayKeyword($testMarker, $testContent='array')
{
$tokens = self::$phpcsFile->getTokens();

$token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent);
$tokenArray = $tokens[$token];

$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)');
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)');

$this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set');
$this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set');
$this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set');

}//end testNotArrayKeyword()


/**
* Data provider.
*
* @see testNotArrayKeyword()
*
* @return array
*/
public function dataNotArrayKeyword()
{
return [
'class-constant-name' => [
'/* testClassConst */',
'ARRAY',
],
'class-method-name' => ['/* testClassMethod */'],
];

}//end dataNotArrayKeyword()


}//end class

0 comments on commit 964a38c

Please sign in to comment.