Skip to content

Commit

Permalink
Tokenizer/PHP: don't retokenize tokens in a broken type DNF declaration
Browse files Browse the repository at this point in the history
A DNF type always needs to have matching open and close parentheses. If the parentheses are unbalanced, we can't be sure this is a DNF type and run the risk of retokenizing non-type tokens.

Even if the code under scan was intended as a DNF type, it will be invalid code/contain a parse error if the parentheses are unbalanced.
In that case, retokenizing to the type specific tokens is likely to make the effect of this parse error on sniffs _worse_.

With that in mind, I'm adding extra hardening to the type handling layer in the PHP tokenizer, which will ensure that the retokenization to type specific tokens _only_ happens when there are balanced pairs of DNF parentheses.

Includes tests safeguarding the behaviour of the type handling layer for this type of invalid/parse error types.

Safe for two, all these tests would previously fail on containing at least _some_ type specific tokens.
  • Loading branch information
jrfnl committed May 21, 2024
1 parent cac9038 commit 07477a7
Show file tree
Hide file tree
Showing 3 changed files with 280 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/Tokenizers/PHP.php
Original file line number Diff line number Diff line change
Expand Up @@ -3132,9 +3132,14 @@ protected function processAdditional()

$typeTokenCountBefore = 0;
$typeOperators = [$i];
$parenthesesCount = 0;
$confirmed = false;
$maybeNullable = null;

if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
}

for ($x = ($i - 1); $x >= 0; $x--) {
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
continue;
Expand Down Expand Up @@ -3201,11 +3206,13 @@ protected function processAdditional()
continue;
}

if ($this->tokens[$x]['code'] === T_BITWISE_OR
|| $this->tokens[$x]['code'] === T_BITWISE_AND
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
) {
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
$typeOperators[] = $x;
continue;
}

if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
$typeOperators[] = $x;
continue;
}
Expand Down Expand Up @@ -3287,8 +3294,8 @@ protected function processAdditional()
unset($parens, $last);
}//end if

if ($confirmed === false) {
// Not a union or intersection type after all, move on.
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
// Not a (valid) union, intersection or DNF type after all, move on.
continue;
}

Expand Down
48 changes: 48 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
//
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
// no belong together.

class UnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingClose */
const A|(B&C PARSE_ERROR_1 = null;

/* testBrokenConstDNFTypeParensMissingOpen */
const A|B&C) PARSE_ERROR_2 = null;

/* testBrokenPropertyDNFTypeParensMissingClose */
private A|(B&C $parseError1;

/* testBrokenPropertyDNFTypeParensMissingOpen */
protected A|B&C) $parseError2;

function unmatchedParens1 (
/* testBrokenParamDNFTypeParensMissingClose */
A|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOpen */
) : A|B&C) {}

function unmatchedParens2 (
/* testBrokenParamDNFTypeParensMissingOpen */
A|B&C) $parseError
/* testBrokenReturnDNFTypeParensMissingClose */
) : A|(B&C {}
}

class MatchedAndUnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingOneClose */
const (A&B)|(B&C PARSE_ERROR = null;

/* testBrokenPropertyDNFTypeParensMissingOneOpen */
protected (A&B)|B&C) $parseError;

function unmatchedParens (
/* testBrokenParamDNFTypeParensMissingOneClose */
(A&B)|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOneOpen */
) : (A&B)|B&C) {}
}
218 changes: 218 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
<?php
/**
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
use PHP_CodeSniffer\Util\Tokens;

final class DNFTypesParseError2Test extends AbstractTokenizerTestCase
{


/**
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
*
* @param string $testMarker The comment prefacing the target token.
*
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
*
* @return void
*/
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens($testMarker)
{
$tokens = $this->phpcsFile->getTokens();

// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
// is not recognized as a valid type declaration.
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
$token = $tokens[$unionPtr];

$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');

// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
$token = $tokens[$openPtr];

$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');

// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
// is not recognized as a valid type declaration.
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
$token = $tokens[$intersectPtr];

$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');

}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()


/**
* Data provider.
*
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
*
* @return array<string, array<string, string>>
*/
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
{
return [
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingClose */'],
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingClose */'],
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingClose */'],
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingClose */'],
];

}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()


/**
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
*
* @param string $testMarker The comment prefacing the target token.
*
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
*
* @return void
*/
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens($testMarker)
{
$tokens = $this->phpcsFile->getTokens();

// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
// is not recognized as a valid type declaration.
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
$token = $tokens[$unionPtr];

$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');

// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
$closePtr = $this->getTargetToken($testMarker, [T_CLOSE_PARENTHESIS, T_TYPE_CLOSE_PARENTHESIS], ')');
$token = $tokens[$closePtr];

$this->assertSame(T_CLOSE_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (code)');
$this->assertSame('T_CLOSE_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (type)');

// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
// is not recognized as a valid type declaration.
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
$token = $tokens[$intersectPtr];

$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');

}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()


/**
* Data provider.
*
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
*
* @return array<string, array<string, string>>
*/
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
{
return [
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingOpen */'],
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingOpen */'],
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingOpen */'],
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingOpen */'],
];

}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()


/**
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis,
* but also contains a set of matched parentheses.
*
* @param string $testMarker The comment prefacing the target token.
*
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
*
* @return void
*/
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched($testMarker)
{
$tokens = $this->phpcsFile->getTokens();
$startPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');

for ($i = $startPtr; $i < $this->phpcsFile->numTokens; $i++) {
if (isset(Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
continue;
}

if ($tokens[$i]['code'] === T_EQUAL
|| $tokens[$i]['code'] === T_VARIABLE
|| $tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
) {
// Reached the end of the type.
break;
}

$errorPrefix = 'Token tokenized as '.$tokens[$i]['type'];

// Verify that type tokens have not been retokenized to `T_TYPE_*` tokens for broken type declarations.
switch ($tokens[$i]['content']) {
case '|':
$this->assertSame(T_BITWISE_OR, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_OR (code)');
$this->assertSame('T_BITWISE_OR', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_OR (type)');
break;

case '&':
$this->assertSame(T_BITWISE_AND, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_AND (code)');
$this->assertSame('T_BITWISE_AND', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_AND (type)');
break;

case '(':
// Verify that the open parenthesis is tokenized as a normal parenthesis.
$this->assertSame(T_OPEN_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_OPEN_PARENTHESIS (code)');
$this->assertSame('T_OPEN_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_OPEN_PARENTHESIS (type)');
break;

case ')':
$this->assertSame(T_CLOSE_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_CLOSE_PARENTHESIS (code)');
$this->assertSame('T_CLOSE_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_CLOSE_PARENTHESIS (type)');
break;

default:
break;
}//end switch
}//end for

}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()


/**
* Data provider.
*
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
*
* @return array<string, array<string, string>>
*/
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
{
return [
'OO const type - missing one close parenthesis' => ['/* testBrokenConstDNFTypeParensMissingOneClose */'],
'OO property type - missing one open parenthesis' => ['/* testBrokenPropertyDNFTypeParensMissingOneOpen */'],
'Parameter type - missing one close parenthesis' => ['/* testBrokenParamDNFTypeParensMissingOneClose */'],
'Return type - missing one open parenthesis' => ['/* testBrokenReturnDNFTypeParensMissingOneOpen */'],
];

}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()


}//end class

0 comments on commit 07477a7

Please sign in to comment.