Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RegexArrayShapeMatcher - More precise non-empty-string and numeric-string #3249

Merged
merged 18 commits into from
Jul 17, 2024
149 changes: 140 additions & 9 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@
use Hoa\Compiler\Llk\TreeNode;
use Hoa\Exception\Exception;
use Hoa\File\Read;
use Nette\Utils\Strings;
use PhpParser\Node\Expr;
use PhpParser\Node\Name;
use PHPStan\Analyser\Scope;
use PHPStan\Php\PhpVersion;
use PHPStan\ShouldNotHappenException;
use PHPStan\TrinaryLogic;
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
use PHPStan\Type\Accessory\AccessoryNumericStringType;
use PHPStan\Type\Constant\ConstantArrayType;
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
use PHPStan\Type\Constant\ConstantIntegerType;
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntersectionType;
use PHPStan\Type\StringType;
use PHPStan\Type\Type;
use PHPStan\Type\TypeCombinator;
Expand Down Expand Up @@ -126,7 +130,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$trailingOptionals++;
}

$valueType = $this->getValueType($flags ?? 0);
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
$onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);

Expand All @@ -141,7 +144,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

$combiType = $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand Down Expand Up @@ -179,7 +181,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

$combiType = $this->buildArrayType(
$comboList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand All @@ -202,7 +203,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

return $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand Down Expand Up @@ -264,7 +264,6 @@ private function getOnlyTopLevelAlternationId(array $captureGroups): ?int
*/
private function buildArrayType(
array $captureGroups,
Type $valueType,
TrinaryLogic $wasMatched,
int $trailingOptionals,
int $flags,
Expand All @@ -275,14 +274,14 @@ private function buildArrayType(
// first item in matches contains the overall match.
$builder->setOffsetValueType(
$this->getKeyType(0),
TypeCombinator::removeNull($valueType),
TypeCombinator::removeNull($this->getValueType(new StringType(), $flags)),
!$wasMatched->yes(),
);

$countGroups = count($captureGroups);
$i = 0;
foreach ($captureGroups as $captureGroup) {
$groupValueType = $valueType;
$groupValueType = $this->getValueType($captureGroup->getType(), $flags);

if (!$wasMatched->yes()) {
$optional = true;
Expand All @@ -299,6 +298,10 @@ private function buildArrayType(
}
}

if (!$optional && $captureGroup->isOptional() && !$this->containsUnmatchedAsNull($flags)) {
$groupValueType = TypeCombinator::union($groupValueType, new ConstantStringType(''));
}

if ($captureGroup->isNamed()) {
$builder->setOffsetValueType(
$this->getKeyType($captureGroup->getName()),
Expand Down Expand Up @@ -333,9 +336,10 @@ private function getKeyType(int|string $key): Type
return new ConstantIntegerType($key);
}

private function getValueType(int $flags): Type
private function getValueType(Type $baseType, int $flags): Type
{
$valueType = new StringType();
$valueType = $baseType;

$offsetType = IntegerRangeType::fromInterval(0, null);
if ($this->containsUnmatchedAsNull($flags)) {
$valueType = TypeCombinator::addNull($valueType);
Expand Down Expand Up @@ -420,6 +424,7 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#namedcapturing') {
Expand All @@ -430,6 +435,7 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#noncapturing') {
Expand Down Expand Up @@ -534,6 +540,131 @@ private function getQuantificationRange(TreeNode $node): array
return [$min, $max];
}

private function createGroupType(TreeNode $group): Type
{
$isNonEmpty = TrinaryLogic::createMaybe();
$isNumeric = TrinaryLogic::createMaybe();
$inOptionalQuantification = false;

$this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification);

$accessories = [];
if ($isNumeric->yes()) {
$accessories[] = new AccessoryNumericStringType();
} elseif ($isNonEmpty->yes()) {
$accessories[] = new AccessoryNonEmptyStringType();
}

if ($accessories !== []) {
$accessories[] = new StringType();
return new IntersectionType($accessories);
}

return new StringType();
}

private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification): void
{
$children = $ast->getChildren();

if (
$ast->getId() === '#concatenation'
&& count($children) > 0
) {
$isNonEmpty = TrinaryLogic::createYes();
}

if ($ast->getId() === '#quantification') {
[$min] = $this->getQuantificationRange($ast);

if ($min === 0) {
$inOptionalQuantification = true;
}
if ($min >= 1) {
$isNonEmpty = TrinaryLogic::createYes();
$inOptionalQuantification = false;
}
}

if ($ast->getId() === 'token') {
$literalValue = $this->getLiteralValue($ast);
if ($literalValue !== null) {
if (Strings::match($literalValue, '/^\d+$/') === null) {
$isNumeric = TrinaryLogic::createNo();
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}

if ($ast->getValueToken() === 'character_type') {
if ($ast->getValueValue() === '\d') {
if ($isNumeric->maybe()) {
$isNumeric = TrinaryLogic::createYes();
}
} else {
$isNumeric = TrinaryLogic::createNo();
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}
}

if ($ast->getId() === '#range' || $ast->getId() === '#class') {
if ($isNumeric->maybe()) {
$allNumeric = null;
foreach ($children as $child) {
$literalValue = $this->getLiteralValue($child);

if ($literalValue === null) {
break;
}

if (Strings::match($literalValue, '/^\d+$/') === null) {
$allNumeric = false;
break;
}

$allNumeric = true;
}

if ($allNumeric === true) {
$isNumeric = TrinaryLogic::createYes();
}
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}

foreach ($children as $child) {
$this->walkGroupAst(
$child,
$isNonEmpty,
$isNumeric,
$inOptionalQuantification,
);
}
}

private function getLiteralValue(TreeNode $node): ?string
{
if ($node->getId() === 'token' && $node->getValueToken() === 'literal') {
return $node->getValueValue();
}

// literal "-" outside of a character class like '~^((\\d{1,6})-)$~'
if ($node->getId() === 'token' && $node->getValueToken() === 'range') {
return $node->getValueValue();
}

return null;
}

private function getPatternType(Expr $patternExpr, Scope $scope): Type
{
if ($patternExpr instanceof Expr\BinaryOp\Concat) {
Expand Down
8 changes: 8 additions & 0 deletions src/Type/Php/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace PHPStan\Type\Php;

use PHPStan\Type\Type;

class RegexCapturingGroup
{

Expand All @@ -13,6 +15,7 @@ public function __construct(
private ?int $alternationId,
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
private Type $type,
)
{
}
Expand Down Expand Up @@ -92,4 +95,9 @@ public function getName(): ?string
return $this->name;
}

public function getType(): Type
{
return $this->type;
}

}
10 changes: 5 additions & 5 deletions tests/PHPStan/Analyser/nsrt/bug-11311-php72.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@

function doFoo(string $s) {
if (1 === preg_match('/(?<major>\d+)\.(?<minor>\d+)(?:\.(?<patch>\d+))?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, major: string, 1: string, minor: string, 2: string, patch?: string, 3?: string}', $matches);
assertType('array{0: string, major: numeric-string, 1: numeric-string, minor: numeric-string, 2: numeric-string, patch?: numeric-string, 3?: numeric-string}', $matches);
}
}

function doUnmatchedAsNull(string $s): void {
if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
}
assertType('array{}|array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
assertType('array{}|array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
}

// see https://3v4l.org/VeDob#veol
function unmatchedAsNullWithOptionalGroup(string $s): void {
if (preg_match('/Price: (£|€)?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, 1?: string}', $matches);
assertType('array{0: string, 1?: non-empty-string}', $matches);
Comment on lines 24 to +25
Copy link
Contributor

@Seldaek Seldaek Jul 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given unmatched as null this should be 1: non-empty-string|null IMO.

preg_match('/Price: (£|€)?\d+/', 'Price: 3', $matches, PREG_UNMATCHED_AS_NULL) for ex yields:

array (size=2)
  0 => string 'Price: 3' (length=8)
  1 => null

Copy link
Contributor Author

@staabm staabm Jul 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this file is the php72 variant, which does not "correctly support" nulls in the PREG_UNMATCHED_AS_NULL case (because php-src behaves differently).

see https://3v4l.org/No3Tf#v7.3.26

there is the identical test but for php7.4+ with the expectations you just shared:
https://github.com/phpstan/phpstan-src/pull/3249/files#diff-587efc95b2825fa3f959191a2c1dae92edda5ad901653ae174e58bc723152fa9R25

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sorry I missed that 🤦🏻‍♂️

} else {
assertType('array{}', $matches);
}
assertType('array{}|array{0: string, 1?: string}', $matches);
assertType('array{}|array{0: string, 1?: non-empty-string}', $matches);
}
Loading
Loading