Skip to content

Commit

Permalink
RegexArrayShapeMatcher - More precise non-empty-string and numeric-st…
Browse files Browse the repository at this point in the history
…ring
  • Loading branch information
staabm authored Jul 17, 2024
1 parent 924f173 commit 3d9b04c
Show file tree
Hide file tree
Showing 7 changed files with 388 additions and 111 deletions.
149 changes: 140 additions & 9 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,21 @@
use Hoa\Compiler\Llk\TreeNode;
use Hoa\Exception\Exception;
use Hoa\File\Read;
use Nette\Utils\Strings;
use PhpParser\Node\Expr;
use PhpParser\Node\Name;
use PHPStan\Analyser\Scope;
use PHPStan\Php\PhpVersion;
use PHPStan\ShouldNotHappenException;
use PHPStan\TrinaryLogic;
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
use PHPStan\Type\Accessory\AccessoryNumericStringType;
use PHPStan\Type\Constant\ConstantArrayType;
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
use PHPStan\Type\Constant\ConstantIntegerType;
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntersectionType;
use PHPStan\Type\StringType;
use PHPStan\Type\Type;
use PHPStan\Type\TypeCombinator;
Expand Down Expand Up @@ -126,7 +130,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$trailingOptionals++;
}

$valueType = $this->getValueType($flags ?? 0);
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
$onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);

Expand All @@ -141,7 +144,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

$combiType = $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand Down Expand Up @@ -179,7 +181,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

$combiType = $this->buildArrayType(
$comboList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand All @@ -202,7 +203,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

return $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
$flags ?? 0,
Expand Down Expand Up @@ -264,7 +264,6 @@ private function getOnlyTopLevelAlternationId(array $captureGroups): ?int
*/
private function buildArrayType(
array $captureGroups,
Type $valueType,
TrinaryLogic $wasMatched,
int $trailingOptionals,
int $flags,
Expand All @@ -275,14 +274,14 @@ private function buildArrayType(
// first item in matches contains the overall match.
$builder->setOffsetValueType(
$this->getKeyType(0),
TypeCombinator::removeNull($valueType),
TypeCombinator::removeNull($this->getValueType(new StringType(), $flags)),
!$wasMatched->yes(),
);

$countGroups = count($captureGroups);
$i = 0;
foreach ($captureGroups as $captureGroup) {
$groupValueType = $valueType;
$groupValueType = $this->getValueType($captureGroup->getType(), $flags);

if (!$wasMatched->yes()) {
$optional = true;
Expand All @@ -299,6 +298,10 @@ private function buildArrayType(
}
}

if (!$optional && $captureGroup->isOptional() && !$this->containsUnmatchedAsNull($flags)) {
$groupValueType = TypeCombinator::union($groupValueType, new ConstantStringType(''));
}

if ($captureGroup->isNamed()) {
$builder->setOffsetValueType(
$this->getKeyType($captureGroup->getName()),
Expand Down Expand Up @@ -333,9 +336,10 @@ private function getKeyType(int|string $key): Type
return new ConstantIntegerType($key);
}

private function getValueType(int $flags): Type
private function getValueType(Type $baseType, int $flags): Type
{
$valueType = new StringType();
$valueType = $baseType;

$offsetType = IntegerRangeType::fromInterval(0, null);
if ($this->containsUnmatchedAsNull($flags)) {
$valueType = TypeCombinator::addNull($valueType);
Expand Down Expand Up @@ -420,6 +424,7 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#namedcapturing') {
Expand All @@ -430,6 +435,7 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#noncapturing') {
Expand Down Expand Up @@ -534,6 +540,131 @@ private function getQuantificationRange(TreeNode $node): array
return [$min, $max];
}

private function createGroupType(TreeNode $group): Type
{
$isNonEmpty = TrinaryLogic::createMaybe();
$isNumeric = TrinaryLogic::createMaybe();
$inOptionalQuantification = false;

$this->walkGroupAst($group, $isNonEmpty, $isNumeric, $inOptionalQuantification);

$accessories = [];
if ($isNumeric->yes()) {
$accessories[] = new AccessoryNumericStringType();
} elseif ($isNonEmpty->yes()) {
$accessories[] = new AccessoryNonEmptyStringType();
}

if ($accessories !== []) {
$accessories[] = new StringType();
return new IntersectionType($accessories);
}

return new StringType();
}

private function walkGroupAst(TreeNode $ast, TrinaryLogic &$isNonEmpty, TrinaryLogic &$isNumeric, bool &$inOptionalQuantification): void
{
$children = $ast->getChildren();

if (
$ast->getId() === '#concatenation'
&& count($children) > 0
) {
$isNonEmpty = TrinaryLogic::createYes();
}

if ($ast->getId() === '#quantification') {
[$min] = $this->getQuantificationRange($ast);

if ($min === 0) {
$inOptionalQuantification = true;
}
if ($min >= 1) {
$isNonEmpty = TrinaryLogic::createYes();
$inOptionalQuantification = false;
}
}

if ($ast->getId() === 'token') {
$literalValue = $this->getLiteralValue($ast);
if ($literalValue !== null) {
if (Strings::match($literalValue, '/^\d+$/') === null) {
$isNumeric = TrinaryLogic::createNo();
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}

if ($ast->getValueToken() === 'character_type') {
if ($ast->getValueValue() === '\d') {
if ($isNumeric->maybe()) {
$isNumeric = TrinaryLogic::createYes();
}
} else {
$isNumeric = TrinaryLogic::createNo();
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}
}

if ($ast->getId() === '#range' || $ast->getId() === '#class') {
if ($isNumeric->maybe()) {
$allNumeric = null;
foreach ($children as $child) {
$literalValue = $this->getLiteralValue($child);

if ($literalValue === null) {
break;
}

if (Strings::match($literalValue, '/^\d+$/') === null) {
$allNumeric = false;
break;
}

$allNumeric = true;
}

if ($allNumeric === true) {
$isNumeric = TrinaryLogic::createYes();
}
}

if (!$inOptionalQuantification) {
$isNonEmpty = TrinaryLogic::createYes();
}
}

foreach ($children as $child) {
$this->walkGroupAst(
$child,
$isNonEmpty,
$isNumeric,
$inOptionalQuantification,
);
}
}

private function getLiteralValue(TreeNode $node): ?string
{
if ($node->getId() === 'token' && $node->getValueToken() === 'literal') {
return $node->getValueValue();
}

// literal "-" outside of a character class like '~^((\\d{1,6})-)$~'
if ($node->getId() === 'token' && $node->getValueToken() === 'range') {
return $node->getValueValue();
}

return null;
}

private function getPatternType(Expr $patternExpr, Scope $scope): Type
{
if ($patternExpr instanceof Expr\BinaryOp\Concat) {
Expand Down
8 changes: 8 additions & 0 deletions src/Type/Php/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace PHPStan\Type\Php;

use PHPStan\Type\Type;

class RegexCapturingGroup
{

Expand All @@ -13,6 +15,7 @@ public function __construct(
private ?int $alternationId,
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
private Type $type,
)
{
}
Expand Down Expand Up @@ -92,4 +95,9 @@ public function getName(): ?string
return $this->name;
}

public function getType(): Type
{
return $this->type;
}

}
10 changes: 5 additions & 5 deletions tests/PHPStan/Analyser/nsrt/bug-11311-php72.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@

function doFoo(string $s) {
if (1 === preg_match('/(?<major>\d+)\.(?<minor>\d+)(?:\.(?<patch>\d+))?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, major: string, 1: string, minor: string, 2: string, patch?: string, 3?: string}', $matches);
assertType('array{0: string, major: numeric-string, 1: numeric-string, minor: numeric-string, 2: numeric-string, patch?: numeric-string, 3?: numeric-string}', $matches);
}
}

function doUnmatchedAsNull(string $s): void {
if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
}
assertType('array{}|array{0: string, 1?: string, 2?: string, 3?: string}', $matches);
assertType('array{}|array{0: string, 1?: non-empty-string, 2?: non-empty-string, 3?: non-empty-string}', $matches);
}

// see https://3v4l.org/VeDob#veol
function unmatchedAsNullWithOptionalGroup(string $s): void {
if (preg_match('/Price: (£|€)?\d+/', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType('array{0: string, 1?: string}', $matches);
assertType('array{0: string, 1?: non-empty-string}', $matches);
} else {
assertType('array{}', $matches);
}
assertType('array{}|array{0: string, 1?: string}', $matches);
assertType('array{}|array{0: string, 1?: non-empty-string}', $matches);
}
Loading

0 comments on commit 3d9b04c

Please sign in to comment.