Skip to content

Commit

Permalink
RegexArrayShapeMatcher - when all groups are optional return a more p…
Browse files Browse the repository at this point in the history
…recise union
  • Loading branch information
staabm authored Jun 30, 2024
1 parent 877ff0a commit 271766e
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 35 deletions.
163 changes: 134 additions & 29 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,88 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
*/
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
{
$captureGroups = $this->parseGroups($regex);
if ($captureGroups === null) {
$groupList = $this->parseGroups($regex);
if ($groupList === null) {
// regex could not be parsed by Hoa/Regex
return null;
}

$builder = ConstantArrayTypeBuilder::createEmpty();
$trailingOptionals = 0;
foreach (array_reverse($groupList) as $captureGroup) {
if (!$captureGroup->isOptional()) {
break;
}
$trailingOptionals++;
}

$valueType = $this->getValueType($flags ?? 0);
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
if (
$wasMatched->yes()
&& $onlyOptionalTopLevelGroup !== null
) {
// if only one top level capturing optional group exists
// we build a more precise constant union of a empty-match and a match with the group

$onlyOptionalTopLevelGroup->removeOptionalQualification();

$combiType = $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
);

return TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]),
$combiType,
);
}

return $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
);
}

/**
* @param list<RegexCapturingGroup> $captureGroups
*/
private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
{
$group = null;
foreach ($captureGroups as $captureGroup) {
if (!$captureGroup->isTopLevel()) {
continue;
}

if (!$captureGroup->isOptional()) {
return null;
}

if ($group !== null) {
return null;
}

$group = $captureGroup;
}

return $group;
}

/**
* @param list<RegexCapturingGroup> $captureGroups
*/
private function buildArrayType(
array $captureGroups,
Type $valueType,
TrinaryLogic $wasMatched,
int $trailingOptionals,
): Type
{
$builder = ConstantArrayTypeBuilder::createEmpty();

// first item in matches contains the overall match.
$builder->setOffsetValueType(
Expand All @@ -89,21 +163,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
!$wasMatched->yes(),
);

$trailingOptionals = 0;
foreach (array_reverse($captureGroups) as $captureGroup) {
if (!$captureGroup->isOptional()) {
break;
}
$trailingOptionals++;
}

for ($i = 0; $i < count($captureGroups); $i++) {
$countGroups = count($captureGroups);
for ($i = 0; $i < $countGroups; $i++) {
$captureGroup = $captureGroups[$i];

if (!$wasMatched->yes()) {
$optional = true;
} else {
if ($i < count($captureGroups) - $trailingOptionals) {
if ($i < $countGroups - $trailingOptionals) {
$optional = false;
} else {
$optional = $captureGroup->isOptional();
Expand Down Expand Up @@ -181,46 +248,84 @@ private function parseGroups(string $regex): ?array
return null;
}

$capturings = [];
$this->walkRegexAst($ast, 0, 0, $capturings);
$capturingGroups = [];
$this->walkRegexAst(
$ast,
false,
false,
null,
$capturingGroups,
);

return $capturings;
return $capturingGroups;
}

/**
* @param list<RegexCapturingGroup> $capturings
* @param list<RegexCapturingGroup> $capturingGroups
*/
private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void
private function walkRegexAst(
TreeNode $ast,
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup,
array &$capturingGroups,
): void
{
$group = null;
if ($ast->getId() === '#capturing') {
$capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0 || $inOptionalQuantification > 0);
$group = RegexCapturingGroup::unnamed(
$inAlternation,
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
} elseif ($ast->getId() === '#namedcapturing') {
$name = $ast->getChild(0)->getValue()['value'];
$capturings[] = RegexCapturingGroup::named(
$group = RegexCapturingGroup::named(
$name,
$inAlternation > 0 || $inOptionalQuantification > 0,
$inAlternation,
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
} elseif ($ast->getId() === '#noncapturing') {
$group = RegexNonCapturingGroup::create(
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
}

if ($ast->getId() === '#alternation') {
$inAlternation++;
}

$inOptionalQuantification = false;
if ($ast->getId() === '#quantification') {
$lastChild = $ast->getChild($ast->getChildrenNumber() - 1);
$value = $lastChild->getValue();

if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) {
$inOptionalQuantification++;
$inOptionalQuantification = true;
} elseif ($value['token'] === 'zero_or_one') {
$inOptionalQuantification++;
$inOptionalQuantification = true;
} elseif ($value['token'] === 'zero_or_more') {
$inOptionalQuantification++;
$inOptionalQuantification = true;
}
}

if ($ast->getId() === '#alternation') {
$inAlternation = true;
}

if ($group instanceof RegexCapturingGroup) {
$capturingGroups[] = $group;
}

foreach ($ast->getChildren() as $child) {
$this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $capturings);
$this->walkRegexAst(
$child,
$inAlternation,
$inOptionalQuantification,
$parentGroup,
$capturingGroups,
);
}
}

Expand Down
38 changes: 32 additions & 6 deletions src/Type/Php/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,49 @@
class RegexCapturingGroup
{

private function __construct(private ?string $name, private bool $optional)
private function __construct(
private ?string $name,
private bool $inAlternation,
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
)
{
}

public static function unnamed(bool $optional): self
public static function unnamed(
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self(null, $optional);
return new self(null, $inAlternation, $inOptionalQuantification, $parent);
}

public static function named(string $name, bool $optional): self
public static function named(
string $name,
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self($name, $optional);
return new self($name, $inAlternation, $inOptionalQuantification, $parent);
}

public function removeOptionalQualification(): void
{
$this->inOptionalQuantification = false;
}

public function isOptional(): bool
{
return $this->optional;
return $this->inAlternation
|| $this->inOptionalQuantification
|| ($this->parent !== null && $this->parent->isOptional());
}

public function isTopLevel(): bool
{
return $this->parent === null;
}

/** @phpstan-assert-if-true !null $this->getName() */
Expand Down
29 changes: 29 additions & 0 deletions src/Type/Php/RegexNonCapturingGroup.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php declare(strict_types = 1);

namespace PHPStan\Type\Php;

class RegexNonCapturingGroup
{

private function __construct(
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
)
{
}

public static function create(
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self($inOptionalQuantification, $parent);
}

public function isOptional(): bool
{
return $this->inOptionalQuantification
|| ($this->parent !== null && $this->parent->isOptional());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,36 @@ function doFoo3(string $row): void

assertType('array{string, string, string, string, string, string, string}', $matches);
}

function groupsOptional(string $size): void
{
if (preg_match('~^a\.b(c(\d+)(\d+)(\s+))?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string, string, string}|array{string}', $matches);

if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string}|array{string}', $matches);

if (preg_match('~^a\.b(c(\d+)?)d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: string, 2?: string}', $matches);

if (preg_match('~^a\.b(c(\d+)?)?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1?: string, 2?: string}', $matches);

if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string}', $matches);

if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1?: string, 2?: string}', $matches);
}

0 comments on commit 271766e

Please sign in to comment.