From 963cd62d30aa37b5719566792364dba9a920bd14 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Wed, 16 Nov 2022 16:39:19 +0100 Subject: [PATCH] Add matchStrictGroups and other strict group operations to avoid nullable matches (#14) --- README.md | 11 +++ src/MatchAllResult.php | 4 +- src/MatchAllStrictGroupsResult.php | 46 ++++++++++ src/MatchAllWithOffsetsResult.php | 2 +- src/MatchResult.php | 2 +- src/MatchStrictGroupsResult.php | 39 ++++++++ src/MatchWithOffsetsResult.php | 2 +- src/Preg.php | 127 +++++++++++++++++++++++++-- src/Regex.php | 55 ++++++++++-- src/ReplaceResult.php | 3 +- src/UnexpectedNullMatchException.php | 20 +++++ tests/PregTests/MatchAllTest.php | 15 ++++ tests/PregTests/MatchTest.php | 15 ++++ tests/RegexTests/MatchTest.php | 14 +++ 14 files changed, 334 insertions(+), 21 deletions(-) create mode 100644 src/MatchAllStrictGroupsResult.php create mode 100644 src/MatchStrictGroupsResult.php create mode 100644 src/UnexpectedNullMatchException.php diff --git a/README.md b/README.md index 6cfdec1..03db398 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,17 @@ if (Preg::isMatch('{fo+}', $string, $matches)) // bool if (Preg::isMatchAll('{fo+}', $string, $matches)) // bool ``` +Finally the `Preg` class provides a few `*StrictGroups` method variants that ensure match groups +are always present and thus non-nullable, making it easier to write type-safe code: + +```php +use Composer\Pcre\Preg; + +// $matches is guaranteed to be an array of strings, if a subpattern does not match and produces a null it will throw +if (Preg::matchStrictGroups('{fo+}', $string, $matches)) +if (Preg::matchAllStrictGroups('{fo+}', $string, $matches)) +``` + If you would prefer a slightly more verbose usage, replacing by-ref arguments by result objects, you can use the `Regex` class: ```php diff --git a/src/MatchAllResult.php b/src/MatchAllResult.php index 4c367b0..4310c53 100644 --- a/src/MatchAllResult.php +++ b/src/MatchAllResult.php @@ -35,9 +35,9 @@ final class MatchAllResult /** * @param 0|positive-int $count - * @param array> $matches + * @param array> $matches */ - public function __construct($count, array $matches) + public function __construct(int $count, array $matches) { $this->matches = $matches; $this->matched = (bool) $count; diff --git a/src/MatchAllStrictGroupsResult.php b/src/MatchAllStrictGroupsResult.php new file mode 100644 index 0000000..69dcd06 --- /dev/null +++ b/src/MatchAllStrictGroupsResult.php @@ -0,0 +1,46 @@ + + * + * For the full copyright and license information, please view + * the LICENSE file that was distributed with this source code. + */ + +namespace Composer\Pcre; + +final class MatchAllStrictGroupsResult +{ + /** + * An array of match group => list of matched strings + * + * @readonly + * @var array> + */ + public $matches; + + /** + * @readonly + * @var 0|positive-int + */ + public $count; + + /** + * @readonly + * @var bool + */ + public $matched; + + /** + * @param 0|positive-int $count + * @param array> $matches + */ + public function __construct(int $count, array $matches) + { + $this->matches = $matches; + $this->matched = (bool) $count; + $this->count = $count; + } +} diff --git a/src/MatchAllWithOffsetsResult.php b/src/MatchAllWithOffsetsResult.php index 4e893fe..032a02c 100644 --- a/src/MatchAllWithOffsetsResult.php +++ b/src/MatchAllWithOffsetsResult.php @@ -39,7 +39,7 @@ final class MatchAllWithOffsetsResult * @param array> $matches * @phpstan-param array}>> $matches */ - public function __construct($count, array $matches) + public function __construct(int $count, array $matches) { $this->matches = $matches; $this->matched = (bool) $count; diff --git a/src/MatchResult.php b/src/MatchResult.php index d8e3460..e951a5e 100644 --- a/src/MatchResult.php +++ b/src/MatchResult.php @@ -31,7 +31,7 @@ final class MatchResult * @param 0|positive-int $count * @param array $matches */ - public function __construct($count, array $matches) + public function __construct(int $count, array $matches) { $this->matches = $matches; $this->matched = (bool) $count; diff --git a/src/MatchStrictGroupsResult.php b/src/MatchStrictGroupsResult.php new file mode 100644 index 0000000..126ee62 --- /dev/null +++ b/src/MatchStrictGroupsResult.php @@ -0,0 +1,39 @@ + + * + * For the full copyright and license information, please view + * the LICENSE file that was distributed with this source code. + */ + +namespace Composer\Pcre; + +final class MatchStrictGroupsResult +{ + /** + * An array of match group => string matched + * + * @readonly + * @var array + */ + public $matches; + + /** + * @readonly + * @var bool + */ + public $matched; + + /** + * @param 0|positive-int $count + * @param array $matches + */ + public function __construct(int $count, array $matches) + { + $this->matches = $matches; + $this->matched = (bool) $count; + } +} diff --git a/src/MatchWithOffsetsResult.php b/src/MatchWithOffsetsResult.php index 9bd813b..ba4d4bc 100644 --- a/src/MatchWithOffsetsResult.php +++ b/src/MatchWithOffsetsResult.php @@ -33,7 +33,7 @@ final class MatchWithOffsetsResult * @param array $matches * @phpstan-param array}> $matches */ - public function __construct($count, array $matches) + public function __construct(int $count, array $matches) { $this->matches = $matches; $this->matched = (bool) $count; diff --git a/src/Preg.php b/src/Preg.php index 3e4a1db..dbfea82 100644 --- a/src/Preg.php +++ b/src/Preg.php @@ -38,6 +38,25 @@ public static function match(string $pattern, string $subject, ?array &$matches return $result; } + /** + * Variant of `match()` which outputs non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param array $matches Set by method + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @return 0|1 + * @throws UnexpectedNullMatchException + * + * @param-out array $matches + */ + public static function matchStrictGroups(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): int + { + $result = self::match($pattern, $subject, $matchesInternal, $flags, $offset); + $matches = self::enforceNonNullMatches($pattern, $matchesInternal, 'match'); + + return $result; + } + /** * Runs preg_match with PREG_OFFSET_CAPTURE * @@ -61,7 +80,7 @@ public static function matchWithOffsets(string $pattern, string $subject, ?array /** * @param non-empty-string $pattern * @param array> $matches Set by method - * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported * @return 0|positive-int * * @param-out array> $matches @@ -69,10 +88,7 @@ public static function matchWithOffsets(string $pattern, string $subject, ?array public static function matchAll(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): int { self::checkOffsetCapture($flags, 'matchAllWithOffsets'); - - if (($flags & PREG_SET_ORDER) !== 0) { - throw new \InvalidArgumentException('PREG_SET_ORDER is not supported as it changes the type of $matches'); - } + self::checkSetOrder($flags); $result = preg_match_all($pattern, $subject, $matches, $flags | PREG_UNMATCHED_AS_NULL, $offset); if (!is_int($result)) { // PHP < 8 may return null, 8+ returns int|false @@ -82,6 +98,25 @@ public static function matchAll(string $pattern, string $subject, ?array &$match return $result; } + /** + * Variant of `match()` which outputs non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param array> $matches Set by method + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @return 0|positive-int + * @throws UnexpectedNullMatchException + * + * @param-out array> $matches + */ + public static function matchAllStrictGroups(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): int + { + $result = self::matchAll($pattern, $subject, $matchesInternal, $flags, $offset); + $matches = self::enforceNonNullMatchAll($pattern, $matchesInternal, 'matchAll'); + + return $result; + } + /** * Runs preg_match_all with PREG_OFFSET_CAPTURE * @@ -94,6 +129,8 @@ public static function matchAll(string $pattern, string $subject, ?array &$match */ public static function matchAllWithOffsets(string $pattern, string $subject, ?array &$matches, int $flags = 0, int $offset = 0): int { + self::checkSetOrder($flags); + $result = preg_match_all($pattern, $subject, $matches, $flags | PREG_UNMATCHED_AS_NULL | PREG_OFFSET_CAPTURE, $offset); if (!is_int($result)) { // PHP < 8 may return null, 8+ returns int|false throw PcreException::fromFunction('preg_match_all', $pattern); @@ -233,6 +270,8 @@ public static function grep(string $pattern, array $array, int $flags = 0): arra } /** + * Variant of match() which returns a bool instead of int + * * @param non-empty-string $pattern * @param array $matches Set by method * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported @@ -245,6 +284,23 @@ public static function isMatch(string $pattern, string $subject, ?array &$matche } /** + * Variant of `isMatch()` which outputs non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param array $matches Set by method + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @throws UnexpectedNullMatchException + * + * @param-out array $matches + */ + public static function isMatchStrictGroups(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): bool + { + return (bool) self::matchStrictGroups($pattern, $subject, $matches, $flags, $offset); + } + + /** + * Variant of matchAll() which returns a bool instead of int + * * @param non-empty-string $pattern * @param array> $matches Set by method * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported @@ -257,6 +313,22 @@ public static function isMatchAll(string $pattern, string $subject, ?array &$mat } /** + * Variant of `isMatchAll()` which outputs non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param array> $matches Set by method + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * + * @param-out array> $matches + */ + public static function isMatchAllStrictGroups(string $pattern, string $subject, ?array &$matches = null, int $flags = 0, int $offset = 0): bool + { + return (bool) self::matchAllStrictGroups($pattern, $subject, $matches, $flags, $offset); + } + + /** + * Variant of matchWithOffsets() which returns a bool instead of int + * * Runs preg_match with PREG_OFFSET_CAPTURE * * @param non-empty-string $pattern @@ -271,6 +343,8 @@ public static function isMatchWithOffsets(string $pattern, string $subject, ?arr } /** + * Variant of matchAllWithOffsets() which returns a bool instead of int + * * Runs preg_match_all with PREG_OFFSET_CAPTURE * * @param non-empty-string $pattern @@ -290,4 +364,47 @@ private static function checkOffsetCapture(int $flags, string $useFunctionName): throw new \InvalidArgumentException('PREG_OFFSET_CAPTURE is not supported as it changes the type of $matches, use ' . $useFunctionName . '() instead'); } } + + private static function checkSetOrder(int $flags): void + { + if (($flags & PREG_SET_ORDER) !== 0) { + throw new \InvalidArgumentException('PREG_SET_ORDER is not supported as it changes the type of $matches'); + } + } + + /** + * @param array $matches + * @return array + * @throws UnexpectedNullMatchException + */ + private static function enforceNonNullMatches(string $pattern, array $matches, string $variantMethod) + { + foreach ($matches as $group => $match) { + if (null === $match) { + throw new UnexpectedNullMatchException('Pattern "'.$pattern.'" had an unexpected unmatched group "'.$group.'", make sure the pattern always matches or use '.$variantMethod.'() instead.'); + } + } + + /** @var array */ + return $matches; + } + + /** + * @param array> $matches + * @return array> + * @throws UnexpectedNullMatchException + */ + private static function enforceNonNullMatchAll(string $pattern, array $matches, string $variantMethod) + { + foreach ($matches as $group => $groupMatches) { + foreach ($groupMatches as $match) { + if (null === $match) { + throw new UnexpectedNullMatchException('Pattern "'.$pattern.'" had an unexpected unmatched group "'.$group.'", make sure the pattern always matches or use '.$variantMethod.'() instead.'); + } + } + } + + /** @var array> */ + return $matches; + } } diff --git a/src/Regex.php b/src/Regex.php index 42c2cd9..82061ff 100644 --- a/src/Regex.php +++ b/src/Regex.php @@ -27,13 +27,27 @@ public static function isMatch(string $pattern, string $subject, int $offset = 0 */ public static function match(string $pattern, string $subject, int $flags = 0, int $offset = 0): MatchResult { - self::checkOffsetCapture($flags); + self::checkOffsetCapture($flags, 'matchWithOffsets'); $count = Preg::match($pattern, $subject, $matches, $flags, $offset); return new MatchResult($count, $matches); } + /** + * Variant of `match()` which returns non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @throws UnexpectedNullMatchException + */ + public static function matchStrictGroups(string $pattern, string $subject, int $flags = 0, int $offset = 0): MatchStrictGroupsResult + { + $count = Preg::matchStrictGroups($pattern, $subject, $matches, $flags, $offset); + + return new MatchStrictGroupsResult($count, $matches); + } + /** * Runs preg_match with PREG_OFFSET_CAPTURE * @@ -49,21 +63,35 @@ public static function matchWithOffsets(string $pattern, string $subject, int $f /** * @param non-empty-string $pattern - * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported */ public static function matchAll(string $pattern, string $subject, int $flags = 0, int $offset = 0): MatchAllResult { - self::checkOffsetCapture($flags); - - if (($flags & PREG_SET_ORDER) !== 0) { - throw new \InvalidArgumentException('PREG_SET_ORDER is not supported as it changes the return type'); - } + self::checkOffsetCapture($flags, 'matchAllWithOffsets'); + self::checkSetOrder($flags); $count = Preg::matchAll($pattern, $subject, $matches, $flags, $offset); return new MatchAllResult($count, $matches); } + /** + * Variant of `matchAll()` which returns non-null matches (or throws) + * + * @param non-empty-string $pattern + * @param int-mask $flags PREG_UNMATCHED_AS_NULL is always set, no other flags are supported + * @throws UnexpectedNullMatchException + */ + public static function matchAllStrictGroups(string $pattern, string $subject, int $flags = 0, int $offset = 0): MatchAllStrictGroupsResult + { + self::checkOffsetCapture($flags, 'matchAllWithOffsets'); + self::checkSetOrder($flags); + + $count = Preg::matchAllStrictGroups($pattern, $subject, $matches, $flags, $offset); + + return new MatchAllStrictGroupsResult($count, $matches); + } + /** * Runs preg_match_all with PREG_OFFSET_CAPTURE * @@ -72,6 +100,8 @@ public static function matchAll(string $pattern, string $subject, int $flags = 0 */ public static function matchAllWithOffsets(string $pattern, string $subject, int $flags = 0, int $offset = 0): MatchAllWithOffsetsResult { + self::checkSetOrder($flags); + $count = Preg::matchAllWithOffsets($pattern, $subject, $matches, $flags, $offset); return new MatchAllWithOffsetsResult($count, $matches); @@ -113,10 +143,17 @@ public static function replaceCallbackArray(array $pattern, $subject, int $limit return new ReplaceResult($count, $result); } - private static function checkOffsetCapture(int $flags): void + private static function checkOffsetCapture(int $flags, string $useFunctionName): void { if (($flags & PREG_OFFSET_CAPTURE) !== 0) { - throw new \InvalidArgumentException('PREG_OFFSET_CAPTURE is not supported as it changes the return type, use matchWithOffsets() instead'); + throw new \InvalidArgumentException('PREG_OFFSET_CAPTURE is not supported as it changes the return type, use '.$useFunctionName.'() instead'); + } + } + + private static function checkSetOrder(int $flags): void + { + if (($flags & PREG_SET_ORDER) !== 0) { + throw new \InvalidArgumentException('PREG_SET_ORDER is not supported as it changes the return type'); } } } diff --git a/src/ReplaceResult.php b/src/ReplaceResult.php index 0ac0840..3384771 100644 --- a/src/ReplaceResult.php +++ b/src/ReplaceResult.php @@ -33,9 +33,8 @@ final class ReplaceResult /** * @param 0|positive-int $count - * @param string $result */ - public function __construct($count, $result) + public function __construct(int $count, string $result) { $this->count = $count; $this->matched = (bool) $count; diff --git a/src/UnexpectedNullMatchException.php b/src/UnexpectedNullMatchException.php new file mode 100644 index 0000000..f123828 --- /dev/null +++ b/src/UnexpectedNullMatchException.php @@ -0,0 +1,20 @@ + + * + * For the full copyright and license information, please view + * the LICENSE file that was distributed with this source code. + */ + +namespace Composer\Pcre; + +class UnexpectedNullMatchException extends PcreException +{ + public static function fromFunction($function, $pattern) + { + throw new \LogicException('fromFunction should not be called on '.self::class.', use '.PcreException::class); + } +} diff --git a/tests/PregTests/MatchAllTest.php b/tests/PregTests/MatchAllTest.php index 3f5a91b..862d234 100644 --- a/tests/PregTests/MatchAllTest.php +++ b/tests/PregTests/MatchAllTest.php @@ -13,6 +13,7 @@ use Composer\Pcre\BaseTestCase; use Composer\Pcre\Preg; +use Composer\Pcre\UnexpectedNullMatchException; class MatchAllTest extends BaseTestCase { @@ -41,6 +42,20 @@ public function testFailure(): void self::assertSame(array(array()), $matches); } + public function testSuccessStrictGroups(): void + { + $count = Preg::matchAllStrictGroups('{(?P\d)(?a)?}', '3a', $matches); + self::assertSame(1, $count); + self::assertSame(array(0 => ['3a'], 'm' => ['3'], 1 => ['3'], 'matched' => ['a'], 2 => ['a']), $matches); + } + + public function testFailStrictGroups(): void + { + self::expectException(UnexpectedNullMatchException::class); + self::expectExceptionMessage('Pattern "{(?P\d)(?b)?}" had an unexpected unmatched group "unmatched", make sure the pattern always matches or use matchAll() instead.'); + Preg::matchAllStrictGroups('{(?P\d)(?b)?}', '123', $matches); + } + public function testBadPatternThrowsIfWarningsAreNotThrowing(): void { $this->expectPcreException($pattern = '{[aei]'); diff --git a/tests/PregTests/MatchTest.php b/tests/PregTests/MatchTest.php index b98579a..4038405 100644 --- a/tests/PregTests/MatchTest.php +++ b/tests/PregTests/MatchTest.php @@ -13,6 +13,7 @@ use Composer\Pcre\BaseTestCase; use Composer\Pcre\Preg; +use Composer\Pcre\UnexpectedNullMatchException; class MatchTest extends BaseTestCase { @@ -35,6 +36,20 @@ public function testSuccessWithInt(): void self::assertSame(array(0 => '1', 'm' => '1', 1 => '1'), $matches); } + public function testSuccessStrictGroups(): void + { + $count = Preg::matchStrictGroups('{(?P\d)(?a)?}', '3a', $matches); + self::assertSame(1, $count); + self::assertSame(array(0 => '3a', 'm' => '3', 1 => '3', 'matched' => 'a', 2 => 'a'), $matches); + } + + public function testFailStrictGroups(): void + { + self::expectException(UnexpectedNullMatchException::class); + self::expectExceptionMessage('Pattern "{(?P\d)(?b)?}" had an unexpected unmatched group "unmatched", make sure the pattern always matches or use match() instead.'); + Preg::matchStrictGroups('{(?P\d)(?b)?}', '123', $matches); + } + public function testTypeErrorWithNull(): void { $this->expectException('TypeError'); diff --git a/tests/RegexTests/MatchTest.php b/tests/RegexTests/MatchTest.php index be1da26..519e935 100644 --- a/tests/RegexTests/MatchTest.php +++ b/tests/RegexTests/MatchTest.php @@ -13,6 +13,7 @@ use Composer\Pcre\BaseTestCase; use Composer\Pcre\Regex; +use Composer\Pcre\UnexpectedNullMatchException; class MatchTest extends BaseTestCase { @@ -37,6 +38,19 @@ public function testFailure(): void self::assertSame(array(), $result->matches); } + public function testSuccessStrictGroups(): void + { + $result = Regex::matchStrictGroups('{(?P\d)(?a)?}', '3a'); + self::assertSame(array(0 => '3a', 'm' => '3', 1 => '3', 'matched' => 'a', 2 => 'a'), $result->matches); + } + + public function testFailStrictGroups(): void + { + self::expectException(UnexpectedNullMatchException::class); + self::expectExceptionMessage('Pattern "{(?P\d)(?b)?}" had an unexpected unmatched group "unmatched", make sure the pattern always matches or use match() instead.'); + Regex::matchStrictGroups('{(?P\d)(?b)?}', '123'); + } + public function testBadPatternThrowsIfWarningsAreNotThrowing(): void { $this->expectPcreException($pattern = '{abc');