From cadfa5c7a4a5cabfdce62cb3d8cf2d337172f9a9 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 16 Dec 2024 19:38:36 +0100 Subject: [PATCH 1/3] Support # comments in regex --- src/Type/Regex/RegexGroupParser.php | 23 ++++++++++++++++------- tests/PHPStan/Analyser/nsrt/bug-12242.php | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 tests/PHPStan/Analyser/nsrt/bug-12242.php diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index c818426111..2029253a65 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -23,6 +23,7 @@ use function count; use function in_array; use function is_int; +use function preg_replace; use function rtrim; use function sscanf; use function str_contains; @@ -64,13 +65,6 @@ public function parseGroups(string $regex): ?array return null; } - $rawRegex = $this->regexExpressionHelper->removeDelimitersAndModifiers($regex); - try { - $ast = self::$parser->parse($rawRegex); - } catch (Exception) { - return null; - } - $modifiers = $this->regexExpressionHelper->getPatternModifiers($regex) ?? ''; foreach (self::NOT_SUPPORTED_MODIFIERS as $notSupportedModifier) { if (str_contains($modifiers, $notSupportedModifier)) { @@ -78,6 +72,21 @@ public function parseGroups(string $regex): ?array } } + // The regex engine ignores everything after the (?# until the first closing parenthesis + $regex = preg_replace('/\(\?#[^)]*\)/', '', $regex) ?? ''; + + if (str_contains($modifiers, 'x')) { + // in freespacing mode the # character starts a comment and runs until the end of the line + $regex = preg_replace('/#.*/', '', $regex) ?? ''; + } + + $rawRegex = $this->regexExpressionHelper->removeDelimitersAndModifiers($regex); + try { + $ast = self::$parser->parse($rawRegex); + } catch (Exception) { + return null; + } + $captureOnlyNamed = false; if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) { $captureOnlyNamed = str_contains($modifiers, 'n'); diff --git a/tests/PHPStan/Analyser/nsrt/bug-12242.php b/tests/PHPStan/Analyser/nsrt/bug-12242.php new file mode 100644 index 0000000000..c8a9416249 --- /dev/null +++ b/tests/PHPStan/Analyser/nsrt/bug-12242.php @@ -0,0 +1,17 @@ += 7.4 + +namespace Bug12242; + +use function PHPStan\Testing\assertType; + +function foo(string $str): void +{ + $regexp = '/ + # ( + ([\d,]*) + # ) + /x'; + if (preg_match($regexp, $str, $match)) { + assertType('array{string, string}', $match); + } +} From 24a00b6eabbb5875bc1cfc455368b1eb493a5236 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 16 Dec 2024 20:25:21 +0100 Subject: [PATCH 2/3] simplify --- src/Type/Regex/RegexGroupParser.php | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index 2029253a65..a98fb20b42 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -72,12 +72,9 @@ public function parseGroups(string $regex): ?array } } - // The regex engine ignores everything after the (?# until the first closing parenthesis - $regex = preg_replace('/\(\?#[^)]*\)/', '', $regex) ?? ''; - if (str_contains($modifiers, 'x')) { // in freespacing mode the # character starts a comment and runs until the end of the line - $regex = preg_replace('/#.*/', '', $regex) ?? ''; + $regex = preg_replace('/[^?]#.*/', '', $regex) ?? ''; } $rawRegex = $this->regexExpressionHelper->removeDelimitersAndModifiers($regex); From 5a62f84c7a0318bc2d4b48b9b35d230314a86668 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 17 Dec 2024 08:13:09 +0100 Subject: [PATCH 3/3] another less reduced test --- tests/PHPStan/Analyser/nsrt/bug-12242.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/bug-12242.php b/tests/PHPStan/Analyser/nsrt/bug-12242.php index c8a9416249..cb6d424567 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-12242.php +++ b/tests/PHPStan/Analyser/nsrt/bug-12242.php @@ -15,3 +15,18 @@ function foo(string $str): void assertType('array{string, string}', $match); } } + +function bar(string $str): void +{ + $regexp = '/^ + (\w+) # column type [1] + [\(] # ( + ?([\d,]*) # size or size, precision [2] + [\)] # ) + ?\s* # whitespace + (\w*) # extra description (UNSIGNED, CHARACTER SET, ...) [3] + $/x'; + if (preg_match($regexp, $str, $matches)) { + assertType('array{string, non-empty-string, string, string}', $matches); + } +}