From c053dbc01983f6dd78a78b8154a00afb64088b33 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 17 Dec 2024 11:35:44 +0100 Subject: [PATCH] Support `#` comments in regex with `x` modifier --- src/Type/Regex/RegexGroupParser.php | 20 +++++++++----- tests/PHPStan/Analyser/nsrt/bug-12242.php | 32 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 tests/PHPStan/Analyser/nsrt/bug-12242.php diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index c818426111..a98fb20b42 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -23,6 +23,7 @@ use function count; use function in_array; use function is_int; +use function preg_replace; use function rtrim; use function sscanf; use function str_contains; @@ -64,13 +65,6 @@ public function parseGroups(string $regex): ?array return null; } - $rawRegex = $this->regexExpressionHelper->removeDelimitersAndModifiers($regex); - try { - $ast = self::$parser->parse($rawRegex); - } catch (Exception) { - return null; - } - $modifiers = $this->regexExpressionHelper->getPatternModifiers($regex) ?? ''; foreach (self::NOT_SUPPORTED_MODIFIERS as $notSupportedModifier) { if (str_contains($modifiers, $notSupportedModifier)) { @@ -78,6 +72,18 @@ public function parseGroups(string $regex): ?array } } + if (str_contains($modifiers, 'x')) { + // in freespacing mode the # character starts a comment and runs until the end of the line + $regex = preg_replace('/[^?]#.*/', '', $regex) ?? ''; + } + + $rawRegex = $this->regexExpressionHelper->removeDelimitersAndModifiers($regex); + try { + $ast = self::$parser->parse($rawRegex); + } catch (Exception) { + return null; + } + $captureOnlyNamed = false; if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) { $captureOnlyNamed = str_contains($modifiers, 'n'); diff --git a/tests/PHPStan/Analyser/nsrt/bug-12242.php b/tests/PHPStan/Analyser/nsrt/bug-12242.php new file mode 100644 index 0000000000..cb6d424567 --- /dev/null +++ b/tests/PHPStan/Analyser/nsrt/bug-12242.php @@ -0,0 +1,32 @@ += 7.4 + +namespace Bug12242; + +use function PHPStan\Testing\assertType; + +function foo(string $str): void +{ + $regexp = '/ + # ( + ([\d,]*) + # ) + /x'; + if (preg_match($regexp, $str, $match)) { + assertType('array{string, string}', $match); + } +} + +function bar(string $str): void +{ + $regexp = '/^ + (\w+) # column type [1] + [\(] # ( + ?([\d,]*) # size or size, precision [2] + [\)] # ) + ?\s* # whitespace + (\w*) # extra description (UNSIGNED, CHARACTER SET, ...) [3] + $/x'; + if (preg_match($regexp, $str, $matches)) { + assertType('array{string, non-empty-string, string, string}', $matches); + } +}