diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d9c08637ef..1546d4f67e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -109,6 +109,24 @@ jobs: - run: vendor/bin/psalm --no-progress --stats --threads=$(nproc) --output-format=github --shepherd + pathological: + name: Pathological Tests + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: shivammathur/setup-php@v2 + with: + php-version: 8.1 + extensions: curl, mbstring, yaml + coverage: none + tools: composer:v2 + + - run: composer update --no-progress + + - run: php tests/pathological/test.php + docs-lint: permissions: contents: read # for actions/checkout to fetch code diff --git a/.phpstorm.meta.php b/.phpstorm.meta.php index ca1bec7ee1..5eb9270da0 100644 --- a/.phpstorm.meta.php +++ b/.phpstorm.meta.php @@ -31,6 +31,7 @@ 'html_input', 'allow_unsafe_links', 'max_nesting_level', + 'max_delimiters_per_line', 'renderer', 'renderer/block_separator', 'renderer/inner_separator', @@ -89,6 +90,7 @@ 'table/alignment_attributes/left', 'table/alignment_attributes/center', 'table/alignment_attributes/right', + 'table/max_autocompleted_cells', 'table_of_contents', 'table_of_contents/html_class', 'table_of_contents/max_heading_level', diff --git a/CHANGELOG.md b/CHANGELOG.md index 917e5e80c7..9846fe0c97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,31 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi ## [Unreleased][unreleased] +This is a **security release** to address potential denial of service attacks when parsing specially crafted, +malicious input from untrusted sources (like user input). + +### Added + +- Added `max_delimiters_per_line` config option to prevent denial of service attacks when parsing malicious input +- Added `table/max_autocompleted_cells` config option to prevent denial of service attacks when parsing large tables +- The `AttributesExtension` now supports attributes without values (#985, #986) +- The `AutolinkExtension` exposes two new configuration options to override the default behavior (#969, #987): + - `autolink/allowed_protocols` - an array of protocols to allow autolinking for + - `autolink/default_protocol` - the default protocol to use when none is specified +- Added `RegexHelper::isWhitespace()` method to check if a given character is an ASCII whitespace character +- Added `CacheableDelimiterProcessorInterface` to ensure linear complexity for dynamic delimiter processing +- Added `Bracket` delimiter type to optimize bracket parsing + +### Changed + +- `[` and `]` are no longer added as `Delimiter` objects on the stack; a new `Bracket` type with its own stack is used instead +- `UrlAutolinkParser` no longer parses URLs with more than 127 subdomains +- Expanded reference links can no longer exceed 100kb, or the size of the input document (whichever is greater) +- Delimiters should always provide a non-null value via `DelimiterInterface::getIndex()` + - We'll attempt to infer the index based on surrounding delimiters where possible +- The `DelimiterStack` now accepts integer positions for any `$stackBottom` argument +- Several small performance optimizations + ## [2.5.3] - 2024-08-16 ### Changed @@ -77,6 +102,25 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi - Fixed declaration parser being too strict - `FencedCodeRenderer`: don't add `language-` to class if already prefixed +### Deprecated + +- Returning dynamic values from `DelimiterProcessorInterface::getDelimiterUse()` is deprecated + - You should instead implement `CacheableDelimiterProcessorInterface` to help the engine perform caching to avoid performance issues. +- Failing to set a delimiter's index (or returning `null` from `DelimiterInterface::getIndex()`) is deprecated and will not be supported in 3.0 +- Deprecated `DelimiterInterface::isActive()` and `DelimiterInterface::setActive()`, as these are no longer used by the engine +- Deprecated `DelimiterStack::removeEarlierMatches()` and `DelimiterStack::searchByCharacter()`, as these are no longer used by the engine +- Passing a `DelimiterInterface` as the `$stackBottom` argument to `DelimiterStack::processDelimiters()` or `::removeAll()` is deprecated and will not be supported in 3.0; pass the integer position instead. + +### Fixed + +- Fixed NUL characters not being replaced in the input +- Fixed quadratic complexity parsing unclosed inline links +- Fixed quadratic complexity parsing emphasis and strikethrough delimiters +- Fixed issue where having 500,000+ delimiters could trigger a [known segmentation fault issue in PHP's garbage collection](https://bugs.php.net/bug.php?id=68606) +- Fixed quadratic complexity deactivating link openers +- Fixed quadratic complexity parsing long backtick code spans with no matching closers +- Fixed catastrophic backtracking when parsing link labels/titles + ## [2.4.1] - 2023-08-30 ### Fixed diff --git a/composer.json b/composer.json index 9b90662034..d74b0b4b11 100644 --- a/composer.json +++ b/composer.json @@ -42,8 +42,9 @@ "phpstan/phpstan": "^1.8.2", "phpunit/phpunit": "^9.5.21 || ^10.5.9 || ^11.0.0", "scrutinizer/ocular": "^1.8.1", - "symfony/finder": "^5.3 | ^6.0 || ^7.0", - "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0 || ^7.0", + "symfony/finder": "^5.3 | ^6.0 | ^7.0", + "symfony/process": "^5.4 | ^6.0 | ^7.0", + "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0 | ^7.0", "unleashedtech/php-coding-standard": "^3.1.1", "vimeo/psalm": "^4.24.0 || ^5.0.0" }, @@ -103,11 +104,13 @@ "phpstan": "phpstan analyse", "phpunit": "phpunit --no-coverage", "psalm": "psalm --stats", + "pathological": "tests/pathological/test.php", "test": [ "@phpcs", "@phpstan", "@psalm", - "@phpunit" + "@phpunit", + "@pathological" ] }, "extra": { diff --git a/docs/2.5/configuration.md b/docs/2.5/configuration.md index 472b99ff56..5991f9e1cf 100644 --- a/docs/2.5/configuration.md +++ b/docs/2.5/configuration.md @@ -27,6 +27,7 @@ $config = [ 'html_input' => 'escape', 'allow_unsafe_links' => false, 'max_nesting_level' => PHP_INT_MAX, + 'max_delimiters_per_line' => PHP_INT_MAX, 'slug_normalizer' => [ 'max_length' => 255, ], @@ -73,6 +74,7 @@ Here's a list of the core configuration options available: - `escape` - Escape all HTML - `allow_unsafe_links` - Remove risky link and image URLs by setting this to `false` (default: `true`) - `max_nesting_level` - The maximum nesting level for blocks (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if blocks are too deeply-nested. +- `max_delimiters_per_line` - The maximum number of delimiters (e.g. `*` or `_`) allowed in a single line (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if lines are too long. - `slug_normalizer` - Array of options for configuring how URL-safe slugs are created; see [the slug normalizer docs](/2.5/customization/slug-normalizer/#configuration) for more details - `instance` - An alternative normalizer to use (defaults to the included `SlugNormalizer`) - `max_length` - Limits the size of generated slugs (defaults to 255 characters) diff --git a/docs/2.5/customization/delimiter-processing.md b/docs/2.5/customization/delimiter-processing.md index 9d172b256c..8748d4ef08 100644 --- a/docs/2.5/customization/delimiter-processing.md +++ b/docs/2.5/customization/delimiter-processing.md @@ -48,6 +48,8 @@ public function getDelimiterUse(DelimiterInterface $opener, DelimiterInterface $ This method is used to tell the engine how many characters from the matching delimiters should be consumed. For simple processors you'll likely return `1` (or whatever your minimum length is). In more advanced cases, you can examine the opening and closing delimiters and perform additional logic to determine whether they should be fully or partially consumed. You can also return `0` if you'd like. +**Note:** Unless you're returning a hard-coded value, you should probably implement `CacheableDelimiterProcessorInterface` instead of `DelimiterProcessorInterface` - this will allow the engine to perform additional caching for better performance. + ### `process()` ```php diff --git a/docs/2.5/extensions/tables.md b/docs/2.5/extensions/tables.md index c98bd07264..5f838a675e 100644 --- a/docs/2.5/extensions/tables.md +++ b/docs/2.5/extensions/tables.md @@ -44,6 +44,7 @@ $config = [ 'center' => ['align' => 'center'], 'right' => ['align' => 'right'], ], + 'max_autocompleted_cells' => 10_000, ], ]; @@ -159,6 +160,14 @@ $config = [ Or any other HTML attributes you'd like! +### Limiting Auto-Completed Cells + +The GFM specification says that the: + +> table’s rows may vary in the number of cells. If there are a number of cells fewer than the number of cells in the header row, empty cells are inserted. + +This feature could be abused to create very large tables. To prevent this, you can configure the `max_autocompleted_cells` option to limit the number of empty cells that will be autocompleted. If the limit is reached, further parsing of the table will be aborted. + ## Credits The Table functionality was originally built by [Martin Hasoň](https://github.com/hason) and [Webuni s.r.o.](https://www.webuni.cz) before it was merged into the core parser. diff --git a/docs/2.5/security.md b/docs/2.5/security.md index b4bd8ab598..8573e69352 100644 --- a/docs/2.5/security.md +++ b/docs/2.5/security.md @@ -11,7 +11,8 @@ In order to be fully compliant with the CommonMark spec, certain security settin - `html_input`: How to handle raw HTML - `allow_unsafe_links`: Whether unsafe links are permitted -- `max_nesting_level`: Protected against long render times or segfaults +- `max_nesting_level`: Protect against long render times or segfaults +- `max_delimiters_per_line`: Protect against long parse times or rendering segfaults Further information about each option can be found below. @@ -88,6 +89,25 @@ echo $converter->convert($markdown); See the [configuration](/2.5/configuration/) section for more information. +## Max Delimiters Per Line + +Similarly to the maximum nesting level, **no maximum number of delimiters per line is enforced by default.** Delimiters can be nested (like `*a **b** c*`) or un-nested (like `*a* *b* *c*`) - in either case, having too many in a single line can result in long parse times. We therefore have a separate option to limit the number of delimiters per line. + +If you need to parse untrusted input, consider setting a reasonable `max_delimiters_per_line` (perhaps 100-1000) depending on your needs. Once this level is hit, any subsequent delimiters on that line will be rendered as plain text. + +### Example - Prevent too many delimiters + +```php +use League\CommonMark\CommonMarkConverter; + +$markdown = '*a* **b *c **d** c* b**'; // 8 delimiters (* and **) + +$converter = new CommonMarkConverter(['max_delimiters_per_line' => 6]); +echo $converter->convert($markdown); + +//

a **b *c d c* b**

+``` + ## Additional Filtering Although this library does offer these security features out-of-the-box, some users may opt to also run the HTML output through additional filtering layers (like HTMLPurifier). If you do this, make sure you **thoroughly** test your additional post-processing steps and configure them to work properly with the types of HTML elements and attributes that converted Markdown might produce, otherwise, you may end up with weird behavior like missing images, broken links, mismatched HTML tags, etc. diff --git a/docs/2.6/upgrading.md b/docs/2.6/upgrading.md index 4e84686028..5d6e91c3c6 100644 --- a/docs/2.6/upgrading.md +++ b/docs/2.6/upgrading.md @@ -6,3 +6,27 @@ redirect_from: /upgrading/ --- # Upgrading from 2.5 to 2.6 + +## `max_delimiters_per_line` Configuration Option + +The `max_delimiters_per_line` configuration option was added in 2.6 to help protect against malicious input that could +cause excessive memory usage or denial of service attacks. It defaults to `PHP_INT_MAX` (no limit) for backwards +compatibility, which is safe when parsing trusted input. However, if you're parsing untrusted input from users, you +should probably set this to a reasonable value (somewhere between `100` and `1000`) to protect against malicious inputs. + +## Custom Delimiter Processors + +If you're implementing a custom delimiter processor, and `getDelimiterUse()` has more logic than just a +simple `return` statement, you should implement `CacheableDelimiterProcessorInterface` instead of +`DelimiterProcessorInterface` to improve performance and avoid possible quadratic performance issues. + +`DelimiterProcessorInterface` has a `getDelimiterUse()` method that tells the engine how many characters from the +matching delimiters should be consumed. Simple processors usually always return a hard-coded integer like `1` or `2`. +However, some more advanced processors may need to examine the opening and closing delimiters and perform additional +logic to determine whether they should be fully or partially consumed. Previously, these results could not be safely +cached, resulting in possible quadratic performance issues. + +In 2.6, the `CacheableDelimiterProcessorInterface` was introduced to allow these "dynamic" processors to be safely +cached. It requires a new `getCacheKey()` method that returns a string that uniquely identifies the combination of +factors considered when determining the delimiter use. This key is then used to cache the results of the search for +a matching delimiter. diff --git a/phpstan.neon.dist b/phpstan.neon.dist index e25d706246..54d7b4b6f5 100644 --- a/phpstan.neon.dist +++ b/phpstan.neon.dist @@ -7,6 +7,8 @@ parameters: message: '#Parameter .+ of class .+Reference constructor expects string, string\|null given#' - path: src/Util/RegexHelper.php message: '#Method .+RegexHelper::unescape\(\) should return string but returns string\|null#' + - path: src/Delimiter/DelimiterStack.php + message: '#unknown class WeakMap#' exceptions: uncheckedExceptionClasses: # Exceptions caused by bad developer logic that should always bubble up: diff --git a/src/Delimiter/Bracket.php b/src/Delimiter/Bracket.php new file mode 100644 index 0000000000..3a86859c71 --- /dev/null +++ b/src/Delimiter/Bracket.php @@ -0,0 +1,83 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Delimiter; + +use League\CommonMark\Node\Node; + +final class Bracket +{ + private Node $node; + private ?Bracket $previous; + private bool $hasNext = false; + private int $position; + private bool $image; + private bool $active = true; + + public function __construct(Node $node, ?Bracket $previous, int $position, bool $image) + { + $this->node = $node; + $this->previous = $previous; + $this->position = $position; + $this->image = $image; + } + + public function getNode(): Node + { + return $this->node; + } + + public function getPrevious(): ?Bracket + { + return $this->previous; + } + + public function hasNext(): bool + { + return $this->hasNext; + } + + public function getPosition(): int + { + return $this->position; + } + + public function isImage(): bool + { + return $this->image; + } + + /** + * Only valid in the context of non-images (links) + */ + public function isActive(): bool + { + return $this->active; + } + + /** + * @internal + */ + public function setHasNext(bool $hasNext): void + { + $this->hasNext = $hasNext; + } + + /** + * @internal + */ + public function setActive(bool $active): void + { + $this->active = $active; + } +} diff --git a/src/Delimiter/DelimiterInterface.php b/src/Delimiter/DelimiterInterface.php index 6bfa32e48e..0cefba7e60 100644 --- a/src/Delimiter/DelimiterInterface.php +++ b/src/Delimiter/DelimiterInterface.php @@ -24,8 +24,14 @@ public function canClose(): bool; public function canOpen(): bool; + /** + * @deprecated This method is no longer used internally and will be removed in 3.0 + */ public function isActive(): bool; + /** + * @deprecated This method is no longer used internally and will be removed in 3.0 + */ public function setActive(bool $active): void; public function getChar(): string; diff --git a/src/Delimiter/DelimiterParser.php b/src/Delimiter/DelimiterParser.php index 3f96addf85..fdfe093c90 100644 --- a/src/Delimiter/DelimiterParser.php +++ b/src/Delimiter/DelimiterParser.php @@ -62,16 +62,20 @@ public function parse(InlineParserContext $inlineContext): bool [$canOpen, $canClose] = self::determineCanOpenOrClose($charBefore, $charAfter, $character, $processor); + if (! ($canOpen || $canClose)) { + $inlineContext->getContainer()->appendChild(new Text(\str_repeat($character, $numDelims))); + + return true; + } + $node = new Text(\str_repeat($character, $numDelims), [ 'delim' => true, ]); $inlineContext->getContainer()->appendChild($node); // Add entry to stack to this opener - if ($canOpen || $canClose) { - $delimiter = new Delimiter($character, $numDelims, $node, $canOpen, $canClose); - $inlineContext->getDelimiterStack()->push($delimiter); - } + $delimiter = new Delimiter($character, $numDelims, $node, $canOpen, $canClose, $inlineContext->getCursor()->getPosition()); + $inlineContext->getDelimiterStack()->push($delimiter); return true; } diff --git a/src/Delimiter/DelimiterStack.php b/src/Delimiter/DelimiterStack.php index fb95b907cf..cf2a41e524 100644 --- a/src/Delimiter/DelimiterStack.php +++ b/src/Delimiter/DelimiterStack.php @@ -19,16 +19,47 @@ namespace League\CommonMark\Delimiter; +use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface; use League\CommonMark\Delimiter\Processor\DelimiterProcessorCollection; use League\CommonMark\Node\Inline\AdjacentTextMerger; +use League\CommonMark\Node\Node; final class DelimiterStack { /** @psalm-readonly-allow-private-mutation */ private ?DelimiterInterface $top = null; + /** @psalm-readonly-allow-private-mutation */ + private ?Bracket $brackets = null; + + /** + * @deprecated This property will be removed in 3.0 once all delimiters MUST have an index/position + * + * @var \SplObjectStorage|\WeakMap + */ + private $missingIndexCache; + + + private int $remainingDelimiters = 0; + + public function __construct(int $maximumStackSize = PHP_INT_MAX) + { + $this->remainingDelimiters = $maximumStackSize; + + if (\PHP_VERSION_ID >= 80000) { + /** @psalm-suppress PropertyTypeCoercion */ + $this->missingIndexCache = new \WeakMap(); // @phpstan-ignore-line + } else { + $this->missingIndexCache = new \SplObjectStorage(); // @phpstan-ignore-line + } + } + public function push(DelimiterInterface $newDelimiter): void { + if ($this->remainingDelimiters-- <= 0) { + return; + } + $newDelimiter->setPrevious($this->top); if ($this->top !== null) { @@ -38,14 +69,54 @@ public function push(DelimiterInterface $newDelimiter): void $this->top = $newDelimiter; } - private function findEarliest(?DelimiterInterface $stackBottom = null): ?DelimiterInterface + /** + * @internal + */ + public function addBracket(Node $node, int $index, bool $image): void { - $delimiter = $this->top; - while ($delimiter !== null && $delimiter->getPrevious() !== $stackBottom) { - $delimiter = $delimiter->getPrevious(); + if ($this->brackets !== null) { + $this->brackets->setHasNext(true); } - return $delimiter; + $this->brackets = new Bracket($node, $this->brackets, $index, $image); + } + + /** + * @psalm-immutable + */ + public function getLastBracket(): ?Bracket + { + return $this->brackets; + } + + private function findEarliest(int $stackBottom): ?DelimiterInterface + { + // Move back to first relevant delim. + $delimiter = $this->top; + $lastChecked = null; + + while ($delimiter !== null && self::getIndex($delimiter) > $stackBottom) { + $lastChecked = $delimiter; + $delimiter = $delimiter->getPrevious(); + } + + return $lastChecked; + } + + /** + * @internal + */ + public function removeBracket(): void + { + if ($this->brackets === null) { + return; + } + + $this->brackets = $this->brackets->getPrevious(); + + if ($this->brackets !== null) { + $this->brackets->setHasNext(false); + } } public function removeDelimiter(DelimiterInterface $delimiter): void @@ -62,6 +133,19 @@ public function removeDelimiter(DelimiterInterface $delimiter): void /** @psalm-suppress PossiblyNullReference */ $delimiter->getNext()->setPrevious($delimiter->getPrevious()); } + + // Nullify all references from the removed delimiter to other delimiters. + // All references to this particular delimiter in the linked list should be gone, + // but it's possible we're still hanging on to other references to things that + // have been (or soon will be) removed, which may interfere with efficient + // garbage collection by the PHP runtime. + // Explicitly releasing these references should help to avoid possible + // segfaults like in https://bugs.php.net/bug.php?id=68606. + $delimiter->setPrevious(null); + $delimiter->setNext(null); + + // TODO: Remove the line below once PHP 7.4 support is dropped, as WeakMap won't hold onto the reference, making this unnecessary + unset($this->missingIndexCache[$delimiter]); } private function removeDelimiterAndNode(DelimiterInterface $delimiter): void @@ -72,21 +156,30 @@ private function removeDelimiterAndNode(DelimiterInterface $delimiter): void private function removeDelimitersBetween(DelimiterInterface $opener, DelimiterInterface $closer): void { - $delimiter = $closer->getPrevious(); - while ($delimiter !== null && $delimiter !== $opener) { + $delimiter = $closer->getPrevious(); + $openerPosition = self::getIndex($opener); + while ($delimiter !== null && self::getIndex($delimiter) > $openerPosition) { $previous = $delimiter->getPrevious(); $this->removeDelimiter($delimiter); $delimiter = $previous; } } - public function removeAll(?DelimiterInterface $stackBottom = null): void + /** + * @param DelimiterInterface|int|null $stackBottom + */ + public function removeAll($stackBottom = null): void { - while ($this->top && $this->top !== $stackBottom) { + $stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom); + + while ($this->top && $this->getIndex($this->top) > $stackBottomPosition) { $this->removeDelimiter($this->top); } } + /** + * @deprecated This method is no longer used internally and will be removed in 3.0 + */ public function removeEarlierMatches(string $character): void { $opener = $this->top; @@ -100,6 +193,20 @@ public function removeEarlierMatches(string $character): void } /** + * @internal + */ + public function deactivateLinkOpeners(): void + { + $opener = $this->brackets; + while ($opener !== null && $opener->isActive()) { + $opener->setActive(false); + $opener = $opener->getPrevious(); + } + } + + /** + * @deprecated This method is no longer used internally and will be removed in 3.0 + * * @param string|string[] $characters */ public function searchByCharacter($characters): ?DelimiterInterface @@ -120,30 +227,44 @@ public function searchByCharacter($characters): ?DelimiterInterface return $opener; } - public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterProcessorCollection $processors): void + /** + * @param DelimiterInterface|int|null $stackBottom + * + * @todo change $stackBottom to an int in 3.0 + */ + public function processDelimiters($stackBottom, DelimiterProcessorCollection $processors): void { + /** @var array $openersBottom */ $openersBottom = []; + $stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom); + // Find first closer above stackBottom - $closer = $this->findEarliest($stackBottom); + $closer = $this->findEarliest($stackBottomPosition); // Move forward, looking for closers, and handling each while ($closer !== null) { - $delimiterChar = $closer->getChar(); + $closingDelimiterChar = $closer->getChar(); - $delimiterProcessor = $processors->getDelimiterProcessor($delimiterChar); + $delimiterProcessor = $processors->getDelimiterProcessor($closingDelimiterChar); if (! $closer->canClose() || $delimiterProcessor === null) { $closer = $closer->getNext(); continue; } + if ($delimiterProcessor instanceof CacheableDelimiterProcessorInterface) { + $openersBottomCacheKey = $delimiterProcessor->getCacheKey($closer); + } else { + $openersBottomCacheKey = $closingDelimiterChar; + } + $openingDelimiterChar = $delimiterProcessor->getOpeningCharacter(); $useDelims = 0; $openerFound = false; $potentialOpenerFound = false; $opener = $closer->getPrevious(); - while ($opener !== null && $opener !== $stackBottom && $opener !== ($openersBottom[$delimiterChar] ?? null)) { + while ($opener !== null && ($openerPosition = self::getIndex($opener)) > $stackBottomPosition && $openerPosition >= ($openersBottom[$openersBottomCacheKey] ?? 0)) { if ($opener->canOpen() && $opener->getChar() === $openingDelimiterChar) { $potentialOpenerFound = true; $useDelims = $delimiterProcessor->getDelimiterUse($opener, $closer); @@ -157,23 +278,22 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro } if (! $openerFound) { - if (! $potentialOpenerFound) { - // Only do this when we didn't even have a potential - // opener (one that matches the character and can open). - // If an opener was rejected because of the number of - // delimiters (e.g. because of the "multiple of 3" - // Set lower bound for future searches for openersrule), - // we want to consider it next time because the number - // of delimiters can change as we continue processing. - $openersBottom[$delimiterChar] = $closer->getPrevious(); - if (! $closer->canOpen()) { - // We can remove a closer that can't be an opener, - // once we've seen there's no matching opener. - $this->removeDelimiter($closer); - } + // Set lower bound for future searches + // TODO: Remove this conditional check in 3.0. It only exists to prevent behavioral BC breaks in 2.x. + if ($potentialOpenerFound === false || $delimiterProcessor instanceof CacheableDelimiterProcessorInterface) { + $openersBottom[$openersBottomCacheKey] = self::getIndex($closer); + } + + if (! $potentialOpenerFound && ! $closer->canOpen()) { + // We can remove a closer that can't be an opener, + // once we've seen there's no matching opener. + $next = $closer->getNext(); + $this->removeDelimiter($closer); + $closer = $next; + } else { + $closer = $closer->getNext(); } - $closer = $closer->getNext(); continue; } @@ -209,6 +329,68 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro } // Remove all delimiters - $this->removeAll($stackBottom); + $this->removeAll($stackBottomPosition); + } + + /** + * @internal + */ + public function __destruct() + { + while ($this->top) { + $this->removeDelimiter($this->top); + } + + while ($this->brackets) { + $this->removeBracket(); + } + } + + /** + * @deprecated This method will be dropped in 3.0 once all delimiters MUST have an index/position + */ + private function getIndex(?DelimiterInterface $delimiter): int + { + if ($delimiter === null) { + return -1; + } + + if (($index = $delimiter->getIndex()) !== null) { + return $index; + } + + if (isset($this->missingIndexCache[$delimiter])) { + return $this->missingIndexCache[$delimiter]; + } + + $prev = $delimiter->getPrevious(); + $next = $delimiter->getNext(); + + $i = 0; + do { + $i++; + if ($prev === null) { + break; + } + + if ($prev->getIndex() !== null) { + return $this->missingIndexCache[$delimiter] = $prev->getIndex() + $i; + } + } while ($prev = $prev->getPrevious()); + + $j = 0; + do { + $j++; + if ($next === null) { + break; + } + + if ($next->getIndex() !== null) { + return $this->missingIndexCache[$delimiter] = $next->getIndex() - $j; + } + } while ($next = $next->getNext()); + + // No index was defined on this delimiter, and none could be guesstimated based on the stack. + return $this->missingIndexCache[$delimiter] = $this->getIndex($delimiter->getPrevious()) + 1; } } diff --git a/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php b/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php new file mode 100644 index 0000000000..a2a9b7ef39 --- /dev/null +++ b/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php @@ -0,0 +1,46 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Delimiter\Processor; + +use League\CommonMark\Delimiter\DelimiterInterface; + +/** + * Special marker interface for delimiter processors that return dynamic values from getDelimiterUse() + * + * In order to guarantee linear performance of delimiter processing, the delimiter stack must be able to + * cache the lower bound when searching for a matching opener. This gets complicated for delimiter processors + * that use a dynamic number of characters (like with emphasis and its "multiple of 3" rule). + */ +interface CacheableDelimiterProcessorInterface extends DelimiterProcessorInterface +{ + /** + * Returns a cache key of the factors that determine the number of characters to use. + * + * In order to guarantee linear performance of delimiter processing, the delimiter stack must be able to + * cache the lower bound when searching for a matching opener. This lower bound is usually quite simple; + * for example, with quotes, it's just the last opener with that characted. However, this gets complicated + * for delimiter processors that use a dynamic number of characters (like with emphasis and its "multiple + * of 3" rule), because the delimiter length being considered may change during processing because of that + * dynamic logic in getDelimiterUse(). Therefore, we cannot safely cache the lower bound for these dynamic + * processors without knowing the factors that determine the number of characters to use. + * + * At a minimum, this should include the delimiter character, plus any other factors used to determine + * the result of getDelimiterUse(). The format of the string is not important so long as it is unique + * (compared to other processors) and consistent for a given set of factors. + * + * If getDelimiterUse() always returns the same hard-coded value, this method should return just + * the delimiter character. + */ + public function getCacheKey(DelimiterInterface $closer): string; +} diff --git a/src/Delimiter/Processor/DelimiterProcessorInterface.php b/src/Delimiter/Processor/DelimiterProcessorInterface.php index 465378c390..5e88ddc7ad 100644 --- a/src/Delimiter/Processor/DelimiterProcessorInterface.php +++ b/src/Delimiter/Processor/DelimiterProcessorInterface.php @@ -58,6 +58,9 @@ public function getMinLength(): int; * return 0 when it doesn't want to allow this particular combination of * delimiter runs. * + * IMPORTANT: Unless this method returns the same hard-coded value in all cases, + * you MUST implement the CacheableDelimiterProcessorInterface interface instead. + * * @param DelimiterInterface $opener The opening delimiter run * @param DelimiterInterface $closer The closing delimiter run */ diff --git a/src/Environment/Environment.php b/src/Environment/Environment.php index 3c24749bbb..a8112967e4 100644 --- a/src/Environment/Environment.php +++ b/src/Environment/Environment.php @@ -432,6 +432,7 @@ public static function createDefaultConfiguration(): Configuration 'html_input' => Expect::anyOf(HtmlFilter::STRIP, HtmlFilter::ALLOW, HtmlFilter::ESCAPE)->default(HtmlFilter::ALLOW), 'allow_unsafe_links' => Expect::bool(true), 'max_nesting_level' => Expect::type('int')->default(PHP_INT_MAX), + 'max_delimiters_per_line' => Expect::type('int')->default(PHP_INT_MAX), 'renderer' => Expect::structure([ 'block_separator' => Expect::string("\n"), 'inner_separator' => Expect::string("\n"), diff --git a/src/Extension/Autolink/UrlAutolinkParser.php b/src/Extension/Autolink/UrlAutolinkParser.php index 1ef270fe85..f487616552 100644 --- a/src/Extension/Autolink/UrlAutolinkParser.php +++ b/src/Extension/Autolink/UrlAutolinkParser.php @@ -34,7 +34,7 @@ final class UrlAutolinkParser implements InlineParserInterface (?: (?:xn--[a-z0-9-]++\.)*+xn--[a-z0-9-]++ # a domain name using punycode | - (?:[\pL\pN\pS\pM\-\_]++\.)+[\pL\pN\pM]++ # a multi-level domain name + (?:[\pL\pN\pS\pM\-\_]++\.){1,127}[\pL\pN\pM]++ # a multi-level domain name; total length must be 253 bytes or less | [a-z0-9\-\_]++ # a single-level domain name )\.? diff --git a/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php b/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php index 84b46ee6d9..9a6be13491 100644 --- a/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php +++ b/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php @@ -20,14 +20,14 @@ namespace League\CommonMark\Extension\CommonMark\Delimiter\Processor; use League\CommonMark\Delimiter\DelimiterInterface; -use League\CommonMark\Delimiter\Processor\DelimiterProcessorInterface; +use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface; use League\CommonMark\Extension\CommonMark\Node\Inline\Emphasis; use League\CommonMark\Extension\CommonMark\Node\Inline\Strong; use League\CommonMark\Node\Inline\AbstractStringContainer; use League\Config\ConfigurationAwareInterface; use League\Config\ConfigurationInterface; -final class EmphasisDelimiterProcessor implements DelimiterProcessorInterface, ConfigurationAwareInterface +final class EmphasisDelimiterProcessor implements CacheableDelimiterProcessorInterface, ConfigurationAwareInterface { /** @psalm-readonly */ private string $char; @@ -105,4 +105,15 @@ public function setConfiguration(ConfigurationInterface $configuration): void { $this->config = $configuration; } + + public function getCacheKey(DelimiterInterface $closer): string + { + return \sprintf( + '%s-%s-%d-%d', + $this->char, + $closer->canOpen() ? 'canOpen' : 'cannotOpen', + $closer->getOriginalLength() % 3, + $closer->getLength(), + ); + } } diff --git a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php index 9618f2e676..3324fe39d0 100644 --- a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php +++ b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php @@ -18,12 +18,27 @@ use League\CommonMark\Extension\CommonMark\Node\Inline\Code; use League\CommonMark\Node\Inline\Text; +use League\CommonMark\Parser\Cursor; use League\CommonMark\Parser\Inline\InlineParserInterface; use League\CommonMark\Parser\Inline\InlineParserMatch; use League\CommonMark\Parser\InlineParserContext; final class BacktickParser implements InlineParserInterface { + /** + * Max bound for backtick code span delimiters. + * + * @see https://github.com/commonmark/cmark/commit/8ed5c9d + */ + private const MAX_BACKTICKS = 1000; + + /** @var \WeakReference|null */ + private ?\WeakReference $lastCursor = null; + private bool $lastCursorScanned = false; + + /** @var array backtick count => position of known ender */ + private array $seenBackticks = []; + public function getMatchDefinition(): InlineParserMatch { return InlineParserMatch::regex('`+'); @@ -38,11 +53,7 @@ public function parse(InlineParserContext $inlineContext): bool $currentPosition = $cursor->getPosition(); $previousState = $cursor->saveState(); - while ($matchingTicks = $cursor->match('/`+/m')) { - if ($matchingTicks !== $ticks) { - continue; - } - + if ($this->findMatchingTicks(\strlen($ticks), $cursor)) { $code = $cursor->getSubstring($currentPosition, $cursor->getPosition() - $currentPosition - \strlen($ticks)); $c = \preg_replace('/\n/m', ' ', $code) ?? ''; @@ -67,4 +78,55 @@ public function parse(InlineParserContext $inlineContext): bool return true; } + + /** + * Locates the matching closer for a backtick code span. + * + * Leverages some caching to avoid traversing the same cursor multiple times when + * we've already seen all the potential backtick closers. + * + * @see https://github.com/commonmark/cmark/commit/8ed5c9d + * + * @param int $openTickLength Number of backticks in the opening sequence + * @param Cursor $cursor Cursor to scan + * + * @return bool True if a matching closer was found, false otherwise + */ + private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool + { + // Reset the seenBackticks cache if this is a new cursor + if ($this->lastCursor === null || $this->lastCursor->get() !== $cursor) { + $this->seenBackticks = []; + $this->lastCursor = \WeakReference::create($cursor); + $this->lastCursorScanned = false; + } + + if ($openTickLength > self::MAX_BACKTICKS) { + return false; + } + + // Return if we already know there's no closer + if ($this->lastCursorScanned && isset($this->seenBackticks[$openTickLength]) && $this->seenBackticks[$openTickLength] <= $cursor->getPosition()) { + return false; + } + + while ($ticks = $cursor->match('/`{1,' . self::MAX_BACKTICKS . '}/m')) { + $numTicks = \strlen($ticks); + + // Did we find the closer? + if ($numTicks === $openTickLength) { + return true; + } + + // Store position of closer + if ($numTicks <= self::MAX_BACKTICKS) { + $this->seenBackticks[$numTicks] = $cursor->getPosition() - $numTicks; + } + } + + // Got through whole input without finding closer + $this->lastCursorScanned = true; + + return false; + } } diff --git a/src/Extension/CommonMark/Parser/Inline/BangParser.php b/src/Extension/CommonMark/Parser/Inline/BangParser.php index 8a9e1bd65c..cbf6ca3828 100644 --- a/src/Extension/CommonMark/Parser/Inline/BangParser.php +++ b/src/Extension/CommonMark/Parser/Inline/BangParser.php @@ -16,7 +16,6 @@ namespace League\CommonMark\Extension\CommonMark\Parser\Inline; -use League\CommonMark\Delimiter\Delimiter; use League\CommonMark\Node\Inline\Text; use League\CommonMark\Parser\Inline\InlineParserInterface; use League\CommonMark\Parser\Inline\InlineParserMatch; @@ -38,8 +37,7 @@ public function parse(InlineParserContext $inlineContext): bool $inlineContext->getContainer()->appendChild($node); // Add entry to stack for this opener - $delimiter = new Delimiter('!', 1, $node, true, false, $cursor->getPosition()); - $inlineContext->getDelimiterStack()->push($delimiter); + $inlineContext->getDelimiterStack()->addBracket($node, $cursor->getPosition(), true); return true; } diff --git a/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php b/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php index 16f24dc2aa..f3b83fd129 100644 --- a/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php +++ b/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php @@ -16,6 +16,7 @@ namespace League\CommonMark\Extension\CommonMark\Parser\Inline; +use League\CommonMark\Delimiter\Bracket; use League\CommonMark\Environment\EnvironmentAwareInterface; use League\CommonMark\Environment\EnvironmentInterface; use League\CommonMark\Extension\CommonMark\Node\Inline\AbstractWebResource; @@ -46,14 +47,14 @@ public function getMatchDefinition(): InlineParserMatch public function parse(InlineParserContext $inlineContext): bool { // Look through stack of delimiters for a [ or ! - $opener = $inlineContext->getDelimiterStack()->searchByCharacter(['[', '!']); + $opener = $inlineContext->getDelimiterStack()->getLastBracket(); if ($opener === null) { return false; } - if (! $opener->isActive()) { - // no matched opener; remove from emphasis stack - $inlineContext->getDelimiterStack()->removeDelimiter($opener); + if (! $opener->isImage() && ! $opener->isActive()) { + // no matched opener; remove from stack + $inlineContext->getDelimiterStack()->removeBracket(); return false; } @@ -70,21 +71,19 @@ public function parse(InlineParserContext $inlineContext): bool // Inline link? if ($result = $this->tryParseInlineLinkAndTitle($cursor)) { $link = $result; - } elseif ($link = $this->tryParseReference($cursor, $inlineContext->getReferenceMap(), $opener->getIndex(), $startPos)) { + } elseif ($link = $this->tryParseReference($cursor, $inlineContext->getReferenceMap(), $opener, $startPos)) { $reference = $link; $link = ['url' => $link->getDestination(), 'title' => $link->getTitle()]; } else { - // No match - $inlineContext->getDelimiterStack()->removeDelimiter($opener); // Remove this opener from stack + // No match; remove this opener from stack + $inlineContext->getDelimiterStack()->removeBracket(); $cursor->restoreState($previousState); return false; } - $isImage = $opener->getChar() === '!'; - - $inline = $this->createInline($link['url'], $link['title'], $isImage, $reference ?? null); - $opener->getInlineNode()->replaceWith($inline); + $inline = $this->createInline($link['url'], $link['title'], $opener->isImage(), $reference ?? null); + $opener->getNode()->replaceWith($inline); while (($label = $inline->next()) !== null) { // Is there a Mention or Link contained within this link? // CommonMark does not allow nested links, so we'll restore the original text. @@ -104,8 +103,9 @@ public function parse(InlineParserContext $inlineContext): bool // Process delimiters such as emphasis inside link/image $delimiterStack = $inlineContext->getDelimiterStack(); - $stackBottom = $opener->getPrevious(); + $stackBottom = $opener->getPosition(); $delimiterStack->processDelimiters($stackBottom, $this->environment->getDelimiterProcessors()); + $delimiterStack->removeBracket(); $delimiterStack->removeAll($stackBottom); // Merge any adjacent Text nodes together @@ -113,8 +113,8 @@ public function parse(InlineParserContext $inlineContext): bool // processEmphasis will remove this and later delimiters. // Now, for a link, we also remove earlier link openers (no links in links) - if (! $isImage) { - $inlineContext->getDelimiterStack()->removeEarlierMatches('['); + if (! $opener->isImage()) { + $inlineContext->getDelimiterStack()->deactivateLinkOpeners(); } return true; @@ -168,21 +168,23 @@ private function tryParseInlineLinkAndTitle(Cursor $cursor): ?array return ['url' => $dest, 'title' => $title]; } - private function tryParseReference(Cursor $cursor, ReferenceMapInterface $referenceMap, ?int $openerIndex, int $startPos): ?ReferenceInterface + private function tryParseReference(Cursor $cursor, ReferenceMapInterface $referenceMap, Bracket $opener, int $startPos): ?ReferenceInterface { - if ($openerIndex === null) { - return null; - } - $savePos = $cursor->saveState(); $beforeLabel = $cursor->getPosition(); $n = LinkParserHelper::parseLinkLabel($cursor); - if ($n === 0 || $n === 2) { - $start = $openerIndex; - $length = $startPos - $openerIndex; - } else { + if ($n > 2) { $start = $beforeLabel + 1; $length = $n - 2; + } elseif (! $opener->hasNext()) { + // Empty or missing second label means to use the first label as the reference. + // The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it. + $start = $opener->getPosition(); + $length = $startPos - $start; + } else { + $cursor->restoreState($savePos); + + return null; } $referenceLabel = $cursor->getSubstring($start, $length); diff --git a/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php b/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php index 2b52d1cdc6..1ba8c133ab 100644 --- a/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php +++ b/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php @@ -16,7 +16,6 @@ namespace League\CommonMark\Extension\CommonMark\Parser\Inline; -use League\CommonMark\Delimiter\Delimiter; use League\CommonMark\Node\Inline\Text; use League\CommonMark\Parser\Inline\InlineParserInterface; use League\CommonMark\Parser\Inline\InlineParserMatch; @@ -36,8 +35,7 @@ public function parse(InlineParserContext $inlineContext): bool $inlineContext->getContainer()->appendChild($node); // Add entry to stack for this opener - $delimiter = new Delimiter('[', 1, $node, true, false, $inlineContext->getCursor()->getPosition()); - $inlineContext->getDelimiterStack()->push($delimiter); + $inlineContext->getDelimiterStack()->addBracket($node, $inlineContext->getCursor()->getPosition(), false); return true; } diff --git a/src/Extension/SmartPunct/QuoteParser.php b/src/Extension/SmartPunct/QuoteParser.php index 959930b3ee..31ba8c7738 100644 --- a/src/Extension/SmartPunct/QuoteParser.php +++ b/src/Extension/SmartPunct/QuoteParser.php @@ -46,6 +46,7 @@ public function parse(InlineParserContext $inlineContext): bool { $char = $inlineContext->getFullMatch(); $cursor = $inlineContext->getCursor(); + $index = $cursor->getPosition(); $charBefore = $cursor->peek(-1); if ($charBefore === null) { @@ -67,7 +68,7 @@ public function parse(InlineParserContext $inlineContext): bool $inlineContext->getContainer()->appendChild($node); // Add entry to stack to this opener - $inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose)); + $inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose, $index)); return true; } diff --git a/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php b/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php index 978e75a43f..a6c8d3889f 100644 --- a/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php +++ b/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php @@ -14,10 +14,10 @@ namespace League\CommonMark\Extension\Strikethrough; use League\CommonMark\Delimiter\DelimiterInterface; -use League\CommonMark\Delimiter\Processor\DelimiterProcessorInterface; +use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface; use League\CommonMark\Node\Inline\AbstractStringContainer; -final class StrikethroughDelimiterProcessor implements DelimiterProcessorInterface +final class StrikethroughDelimiterProcessor implements CacheableDelimiterProcessorInterface { public function getOpeningCharacter(): string { @@ -61,4 +61,9 @@ public function process(AbstractStringContainer $opener, AbstractStringContainer $opener->insertAfter($strikethrough); } + + public function getCacheKey(DelimiterInterface $closer): string + { + return '~' . $closer->getLength(); + } } diff --git a/src/Extension/Table/TableExtension.php b/src/Extension/Table/TableExtension.php index 27a58bbdc6..0a8db3ed2a 100644 --- a/src/Extension/Table/TableExtension.php +++ b/src/Extension/Table/TableExtension.php @@ -41,6 +41,7 @@ public function configureSchema(ConfigurationBuilderInterface $builder): void 'center' => (clone $attributeArraySchema)->default(['align' => 'center']), 'right' => (clone $attributeArraySchema)->default(['align' => 'right']), ]), + 'max_autocompleted_cells' => Expect::int()->min(0)->default(TableParser::DEFAULT_MAX_AUTOCOMPLETED_CELLS), ])); } @@ -52,7 +53,7 @@ public function register(EnvironmentBuilderInterface $environment): void } $environment - ->addBlockStartParser(new TableStartParser()) + ->addBlockStartParser(new TableStartParser($environment->getConfiguration()->get('table/max_autocompleted_cells'))) ->addRenderer(Table::class, $tableRenderer) ->addRenderer(TableSection::class, new TableSectionRenderer()) diff --git a/src/Extension/Table/TableParser.php b/src/Extension/Table/TableParser.php index ca340a31f2..a005f8a97e 100644 --- a/src/Extension/Table/TableParser.php +++ b/src/Extension/Table/TableParser.php @@ -25,6 +25,11 @@ final class TableParser extends AbstractBlockContinueParser implements BlockContinueParserWithInlinesInterface { + /** + * @internal + */ + public const DEFAULT_MAX_AUTOCOMPLETED_CELLS = 10_000; + /** @psalm-readonly */ private Table $block; @@ -54,6 +59,8 @@ final class TableParser extends AbstractBlockContinueParser implements BlockCont /** @psalm-readonly-allow-private-mutation */ private bool $nextIsSeparatorLine = true; + private int $remainingAutocompletedCells; + /** * @param array $columns * @param array $headerCells @@ -62,12 +69,13 @@ final class TableParser extends AbstractBlockContinueParser implements BlockCont * * @phpstan-param array $columns */ - public function __construct(array $columns, array $headerCells) + public function __construct(array $columns, array $headerCells, int $remainingAutocompletedCells = self::DEFAULT_MAX_AUTOCOMPLETED_CELLS) { - $this->block = new Table(); - $this->bodyLines = new ArrayCollection(); - $this->columns = $columns; - $this->headerCells = $headerCells; + $this->block = new Table(); + $this->bodyLines = new ArrayCollection(); + $this->columns = $columns; + $this->headerCells = $headerCells; + $this->remainingAutocompletedCells = $remainingAutocompletedCells; } public function canHaveLazyContinuationLines(): bool @@ -121,6 +129,12 @@ public function parseInlines(InlineParserEngineInterface $inlineParser): void // Body can not have more columns than head for ($i = 0; $i < $headerColumns; $i++) { + // It can have less columns though, in which case we'll autocomplete the empty ones (up to some limit) + if (! isset($cells[$i]) && $this->remainingAutocompletedCells-- <= 0) { + // Too many cells were auto-completed, so we'll just stop here + return; + } + $cell = $cells[$i] ?? ''; $tableCell = $this->parseCell($cell, $i, $inlineParser); $row->appendChild($tableCell); @@ -138,14 +152,12 @@ public function parseInlines(InlineParserEngineInterface $inlineParser): void private function parseCell(string $cell, int $column, InlineParserEngineInterface $inlineParser): TableCell { - $tableCell = new TableCell(); + $tableCell = new TableCell(TableCell::TYPE_DATA, $this->columns[$column] ?? null); - if ($column < \count($this->columns)) { - $tableCell->setAlign($this->columns[$column]); + if ($cell !== '') { + $inlineParser->parse(\trim($cell), $tableCell); } - $inlineParser->parse(\trim($cell), $tableCell); - return $tableCell; } diff --git a/src/Extension/Table/TableStartParser.php b/src/Extension/Table/TableStartParser.php index 12206d289a..7411951c3d 100644 --- a/src/Extension/Table/TableStartParser.php +++ b/src/Extension/Table/TableStartParser.php @@ -23,6 +23,13 @@ final class TableStartParser implements BlockStartParserInterface { + private int $maxAutocompletedCells; + + public function __construct(int $maxAutocompletedCells = TableParser::DEFAULT_MAX_AUTOCOMPLETED_CELLS) + { + $this->maxAutocompletedCells = $maxAutocompletedCells; + } + public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserState): ?BlockStart { $paragraph = $parserState->getParagraphContent(); @@ -35,8 +42,8 @@ public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserSta return BlockStart::none(); } - $lines = \explode("\n", $paragraph); - $lastLine = \array_pop($lines); + $lastLineBreak = \strrpos($paragraph, "\n"); + $lastLine = $lastLineBreak === false ? $paragraph : \substr($paragraph, $lastLineBreak + 1); $headerCells = TableParser::split($lastLine); if (\count($headerCells) > \count($columns)) { @@ -47,13 +54,13 @@ public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserSta $parsers = []; - if (\count($lines) > 0) { + if ($lastLineBreak !== false) { $p = new ParagraphParser(); - $p->addLine(\implode("\n", $lines)); + $p->addLine(\substr($paragraph, 0, $lastLineBreak)); $parsers[] = $p; } - $parsers[] = new TableParser($columns, $headerCells); + $parsers[] = new TableParser($columns, $headerCells, $this->maxAutocompletedCells); return BlockStart::of(...$parsers) ->at($cursor) diff --git a/src/Normalizer/TextNormalizer.php b/src/Normalizer/TextNormalizer.php index 7860f1b939..43eb117453 100644 --- a/src/Normalizer/TextNormalizer.php +++ b/src/Normalizer/TextNormalizer.php @@ -34,6 +34,11 @@ public function normalize(string $text, array $context = []): string $text = \preg_replace('/[ \t\r\n]+/', ' ', \trim($text)); \assert(\is_string($text)); + // Is it strictly ASCII? If so, we can use strtolower() instead (faster) + if (\mb_check_encoding($text, 'ASCII')) { + return \strtolower($text); + } + return \mb_convert_case($text, \MB_CASE_FOLD, 'UTF-8'); } } diff --git a/src/Parser/Cursor.php b/src/Parser/Cursor.php index faae75bc1d..598cd75b52 100644 --- a/src/Parser/Cursor.php +++ b/src/Parser/Cursor.php @@ -322,17 +322,17 @@ public function advanceToNextNonSpaceOrTab(): int */ public function advanceToNextNonSpaceOrNewline(): int { - $remainder = $this->getRemainder(); + $currentCharacter = $this->getCurrentCharacter(); // Optimization: Avoid the regex if we know there are no spaces or newlines - if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) { + if ($currentCharacter !== ' ' && $currentCharacter !== "\n") { $this->previousPosition = $this->currentPosition; return 0; } $matches = []; - \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE); + \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, \PREG_OFFSET_CAPTURE); // [0][0] contains the matched text // [0][1] contains the index of that match diff --git a/src/Parser/InlineParserContext.php b/src/Parser/InlineParserContext.php index 796f2f388c..9372904281 100644 --- a/src/Parser/InlineParserContext.php +++ b/src/Parser/InlineParserContext.php @@ -42,12 +42,12 @@ final class InlineParserContext */ private array $matches; - public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap) + public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap, int $maxDelimitersPerLine = PHP_INT_MAX) { $this->referenceMap = $referenceMap; $this->container = $container; $this->cursor = $contents; - $this->delimiterStack = new DelimiterStack(); + $this->delimiterStack = new DelimiterStack($maxDelimitersPerLine); } public function getContainer(): AbstractBlock diff --git a/src/Parser/InlineParserEngine.php b/src/Parser/InlineParserEngine.php index b91a63f72f..6a26979329 100644 --- a/src/Parser/InlineParserEngine.php +++ b/src/Parser/InlineParserEngine.php @@ -59,7 +59,7 @@ public function parse(string $contents, AbstractBlock $block): void $contents = \trim($contents); $cursor = new Cursor($contents); - $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap); + $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap, $this->environment->getConfiguration()->get('max_delimiters_per_line')); // Have all parsers look at the line to determine what they might want to parse and what positions they exist at foreach ($this->matchParsers($contents) as $matchPosition => $parsers) { diff --git a/src/Parser/MarkdownParser.php b/src/Parser/MarkdownParser.php index 2fecb9ba47..904c7c45b4 100644 --- a/src/Parser/MarkdownParser.php +++ b/src/Parser/MarkdownParser.php @@ -32,6 +32,7 @@ use League\CommonMark\Parser\Block\BlockStartParserInterface; use League\CommonMark\Parser\Block\DocumentBlockParser; use League\CommonMark\Parser\Block\ParagraphParser; +use League\CommonMark\Reference\MemoryLimitedReferenceMap; use League\CommonMark\Reference\ReferenceInterface; use League\CommonMark\Reference\ReferenceMap; @@ -102,7 +103,7 @@ public function parse(string $input): Document // finalizeAndProcess $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber); - $this->processInlines(); + $this->processInlines(\strlen($input)); $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock())); @@ -115,6 +116,9 @@ public function parse(string $input): Document */ private function parseLine(string $line): void { + // replace NUL characters for security + $line = \str_replace("\0", "\u{FFFD}", $line); + $this->cursor = new Cursor($line); $matches = $this->parseBlockContinuation(); @@ -263,9 +267,9 @@ private function finalize(BlockContinueParserInterface $blockParser, int $endLin /** * Walk through a block & children recursively, parsing string content into inline content where appropriate. */ - private function processInlines(): void + private function processInlines(int $inputSize): void { - $p = new InlineParserEngine($this->environment, $this->referenceMap); + $p = new InlineParserEngine($this->environment, new MemoryLimitedReferenceMap($this->referenceMap, $inputSize)); foreach ($this->closedBlockParsers as $blockParser) { $blockParser->parseInlines($p); diff --git a/src/Reference/MemoryLimitedReferenceMap.php b/src/Reference/MemoryLimitedReferenceMap.php new file mode 100644 index 0000000000..d47bd6a6de --- /dev/null +++ b/src/Reference/MemoryLimitedReferenceMap.php @@ -0,0 +1,68 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Reference; + +final class MemoryLimitedReferenceMap implements ReferenceMapInterface +{ + private ReferenceMapInterface $decorated; + + private const MINIMUM_SIZE = 100_000; + + private int $remaining; + + public function __construct(ReferenceMapInterface $decorated, int $maxSize) + { + $this->decorated = $decorated; + $this->remaining = \max(self::MINIMUM_SIZE, $maxSize); + } + + public function add(ReferenceInterface $reference): void + { + $this->decorated->add($reference); + } + + public function contains(string $label): bool + { + return $this->decorated->contains($label); + } + + public function get(string $label): ?ReferenceInterface + { + $reference = $this->decorated->get($label); + if ($reference === null) { + return null; + } + + // Check for expansion limit + $this->remaining -= \strlen($reference->getDestination()) + \strlen($reference->getTitle()); + if ($this->remaining < 0) { + return null; + } + + return $reference; + } + + /** + * @return \Traversable + */ + public function getIterator(): \Traversable + { + return $this->decorated->getIterator(); + } + + public function count(): int + { + return $this->decorated->count(); + } +} diff --git a/src/Reference/ReferenceMap.php b/src/Reference/ReferenceMap.php index 982cb1253a..97a167dcf2 100644 --- a/src/Reference/ReferenceMap.php +++ b/src/Reference/ReferenceMap.php @@ -48,6 +48,10 @@ public function add(ReferenceInterface $reference): void public function contains(string $label): bool { + if ($this->references === []) { + return false; + } + $label = $this->normalizer->normalize($label); return isset($this->references[$label]); @@ -55,6 +59,10 @@ public function contains(string $label): bool public function get(string $label): ?ReferenceInterface { + if ($this->references === []) { + return null; + } + $label = $this->normalizer->normalize($label); return $this->references[$label] ?? null; diff --git a/src/Util/LinkParserHelper.php b/src/Util/LinkParserHelper.php index e329669bdd..3e76c28faa 100644 --- a/src/Util/LinkParserHelper.php +++ b/src/Util/LinkParserHelper.php @@ -30,15 +30,8 @@ final class LinkParserHelper */ public static function parseLinkDestination(Cursor $cursor): ?string { - if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { - // Chop off surrounding <..>: - return UrlEncoder::unescapeAndEncode( - RegexHelper::unescape(\substr($res, 1, -1)) - ); - } - if ($cursor->getCurrentCharacter() === '<') { - return null; + return self::parseDestinationBraces($cursor); } $destination = self::manuallyParseLinkDestination($cursor); @@ -69,7 +62,7 @@ public static function parseLinkLabel(Cursor $cursor): int public static function parsePartialLinkLabel(Cursor $cursor): ?string { - return $cursor->match('/^(?:[^\\\\\[\]]+|\\\\.?)*/'); + return $cursor->match('/^(?:[^\\\\\[\]]++|\\\\.?)*+/'); } /** @@ -100,27 +93,27 @@ public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimite private static function manuallyParseLinkDestination(Cursor $cursor): ?string { - $oldPosition = $cursor->getPosition(); - $oldState = $cursor->saveState(); - + $remainder = $cursor->getRemainder(); $openParens = 0; - while (($c = $cursor->getCurrentCharacter()) !== null) { - if ($c === '\\' && ($peek = $cursor->peek()) !== null && RegexHelper::isEscapable($peek)) { - $cursor->advanceBy(2); + $len = \strlen($remainder); + for ($i = 0; $i < $len; $i++) { + $c = $remainder[$i]; + if ($c === '\\' && $i + 1 < $len && RegexHelper::isEscapable($remainder[$i + 1])) { + $i++; } elseif ($c === '(') { - $cursor->advanceBy(1); $openParens++; + // Limit to 32 nested parens for pathological cases + if ($openParens > 32) { + return null; + } } elseif ($c === ')') { if ($openParens < 1) { break; } - $cursor->advanceBy(1); $openParens--; - } elseif (\preg_match(RegexHelper::REGEX_WHITESPACE_CHAR, $c)) { + } elseif (\ord($c) <= 32 && RegexHelper::isWhitespace($c)) { break; - } else { - $cursor->advanceBy(1); } } @@ -128,15 +121,45 @@ private static function manuallyParseLinkDestination(Cursor $cursor): ?string return null; } - if ($cursor->getPosition() === $oldPosition && (! isset($c) || $c !== ')')) { + if ($i === 0 && (! isset($c) || $c !== ')')) { return null; } - $newPos = $cursor->getPosition(); - $cursor->restoreState($oldState); + $destination = \substr($remainder, 0, $i); + $cursor->advanceBy(\mb_strlen($destination, 'UTF-8')); + + return $destination; + } + + /** @var \WeakReference|null */ + private static ?\WeakReference $lastCursor = null; + private static bool $lastCursorLacksClosingBrace = false; + + private static function parseDestinationBraces(Cursor $cursor): ?string + { + // Optimization: If we've previously parsed this cursor and returned `null`, we know + // that no closing brace exists, so we can skip the regex entirely. This helps avoid + // certain pathological cases where the regex engine can take a very long time to + // determine that no match exists. + if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) { + if (self::$lastCursorLacksClosingBrace) { + return null; + } + } else { + self::$lastCursor = \WeakReference::create($cursor); + } + + if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { + self::$lastCursorLacksClosingBrace = false; - $cursor->advanceBy($newPos - $cursor->getPosition()); + // Chop off surrounding <..>: + return UrlEncoder::unescapeAndEncode( + RegexHelper::unescape(\substr($res, 1, -1)) + ); + } + + self::$lastCursorLacksClosingBrace = true; - return $cursor->getPreviousText(); + return null; } } diff --git a/src/Util/RegexHelper.php b/src/Util/RegexHelper.php index a89e7bda66..603631f294 100644 --- a/src/Util/RegexHelper.php +++ b/src/Util/RegexHelper.php @@ -61,9 +61,9 @@ final class RegexHelper self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')'; public const PARTIAL_HTMLBLOCKOPEN = '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' . '\/' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s>]|$)' . '|' . '[?!])'; - public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"' . - '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'' . - '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*\))'; + public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*+"' . + '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*+\'' . + '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*+\))'; public const REGEX_PUNCTUATION = '/^[!"#$%&\'()*+,\-.\\/:;<=>?@\\[\\]\\\\^_`{|}~\p{P}\p{S}]/u'; public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i'; @@ -83,6 +83,12 @@ public static function isEscapable(string $character): bool return \preg_match('/' . self::PARTIAL_ESCAPABLE . '/', $character) === 1; } + public static function isWhitespace(string $character): bool + { + /** @psalm-suppress InvalidLiteralArgument */ + return $character !== '' && \strpos(" \t\n\x0b\x0c\x0d", $character) !== false; + } + /** * @psalm-pure */ diff --git a/tests/functional/MaxDelimitersPerLineTest.php b/tests/functional/MaxDelimitersPerLineTest.php new file mode 100644 index 0000000000..1de9697dc5 --- /dev/null +++ b/tests/functional/MaxDelimitersPerLineTest.php @@ -0,0 +1,50 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Tests\Functional; + +use League\CommonMark\CommonMarkConverter; +use PHPUnit\Framework\TestCase; + +final class MaxDelimitersPerLineTest extends TestCase +{ + /** + * @dataProvider provideTestCases + */ + public function testIt(string $input, int $maxDelimsPerLine, string $expectedOutput): void + { + $converter = new CommonMarkConverter(['max_delimiters_per_line' => $maxDelimsPerLine]); + + $this->assertEquals($expectedOutput, \trim($converter->convert($input)->getContent())); + } + + /** + * @return iterable> + */ + public function provideTestCases(): iterable + { + yield ['*a* **b *c* b**', 6, '

a b c b

']; + + yield ['*a* **b *c **d** c* b**', 0, '

*a* **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 1, '

*a* **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 2, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 3, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 4, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 5, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 6, '

a **b *c d c* b**

']; + yield ['*a* **b *c **d** c* b**', 7, '

a **b c d c b**

']; + yield ['*a* **b *c **d** c* b**', 8, '

a b c d c b

']; + yield ['*a* **b *c **d** c* b**', 9, '

a b c d c b

']; + yield ['*a* **b *c **d** c* b**', 100, '

a b c d c b

']; + } +} diff --git a/tests/pathological/convert.php b/tests/pathological/convert.php new file mode 100755 index 0000000000..51c6d42330 --- /dev/null +++ b/tests/pathological/convert.php @@ -0,0 +1,62 @@ +#!/usr/bin/env php + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +ini_set('memory_limit', '1024M'); + +use League\CommonMark\Environment\Environment; +use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension; +use League\CommonMark\Extension\Footnote\FootnoteExtension; +use League\CommonMark\Extension\GithubFlavoredMarkdownExtension; +use League\CommonMark\Extension\Table\TableExtension; +use League\CommonMark\MarkdownConverter; + +require_once __DIR__ . '/../../vendor/autoload.php'; + +ini_set('display_errors', 'stderr'); +ini_set('xdebug.max_nesting_level', '999999'); + +$stdin = fopen('php://stdin', 'r'); +if (stream_set_blocking($stdin, true)) { + $markdown = stream_get_contents($stdin); +} +fclose($stdin); + +if (empty($markdown)) { + fwrite(STDERR, "No input provided\n"); + exit(1); +} + +$config = []; +if (isset($argv[1])) { + $config = \json_decode($argv[1], true); +} + +$environment = new Environment($config); +$environment->addExtension(new CommonMarkCoreExtension()); + +// Enable additional extensions if requested +switch ($argv[2] ?? null) { + case 'table': + $environment->addExtension(new TableExtension()); + break; + case 'footnotes': + $environment->addExtension(new FootnoteExtension()); + break; + case 'gfm': + default: + $environment->addExtension(new GithubFlavoredMarkdownExtension()); + break; +} + +$converter = new MarkdownConverter($environment); + +echo $converter->convert($markdown)->getContent(); diff --git a/tests/pathological/test.php b/tests/pathological/test.php new file mode 100755 index 0000000000..fea326a449 --- /dev/null +++ b/tests/pathological/test.php @@ -0,0 +1,386 @@ +#!/usr/bin/env php + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\Component\Process\Exception\ProcessSignaledException; +use Symfony\Component\Process\Exception\ProcessTimedOutException; +use Symfony\Component\Process\Process; + +require_once __DIR__ . '/../../vendor/autoload.php'; + +$cases = [ + 'U+0000 in input' => [ + 'sizes' => [1], + 'input' => static fn($n) => "abc\u{0000}def\u{0000}\n", + 'expected' => static fn($n) => "

abc\u{FFFD}def\u{FFFD}

", + ], + 'Alternate line endings' => [ + 'sizes' => [1], + 'input' => static fn($n) => "- a\n- b\r- c\r\n- d", + 'expected' => static fn($n) => "
    \n
  • a
  • \n
  • b
  • \n
  • c
  • \n
  • d
  • \n
\n", + ], + 'Nested strong emphasis' => [ + 'sizes' => [50, 500], + 'input' => static fn($n) => \str_repeat('*a **a ', $n) . 'b' . \str_repeat(' a** a*', $n), + 'expected' => static fn($n) => '

' . \str_repeat('a a ', $n) . 'b' . \str_repeat(' a a', $n) . '

', + ], + 'Emphasis closers with no openers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('a_ ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('a_ ', $n - 1) . 'a_

', + ], + 'Emphasis openers with no closers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('_a ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('_a ', $n - 1) . '_a

', + ], + 'Openers and closers multiple of 3' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => 'a**b' . \str_repeat('c* ', $n), + 'expected' => static fn($n) => '

a**b' . \str_repeat('c* ', $n - 1) . 'c*

', + ], + 'Delimiters that cannot open or close' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/172', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('*_* _ ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('_ _ ', $n - 1) . '_ _

', + ], + 'Link closers with no openers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('a] ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('a] ', $n - 1) . 'a]

', + ], + 'Link openers with no closers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[a ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[a ', $n - 1) . '[a

', + ], + 'Link openers and emphasis closers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[ a_ ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[ a_ ', $n - 1) . '[ a_

', + ], + 'Mismatched openers and closers' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('*a_ ', $n), + 'expected' => static fn($n) => '

' . \str_repeat('*a_ ', $n - 1) . '*a_

', + ], + 'Pattern [ (](' => [ + 'sizes' => [500, 5_000, 50_000], + 'input' => static fn($n) => \str_repeat('[ (](', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[ (](', $n) . '

', + ], + 'Nested brackets' => [ + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[', $n) . 'a' . \str_repeat(']', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[', $n) . 'a' . \str_repeat(']', $n) . '

', + ], + 'Backslash in link' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/157', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => '[' . \str_repeat('\\', $n) . "\n", + 'expected' => static fn($n) => '

[' . \str_repeat('\\', $n / 2) . '

', + ], + 'Backslash in unclosed link title' => [ + 'sizes' => [10, 100, 1_000], + 'input' => static fn($n) => '[test](\\url "' . \str_repeat('\\', $n) . "\n", + 'expected' => static fn($n) => '

[test](\\url "' . \str_repeat('\\', $n / 2) . '

', + ], + 'Unclosed inline links (1)' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[](', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[](', $n) . '

', + ], + 'Unclosed inline links (2)' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[a](b', $n), + 'expected' => static fn($n) => '

' . \str_repeat('[a](b', $n) . '

', + ], + 'Unclosed inline links (3)' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('[a]( static fn($n) => '

' . \str_repeat('[a](<b', $n) . '

', + ], + 'Nested blockquotes' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [100, 1_000], + 'input' => static fn($n) => \str_repeat('> ', $n) . "a\n", + 'expected' => static fn($n) => \str_repeat("
\n", $n) . "

a

\n" . \str_repeat("
\n", $n), + ], + 'Backticks' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [500, 1_000, 2_000, 4_000], + 'input' => static fn($n) => \implode('', \array_map(static fn($i) => 'e' . \str_repeat('`', $i), \range(1, $n))), + ], + 'Many ref. definitions' => [ + 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat("[a]: u\n", $n), + ], + 'Huge horizontal rule' => [ + 'sizes' => [500, 5_000], + 'input' => static fn($n) => \str_repeat('*', $n) . "\n", + 'expected' => static fn($n) => '
', + ], + 'CVE-2022-39209' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-cgh3-p57x-9q7q', + 'extension' => 'autolink', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('![l', $n) . "\n", + 'expected' => static fn($n) => '

' . \str_repeat('![l', $n) . '

', + ], + 'CVE-2023-22486' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-r572-jvj2-3m8p', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('![[]()', $n) . "\n", + 'expected' => static fn($n) => '

' . \str_repeat('![', $n) . '

', + ], + 'CVE-2023-22484' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-24f7-9frr-5h2r', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => ' static fn($n) => '

</' . \str_repeat('<!--', $n) . '

', + ], + 'CVE-2023-22483 test 1' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => "-1" . \str_repeat(" static fn($n) => '

-1' . \str_repeat('<?x', $n) . 'y

', + ], + 'CVE-2023-22483 test 2 (tables)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'extension' => 'table', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat("|-\nt>|-\n", $n) . "y\n", + 'expected' => static fn($n) => '

' . \str_repeat("|-\nt>|-\n", $n) . 'y

', + ], + 'CVE-2023-22483 test 3' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 20_000, 40_000], + 'input' => static fn($n) => \str_repeat('*t ', $n) . \str_repeat('_t*_ ', $n) . "\n", + 'expected' => static fn($n) => '

' . \str_repeat('t ', $n) . \str_repeat('_t_ ', $n - 1) . '_t_

', + ], + 'CVE-2023-22483 test 4 (tables)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'extension' => 'table', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => "x\n| - |\n" . \str_repeat("|", $n) . "y\n", + 'expected' => static fn($n) => "

x\n| - |\n" . \str_repeat('|', $n) . 'y

', + ], + 'CVE-2023-22483 test 5 (autolink)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat('z_www.', $n) . "\n", + ], + 'CVE-2023-22483 test 6' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [100, 1_000, 10_000, 100_000], + 'input' => static fn($n) => "[f]:u\n\"\n" . \str_repeat("[f]\n", $n) . "[f]:u \"t\n", + ], + 'CVE-2023-22483 test 7 (autolink)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => "www." . \str_repeat(")", $n) . "\n", + 'expected' => static fn($n) => '

www.' . \str_repeat(')', $n) . '

', + ], + 'CVE-2023-22483 test 8 (tables)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'extension' => 'table', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => ":-\n" . \str_repeat(":\n", $n) . ":-\n", + 'expected' => static fn($n) => "

:-\n" . \str_repeat(":\n", $n) . ':-

', + ], + 'CVE-2023-22483 test 9 (autolink)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat(" static fn($n) => '

' . \str_repeat('<http://s', $n) . '

', + ], + 'CVE-2023-22483 test 10 (strikethrough)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => '~e' . \str_repeat(',z~~', $n) . "\n", + 'expected' => static fn($n) => '

~e' . \str_repeat(',z~~', $n) . '

', + ], + 'CVE-2023-22483 test 11 (deeply-nested blockquotes)' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c', + 'sizes' => [100, 1_000, 2_000, 3_000], + 'input' => static fn($n) => \str_repeat(">", $n) . \str_repeat(".", $n) . "\n", + 'expected' => static fn($n) => \str_repeat("
\n", $n) . '

' . \str_repeat('.', $n) . "

\n" . \str_repeat("
\n", $n), + ], + 'CVE-2023-24824 test 1' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat(">", $n) . \str_repeat("a*", $n) . "\n", + 'configuration' => [ + 'max_nesting_level' => 1_000, + ], + ], + 'CVE-2023-24824 test 2' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh', + 'sizes' => [500, 5_000, 50_000], + 'input' => static fn($n) => \str_repeat(" -", $n) . 'x' . \str_repeat("\n", $n), + 'configuration' => [ + 'max_nesting_level' => 500, + ], + ], + 'CVE-2023-26485 test 1' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-r8vr-c48j-fcc5', + 'sizes' => [50, 500, 5_000], // ideally should be 1000, 10_000, 100_000 but recursive rendering makes large sizes fail + 'input' => static fn($n) => \str_repeat('_', $n) . '.' . \str_repeat('_', $n) . "\n", + 'expected' => static fn($n) => '

' . \str_repeat('', $n/2) . '.' . \str_repeat('', $n/2) . '

', + ], + 'CVE-2023-26485 test 2' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-r8vr-c48j-fcc5', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => "1.\n" . \str_repeat(" 2.\n", $n), + 'expected' => static fn($n) => "
    \n
  1. \n" . \str_repeat("
  2. \n", $n) . "
\n", + ], + 'CVE-2023-26485 test 3' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-r8vr-c48j-fcc5', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat(" -\n", $n) . "x\n", + 'expected' => static fn($n) => "
    \n" . \str_repeat("
  • \n", $n) . "
\n

x

", + ], + 'CVE-2023-37463 test 1' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-w4qg-3vf7-m9x5', + 'extension' => 'table', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => '|' . \str_repeat('x|', $n) . "\n|" . \str_repeat('-|', $n) . "\n", + 'expected' => static fn($n) => "\n\n\n" . \str_repeat("\n", $n) . "\n\n
x
\n", + ], + 'CVE-2023-37463 test 2' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-w4qg-3vf7-m9x5', + 'extension' => 'table', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => '|' . \str_repeat('x|', $n) . "\n|" . \str_repeat('-|', $n) . "\n" . \str_repeat("a\n", $n), + ], + 'CVE-2023-37463 test 3' => [ + 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-w4qg-3vf7-m9x5', + 'extension' => 'footnotes', + 'sizes' => [1_000, 10_000, 100_000], + 'input' => static fn($n) => \str_repeat("[^1]:\n", $n) . \str_repeat("\n", $n), + 'expected' => static fn($n) => '', + ], +]; + +print("Running " . \count($cases) . " pathological test cases\n\n"); + +$failures = []; +foreach ($cases as $name => $case) { + print("\033[1m$name\033[0m"); + if (isset($case['ref'])) { + print(" (\033[4;34m{$case['ref']}\033[0m)"); + } + print("\n"); + + $lastRunTime = null; + $lastInputSize = null; + $lastOutputSize = null; + foreach ($case['sizes'] as $size) { + printf(' - %s: ', number_format($size)); + + $input = $case['input']($size); + $inputSize = \strlen($input); + + if ($lastInputSize === null) { + $timeout = 5; // 5 seconds + } else { + // Ideally, these cases should run in linear time or better, + // but we'll allow a 50% margin of error. + $timeout = \ceil($lastRunTime * $inputSize / $lastInputSize * 1.5); + // But regardless of this, we always want to wait at least 5 seconds, + // and at most 60 seconds. + $timeout = \max(5, \min(60, $timeout)); + } + + if (isset($_ENV['CI']) || isset($_SERVER['CI'])) { + $command = ['php', 'convert.php']; + } else { + $command = ['php', '-n', 'convert.php', \json_encode($case['configuration'] ?? [])]; + } + + if (isset($case['extension'])) { + $command[] = $case['extension']; + } + + $process = new Process($command, __DIR__, null, $input, $timeout); + + $startTime = \microtime(true); + try { + $result = $process->run(); + $executionTime = \round(\microtime(true) - $startTime, 3); + + if ($lastRunTime !== null) { + $relativeDifference = \round($executionTime / $lastRunTime, 1); + printf("%.3f seconds (%sx slower); ", $executionTime, $relativeDifference); + } else { + printf("%.3f seconds; ", $executionTime); + } + + $lastRunTime = $executionTime; + + $actual = $process->getOutput(); + $actualOutputSize = strlen($actual); + + if ($lastOutputSize !== null && $lastOutputSize !== 0 && $lastInputSize !== null) { + $relativeDifference = \round($actualOutputSize / $lastOutputSize, 1); + printf("%.1fkb output (%sx larger)", $actualOutputSize / 1024, $relativeDifference); + if ($actualOutputSize > ($lastOutputSize * $inputSize / $lastInputSize * 1.5)) { + + printf(" \033[31;31m%s\033[0m", 'Max allowed size exceeded'); + $failures[$name] = 'Max allowed size exceeded'; + } + } else { + printf("%.1fkb output", $actualOutputSize / 1024); + } + echo "\n"; + + $lastOutputSize = $actualOutputSize; + + if (isset($case['expected'])) { + $expected = $case['expected']($size); + if (\trim($expected) !== \trim($actual)) { + printf(" \033[31;31m%s\033[0m\n", 'Unexpected output'); + $failures[$name] = 'Unexpected output'; + } + } + + if ($result !== 0) { + printf(" \033[31;31m%s\033[0m\n", 'Process failed: ' . $process->getErrorOutput()); + $failures[$name] = 'Process failed'; + } + } catch (ProcessTimedOutException $e) { + printf("\033[31;31m%s\033[0m\n", 'Max execution time ('.$timeout.'s) exceeded'); + $failures[$name] = 'Max execution time exceeded'; + } catch (ProcessSignaledException $e) { + printf("\033[31;31m%s\033[0m\n", 'Process signaled: ' . $e->getSignal()); + $failures[$name] = 'Process signaled'; + } finally { + $lastInputSize = \strlen($input); + } + } + print("\n"); +} + +print("\n"); +print("--------------------\n"); +print("\n"); +print(\count($failures) . " failure(s) out of " . \count($cases) . " test(s)\n"); + +if (\count($failures) > 0) { + exit(1); +} diff --git a/tests/unit/Delimiter/DelimiterStackTest.php b/tests/unit/Delimiter/DelimiterStackTest.php new file mode 100644 index 0000000000..8c7804425e --- /dev/null +++ b/tests/unit/Delimiter/DelimiterStackTest.php @@ -0,0 +1,354 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Tests\Unit\Delimiter; + +use League\CommonMark\Delimiter\Delimiter; +use League\CommonMark\Delimiter\DelimiterStack; +use League\CommonMark\Node\Inline\Text; +use PHPUnit\Framework\TestCase; + +final class DelimiterStackTest extends TestCase +{ + public function testPush(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $delim1 = new Delimiter('*', 1, $text1, true, false); + $delim2 = new Delimiter('_', 1, $text2, false, false); + $delim3 = new Delimiter('*', 1, $text3, false, true); + + $stack = new DelimiterStack(); + + $this->assertNull($stack->searchByCharacter('*')); + $this->assertNull($stack->searchByCharacter('_')); + + $stack->push($delim1); + + $this->assertSame($delim1, $stack->searchByCharacter('*')); + $this->assertNull($stack->searchByCharacter('_')); + $this->assertNull($delim1->getPrevious()); + $this->assertNull($delim1->getNext()); + $this->assertNull($delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $stack->push($delim2); + + $this->assertSame($delim1, $stack->searchByCharacter('*')); + $this->assertSame($delim2, $stack->searchByCharacter('_')); + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim2, $delim1->getNext()); + $this->assertSame($delim1, $delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $stack->push($delim3); + + $this->assertSame($delim3, $stack->searchByCharacter('*')); + $this->assertSame($delim2, $stack->searchByCharacter('_')); + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim2, $delim1->getNext()); + $this->assertSame($delim1, $delim2->getPrevious()); + $this->assertSame($delim3, $delim2->getNext()); + $this->assertSame($delim2, $delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + } + + public function testRemoveDelimiter(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, true, false)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, true)); + + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim2, $delim1->getNext()); + + $this->assertSame($delim1, $delim2->getPrevious()); + $this->assertSame($delim3, $delim2->getNext()); + + $this->assertSame($delim2, $delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $stack->removeDelimiter($delim2); + + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim3, $delim1->getNext()); + + $this->assertNull($delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + + $this->assertSame($delim1, $delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $stack->removeDelimiter($delim3); + + $this->assertNull($delim1->getPrevious()); + $this->assertNull($delim1->getNext()); + + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + } + + public function testRemoveDelimitersBetween(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + $text4 = new Text('_'); + $text5 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, false, false, 0)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false, 1)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, false, 2)); + $stack->push($delim4 = new Delimiter('_', 1, $text4, false, false, 3)); + $stack->push($delim5 = new Delimiter('*', 1, $text5, false, false, 4)); + + // Make removeDelimiterBetween() callable using reflection + $reflection = new \ReflectionClass($stack); + $method = $reflection->getMethod('removeDelimitersBetween'); + $method->setAccessible(true); + + $method->invoke($stack, $delim1, $delim5); + + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim5, $delim1->getNext()); + + $this->assertNull($delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $this->assertNull($delim4->getPrevious()); + $this->assertNull($delim4->getNext()); + + $this->assertSame($delim1, $delim5->getPrevious()); + $this->assertNull($delim5->getNext()); + } + + public function testFindEarliest(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, true, false, 0)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false, 17)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, true, 24)); + + // Use reflection to make findEarliest() callable + $reflection = new \ReflectionClass($stack); + $method = $reflection->getMethod('findEarliest'); + $method->setAccessible(true); + + $this->assertSame($delim1, $method->invoke($stack, -1)); + $this->assertSame($delim2, $method->invoke($stack, $delim1->getIndex())); + $this->assertSame($delim3, $method->invoke($stack, $delim2->getIndex())); + $this->assertNull($method->invoke($stack, $delim3->getIndex())); + } + + public function testRemoveAll(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, true, false)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, true)); + + $stack->removeAll(); + + $this->assertNull($delim1->getPrevious()); + $this->assertNull($delim1->getNext()); + + $this->assertNull($delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + + $this->assertNull($stack->searchByCharacter('*')); + $this->assertNull($stack->searchByCharacter('_')); + } + + public function testRemoveAllWithStackBottomGiven(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, true, false)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, true)); + + $stack->removeAll($delim2); + + $this->assertSame($delim1, $stack->searchByCharacter('*')); + $this->assertSame($delim2, $stack->searchByCharacter('_')); + + $this->assertNull($delim1->getPrevious()); + $this->assertSame($delim2, $delim1->getNext()); + + $this->assertSame($delim1, $delim2->getPrevious()); + $this->assertNull($delim2->getNext()); + + $this->assertNull($delim3->getPrevious()); + $this->assertNull($delim3->getNext()); + } + + public function testRemoveEarlierMatches(): void + { + $text1 = new Text('*'); + $text2 = new Text('_'); + $text3 = new Text('*'); + + $stack = new DelimiterStack(); + $stack->push($delim1 = new Delimiter('*', 1, $text1, true, false)); + $stack->push($delim2 = new Delimiter('_', 1, $text2, false, false)); + $stack->push($delim3 = new Delimiter('*', 1, $text3, false, true)); + + $this->assertTrue($delim1->isActive()); + $this->assertTrue($delim2->isActive()); + $this->assertTrue($delim3->isActive()); + + $stack->removeEarlierMatches('*'); + + $this->assertFalse($delim1->isActive()); + $this->assertTrue($delim2->isActive()); + $this->assertFalse($delim3->isActive()); + + $stack->removeEarlierMatches('_'); + $this->assertFalse($delim2->isActive()); + } + + public function testAddBracket(): void + { + $text1 = new Text('['); + $text2 = new Text('*'); + $text3 = new Text(']'); + + $stack = new DelimiterStack(); + + $stack->addBracket($text1, 0, false); + + $firstBracket = $stack->getLastBracket(); + $this->assertSame($text1, $firstBracket->getNode()); + $this->assertSame(0, $firstBracket->getPosition()); + $this->assertFalse($firstBracket->isImage()); + $this->assertNull($firstBracket->getPrevious()); + $this->assertFalse($firstBracket->hasNext()); + + $stack->push(new Delimiter('*', 1, $text2, true, false)); + $stack->addBracket($text3, 2, true); + + $this->assertSame($text3, $stack->getLastBracket()->getNode()); + $this->assertSame(2, $stack->getLastBracket()->getPosition()); + $this->assertTrue($stack->getLastBracket()->isImage()); + $this->assertSame($firstBracket, $stack->getLastBracket()->getPrevious()); + $this->assertFalse($stack->getLastBracket()->hasNext()); + + $this->assertTrue($firstBracket->hasNext()); + } + + public function testRemoveBracket(): void + { + $text1 = new Text('['); + $text2 = new Text(']'); + + $stack = new DelimiterStack(); + + $stack->addBracket($text1, 0, false); + $firstBracket = $stack->getLastBracket(); + + $stack->addBracket($text2, 1, false); + + $this->assertSame($text2, $stack->getLastBracket()->getNode()); + $this->assertTrue($firstBracket->hasNext()); + + $stack->removeBracket(); + + $this->assertSame($text1, $stack->getLastBracket()->getNode()); + $this->assertFalse($firstBracket->hasNext()); + } + + public function testDeactivateLinkOpeners(): void + { + $text1 = new Text('['); + $text2 = new Text('['); + $text3 = new Text('['); + + $stack = new DelimiterStack(); + + $stack->addBracket($text1, 0, false); + $bracket1 = $stack->getLastBracket(); + $this->assertTrue($bracket1->isActive()); + + $stack->addBracket($text2, 1, true); + $bracket2 = $stack->getLastBracket(); + $this->assertTrue($bracket2->isActive()); + + $stack->addBracket($text3, 2, false); + $bracket3 = $stack->getLastBracket(); + $this->assertTrue($bracket3->isActive()); + + $stack->deactivateLinkOpeners(); + + $this->assertSame($bracket3, $stack->getLastBracket()); + + $this->assertFalse($bracket1->isActive()); + $this->assertFalse($bracket2->isActive()); + $this->assertFalse($bracket3->isActive()); + } + + public function testGetIndex(): void + { + $delim1 = new Delimiter('*', 1, new Text('*'), true, false); + $delim2 = new Delimiter('_', 1, new Text('_'), true, false, 10); + $delim3 = new Delimiter('*', 1, new Text('*'), true, true, 20); + $delim4 = new Delimiter('_', 1, new Text('_'), false, true); + $delim5 = new Delimiter('*', 1, new Text('*'), false, true); + + $stack = new DelimiterStack(); + + $stack->push($delim1); + $stack->push($delim2); + $stack->push($delim3); + $stack->push($delim4); + $stack->push($delim5); + + $reflection = new \ReflectionClass($stack); + $method = $reflection->getMethod('getIndex'); + $method->setAccessible(true); + + $this->assertSame(9, $method->invoke($stack, $delim1)); + $this->assertSame(10, $method->invoke($stack, $delim2)); + $this->assertSame(20, $method->invoke($stack, $delim3)); + $this->assertSame(21, $method->invoke($stack, $delim4)); + $this->assertSame(22, $method->invoke($stack, $delim5)); + } +} diff --git a/tests/unit/Util/RegexHelperTest.php b/tests/unit/Util/RegexHelperTest.php index c71a6e079c..ed9c5af6c6 100644 --- a/tests/unit/Util/RegexHelperTest.php +++ b/tests/unit/Util/RegexHelperTest.php @@ -281,6 +281,16 @@ public function testIsEscapable(): void $this->assertTrue(RegexHelper::isEscapable('\\')); } + public function testIsWhitespace(): void + { + $this->assertFalse(RegexHelper::isWhitespace('')); + $this->assertFalse(RegexHelper::isWhitespace('A')); + $this->assertTrue(RegexHelper::isWhitespace(' ')); + $this->assertTrue(RegexHelper::isWhitespace("\t")); + $this->assertTrue(RegexHelper::isWhitespace("\n")); + $this->assertTrue(RegexHelper::isWhitespace("\r")); + } + /** * @dataProvider dataForTestMatchAt */