diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d9c08637ef..1546d4f67e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -109,6 +109,24 @@ jobs:
- run: vendor/bin/psalm --no-progress --stats --threads=$(nproc) --output-format=github --shepherd
+ pathological:
+ name: Pathological Tests
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - uses: shivammathur/setup-php@v2
+ with:
+ php-version: 8.1
+ extensions: curl, mbstring, yaml
+ coverage: none
+ tools: composer:v2
+
+ - run: composer update --no-progress
+
+ - run: php tests/pathological/test.php
+
docs-lint:
permissions:
contents: read # for actions/checkout to fetch code
diff --git a/.phpstorm.meta.php b/.phpstorm.meta.php
index ca1bec7ee1..5eb9270da0 100644
--- a/.phpstorm.meta.php
+++ b/.phpstorm.meta.php
@@ -31,6 +31,7 @@
'html_input',
'allow_unsafe_links',
'max_nesting_level',
+ 'max_delimiters_per_line',
'renderer',
'renderer/block_separator',
'renderer/inner_separator',
@@ -89,6 +90,7 @@
'table/alignment_attributes/left',
'table/alignment_attributes/center',
'table/alignment_attributes/right',
+ 'table/max_autocompleted_cells',
'table_of_contents',
'table_of_contents/html_class',
'table_of_contents/max_heading_level',
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 917e5e80c7..9846fe0c97 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,31 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
## [Unreleased][unreleased]
+This is a **security release** to address potential denial of service attacks when parsing specially crafted,
+malicious input from untrusted sources (like user input).
+
+### Added
+
+- Added `max_delimiters_per_line` config option to prevent denial of service attacks when parsing malicious input
+- Added `table/max_autocompleted_cells` config option to prevent denial of service attacks when parsing large tables
+- The `AttributesExtension` now supports attributes without values (#985, #986)
+- The `AutolinkExtension` exposes two new configuration options to override the default behavior (#969, #987):
+ - `autolink/allowed_protocols` - an array of protocols to allow autolinking for
+ - `autolink/default_protocol` - the default protocol to use when none is specified
+- Added `RegexHelper::isWhitespace()` method to check if a given character is an ASCII whitespace character
+- Added `CacheableDelimiterProcessorInterface` to ensure linear complexity for dynamic delimiter processing
+- Added `Bracket` delimiter type to optimize bracket parsing
+
+### Changed
+
+- `[` and `]` are no longer added as `Delimiter` objects on the stack; a new `Bracket` type with its own stack is used instead
+- `UrlAutolinkParser` no longer parses URLs with more than 127 subdomains
+- Expanded reference links can no longer exceed 100kb, or the size of the input document (whichever is greater)
+- Delimiters should always provide a non-null value via `DelimiterInterface::getIndex()`
+ - We'll attempt to infer the index based on surrounding delimiters where possible
+- The `DelimiterStack` now accepts integer positions for any `$stackBottom` argument
+- Several small performance optimizations
+
## [2.5.3] - 2024-08-16
### Changed
@@ -77,6 +102,25 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
- Fixed declaration parser being too strict
- `FencedCodeRenderer`: don't add `language-` to class if already prefixed
+### Deprecated
+
+- Returning dynamic values from `DelimiterProcessorInterface::getDelimiterUse()` is deprecated
+ - You should instead implement `CacheableDelimiterProcessorInterface` to help the engine perform caching to avoid performance issues.
+- Failing to set a delimiter's index (or returning `null` from `DelimiterInterface::getIndex()`) is deprecated and will not be supported in 3.0
+- Deprecated `DelimiterInterface::isActive()` and `DelimiterInterface::setActive()`, as these are no longer used by the engine
+- Deprecated `DelimiterStack::removeEarlierMatches()` and `DelimiterStack::searchByCharacter()`, as these are no longer used by the engine
+- Passing a `DelimiterInterface` as the `$stackBottom` argument to `DelimiterStack::processDelimiters()` or `::removeAll()` is deprecated and will not be supported in 3.0; pass the integer position instead.
+
+### Fixed
+
+- Fixed NUL characters not being replaced in the input
+- Fixed quadratic complexity parsing unclosed inline links
+- Fixed quadratic complexity parsing emphasis and strikethrough delimiters
+- Fixed issue where having 500,000+ delimiters could trigger a [known segmentation fault issue in PHP's garbage collection](https://bugs.php.net/bug.php?id=68606)
+- Fixed quadratic complexity deactivating link openers
+- Fixed quadratic complexity parsing long backtick code spans with no matching closers
+- Fixed catastrophic backtracking when parsing link labels/titles
+
## [2.4.1] - 2023-08-30
### Fixed
diff --git a/composer.json b/composer.json
index 9b90662034..d74b0b4b11 100644
--- a/composer.json
+++ b/composer.json
@@ -42,8 +42,9 @@
"phpstan/phpstan": "^1.8.2",
"phpunit/phpunit": "^9.5.21 || ^10.5.9 || ^11.0.0",
"scrutinizer/ocular": "^1.8.1",
- "symfony/finder": "^5.3 | ^6.0 || ^7.0",
- "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0 || ^7.0",
+ "symfony/finder": "^5.3 | ^6.0 | ^7.0",
+ "symfony/process": "^5.4 | ^6.0 | ^7.0",
+ "symfony/yaml": "^2.3 | ^3.0 | ^4.0 | ^5.0 | ^6.0 | ^7.0",
"unleashedtech/php-coding-standard": "^3.1.1",
"vimeo/psalm": "^4.24.0 || ^5.0.0"
},
@@ -103,11 +104,13 @@
"phpstan": "phpstan analyse",
"phpunit": "phpunit --no-coverage",
"psalm": "psalm --stats",
+ "pathological": "tests/pathological/test.php",
"test": [
"@phpcs",
"@phpstan",
"@psalm",
- "@phpunit"
+ "@phpunit",
+ "@pathological"
]
},
"extra": {
diff --git a/docs/2.5/configuration.md b/docs/2.5/configuration.md
index 472b99ff56..5991f9e1cf 100644
--- a/docs/2.5/configuration.md
+++ b/docs/2.5/configuration.md
@@ -27,6 +27,7 @@ $config = [
'html_input' => 'escape',
'allow_unsafe_links' => false,
'max_nesting_level' => PHP_INT_MAX,
+ 'max_delimiters_per_line' => PHP_INT_MAX,
'slug_normalizer' => [
'max_length' => 255,
],
@@ -73,6 +74,7 @@ Here's a list of the core configuration options available:
- `escape` - Escape all HTML
- `allow_unsafe_links` - Remove risky link and image URLs by setting this to `false` (default: `true`)
- `max_nesting_level` - The maximum nesting level for blocks (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if blocks are too deeply-nested.
+- `max_delimiters_per_line` - The maximum number of delimiters (e.g. `*` or `_`) allowed in a single line (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if lines are too long.
- `slug_normalizer` - Array of options for configuring how URL-safe slugs are created; see [the slug normalizer docs](/2.5/customization/slug-normalizer/#configuration) for more details
- `instance` - An alternative normalizer to use (defaults to the included `SlugNormalizer`)
- `max_length` - Limits the size of generated slugs (defaults to 255 characters)
diff --git a/docs/2.5/customization/delimiter-processing.md b/docs/2.5/customization/delimiter-processing.md
index 9d172b256c..8748d4ef08 100644
--- a/docs/2.5/customization/delimiter-processing.md
+++ b/docs/2.5/customization/delimiter-processing.md
@@ -48,6 +48,8 @@ public function getDelimiterUse(DelimiterInterface $opener, DelimiterInterface $
This method is used to tell the engine how many characters from the matching delimiters should be consumed. For simple processors you'll likely return `1` (or whatever your minimum length is). In more advanced cases, you can examine the opening and closing delimiters and perform additional logic to determine whether they should be fully or partially consumed. You can also return `0` if you'd like.
+**Note:** Unless you're returning a hard-coded value, you should probably implement `CacheableDelimiterProcessorInterface` instead of `DelimiterProcessorInterface` - this will allow the engine to perform additional caching for better performance.
+
### `process()`
```php
diff --git a/docs/2.5/extensions/tables.md b/docs/2.5/extensions/tables.md
index c98bd07264..5f838a675e 100644
--- a/docs/2.5/extensions/tables.md
+++ b/docs/2.5/extensions/tables.md
@@ -44,6 +44,7 @@ $config = [
'center' => ['align' => 'center'],
'right' => ['align' => 'right'],
],
+ 'max_autocompleted_cells' => 10_000,
],
];
@@ -159,6 +160,14 @@ $config = [
Or any other HTML attributes you'd like!
+### Limiting Auto-Completed Cells
+
+The GFM specification says that the:
+
+> table’s rows may vary in the number of cells. If there are a number of cells fewer than the number of cells in the header row, empty cells are inserted.
+
+This feature could be abused to create very large tables. To prevent this, you can configure the `max_autocompleted_cells` option to limit the number of empty cells that will be autocompleted. If the limit is reached, further parsing of the table will be aborted.
+
## Credits
The Table functionality was originally built by [Martin Hasoň](https://github.com/hason) and [Webuni s.r.o.](https://www.webuni.cz) before it was merged into the core parser.
diff --git a/docs/2.5/security.md b/docs/2.5/security.md
index b4bd8ab598..8573e69352 100644
--- a/docs/2.5/security.md
+++ b/docs/2.5/security.md
@@ -11,7 +11,8 @@ In order to be fully compliant with the CommonMark spec, certain security settin
- `html_input`: How to handle raw HTML
- `allow_unsafe_links`: Whether unsafe links are permitted
-- `max_nesting_level`: Protected against long render times or segfaults
+- `max_nesting_level`: Protect against long render times or segfaults
+- `max_delimiters_per_line`: Protect against long parse times or rendering segfaults
Further information about each option can be found below.
@@ -88,6 +89,25 @@ echo $converter->convert($markdown);
See the [configuration](/2.5/configuration/) section for more information.
+## Max Delimiters Per Line
+
+Similarly to the maximum nesting level, **no maximum number of delimiters per line is enforced by default.** Delimiters can be nested (like `*a **b** c*`) or un-nested (like `*a* *b* *c*`) - in either case, having too many in a single line can result in long parse times. We therefore have a separate option to limit the number of delimiters per line.
+
+If you need to parse untrusted input, consider setting a reasonable `max_delimiters_per_line` (perhaps 100-1000) depending on your needs. Once this level is hit, any subsequent delimiters on that line will be rendered as plain text.
+
+### Example - Prevent too many delimiters
+
+```php
+use League\CommonMark\CommonMarkConverter;
+
+$markdown = '*a* **b *c **d** c* b**'; // 8 delimiters (* and **)
+
+$converter = new CommonMarkConverter(['max_delimiters_per_line' => 6]);
+echo $converter->convert($markdown);
+
+//
a **b *c d c* b**
+```
+
## Additional Filtering
Although this library does offer these security features out-of-the-box, some users may opt to also run the HTML output through additional filtering layers (like HTMLPurifier). If you do this, make sure you **thoroughly** test your additional post-processing steps and configure them to work properly with the types of HTML elements and attributes that converted Markdown might produce, otherwise, you may end up with weird behavior like missing images, broken links, mismatched HTML tags, etc.
diff --git a/docs/2.6/upgrading.md b/docs/2.6/upgrading.md
index 4e84686028..5d6e91c3c6 100644
--- a/docs/2.6/upgrading.md
+++ b/docs/2.6/upgrading.md
@@ -6,3 +6,27 @@ redirect_from: /upgrading/
---
# Upgrading from 2.5 to 2.6
+
+## `max_delimiters_per_line` Configuration Option
+
+The `max_delimiters_per_line` configuration option was added in 2.6 to help protect against malicious input that could
+cause excessive memory usage or denial of service attacks. It defaults to `PHP_INT_MAX` (no limit) for backwards
+compatibility, which is safe when parsing trusted input. However, if you're parsing untrusted input from users, you
+should probably set this to a reasonable value (somewhere between `100` and `1000`) to protect against malicious inputs.
+
+## Custom Delimiter Processors
+
+If you're implementing a custom delimiter processor, and `getDelimiterUse()` has more logic than just a
+simple `return` statement, you should implement `CacheableDelimiterProcessorInterface` instead of
+`DelimiterProcessorInterface` to improve performance and avoid possible quadratic performance issues.
+
+`DelimiterProcessorInterface` has a `getDelimiterUse()` method that tells the engine how many characters from the
+matching delimiters should be consumed. Simple processors usually always return a hard-coded integer like `1` or `2`.
+However, some more advanced processors may need to examine the opening and closing delimiters and perform additional
+logic to determine whether they should be fully or partially consumed. Previously, these results could not be safely
+cached, resulting in possible quadratic performance issues.
+
+In 2.6, the `CacheableDelimiterProcessorInterface` was introduced to allow these "dynamic" processors to be safely
+cached. It requires a new `getCacheKey()` method that returns a string that uniquely identifies the combination of
+factors considered when determining the delimiter use. This key is then used to cache the results of the search for
+a matching delimiter.
diff --git a/phpstan.neon.dist b/phpstan.neon.dist
index e25d706246..54d7b4b6f5 100644
--- a/phpstan.neon.dist
+++ b/phpstan.neon.dist
@@ -7,6 +7,8 @@ parameters:
message: '#Parameter .+ of class .+Reference constructor expects string, string\|null given#'
- path: src/Util/RegexHelper.php
message: '#Method .+RegexHelper::unescape\(\) should return string but returns string\|null#'
+ - path: src/Delimiter/DelimiterStack.php
+ message: '#unknown class WeakMap#'
exceptions:
uncheckedExceptionClasses:
# Exceptions caused by bad developer logic that should always bubble up:
diff --git a/src/Delimiter/Bracket.php b/src/Delimiter/Bracket.php
new file mode 100644
index 0000000000..3a86859c71
--- /dev/null
+++ b/src/Delimiter/Bracket.php
@@ -0,0 +1,83 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace League\CommonMark\Delimiter;
+
+use League\CommonMark\Node\Node;
+
+final class Bracket
+{
+ private Node $node;
+ private ?Bracket $previous;
+ private bool $hasNext = false;
+ private int $position;
+ private bool $image;
+ private bool $active = true;
+
+ public function __construct(Node $node, ?Bracket $previous, int $position, bool $image)
+ {
+ $this->node = $node;
+ $this->previous = $previous;
+ $this->position = $position;
+ $this->image = $image;
+ }
+
+ public function getNode(): Node
+ {
+ return $this->node;
+ }
+
+ public function getPrevious(): ?Bracket
+ {
+ return $this->previous;
+ }
+
+ public function hasNext(): bool
+ {
+ return $this->hasNext;
+ }
+
+ public function getPosition(): int
+ {
+ return $this->position;
+ }
+
+ public function isImage(): bool
+ {
+ return $this->image;
+ }
+
+ /**
+ * Only valid in the context of non-images (links)
+ */
+ public function isActive(): bool
+ {
+ return $this->active;
+ }
+
+ /**
+ * @internal
+ */
+ public function setHasNext(bool $hasNext): void
+ {
+ $this->hasNext = $hasNext;
+ }
+
+ /**
+ * @internal
+ */
+ public function setActive(bool $active): void
+ {
+ $this->active = $active;
+ }
+}
diff --git a/src/Delimiter/DelimiterInterface.php b/src/Delimiter/DelimiterInterface.php
index 6bfa32e48e..0cefba7e60 100644
--- a/src/Delimiter/DelimiterInterface.php
+++ b/src/Delimiter/DelimiterInterface.php
@@ -24,8 +24,14 @@ public function canClose(): bool;
public function canOpen(): bool;
+ /**
+ * @deprecated This method is no longer used internally and will be removed in 3.0
+ */
public function isActive(): bool;
+ /**
+ * @deprecated This method is no longer used internally and will be removed in 3.0
+ */
public function setActive(bool $active): void;
public function getChar(): string;
diff --git a/src/Delimiter/DelimiterParser.php b/src/Delimiter/DelimiterParser.php
index 3f96addf85..fdfe093c90 100644
--- a/src/Delimiter/DelimiterParser.php
+++ b/src/Delimiter/DelimiterParser.php
@@ -62,16 +62,20 @@ public function parse(InlineParserContext $inlineContext): bool
[$canOpen, $canClose] = self::determineCanOpenOrClose($charBefore, $charAfter, $character, $processor);
+ if (! ($canOpen || $canClose)) {
+ $inlineContext->getContainer()->appendChild(new Text(\str_repeat($character, $numDelims)));
+
+ return true;
+ }
+
$node = new Text(\str_repeat($character, $numDelims), [
'delim' => true,
]);
$inlineContext->getContainer()->appendChild($node);
// Add entry to stack to this opener
- if ($canOpen || $canClose) {
- $delimiter = new Delimiter($character, $numDelims, $node, $canOpen, $canClose);
- $inlineContext->getDelimiterStack()->push($delimiter);
- }
+ $delimiter = new Delimiter($character, $numDelims, $node, $canOpen, $canClose, $inlineContext->getCursor()->getPosition());
+ $inlineContext->getDelimiterStack()->push($delimiter);
return true;
}
diff --git a/src/Delimiter/DelimiterStack.php b/src/Delimiter/DelimiterStack.php
index fb95b907cf..cf2a41e524 100644
--- a/src/Delimiter/DelimiterStack.php
+++ b/src/Delimiter/DelimiterStack.php
@@ -19,16 +19,47 @@
namespace League\CommonMark\Delimiter;
+use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface;
use League\CommonMark\Delimiter\Processor\DelimiterProcessorCollection;
use League\CommonMark\Node\Inline\AdjacentTextMerger;
+use League\CommonMark\Node\Node;
final class DelimiterStack
{
/** @psalm-readonly-allow-private-mutation */
private ?DelimiterInterface $top = null;
+ /** @psalm-readonly-allow-private-mutation */
+ private ?Bracket $brackets = null;
+
+ /**
+ * @deprecated This property will be removed in 3.0 once all delimiters MUST have an index/position
+ *
+ * @var \SplObjectStorage|\WeakMap
+ */
+ private $missingIndexCache;
+
+
+ private int $remainingDelimiters = 0;
+
+ public function __construct(int $maximumStackSize = PHP_INT_MAX)
+ {
+ $this->remainingDelimiters = $maximumStackSize;
+
+ if (\PHP_VERSION_ID >= 80000) {
+ /** @psalm-suppress PropertyTypeCoercion */
+ $this->missingIndexCache = new \WeakMap(); // @phpstan-ignore-line
+ } else {
+ $this->missingIndexCache = new \SplObjectStorage(); // @phpstan-ignore-line
+ }
+ }
+
public function push(DelimiterInterface $newDelimiter): void
{
+ if ($this->remainingDelimiters-- <= 0) {
+ return;
+ }
+
$newDelimiter->setPrevious($this->top);
if ($this->top !== null) {
@@ -38,14 +69,54 @@ public function push(DelimiterInterface $newDelimiter): void
$this->top = $newDelimiter;
}
- private function findEarliest(?DelimiterInterface $stackBottom = null): ?DelimiterInterface
+ /**
+ * @internal
+ */
+ public function addBracket(Node $node, int $index, bool $image): void
{
- $delimiter = $this->top;
- while ($delimiter !== null && $delimiter->getPrevious() !== $stackBottom) {
- $delimiter = $delimiter->getPrevious();
+ if ($this->brackets !== null) {
+ $this->brackets->setHasNext(true);
}
- return $delimiter;
+ $this->brackets = new Bracket($node, $this->brackets, $index, $image);
+ }
+
+ /**
+ * @psalm-immutable
+ */
+ public function getLastBracket(): ?Bracket
+ {
+ return $this->brackets;
+ }
+
+ private function findEarliest(int $stackBottom): ?DelimiterInterface
+ {
+ // Move back to first relevant delim.
+ $delimiter = $this->top;
+ $lastChecked = null;
+
+ while ($delimiter !== null && self::getIndex($delimiter) > $stackBottom) {
+ $lastChecked = $delimiter;
+ $delimiter = $delimiter->getPrevious();
+ }
+
+ return $lastChecked;
+ }
+
+ /**
+ * @internal
+ */
+ public function removeBracket(): void
+ {
+ if ($this->brackets === null) {
+ return;
+ }
+
+ $this->brackets = $this->brackets->getPrevious();
+
+ if ($this->brackets !== null) {
+ $this->brackets->setHasNext(false);
+ }
}
public function removeDelimiter(DelimiterInterface $delimiter): void
@@ -62,6 +133,19 @@ public function removeDelimiter(DelimiterInterface $delimiter): void
/** @psalm-suppress PossiblyNullReference */
$delimiter->getNext()->setPrevious($delimiter->getPrevious());
}
+
+ // Nullify all references from the removed delimiter to other delimiters.
+ // All references to this particular delimiter in the linked list should be gone,
+ // but it's possible we're still hanging on to other references to things that
+ // have been (or soon will be) removed, which may interfere with efficient
+ // garbage collection by the PHP runtime.
+ // Explicitly releasing these references should help to avoid possible
+ // segfaults like in https://bugs.php.net/bug.php?id=68606.
+ $delimiter->setPrevious(null);
+ $delimiter->setNext(null);
+
+ // TODO: Remove the line below once PHP 7.4 support is dropped, as WeakMap won't hold onto the reference, making this unnecessary
+ unset($this->missingIndexCache[$delimiter]);
}
private function removeDelimiterAndNode(DelimiterInterface $delimiter): void
@@ -72,21 +156,30 @@ private function removeDelimiterAndNode(DelimiterInterface $delimiter): void
private function removeDelimitersBetween(DelimiterInterface $opener, DelimiterInterface $closer): void
{
- $delimiter = $closer->getPrevious();
- while ($delimiter !== null && $delimiter !== $opener) {
+ $delimiter = $closer->getPrevious();
+ $openerPosition = self::getIndex($opener);
+ while ($delimiter !== null && self::getIndex($delimiter) > $openerPosition) {
$previous = $delimiter->getPrevious();
$this->removeDelimiter($delimiter);
$delimiter = $previous;
}
}
- public function removeAll(?DelimiterInterface $stackBottom = null): void
+ /**
+ * @param DelimiterInterface|int|null $stackBottom
+ */
+ public function removeAll($stackBottom = null): void
{
- while ($this->top && $this->top !== $stackBottom) {
+ $stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom);
+
+ while ($this->top && $this->getIndex($this->top) > $stackBottomPosition) {
$this->removeDelimiter($this->top);
}
}
+ /**
+ * @deprecated This method is no longer used internally and will be removed in 3.0
+ */
public function removeEarlierMatches(string $character): void
{
$opener = $this->top;
@@ -100,6 +193,20 @@ public function removeEarlierMatches(string $character): void
}
/**
+ * @internal
+ */
+ public function deactivateLinkOpeners(): void
+ {
+ $opener = $this->brackets;
+ while ($opener !== null && $opener->isActive()) {
+ $opener->setActive(false);
+ $opener = $opener->getPrevious();
+ }
+ }
+
+ /**
+ * @deprecated This method is no longer used internally and will be removed in 3.0
+ *
* @param string|string[] $characters
*/
public function searchByCharacter($characters): ?DelimiterInterface
@@ -120,30 +227,44 @@ public function searchByCharacter($characters): ?DelimiterInterface
return $opener;
}
- public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterProcessorCollection $processors): void
+ /**
+ * @param DelimiterInterface|int|null $stackBottom
+ *
+ * @todo change $stackBottom to an int in 3.0
+ */
+ public function processDelimiters($stackBottom, DelimiterProcessorCollection $processors): void
{
+ /** @var array $openersBottom */
$openersBottom = [];
+ $stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom);
+
// Find first closer above stackBottom
- $closer = $this->findEarliest($stackBottom);
+ $closer = $this->findEarliest($stackBottomPosition);
// Move forward, looking for closers, and handling each
while ($closer !== null) {
- $delimiterChar = $closer->getChar();
+ $closingDelimiterChar = $closer->getChar();
- $delimiterProcessor = $processors->getDelimiterProcessor($delimiterChar);
+ $delimiterProcessor = $processors->getDelimiterProcessor($closingDelimiterChar);
if (! $closer->canClose() || $delimiterProcessor === null) {
$closer = $closer->getNext();
continue;
}
+ if ($delimiterProcessor instanceof CacheableDelimiterProcessorInterface) {
+ $openersBottomCacheKey = $delimiterProcessor->getCacheKey($closer);
+ } else {
+ $openersBottomCacheKey = $closingDelimiterChar;
+ }
+
$openingDelimiterChar = $delimiterProcessor->getOpeningCharacter();
$useDelims = 0;
$openerFound = false;
$potentialOpenerFound = false;
$opener = $closer->getPrevious();
- while ($opener !== null && $opener !== $stackBottom && $opener !== ($openersBottom[$delimiterChar] ?? null)) {
+ while ($opener !== null && ($openerPosition = self::getIndex($opener)) > $stackBottomPosition && $openerPosition >= ($openersBottom[$openersBottomCacheKey] ?? 0)) {
if ($opener->canOpen() && $opener->getChar() === $openingDelimiterChar) {
$potentialOpenerFound = true;
$useDelims = $delimiterProcessor->getDelimiterUse($opener, $closer);
@@ -157,23 +278,22 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro
}
if (! $openerFound) {
- if (! $potentialOpenerFound) {
- // Only do this when we didn't even have a potential
- // opener (one that matches the character and can open).
- // If an opener was rejected because of the number of
- // delimiters (e.g. because of the "multiple of 3"
- // Set lower bound for future searches for openersrule),
- // we want to consider it next time because the number
- // of delimiters can change as we continue processing.
- $openersBottom[$delimiterChar] = $closer->getPrevious();
- if (! $closer->canOpen()) {
- // We can remove a closer that can't be an opener,
- // once we've seen there's no matching opener.
- $this->removeDelimiter($closer);
- }
+ // Set lower bound for future searches
+ // TODO: Remove this conditional check in 3.0. It only exists to prevent behavioral BC breaks in 2.x.
+ if ($potentialOpenerFound === false || $delimiterProcessor instanceof CacheableDelimiterProcessorInterface) {
+ $openersBottom[$openersBottomCacheKey] = self::getIndex($closer);
+ }
+
+ if (! $potentialOpenerFound && ! $closer->canOpen()) {
+ // We can remove a closer that can't be an opener,
+ // once we've seen there's no matching opener.
+ $next = $closer->getNext();
+ $this->removeDelimiter($closer);
+ $closer = $next;
+ } else {
+ $closer = $closer->getNext();
}
- $closer = $closer->getNext();
continue;
}
@@ -209,6 +329,68 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro
}
// Remove all delimiters
- $this->removeAll($stackBottom);
+ $this->removeAll($stackBottomPosition);
+ }
+
+ /**
+ * @internal
+ */
+ public function __destruct()
+ {
+ while ($this->top) {
+ $this->removeDelimiter($this->top);
+ }
+
+ while ($this->brackets) {
+ $this->removeBracket();
+ }
+ }
+
+ /**
+ * @deprecated This method will be dropped in 3.0 once all delimiters MUST have an index/position
+ */
+ private function getIndex(?DelimiterInterface $delimiter): int
+ {
+ if ($delimiter === null) {
+ return -1;
+ }
+
+ if (($index = $delimiter->getIndex()) !== null) {
+ return $index;
+ }
+
+ if (isset($this->missingIndexCache[$delimiter])) {
+ return $this->missingIndexCache[$delimiter];
+ }
+
+ $prev = $delimiter->getPrevious();
+ $next = $delimiter->getNext();
+
+ $i = 0;
+ do {
+ $i++;
+ if ($prev === null) {
+ break;
+ }
+
+ if ($prev->getIndex() !== null) {
+ return $this->missingIndexCache[$delimiter] = $prev->getIndex() + $i;
+ }
+ } while ($prev = $prev->getPrevious());
+
+ $j = 0;
+ do {
+ $j++;
+ if ($next === null) {
+ break;
+ }
+
+ if ($next->getIndex() !== null) {
+ return $this->missingIndexCache[$delimiter] = $next->getIndex() - $j;
+ }
+ } while ($next = $next->getNext());
+
+ // No index was defined on this delimiter, and none could be guesstimated based on the stack.
+ return $this->missingIndexCache[$delimiter] = $this->getIndex($delimiter->getPrevious()) + 1;
}
}
diff --git a/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php b/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php
new file mode 100644
index 0000000000..a2a9b7ef39
--- /dev/null
+++ b/src/Delimiter/Processor/CacheableDelimiterProcessorInterface.php
@@ -0,0 +1,46 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace League\CommonMark\Delimiter\Processor;
+
+use League\CommonMark\Delimiter\DelimiterInterface;
+
+/**
+ * Special marker interface for delimiter processors that return dynamic values from getDelimiterUse()
+ *
+ * In order to guarantee linear performance of delimiter processing, the delimiter stack must be able to
+ * cache the lower bound when searching for a matching opener. This gets complicated for delimiter processors
+ * that use a dynamic number of characters (like with emphasis and its "multiple of 3" rule).
+ */
+interface CacheableDelimiterProcessorInterface extends DelimiterProcessorInterface
+{
+ /**
+ * Returns a cache key of the factors that determine the number of characters to use.
+ *
+ * In order to guarantee linear performance of delimiter processing, the delimiter stack must be able to
+ * cache the lower bound when searching for a matching opener. This lower bound is usually quite simple;
+ * for example, with quotes, it's just the last opener with that characted. However, this gets complicated
+ * for delimiter processors that use a dynamic number of characters (like with emphasis and its "multiple
+ * of 3" rule), because the delimiter length being considered may change during processing because of that
+ * dynamic logic in getDelimiterUse(). Therefore, we cannot safely cache the lower bound for these dynamic
+ * processors without knowing the factors that determine the number of characters to use.
+ *
+ * At a minimum, this should include the delimiter character, plus any other factors used to determine
+ * the result of getDelimiterUse(). The format of the string is not important so long as it is unique
+ * (compared to other processors) and consistent for a given set of factors.
+ *
+ * If getDelimiterUse() always returns the same hard-coded value, this method should return just
+ * the delimiter character.
+ */
+ public function getCacheKey(DelimiterInterface $closer): string;
+}
diff --git a/src/Delimiter/Processor/DelimiterProcessorInterface.php b/src/Delimiter/Processor/DelimiterProcessorInterface.php
index 465378c390..5e88ddc7ad 100644
--- a/src/Delimiter/Processor/DelimiterProcessorInterface.php
+++ b/src/Delimiter/Processor/DelimiterProcessorInterface.php
@@ -58,6 +58,9 @@ public function getMinLength(): int;
* return 0 when it doesn't want to allow this particular combination of
* delimiter runs.
*
+ * IMPORTANT: Unless this method returns the same hard-coded value in all cases,
+ * you MUST implement the CacheableDelimiterProcessorInterface interface instead.
+ *
* @param DelimiterInterface $opener The opening delimiter run
* @param DelimiterInterface $closer The closing delimiter run
*/
diff --git a/src/Environment/Environment.php b/src/Environment/Environment.php
index 3c24749bbb..a8112967e4 100644
--- a/src/Environment/Environment.php
+++ b/src/Environment/Environment.php
@@ -432,6 +432,7 @@ public static function createDefaultConfiguration(): Configuration
'html_input' => Expect::anyOf(HtmlFilter::STRIP, HtmlFilter::ALLOW, HtmlFilter::ESCAPE)->default(HtmlFilter::ALLOW),
'allow_unsafe_links' => Expect::bool(true),
'max_nesting_level' => Expect::type('int')->default(PHP_INT_MAX),
+ 'max_delimiters_per_line' => Expect::type('int')->default(PHP_INT_MAX),
'renderer' => Expect::structure([
'block_separator' => Expect::string("\n"),
'inner_separator' => Expect::string("\n"),
diff --git a/src/Extension/Autolink/UrlAutolinkParser.php b/src/Extension/Autolink/UrlAutolinkParser.php
index 1ef270fe85..f487616552 100644
--- a/src/Extension/Autolink/UrlAutolinkParser.php
+++ b/src/Extension/Autolink/UrlAutolinkParser.php
@@ -34,7 +34,7 @@ final class UrlAutolinkParser implements InlineParserInterface
(?:
(?:xn--[a-z0-9-]++\.)*+xn--[a-z0-9-]++ # a domain name using punycode
|
- (?:[\pL\pN\pS\pM\-\_]++\.)+[\pL\pN\pM]++ # a multi-level domain name
+ (?:[\pL\pN\pS\pM\-\_]++\.){1,127}[\pL\pN\pM]++ # a multi-level domain name; total length must be 253 bytes or less
|
[a-z0-9\-\_]++ # a single-level domain name
)\.?
diff --git a/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php b/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php
index 84b46ee6d9..9a6be13491 100644
--- a/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php
+++ b/src/Extension/CommonMark/Delimiter/Processor/EmphasisDelimiterProcessor.php
@@ -20,14 +20,14 @@
namespace League\CommonMark\Extension\CommonMark\Delimiter\Processor;
use League\CommonMark\Delimiter\DelimiterInterface;
-use League\CommonMark\Delimiter\Processor\DelimiterProcessorInterface;
+use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface;
use League\CommonMark\Extension\CommonMark\Node\Inline\Emphasis;
use League\CommonMark\Extension\CommonMark\Node\Inline\Strong;
use League\CommonMark\Node\Inline\AbstractStringContainer;
use League\Config\ConfigurationAwareInterface;
use League\Config\ConfigurationInterface;
-final class EmphasisDelimiterProcessor implements DelimiterProcessorInterface, ConfigurationAwareInterface
+final class EmphasisDelimiterProcessor implements CacheableDelimiterProcessorInterface, ConfigurationAwareInterface
{
/** @psalm-readonly */
private string $char;
@@ -105,4 +105,15 @@ public function setConfiguration(ConfigurationInterface $configuration): void
{
$this->config = $configuration;
}
+
+ public function getCacheKey(DelimiterInterface $closer): string
+ {
+ return \sprintf(
+ '%s-%s-%d-%d',
+ $this->char,
+ $closer->canOpen() ? 'canOpen' : 'cannotOpen',
+ $closer->getOriginalLength() % 3,
+ $closer->getLength(),
+ );
+ }
}
diff --git a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
index 9618f2e676..3324fe39d0 100644
--- a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
+++ b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php
@@ -18,12 +18,27 @@
use League\CommonMark\Extension\CommonMark\Node\Inline\Code;
use League\CommonMark\Node\Inline\Text;
+use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Parser\Inline\InlineParserMatch;
use League\CommonMark\Parser\InlineParserContext;
final class BacktickParser implements InlineParserInterface
{
+ /**
+ * Max bound for backtick code span delimiters.
+ *
+ * @see https://github.com/commonmark/cmark/commit/8ed5c9d
+ */
+ private const MAX_BACKTICKS = 1000;
+
+ /** @var \WeakReference|null */
+ private ?\WeakReference $lastCursor = null;
+ private bool $lastCursorScanned = false;
+
+ /** @var array backtick count => position of known ender */
+ private array $seenBackticks = [];
+
public function getMatchDefinition(): InlineParserMatch
{
return InlineParserMatch::regex('`+');
@@ -38,11 +53,7 @@ public function parse(InlineParserContext $inlineContext): bool
$currentPosition = $cursor->getPosition();
$previousState = $cursor->saveState();
- while ($matchingTicks = $cursor->match('/`+/m')) {
- if ($matchingTicks !== $ticks) {
- continue;
- }
-
+ if ($this->findMatchingTicks(\strlen($ticks), $cursor)) {
$code = $cursor->getSubstring($currentPosition, $cursor->getPosition() - $currentPosition - \strlen($ticks));
$c = \preg_replace('/\n/m', ' ', $code) ?? '';
@@ -67,4 +78,55 @@ public function parse(InlineParserContext $inlineContext): bool
return true;
}
+
+ /**
+ * Locates the matching closer for a backtick code span.
+ *
+ * Leverages some caching to avoid traversing the same cursor multiple times when
+ * we've already seen all the potential backtick closers.
+ *
+ * @see https://github.com/commonmark/cmark/commit/8ed5c9d
+ *
+ * @param int $openTickLength Number of backticks in the opening sequence
+ * @param Cursor $cursor Cursor to scan
+ *
+ * @return bool True if a matching closer was found, false otherwise
+ */
+ private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool
+ {
+ // Reset the seenBackticks cache if this is a new cursor
+ if ($this->lastCursor === null || $this->lastCursor->get() !== $cursor) {
+ $this->seenBackticks = [];
+ $this->lastCursor = \WeakReference::create($cursor);
+ $this->lastCursorScanned = false;
+ }
+
+ if ($openTickLength > self::MAX_BACKTICKS) {
+ return false;
+ }
+
+ // Return if we already know there's no closer
+ if ($this->lastCursorScanned && isset($this->seenBackticks[$openTickLength]) && $this->seenBackticks[$openTickLength] <= $cursor->getPosition()) {
+ return false;
+ }
+
+ while ($ticks = $cursor->match('/`{1,' . self::MAX_BACKTICKS . '}/m')) {
+ $numTicks = \strlen($ticks);
+
+ // Did we find the closer?
+ if ($numTicks === $openTickLength) {
+ return true;
+ }
+
+ // Store position of closer
+ if ($numTicks <= self::MAX_BACKTICKS) {
+ $this->seenBackticks[$numTicks] = $cursor->getPosition() - $numTicks;
+ }
+ }
+
+ // Got through whole input without finding closer
+ $this->lastCursorScanned = true;
+
+ return false;
+ }
}
diff --git a/src/Extension/CommonMark/Parser/Inline/BangParser.php b/src/Extension/CommonMark/Parser/Inline/BangParser.php
index 8a9e1bd65c..cbf6ca3828 100644
--- a/src/Extension/CommonMark/Parser/Inline/BangParser.php
+++ b/src/Extension/CommonMark/Parser/Inline/BangParser.php
@@ -16,7 +16,6 @@
namespace League\CommonMark\Extension\CommonMark\Parser\Inline;
-use League\CommonMark\Delimiter\Delimiter;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Parser\Inline\InlineParserMatch;
@@ -38,8 +37,7 @@ public function parse(InlineParserContext $inlineContext): bool
$inlineContext->getContainer()->appendChild($node);
// Add entry to stack for this opener
- $delimiter = new Delimiter('!', 1, $node, true, false, $cursor->getPosition());
- $inlineContext->getDelimiterStack()->push($delimiter);
+ $inlineContext->getDelimiterStack()->addBracket($node, $cursor->getPosition(), true);
return true;
}
diff --git a/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php b/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php
index 16f24dc2aa..f3b83fd129 100644
--- a/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php
+++ b/src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php
@@ -16,6 +16,7 @@
namespace League\CommonMark\Extension\CommonMark\Parser\Inline;
+use League\CommonMark\Delimiter\Bracket;
use League\CommonMark\Environment\EnvironmentAwareInterface;
use League\CommonMark\Environment\EnvironmentInterface;
use League\CommonMark\Extension\CommonMark\Node\Inline\AbstractWebResource;
@@ -46,14 +47,14 @@ public function getMatchDefinition(): InlineParserMatch
public function parse(InlineParserContext $inlineContext): bool
{
// Look through stack of delimiters for a [ or !
- $opener = $inlineContext->getDelimiterStack()->searchByCharacter(['[', '!']);
+ $opener = $inlineContext->getDelimiterStack()->getLastBracket();
if ($opener === null) {
return false;
}
- if (! $opener->isActive()) {
- // no matched opener; remove from emphasis stack
- $inlineContext->getDelimiterStack()->removeDelimiter($opener);
+ if (! $opener->isImage() && ! $opener->isActive()) {
+ // no matched opener; remove from stack
+ $inlineContext->getDelimiterStack()->removeBracket();
return false;
}
@@ -70,21 +71,19 @@ public function parse(InlineParserContext $inlineContext): bool
// Inline link?
if ($result = $this->tryParseInlineLinkAndTitle($cursor)) {
$link = $result;
- } elseif ($link = $this->tryParseReference($cursor, $inlineContext->getReferenceMap(), $opener->getIndex(), $startPos)) {
+ } elseif ($link = $this->tryParseReference($cursor, $inlineContext->getReferenceMap(), $opener, $startPos)) {
$reference = $link;
$link = ['url' => $link->getDestination(), 'title' => $link->getTitle()];
} else {
- // No match
- $inlineContext->getDelimiterStack()->removeDelimiter($opener); // Remove this opener from stack
+ // No match; remove this opener from stack
+ $inlineContext->getDelimiterStack()->removeBracket();
$cursor->restoreState($previousState);
return false;
}
- $isImage = $opener->getChar() === '!';
-
- $inline = $this->createInline($link['url'], $link['title'], $isImage, $reference ?? null);
- $opener->getInlineNode()->replaceWith($inline);
+ $inline = $this->createInline($link['url'], $link['title'], $opener->isImage(), $reference ?? null);
+ $opener->getNode()->replaceWith($inline);
while (($label = $inline->next()) !== null) {
// Is there a Mention or Link contained within this link?
// CommonMark does not allow nested links, so we'll restore the original text.
@@ -104,8 +103,9 @@ public function parse(InlineParserContext $inlineContext): bool
// Process delimiters such as emphasis inside link/image
$delimiterStack = $inlineContext->getDelimiterStack();
- $stackBottom = $opener->getPrevious();
+ $stackBottom = $opener->getPosition();
$delimiterStack->processDelimiters($stackBottom, $this->environment->getDelimiterProcessors());
+ $delimiterStack->removeBracket();
$delimiterStack->removeAll($stackBottom);
// Merge any adjacent Text nodes together
@@ -113,8 +113,8 @@ public function parse(InlineParserContext $inlineContext): bool
// processEmphasis will remove this and later delimiters.
// Now, for a link, we also remove earlier link openers (no links in links)
- if (! $isImage) {
- $inlineContext->getDelimiterStack()->removeEarlierMatches('[');
+ if (! $opener->isImage()) {
+ $inlineContext->getDelimiterStack()->deactivateLinkOpeners();
}
return true;
@@ -168,21 +168,23 @@ private function tryParseInlineLinkAndTitle(Cursor $cursor): ?array
return ['url' => $dest, 'title' => $title];
}
- private function tryParseReference(Cursor $cursor, ReferenceMapInterface $referenceMap, ?int $openerIndex, int $startPos): ?ReferenceInterface
+ private function tryParseReference(Cursor $cursor, ReferenceMapInterface $referenceMap, Bracket $opener, int $startPos): ?ReferenceInterface
{
- if ($openerIndex === null) {
- return null;
- }
-
$savePos = $cursor->saveState();
$beforeLabel = $cursor->getPosition();
$n = LinkParserHelper::parseLinkLabel($cursor);
- if ($n === 0 || $n === 2) {
- $start = $openerIndex;
- $length = $startPos - $openerIndex;
- } else {
+ if ($n > 2) {
$start = $beforeLabel + 1;
$length = $n - 2;
+ } elseif (! $opener->hasNext()) {
+ // Empty or missing second label means to use the first label as the reference.
+ // The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it.
+ $start = $opener->getPosition();
+ $length = $startPos - $start;
+ } else {
+ $cursor->restoreState($savePos);
+
+ return null;
}
$referenceLabel = $cursor->getSubstring($start, $length);
diff --git a/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php b/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php
index 2b52d1cdc6..1ba8c133ab 100644
--- a/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php
+++ b/src/Extension/CommonMark/Parser/Inline/OpenBracketParser.php
@@ -16,7 +16,6 @@
namespace League\CommonMark\Extension\CommonMark\Parser\Inline;
-use League\CommonMark\Delimiter\Delimiter;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Parser\Inline\InlineParserMatch;
@@ -36,8 +35,7 @@ public function parse(InlineParserContext $inlineContext): bool
$inlineContext->getContainer()->appendChild($node);
// Add entry to stack for this opener
- $delimiter = new Delimiter('[', 1, $node, true, false, $inlineContext->getCursor()->getPosition());
- $inlineContext->getDelimiterStack()->push($delimiter);
+ $inlineContext->getDelimiterStack()->addBracket($node, $inlineContext->getCursor()->getPosition(), false);
return true;
}
diff --git a/src/Extension/SmartPunct/QuoteParser.php b/src/Extension/SmartPunct/QuoteParser.php
index 959930b3ee..31ba8c7738 100644
--- a/src/Extension/SmartPunct/QuoteParser.php
+++ b/src/Extension/SmartPunct/QuoteParser.php
@@ -46,6 +46,7 @@ public function parse(InlineParserContext $inlineContext): bool
{
$char = $inlineContext->getFullMatch();
$cursor = $inlineContext->getCursor();
+ $index = $cursor->getPosition();
$charBefore = $cursor->peek(-1);
if ($charBefore === null) {
@@ -67,7 +68,7 @@ public function parse(InlineParserContext $inlineContext): bool
$inlineContext->getContainer()->appendChild($node);
// Add entry to stack to this opener
- $inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose));
+ $inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose, $index));
return true;
}
diff --git a/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php b/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php
index 978e75a43f..a6c8d3889f 100644
--- a/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php
+++ b/src/Extension/Strikethrough/StrikethroughDelimiterProcessor.php
@@ -14,10 +14,10 @@
namespace League\CommonMark\Extension\Strikethrough;
use League\CommonMark\Delimiter\DelimiterInterface;
-use League\CommonMark\Delimiter\Processor\DelimiterProcessorInterface;
+use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface;
use League\CommonMark\Node\Inline\AbstractStringContainer;
-final class StrikethroughDelimiterProcessor implements DelimiterProcessorInterface
+final class StrikethroughDelimiterProcessor implements CacheableDelimiterProcessorInterface
{
public function getOpeningCharacter(): string
{
@@ -61,4 +61,9 @@ public function process(AbstractStringContainer $opener, AbstractStringContainer
$opener->insertAfter($strikethrough);
}
+
+ public function getCacheKey(DelimiterInterface $closer): string
+ {
+ return '~' . $closer->getLength();
+ }
}
diff --git a/src/Extension/Table/TableExtension.php b/src/Extension/Table/TableExtension.php
index 27a58bbdc6..0a8db3ed2a 100644
--- a/src/Extension/Table/TableExtension.php
+++ b/src/Extension/Table/TableExtension.php
@@ -41,6 +41,7 @@ public function configureSchema(ConfigurationBuilderInterface $builder): void
'center' => (clone $attributeArraySchema)->default(['align' => 'center']),
'right' => (clone $attributeArraySchema)->default(['align' => 'right']),
]),
+ 'max_autocompleted_cells' => Expect::int()->min(0)->default(TableParser::DEFAULT_MAX_AUTOCOMPLETED_CELLS),
]));
}
@@ -52,7 +53,7 @@ public function register(EnvironmentBuilderInterface $environment): void
}
$environment
- ->addBlockStartParser(new TableStartParser())
+ ->addBlockStartParser(new TableStartParser($environment->getConfiguration()->get('table/max_autocompleted_cells')))
->addRenderer(Table::class, $tableRenderer)
->addRenderer(TableSection::class, new TableSectionRenderer())
diff --git a/src/Extension/Table/TableParser.php b/src/Extension/Table/TableParser.php
index ca340a31f2..a005f8a97e 100644
--- a/src/Extension/Table/TableParser.php
+++ b/src/Extension/Table/TableParser.php
@@ -25,6 +25,11 @@
final class TableParser extends AbstractBlockContinueParser implements BlockContinueParserWithInlinesInterface
{
+ /**
+ * @internal
+ */
+ public const DEFAULT_MAX_AUTOCOMPLETED_CELLS = 10_000;
+
/** @psalm-readonly */
private Table $block;
@@ -54,6 +59,8 @@ final class TableParser extends AbstractBlockContinueParser implements BlockCont
/** @psalm-readonly-allow-private-mutation */
private bool $nextIsSeparatorLine = true;
+ private int $remainingAutocompletedCells;
+
/**
* @param array $columns
* @param array $headerCells
@@ -62,12 +69,13 @@ final class TableParser extends AbstractBlockContinueParser implements BlockCont
*
* @phpstan-param array $columns
*/
- public function __construct(array $columns, array $headerCells)
+ public function __construct(array $columns, array $headerCells, int $remainingAutocompletedCells = self::DEFAULT_MAX_AUTOCOMPLETED_CELLS)
{
- $this->block = new Table();
- $this->bodyLines = new ArrayCollection();
- $this->columns = $columns;
- $this->headerCells = $headerCells;
+ $this->block = new Table();
+ $this->bodyLines = new ArrayCollection();
+ $this->columns = $columns;
+ $this->headerCells = $headerCells;
+ $this->remainingAutocompletedCells = $remainingAutocompletedCells;
}
public function canHaveLazyContinuationLines(): bool
@@ -121,6 +129,12 @@ public function parseInlines(InlineParserEngineInterface $inlineParser): void
// Body can not have more columns than head
for ($i = 0; $i < $headerColumns; $i++) {
+ // It can have less columns though, in which case we'll autocomplete the empty ones (up to some limit)
+ if (! isset($cells[$i]) && $this->remainingAutocompletedCells-- <= 0) {
+ // Too many cells were auto-completed, so we'll just stop here
+ return;
+ }
+
$cell = $cells[$i] ?? '';
$tableCell = $this->parseCell($cell, $i, $inlineParser);
$row->appendChild($tableCell);
@@ -138,14 +152,12 @@ public function parseInlines(InlineParserEngineInterface $inlineParser): void
private function parseCell(string $cell, int $column, InlineParserEngineInterface $inlineParser): TableCell
{
- $tableCell = new TableCell();
+ $tableCell = new TableCell(TableCell::TYPE_DATA, $this->columns[$column] ?? null);
- if ($column < \count($this->columns)) {
- $tableCell->setAlign($this->columns[$column]);
+ if ($cell !== '') {
+ $inlineParser->parse(\trim($cell), $tableCell);
}
- $inlineParser->parse(\trim($cell), $tableCell);
-
return $tableCell;
}
diff --git a/src/Extension/Table/TableStartParser.php b/src/Extension/Table/TableStartParser.php
index 12206d289a..7411951c3d 100644
--- a/src/Extension/Table/TableStartParser.php
+++ b/src/Extension/Table/TableStartParser.php
@@ -23,6 +23,13 @@
final class TableStartParser implements BlockStartParserInterface
{
+ private int $maxAutocompletedCells;
+
+ public function __construct(int $maxAutocompletedCells = TableParser::DEFAULT_MAX_AUTOCOMPLETED_CELLS)
+ {
+ $this->maxAutocompletedCells = $maxAutocompletedCells;
+ }
+
public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserState): ?BlockStart
{
$paragraph = $parserState->getParagraphContent();
@@ -35,8 +42,8 @@ public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserSta
return BlockStart::none();
}
- $lines = \explode("\n", $paragraph);
- $lastLine = \array_pop($lines);
+ $lastLineBreak = \strrpos($paragraph, "\n");
+ $lastLine = $lastLineBreak === false ? $paragraph : \substr($paragraph, $lastLineBreak + 1);
$headerCells = TableParser::split($lastLine);
if (\count($headerCells) > \count($columns)) {
@@ -47,13 +54,13 @@ public function tryStart(Cursor $cursor, MarkdownParserStateInterface $parserSta
$parsers = [];
- if (\count($lines) > 0) {
+ if ($lastLineBreak !== false) {
$p = new ParagraphParser();
- $p->addLine(\implode("\n", $lines));
+ $p->addLine(\substr($paragraph, 0, $lastLineBreak));
$parsers[] = $p;
}
- $parsers[] = new TableParser($columns, $headerCells);
+ $parsers[] = new TableParser($columns, $headerCells, $this->maxAutocompletedCells);
return BlockStart::of(...$parsers)
->at($cursor)
diff --git a/src/Normalizer/TextNormalizer.php b/src/Normalizer/TextNormalizer.php
index 7860f1b939..43eb117453 100644
--- a/src/Normalizer/TextNormalizer.php
+++ b/src/Normalizer/TextNormalizer.php
@@ -34,6 +34,11 @@ public function normalize(string $text, array $context = []): string
$text = \preg_replace('/[ \t\r\n]+/', ' ', \trim($text));
\assert(\is_string($text));
+ // Is it strictly ASCII? If so, we can use strtolower() instead (faster)
+ if (\mb_check_encoding($text, 'ASCII')) {
+ return \strtolower($text);
+ }
+
return \mb_convert_case($text, \MB_CASE_FOLD, 'UTF-8');
}
}
diff --git a/src/Parser/Cursor.php b/src/Parser/Cursor.php
index faae75bc1d..598cd75b52 100644
--- a/src/Parser/Cursor.php
+++ b/src/Parser/Cursor.php
@@ -322,17 +322,17 @@ public function advanceToNextNonSpaceOrTab(): int
*/
public function advanceToNextNonSpaceOrNewline(): int
{
- $remainder = $this->getRemainder();
+ $currentCharacter = $this->getCurrentCharacter();
// Optimization: Avoid the regex if we know there are no spaces or newlines
- if ($remainder === '' || ($remainder[0] !== ' ' && $remainder[0] !== "\n")) {
+ if ($currentCharacter !== ' ' && $currentCharacter !== "\n") {
$this->previousPosition = $this->currentPosition;
return 0;
}
$matches = [];
- \preg_match('/^ *(?:\n *)?/', $remainder, $matches, \PREG_OFFSET_CAPTURE);
+ \preg_match('/^ *(?:\n *)?/', $this->getRemainder(), $matches, \PREG_OFFSET_CAPTURE);
// [0][0] contains the matched text
// [0][1] contains the index of that match
diff --git a/src/Parser/InlineParserContext.php b/src/Parser/InlineParserContext.php
index 796f2f388c..9372904281 100644
--- a/src/Parser/InlineParserContext.php
+++ b/src/Parser/InlineParserContext.php
@@ -42,12 +42,12 @@ final class InlineParserContext
*/
private array $matches;
- public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap)
+ public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap, int $maxDelimitersPerLine = PHP_INT_MAX)
{
$this->referenceMap = $referenceMap;
$this->container = $container;
$this->cursor = $contents;
- $this->delimiterStack = new DelimiterStack();
+ $this->delimiterStack = new DelimiterStack($maxDelimitersPerLine);
}
public function getContainer(): AbstractBlock
diff --git a/src/Parser/InlineParserEngine.php b/src/Parser/InlineParserEngine.php
index b91a63f72f..6a26979329 100644
--- a/src/Parser/InlineParserEngine.php
+++ b/src/Parser/InlineParserEngine.php
@@ -59,7 +59,7 @@ public function parse(string $contents, AbstractBlock $block): void
$contents = \trim($contents);
$cursor = new Cursor($contents);
- $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);
+ $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap, $this->environment->getConfiguration()->get('max_delimiters_per_line'));
// Have all parsers look at the line to determine what they might want to parse and what positions they exist at
foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
diff --git a/src/Parser/MarkdownParser.php b/src/Parser/MarkdownParser.php
index 2fecb9ba47..904c7c45b4 100644
--- a/src/Parser/MarkdownParser.php
+++ b/src/Parser/MarkdownParser.php
@@ -32,6 +32,7 @@
use League\CommonMark\Parser\Block\BlockStartParserInterface;
use League\CommonMark\Parser\Block\DocumentBlockParser;
use League\CommonMark\Parser\Block\ParagraphParser;
+use League\CommonMark\Reference\MemoryLimitedReferenceMap;
use League\CommonMark\Reference\ReferenceInterface;
use League\CommonMark\Reference\ReferenceMap;
@@ -102,7 +103,7 @@ public function parse(string $input): Document
// finalizeAndProcess
$this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber);
- $this->processInlines();
+ $this->processInlines(\strlen($input));
$this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock()));
@@ -115,6 +116,9 @@ public function parse(string $input): Document
*/
private function parseLine(string $line): void
{
+ // replace NUL characters for security
+ $line = \str_replace("\0", "\u{FFFD}", $line);
+
$this->cursor = new Cursor($line);
$matches = $this->parseBlockContinuation();
@@ -263,9 +267,9 @@ private function finalize(BlockContinueParserInterface $blockParser, int $endLin
/**
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
*/
- private function processInlines(): void
+ private function processInlines(int $inputSize): void
{
- $p = new InlineParserEngine($this->environment, $this->referenceMap);
+ $p = new InlineParserEngine($this->environment, new MemoryLimitedReferenceMap($this->referenceMap, $inputSize));
foreach ($this->closedBlockParsers as $blockParser) {
$blockParser->parseInlines($p);
diff --git a/src/Reference/MemoryLimitedReferenceMap.php b/src/Reference/MemoryLimitedReferenceMap.php
new file mode 100644
index 0000000000..d47bd6a6de
--- /dev/null
+++ b/src/Reference/MemoryLimitedReferenceMap.php
@@ -0,0 +1,68 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace League\CommonMark\Reference;
+
+final class MemoryLimitedReferenceMap implements ReferenceMapInterface
+{
+ private ReferenceMapInterface $decorated;
+
+ private const MINIMUM_SIZE = 100_000;
+
+ private int $remaining;
+
+ public function __construct(ReferenceMapInterface $decorated, int $maxSize)
+ {
+ $this->decorated = $decorated;
+ $this->remaining = \max(self::MINIMUM_SIZE, $maxSize);
+ }
+
+ public function add(ReferenceInterface $reference): void
+ {
+ $this->decorated->add($reference);
+ }
+
+ public function contains(string $label): bool
+ {
+ return $this->decorated->contains($label);
+ }
+
+ public function get(string $label): ?ReferenceInterface
+ {
+ $reference = $this->decorated->get($label);
+ if ($reference === null) {
+ return null;
+ }
+
+ // Check for expansion limit
+ $this->remaining -= \strlen($reference->getDestination()) + \strlen($reference->getTitle());
+ if ($this->remaining < 0) {
+ return null;
+ }
+
+ return $reference;
+ }
+
+ /**
+ * @return \Traversable
+ */
+ public function getIterator(): \Traversable
+ {
+ return $this->decorated->getIterator();
+ }
+
+ public function count(): int
+ {
+ return $this->decorated->count();
+ }
+}
diff --git a/src/Reference/ReferenceMap.php b/src/Reference/ReferenceMap.php
index 982cb1253a..97a167dcf2 100644
--- a/src/Reference/ReferenceMap.php
+++ b/src/Reference/ReferenceMap.php
@@ -48,6 +48,10 @@ public function add(ReferenceInterface $reference): void
public function contains(string $label): bool
{
+ if ($this->references === []) {
+ return false;
+ }
+
$label = $this->normalizer->normalize($label);
return isset($this->references[$label]);
@@ -55,6 +59,10 @@ public function contains(string $label): bool
public function get(string $label): ?ReferenceInterface
{
+ if ($this->references === []) {
+ return null;
+ }
+
$label = $this->normalizer->normalize($label);
return $this->references[$label] ?? null;
diff --git a/src/Util/LinkParserHelper.php b/src/Util/LinkParserHelper.php
index e329669bdd..3e76c28faa 100644
--- a/src/Util/LinkParserHelper.php
+++ b/src/Util/LinkParserHelper.php
@@ -30,15 +30,8 @@ final class LinkParserHelper
*/
public static function parseLinkDestination(Cursor $cursor): ?string
{
- if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
- // Chop off surrounding <..>:
- return UrlEncoder::unescapeAndEncode(
- RegexHelper::unescape(\substr($res, 1, -1))
- );
- }
-
if ($cursor->getCurrentCharacter() === '<') {
- return null;
+ return self::parseDestinationBraces($cursor);
}
$destination = self::manuallyParseLinkDestination($cursor);
@@ -69,7 +62,7 @@ public static function parseLinkLabel(Cursor $cursor): int
public static function parsePartialLinkLabel(Cursor $cursor): ?string
{
- return $cursor->match('/^(?:[^\\\\\[\]]+|\\\\.?)*/');
+ return $cursor->match('/^(?:[^\\\\\[\]]++|\\\\.?)*+/');
}
/**
@@ -100,27 +93,27 @@ public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimite
private static function manuallyParseLinkDestination(Cursor $cursor): ?string
{
- $oldPosition = $cursor->getPosition();
- $oldState = $cursor->saveState();
-
+ $remainder = $cursor->getRemainder();
$openParens = 0;
- while (($c = $cursor->getCurrentCharacter()) !== null) {
- if ($c === '\\' && ($peek = $cursor->peek()) !== null && RegexHelper::isEscapable($peek)) {
- $cursor->advanceBy(2);
+ $len = \strlen($remainder);
+ for ($i = 0; $i < $len; $i++) {
+ $c = $remainder[$i];
+ if ($c === '\\' && $i + 1 < $len && RegexHelper::isEscapable($remainder[$i + 1])) {
+ $i++;
} elseif ($c === '(') {
- $cursor->advanceBy(1);
$openParens++;
+ // Limit to 32 nested parens for pathological cases
+ if ($openParens > 32) {
+ return null;
+ }
} elseif ($c === ')') {
if ($openParens < 1) {
break;
}
- $cursor->advanceBy(1);
$openParens--;
- } elseif (\preg_match(RegexHelper::REGEX_WHITESPACE_CHAR, $c)) {
+ } elseif (\ord($c) <= 32 && RegexHelper::isWhitespace($c)) {
break;
- } else {
- $cursor->advanceBy(1);
}
}
@@ -128,15 +121,45 @@ private static function manuallyParseLinkDestination(Cursor $cursor): ?string
return null;
}
- if ($cursor->getPosition() === $oldPosition && (! isset($c) || $c !== ')')) {
+ if ($i === 0 && (! isset($c) || $c !== ')')) {
return null;
}
- $newPos = $cursor->getPosition();
- $cursor->restoreState($oldState);
+ $destination = \substr($remainder, 0, $i);
+ $cursor->advanceBy(\mb_strlen($destination, 'UTF-8'));
+
+ return $destination;
+ }
+
+ /** @var \WeakReference|null */
+ private static ?\WeakReference $lastCursor = null;
+ private static bool $lastCursorLacksClosingBrace = false;
+
+ private static function parseDestinationBraces(Cursor $cursor): ?string
+ {
+ // Optimization: If we've previously parsed this cursor and returned `null`, we know
+ // that no closing brace exists, so we can skip the regex entirely. This helps avoid
+ // certain pathological cases where the regex engine can take a very long time to
+ // determine that no match exists.
+ if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) {
+ if (self::$lastCursorLacksClosingBrace) {
+ return null;
+ }
+ } else {
+ self::$lastCursor = \WeakReference::create($cursor);
+ }
+
+ if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
+ self::$lastCursorLacksClosingBrace = false;
- $cursor->advanceBy($newPos - $cursor->getPosition());
+ // Chop off surrounding <..>:
+ return UrlEncoder::unescapeAndEncode(
+ RegexHelper::unescape(\substr($res, 1, -1))
+ );
+ }
+
+ self::$lastCursorLacksClosingBrace = true;
- return $cursor->getPreviousText();
+ return null;
}
}
diff --git a/src/Util/RegexHelper.php b/src/Util/RegexHelper.php
index a89e7bda66..603631f294 100644
--- a/src/Util/RegexHelper.php
+++ b/src/Util/RegexHelper.php
@@ -61,9 +61,9 @@ final class RegexHelper
self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')';
public const PARTIAL_HTMLBLOCKOPEN = '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' .
'\/' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s>]|$)' . '|' . '[?!])';
- public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"' .
- '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\'' .
- '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*\))';
+ public const PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*+"' .
+ '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*+\'' .
+ '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*+\))';
public const REGEX_PUNCTUATION = '/^[!"#$%&\'()*+,\-.\\/:;<=>?@\\[\\]\\\\^_`{|}~\p{P}\p{S}]/u';
public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i';
@@ -83,6 +83,12 @@ public static function isEscapable(string $character): bool
return \preg_match('/' . self::PARTIAL_ESCAPABLE . '/', $character) === 1;
}
+ public static function isWhitespace(string $character): bool
+ {
+ /** @psalm-suppress InvalidLiteralArgument */
+ return $character !== '' && \strpos(" \t\n\x0b\x0c\x0d", $character) !== false;
+ }
+
/**
* @psalm-pure
*/
diff --git a/tests/functional/MaxDelimitersPerLineTest.php b/tests/functional/MaxDelimitersPerLineTest.php
new file mode 100644
index 0000000000..1de9697dc5
--- /dev/null
+++ b/tests/functional/MaxDelimitersPerLineTest.php
@@ -0,0 +1,50 @@
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+namespace League\CommonMark\Tests\Functional;
+
+use League\CommonMark\CommonMarkConverter;
+use PHPUnit\Framework\TestCase;
+
+final class MaxDelimitersPerLineTest extends TestCase
+{
+ /**
+ * @dataProvider provideTestCases
+ */
+ public function testIt(string $input, int $maxDelimsPerLine, string $expectedOutput): void
+ {
+ $converter = new CommonMarkConverter(['max_delimiters_per_line' => $maxDelimsPerLine]);
+
+ $this->assertEquals($expectedOutput, \trim($converter->convert($input)->getContent()));
+ }
+
+ /**
+ * @return iterable>
+ */
+ public function provideTestCases(): iterable
+ {
+ yield ['*a* **b *c* b**', 6, 'a b c b
'];
+
+ yield ['*a* **b *c **d** c* b**', 0, '*a* **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 1, '*a* **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 2, 'a **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 3, 'a **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 4, 'a **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 5, 'a **b *c **d** c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 6, 'a **b *c d c* b**
'];
+ yield ['*a* **b *c **d** c* b**', 7, 'a **b c d c b**
'];
+ yield ['*a* **b *c **d** c* b**', 8, 'a b c d c b
'];
+ yield ['*a* **b *c **d** c* b**', 9, 'a b c d c b
'];
+ yield ['*a* **b *c **d** c* b**', 100, 'a b c d c b
'];
+ }
+}
diff --git a/tests/pathological/convert.php b/tests/pathological/convert.php
new file mode 100755
index 0000000000..51c6d42330
--- /dev/null
+++ b/tests/pathological/convert.php
@@ -0,0 +1,62 @@
+#!/usr/bin/env php
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+ini_set('memory_limit', '1024M');
+
+use League\CommonMark\Environment\Environment;
+use League\CommonMark\Extension\CommonMark\CommonMarkCoreExtension;
+use League\CommonMark\Extension\Footnote\FootnoteExtension;
+use League\CommonMark\Extension\GithubFlavoredMarkdownExtension;
+use League\CommonMark\Extension\Table\TableExtension;
+use League\CommonMark\MarkdownConverter;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+ini_set('display_errors', 'stderr');
+ini_set('xdebug.max_nesting_level', '999999');
+
+$stdin = fopen('php://stdin', 'r');
+if (stream_set_blocking($stdin, true)) {
+ $markdown = stream_get_contents($stdin);
+}
+fclose($stdin);
+
+if (empty($markdown)) {
+ fwrite(STDERR, "No input provided\n");
+ exit(1);
+}
+
+$config = [];
+if (isset($argv[1])) {
+ $config = \json_decode($argv[1], true);
+}
+
+$environment = new Environment($config);
+$environment->addExtension(new CommonMarkCoreExtension());
+
+// Enable additional extensions if requested
+switch ($argv[2] ?? null) {
+ case 'table':
+ $environment->addExtension(new TableExtension());
+ break;
+ case 'footnotes':
+ $environment->addExtension(new FootnoteExtension());
+ break;
+ case 'gfm':
+ default:
+ $environment->addExtension(new GithubFlavoredMarkdownExtension());
+ break;
+}
+
+$converter = new MarkdownConverter($environment);
+
+echo $converter->convert($markdown)->getContent();
diff --git a/tests/pathological/test.php b/tests/pathological/test.php
new file mode 100755
index 0000000000..fea326a449
--- /dev/null
+++ b/tests/pathological/test.php
@@ -0,0 +1,386 @@
+#!/usr/bin/env php
+
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use Symfony\Component\Process\Exception\ProcessSignaledException;
+use Symfony\Component\Process\Exception\ProcessTimedOutException;
+use Symfony\Component\Process\Process;
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+$cases = [
+ 'U+0000 in input' => [
+ 'sizes' => [1],
+ 'input' => static fn($n) => "abc\u{0000}def\u{0000}\n",
+ 'expected' => static fn($n) => "abc\u{FFFD}def\u{FFFD}
",
+ ],
+ 'Alternate line endings' => [
+ 'sizes' => [1],
+ 'input' => static fn($n) => "- a\n- b\r- c\r\n- d",
+ 'expected' => static fn($n) => "\n",
+ ],
+ 'Nested strong emphasis' => [
+ 'sizes' => [50, 500],
+ 'input' => static fn($n) => \str_repeat('*a **a ', $n) . 'b' . \str_repeat(' a** a*', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('a a ', $n) . 'b' . \str_repeat(' a a', $n) . '
',
+ ],
+ 'Emphasis closers with no openers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('a_ ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('a_ ', $n - 1) . 'a_
',
+ ],
+ 'Emphasis openers with no closers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('_a ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('_a ', $n - 1) . '_a
',
+ ],
+ 'Openers and closers multiple of 3' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => 'a**b' . \str_repeat('c* ', $n),
+ 'expected' => static fn($n) => 'a**b' . \str_repeat('c* ', $n - 1) . 'c*
',
+ ],
+ 'Delimiters that cannot open or close' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/172',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('*_* _ ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('_ _ ', $n - 1) . '_ _
',
+ ],
+ 'Link closers with no openers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('a] ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('a] ', $n - 1) . 'a]
',
+ ],
+ 'Link openers with no closers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[a ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[a ', $n - 1) . '[a
',
+ ],
+ 'Link openers and emphasis closers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[ a_ ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[ a_ ', $n - 1) . '[ a_
',
+ ],
+ 'Mismatched openers and closers' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('*a_ ', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('*a_ ', $n - 1) . '*a_
',
+ ],
+ 'Pattern [ (](' => [
+ 'sizes' => [500, 5_000, 50_000],
+ 'input' => static fn($n) => \str_repeat('[ (](', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[ (](', $n) . '
',
+ ],
+ 'Nested brackets' => [
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[', $n) . 'a' . \str_repeat(']', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[', $n) . 'a' . \str_repeat(']', $n) . '
',
+ ],
+ 'Backslash in link' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/157',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => '[' . \str_repeat('\\', $n) . "\n",
+ 'expected' => static fn($n) => '[' . \str_repeat('\\', $n / 2) . '
',
+ ],
+ 'Backslash in unclosed link title' => [
+ 'sizes' => [10, 100, 1_000],
+ 'input' => static fn($n) => '[test](\\url "' . \str_repeat('\\', $n) . "\n",
+ 'expected' => static fn($n) => '[test](\\url "' . \str_repeat('\\', $n / 2) . '
',
+ ],
+ 'Unclosed inline links (1)' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[](', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[](', $n) . '
',
+ ],
+ 'Unclosed inline links (2)' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[a](b', $n),
+ 'expected' => static fn($n) => '' . \str_repeat('[a](b', $n) . '
',
+ ],
+ 'Unclosed inline links (3)' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('[a]( static fn($n) => '' . \str_repeat('[a](<b', $n) . '
',
+ ],
+ 'Nested blockquotes' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [100, 1_000],
+ 'input' => static fn($n) => \str_repeat('> ', $n) . "a\n",
+ 'expected' => static fn($n) => \str_repeat("\n", $n) . "a
\n" . \str_repeat("
\n", $n),
+ ],
+ 'Backticks' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [500, 1_000, 2_000, 4_000],
+ 'input' => static fn($n) => \implode('', \array_map(static fn($i) => 'e' . \str_repeat('`', $i), \range(1, $n))),
+ ],
+ 'Many ref. definitions' => [
+ 'ref' => 'https://github.com/commonmark/commonmark.js/issues/129',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat("[a]: u\n", $n),
+ ],
+ 'Huge horizontal rule' => [
+ 'sizes' => [500, 5_000],
+ 'input' => static fn($n) => \str_repeat('*', $n) . "\n",
+ 'expected' => static fn($n) => '
',
+ ],
+ 'CVE-2022-39209' => [
+ 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-cgh3-p57x-9q7q',
+ 'extension' => 'autolink',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('![l', $n) . "\n",
+ 'expected' => static fn($n) => '' . \str_repeat('![l', $n) . '
',
+ ],
+ 'CVE-2023-22486' => [
+ 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-r572-jvj2-3m8p',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => \str_repeat('![[]()', $n) . "\n",
+ 'expected' => static fn($n) => '' . \str_repeat('![', $n) . '
',
+ ],
+ 'CVE-2023-22484' => [
+ 'ref' => 'https://github.com/github/cmark-gfm/security/advisories/GHSA-24f7-9frr-5h2r',
+ 'sizes' => [1_000, 10_000, 100_000],
+ 'input' => static fn($n) => '' . \str_repeat('