From c0e4b1b5bc4f80295c823de56ca5b5791c6c09e8 Mon Sep 17 00:00:00 2001 From: Wouter de Jong Date: Sat, 16 Dec 2023 16:07:07 +0100 Subject: [PATCH] Fix using < or > in a reference name These characters are valid and only the last `<...>` must be treated as the embeded URL. This stops using the inline lexer inside references, reSt doesn't allow nested inline nodes and the only thing required is getting the embeded URL (if there is any). --- .../Parser/EmbeddedUriParser.php | 28 ++++++++ .../RestructuredText/Parser/InlineLexer.php | 6 -- .../InlineRules/AnonymousPhraseRule.php | 29 ++++---- .../InlineRules/NamedPhraseRule.php | 28 ++++---- .../Productions/InlineRules/ReferenceRule.php | 32 --------- .../TextRoles/AbstractReferenceTextRole.php | 71 ++----------------- .../TextRoles/DocReferenceTextRole.php | 7 -- .../TextRoles/GenericReferenceTextRole.php | 3 - .../unit/Parser/InlineTokenParserTest.php | 15 ++-- .../TextRoles/DocReferenceTextRoleTest.php | 5 +- .../unit/TextRoles/ReferenceTextRoleTest.php | 5 +- tests/Functional/tests/links/links.html | 2 + tests/Functional/tests/links/links.rst | 4 ++ 13 files changed, 77 insertions(+), 158 deletions(-) create mode 100644 packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php b/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php new file mode 100644 index 000000000..4c7cc6b90 --- /dev/null +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php @@ -0,0 +1,28 @@ +)?$/s', $text, $matches); + + $text = $matches[1] === '' ? null : $matches[1]; + $uri = $matches[1]; + + if (isset($matches[2])) { + // there is an embedded URI, text and URI are different + $uri = $matches[2]; + } else { + $text = null; + } + + return ['text' => $text, 'uri' => $uri]; + } +} diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php index 42b253f2f..992625026 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php @@ -23,8 +23,6 @@ final class InlineLexer extends AbstractLexer public const ANONYMOUS_END = 3; public const LITERAL = 5; public const BACKTICK = 6; - public const EMBEDED_URL_START = 9; - public const EMBEDED_URL_END = 10; public const NAMED_REFERENCE = 11; public const ANONYMOUSE_REFERENCE = 12; public const COLON = 13; @@ -62,8 +60,6 @@ protected function getCatchablePatterns(): array '``.+?``(?!`)', '_{2}', '_', - '<', - '>', '`', ':', '|', @@ -138,8 +134,6 @@ protected function getType(string &$value) '**' => self::STRONG_DELIMITER, '*' => self::EMPHASIS_DELIMITER, '|' => self::VARIABLE_DELIMITER, - '<' => self::EMBEDED_URL_START, - '>' => self::EMBEDED_URL_END, '_' => self::UNDERSCORE, '__' => self::ANONYMOUS_END, ':' => self::COLON, diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php index d0b3d0c9f..ddc721c86 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php @@ -6,6 +6,7 @@ use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode; use phpDocumentor\Guides\RestructuredText\Parser\BlockContext; +use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser; use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer; /** @@ -20,6 +21,8 @@ */ class AnonymousPhraseRule extends ReferenceRule { + use EmbeddedUriParser; + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::BACKTICK; @@ -27,8 +30,7 @@ public function applies(InlineLexer $lexer): bool public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractLinkInlineNode|null { - $text = ''; - $embeddedUrl = null; + $value = ''; $initialPosition = $lexer->token?->position; $lexer->moveNext(); while ($lexer->token !== null) { @@ -43,17 +45,10 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL $lexer->moveNext(); - return $this->createAnonymousReference($blockContext, $text, $embeddedUrl); - - case InlineLexer::EMBEDED_URL_START: - $embeddedUrl = $this->parseEmbeddedUrl($lexer); - if ($embeddedUrl === null) { - $text .= '<'; - } + return $this->createAnonymousReference($blockContext, $value); - break; default: - $text .= $lexer->token->value; + $value .= $lexer->token->value; } $lexer->moveNext(); @@ -64,9 +59,17 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL return null; } - private function createAnonymousReference(BlockContext $blockContext, string $link, string|null $embeddedUrl): AbstractLinkInlineNode + private function createAnonymousReference(BlockContext $blockContext, string $value): AbstractLinkInlineNode { - $node = $this->createReference($blockContext, $link, $embeddedUrl, false); + $parsed = $this->extractEmbeddedUri($value); + $link = $parsed['text']; + $uri = $parsed['uri']; + if ($link === null) { + $link = $uri; + $uri = null; + } + + $node = $this->createReference($blockContext, $link, $uri, false); $blockContext->getDocumentParserContext()->pushAnonymous($link); return $node; diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php index 98fb5555d..dcadf66aa 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php @@ -6,6 +6,7 @@ use phpDocumentor\Guides\Nodes\Inline\InlineNode; use phpDocumentor\Guides\RestructuredText\Parser\BlockContext; +use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser; use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer; /** @@ -20,6 +21,8 @@ */ class NamedPhraseRule extends ReferenceRule { + use EmbeddedUriParser; + public function applies(InlineLexer $lexer): bool { return $lexer->token?->type === InlineLexer::BACKTICK; @@ -27,8 +30,7 @@ public function applies(InlineLexer $lexer): bool public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNode|null { - $text = ''; - $embeddedUrl = null; + $value = ''; $initialPosition = $lexer->token?->position; $lexer->moveNext(); while ($lexer->token !== null) { @@ -42,25 +44,23 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod } $lexer->moveNext(); - if ($text === '') { - $text = $embeddedUrl ?? ''; - } - - return $this->createReference($blockContext, $text, $embeddedUrl); - case InlineLexer::EMBEDED_URL_START: - $embeddedUrl = $this->parseEmbeddedUrl($lexer); - if ($embeddedUrl === null) { - $text .= '<'; + $parsed = $this->extractEmbeddedUri($value); + $text = $parsed['text']; + $uri = $parsed['uri']; + if ($text === null) { + $text = $uri; + $uri = null; } - break; + return $this->createReference($blockContext, $text, $uri); + case InlineLexer::WHITESPACE: - $text .= ' '; + $value .= ' '; break; default: - $text .= $lexer->token->value; + $value .= $lexer->token->value; } $lexer->moveNext(); diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php index 2c1bfc013..7398ba614 100644 --- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php +++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php @@ -8,7 +8,6 @@ use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode; use phpDocumentor\Guides\Nodes\Inline\HyperLinkNode; use phpDocumentor\Guides\RestructuredText\Parser\BlockContext; -use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer; use function filter_var; use function preg_replace; @@ -41,35 +40,4 @@ protected function createReference(BlockContext $blockContext, string $link, str return new HyperLinkNode($link, $targetLink); } - - protected function parseEmbeddedUrl(InlineLexer $lexer): string|null - { - if ($lexer->token === null) { - return null; - } - - $startPosition = $lexer->token->position; - $text = ''; - - while ($lexer->moveNext()) { - $token = $lexer->token; - switch ($token->type) { - case InlineLexer::BACKTICK: - //We did not find the expected SpanLexer::EMBEDED_URL_END - $this->rollback($lexer, $startPosition); - - return null; - - case InlineLexer::EMBEDED_URL_END: - return $text; - - default: - $text .= $token->value; - } - } - - $this->rollback($lexer, $startPosition); - - return null; - } } diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php index e43890cbb..3b7e2a2f5 100644 --- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php +++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php @@ -6,25 +6,15 @@ use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode; use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext; -use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer; -use Psr\Log\LoggerInterface; - -use function sprintf; -use function trim; +use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser; /** @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases */ abstract class AbstractReferenceTextRole implements TextRole { + use EmbeddedUriParser; + /** @see https://regex101.com/r/htMn5p/1 */ public const INTERLINK_REGEX = '/^([a-zA-Z0-9-_]+):(.*$)/'; - private readonly InlineLexer $lexer; - - public function __construct( - private readonly LoggerInterface $logger, - ) { - // Do not inject the $lexer. It contains a state. - $this->lexer = new InlineLexer(); - } public function processNode( DocumentParserContext $documentParserContext, @@ -32,60 +22,9 @@ public function processNode( string $content, string $rawContent, ): AbstractLinkInlineNode { - $referenceTarget = null; - $value = null; - - $part = ''; - $this->lexer->setInput($content); - $this->lexer->moveNext(); - $this->lexer->moveNext(); - while ($this->lexer->token !== null) { - $token = $this->lexer->token; - switch ($token->type) { - case InlineLexer::EMBEDED_URL_START: - $value = trim($part); - $part = ''; - - break; - case InlineLexer::EMBEDED_URL_END: - if ($value === null) { - // not inside the embedded URL - $part .= $token->value; - break; - } - - if ($this->lexer->peek() !== null) { - $this->logger->debug( - sprintf( - 'Reference contains unexpected content after closing `>`, treating it as text like sphinx does: "%s"', - $rawContent, - ), - $documentParserContext->getLoggerInformation(), - ); - $part = $value . '<' . $part . '>'; - $value = null; - break; - } - - $referenceTarget = $part; - $part = ''; - - break 2; - default: - $part .= $token->value; - } - - $this->lexer->moveNext(); - } - - $value .= trim($part); - - if ($referenceTarget === null) { - $referenceTarget = $value; - $value = null; - } + $parsed = $this->extractEmbeddedUri($content); - return $this->createNode($referenceTarget, $value, $role); + return $this->createNode($parsed['uri'], $parsed['text'], $role); } abstract protected function createNode(string $referenceTarget, string|null $referenceName, string $role): AbstractLinkInlineNode; diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php index 4cf9c9e0d..25f1f1612 100644 --- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php +++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php @@ -6,18 +6,11 @@ use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode; use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode; -use Psr\Log\LoggerInterface; use function preg_match; class DocReferenceTextRole extends AbstractReferenceTextRole { - public function __construct( - protected readonly LoggerInterface $logger, - ) { - parent::__construct($this->logger); - } - final public const NAME = 'doc'; public function getName(): string diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php index 05db85099..43f9c47f0 100644 --- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php +++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php @@ -7,7 +7,6 @@ use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode; use phpDocumentor\Guides\Nodes\Inline\ReferenceNode; use phpDocumentor\Guides\ReferenceResolvers\AnchorReducer; -use Psr\Log\LoggerInterface; use function array_keys; use function preg_match; @@ -15,11 +14,9 @@ class GenericReferenceTextRole extends AbstractReferenceTextRole { public function __construct( - protected readonly LoggerInterface $logger, private readonly GenericLinkProvider $genericLinkProvider, private readonly AnchorReducer $anchorReducer, ) { - parent::__construct($this->logger); } public function getName(): string diff --git a/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php b/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php index fde2e723b..9516dde2c 100644 --- a/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php +++ b/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php @@ -4,7 +4,6 @@ namespace phpDocumentor\Guides\RestructuredText\Parser; -use Monolog\Logger; use phpDocumentor\Guides\Nodes\Inline\CitationInlineNode; use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode; use phpDocumentor\Guides\Nodes\Inline\EmphasisInlineNode; @@ -43,20 +42,18 @@ final class InlineTokenParserTest extends TestCase { - public Logger $logger; private DocumentParserContext $documentParserContext; private InlineParser $inlineTokenParser; private DefaultTextRoleFactory $textRoleFactory; public function setUp(): void { - $this->logger = new Logger('test'); $this->textRoleFactory = new DefaultTextRoleFactory( new GenericTextRole(), new LiteralTextRole(), [ - new ReferenceTextRole($this->logger), - new DocReferenceTextRole($this->logger), + new ReferenceTextRole(), + new DocReferenceTextRole(), ], ); $this->documentParserContext = new DocumentParserContext( @@ -167,19 +164,19 @@ public static function inlineNodeProvider(): array new InlineCompoundNode([new HyperLinkNode('myref', 'myref')]), ], 'Named Reference, Phrased, With URL' => [ - '`myref`_', + '`myref `_', new InlineCompoundNode([new HyperLinkNode('myref', 'https://test.com')]), ], 'Named Reference, Phrased, With URL not ended' => [ - '`myref [ '`myref`__', new InlineCompoundNode([new HyperLinkNode('myref', 'myref')]), ], 'Anonymous Reference, Phrased, With URL' => [ - '`myref`__', + '`myref `__', new InlineCompoundNode([new HyperLinkNode('myref', 'https://test.com')]), ], 'Footnote' => [ diff --git a/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php b/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php index 8c88afec3..700a53856 100644 --- a/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php +++ b/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php @@ -4,7 +4,6 @@ namespace phpDocumentor\Guides\RestructuredText\TextRoles; -use Monolog\Logger; use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode; use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext; use PHPUnit\Framework\Attributes\DataProvider; @@ -13,15 +12,13 @@ class DocReferenceTextRoleTest extends TestCase { - private Logger $logger; private DocReferenceTextRole $docReferenceTextRole; private DocumentParserContext&MockObject $documentParserContext; public function setUp(): void { - $this->logger = new Logger('test'); $this->documentParserContext = $this->createMock(DocumentParserContext::class); - $this->docReferenceTextRole = new DocReferenceTextRole($this->logger); + $this->docReferenceTextRole = new DocReferenceTextRole(); } #[DataProvider('docReferenceProvider')] diff --git a/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php b/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php index 24aef400b..ca3f6150a 100644 --- a/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php +++ b/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php @@ -4,7 +4,6 @@ namespace phpDocumentor\Guides\RestructuredText\TextRoles; -use Monolog\Logger; use phpDocumentor\Guides\Nodes\Inline\ReferenceNode; use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext; use PHPUnit\Framework\Attributes\DataProvider; @@ -13,15 +12,13 @@ class ReferenceTextRoleTest extends TestCase { - private Logger $logger; private ReferenceTextRole $referenceTextRole; private DocumentParserContext&MockObject $documentParserContext; public function setUp(): void { - $this->logger = new Logger('test'); $this->documentParserContext = $this->createMock(DocumentParserContext::class); - $this->referenceTextRole = new ReferenceTextRole($this->logger); + $this->referenceTextRole = new ReferenceTextRole(); } #[DataProvider('referenceProvider')] diff --git a/tests/Functional/tests/links/links.html b/tests/Functional/tests/links/links.html index 9ed0537b7..e1b52c707 100644 --- a/tests/Functional/tests/links/links.html +++ b/tests/Functional/tests/links/links.html @@ -10,3 +10,5 @@ http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING.

You can read more on the features of Embeddables objects in the documentation.

Doctrine has a few links that are between brackets (such as here)

+

Links may use reserved characters as name like this one linking to the <head> element. +But this < is not a link >.

diff --git a/tests/Functional/tests/links/links.rst b/tests/Functional/tests/links/links.rst index 4277353ae..ae0736ddf 100644 --- a/tests/Functional/tests/links/links.rst +++ b/tests/Functional/tests/links/links.rst @@ -25,5 +25,9 @@ You can read more on the features of Embeddables objects `in the documentation Doctrine has a few links that are between brackets (`such as here `_) +Links may use reserved characters as name like `this one linking to the element`_. +But this :code:`<` is not a link :code:`>`. + .. _`xkcd`: http://xkcd.com/ .. _something: http://something.com/ +.. _`this one linking to the element`: https://html.spec.whatwg.org/#the-head-element