From 9440e98a9bbfe539d3feaeba4e303c5649aac8bf Mon Sep 17 00:00:00 2001
From: Wouter de Jong
Date: Sat, 16 Dec 2023 16:07:07 +0100
Subject: [PATCH] Fix using < or > in a reference name
These characters are valid and only the last `<...>` must be treated as
the embeded URL. This stops using the inline lexer inside references,
reSt doesn't allow nested inline nodes and the only thing required is
getting the embeded URL (if there is any).
---
.../Parser/EmbeddedUriParser.php | 28 ++++++++
.../RestructuredText/Parser/InlineLexer.php | 6 --
.../InlineRules/AnonymousPhraseRule.php | 29 ++++----
.../InlineRules/NamedPhraseRule.php | 28 ++++----
.../Productions/InlineRules/ReferenceRule.php | 32 ---------
.../TextRoles/AbstractReferenceTextRole.php | 71 ++-----------------
.../TextRoles/DocReferenceTextRole.php | 7 --
.../TextRoles/GenericReferenceTextRole.php | 3 -
.../unit/Parser/InlineTokenParserTest.php | 15 ++--
.../TextRoles/DocReferenceTextRoleTest.php | 5 +-
.../unit/TextRoles/ReferenceTextRoleTest.php | 5 +-
tests/Functional/tests/links/links.html | 2 +
tests/Functional/tests/links/links.rst | 4 ++
13 files changed, 77 insertions(+), 158 deletions(-)
create mode 100644 packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php
diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php b/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php
new file mode 100644
index 000000000..4c7cc6b90
--- /dev/null
+++ b/packages/guides-restructured-text/src/RestructuredText/Parser/EmbeddedUriParser.php
@@ -0,0 +1,28 @@
+)?$/s', $text, $matches);
+
+ $text = $matches[1] === '' ? null : $matches[1];
+ $uri = $matches[1];
+
+ if (isset($matches[2])) {
+ // there is an embedded URI, text and URI are different
+ $uri = $matches[2];
+ } else {
+ $text = null;
+ }
+
+ return ['text' => $text, 'uri' => $uri];
+ }
+}
diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php
index 42b253f2f..992625026 100644
--- a/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php
+++ b/packages/guides-restructured-text/src/RestructuredText/Parser/InlineLexer.php
@@ -23,8 +23,6 @@ final class InlineLexer extends AbstractLexer
public const ANONYMOUS_END = 3;
public const LITERAL = 5;
public const BACKTICK = 6;
- public const EMBEDED_URL_START = 9;
- public const EMBEDED_URL_END = 10;
public const NAMED_REFERENCE = 11;
public const ANONYMOUSE_REFERENCE = 12;
public const COLON = 13;
@@ -62,8 +60,6 @@ protected function getCatchablePatterns(): array
'``.+?``(?!`)',
'_{2}',
'_',
- '<',
- '>',
'`',
':',
'|',
@@ -138,8 +134,6 @@ protected function getType(string &$value)
'**' => self::STRONG_DELIMITER,
'*' => self::EMPHASIS_DELIMITER,
'|' => self::VARIABLE_DELIMITER,
- '<' => self::EMBEDED_URL_START,
- '>' => self::EMBEDED_URL_END,
'_' => self::UNDERSCORE,
'__' => self::ANONYMOUS_END,
':' => self::COLON,
diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php
index d0b3d0c9f..ddc721c86 100644
--- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php
+++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/AnonymousPhraseRule.php
@@ -6,6 +6,7 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
+use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
/**
@@ -20,6 +21,8 @@
*/
class AnonymousPhraseRule extends ReferenceRule
{
+ use EmbeddedUriParser;
+
public function applies(InlineLexer $lexer): bool
{
return $lexer->token?->type === InlineLexer::BACKTICK;
@@ -27,8 +30,7 @@ public function applies(InlineLexer $lexer): bool
public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractLinkInlineNode|null
{
- $text = '';
- $embeddedUrl = null;
+ $value = '';
$initialPosition = $lexer->token?->position;
$lexer->moveNext();
while ($lexer->token !== null) {
@@ -43,17 +45,10 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL
$lexer->moveNext();
- return $this->createAnonymousReference($blockContext, $text, $embeddedUrl);
-
- case InlineLexer::EMBEDED_URL_START:
- $embeddedUrl = $this->parseEmbeddedUrl($lexer);
- if ($embeddedUrl === null) {
- $text .= '<';
- }
+ return $this->createAnonymousReference($blockContext, $value);
- break;
default:
- $text .= $lexer->token->value;
+ $value .= $lexer->token->value;
}
$lexer->moveNext();
@@ -64,9 +59,17 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL
return null;
}
- private function createAnonymousReference(BlockContext $blockContext, string $link, string|null $embeddedUrl): AbstractLinkInlineNode
+ private function createAnonymousReference(BlockContext $blockContext, string $value): AbstractLinkInlineNode
{
- $node = $this->createReference($blockContext, $link, $embeddedUrl, false);
+ $parsed = $this->extractEmbeddedUri($value);
+ $link = $parsed['text'];
+ $uri = $parsed['uri'];
+ if ($link === null) {
+ $link = $uri;
+ $uri = null;
+ }
+
+ $node = $this->createReference($blockContext, $link, $uri, false);
$blockContext->getDocumentParserContext()->pushAnonymous($link);
return $node;
diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php
index 98fb5555d..dcadf66aa 100644
--- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php
+++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/NamedPhraseRule.php
@@ -6,6 +6,7 @@
use phpDocumentor\Guides\Nodes\Inline\InlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
+use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
/**
@@ -20,6 +21,8 @@
*/
class NamedPhraseRule extends ReferenceRule
{
+ use EmbeddedUriParser;
+
public function applies(InlineLexer $lexer): bool
{
return $lexer->token?->type === InlineLexer::BACKTICK;
@@ -27,8 +30,7 @@ public function applies(InlineLexer $lexer): bool
public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNode|null
{
- $text = '';
- $embeddedUrl = null;
+ $value = '';
$initialPosition = $lexer->token?->position;
$lexer->moveNext();
while ($lexer->token !== null) {
@@ -42,25 +44,23 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod
}
$lexer->moveNext();
- if ($text === '') {
- $text = $embeddedUrl ?? '';
- }
-
- return $this->createReference($blockContext, $text, $embeddedUrl);
- case InlineLexer::EMBEDED_URL_START:
- $embeddedUrl = $this->parseEmbeddedUrl($lexer);
- if ($embeddedUrl === null) {
- $text .= '<';
+ $parsed = $this->extractEmbeddedUri($value);
+ $text = $parsed['text'];
+ $uri = $parsed['uri'];
+ if ($text === null) {
+ $text = $uri;
+ $uri = null;
}
- break;
+ return $this->createReference($blockContext, $text, $uri);
+
case InlineLexer::WHITESPACE:
- $text .= ' ';
+ $value .= ' ';
break;
default:
- $text .= $lexer->token->value;
+ $value .= $lexer->token->value;
}
$lexer->moveNext();
diff --git a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php
index 2c1bfc013..7398ba614 100644
--- a/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php
+++ b/packages/guides-restructured-text/src/RestructuredText/Parser/Productions/InlineRules/ReferenceRule.php
@@ -8,7 +8,6 @@
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\Nodes\Inline\HyperLinkNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
-use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
use function filter_var;
use function preg_replace;
@@ -41,35 +40,4 @@ protected function createReference(BlockContext $blockContext, string $link, str
return new HyperLinkNode($link, $targetLink);
}
-
- protected function parseEmbeddedUrl(InlineLexer $lexer): string|null
- {
- if ($lexer->token === null) {
- return null;
- }
-
- $startPosition = $lexer->token->position;
- $text = '';
-
- while ($lexer->moveNext()) {
- $token = $lexer->token;
- switch ($token->type) {
- case InlineLexer::BACKTICK:
- //We did not find the expected SpanLexer::EMBEDED_URL_END
- $this->rollback($lexer, $startPosition);
-
- return null;
-
- case InlineLexer::EMBEDED_URL_END:
- return $text;
-
- default:
- $text .= $token->value;
- }
- }
-
- $this->rollback($lexer, $startPosition);
-
- return null;
- }
}
diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php
index e43890cbb..3b7e2a2f5 100644
--- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php
+++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/AbstractReferenceTextRole.php
@@ -6,25 +6,15 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
-use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
-use Psr\Log\LoggerInterface;
-
-use function sprintf;
-use function trim;
+use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;
/** @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases */
abstract class AbstractReferenceTextRole implements TextRole
{
+ use EmbeddedUriParser;
+
/** @see https://regex101.com/r/htMn5p/1 */
public const INTERLINK_REGEX = '/^([a-zA-Z0-9-_]+):(.*$)/';
- private readonly InlineLexer $lexer;
-
- public function __construct(
- private readonly LoggerInterface $logger,
- ) {
- // Do not inject the $lexer. It contains a state.
- $this->lexer = new InlineLexer();
- }
public function processNode(
DocumentParserContext $documentParserContext,
@@ -32,60 +22,9 @@ public function processNode(
string $content,
string $rawContent,
): AbstractLinkInlineNode {
- $referenceTarget = null;
- $value = null;
-
- $part = '';
- $this->lexer->setInput($content);
- $this->lexer->moveNext();
- $this->lexer->moveNext();
- while ($this->lexer->token !== null) {
- $token = $this->lexer->token;
- switch ($token->type) {
- case InlineLexer::EMBEDED_URL_START:
- $value = trim($part);
- $part = '';
-
- break;
- case InlineLexer::EMBEDED_URL_END:
- if ($value === null) {
- // not inside the embedded URL
- $part .= $token->value;
- break;
- }
-
- if ($this->lexer->peek() !== null) {
- $this->logger->debug(
- sprintf(
- 'Reference contains unexpected content after closing `>`, treating it as text like sphinx does: "%s"',
- $rawContent,
- ),
- $documentParserContext->getLoggerInformation(),
- );
- $part = $value . '<' . $part . '>';
- $value = null;
- break;
- }
-
- $referenceTarget = $part;
- $part = '';
-
- break 2;
- default:
- $part .= $token->value;
- }
-
- $this->lexer->moveNext();
- }
-
- $value .= trim($part);
-
- if ($referenceTarget === null) {
- $referenceTarget = $value;
- $value = null;
- }
+ $parsed = $this->extractEmbeddedUri($content);
- return $this->createNode($referenceTarget, $value, $role);
+ return $this->createNode($parsed['uri'], $parsed['text'], $role);
}
abstract protected function createNode(string $referenceTarget, string|null $referenceName, string $role): AbstractLinkInlineNode;
diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php
index 4cf9c9e0d..25f1f1612 100644
--- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php
+++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/DocReferenceTextRole.php
@@ -6,18 +6,11 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
-use Psr\Log\LoggerInterface;
use function preg_match;
class DocReferenceTextRole extends AbstractReferenceTextRole
{
- public function __construct(
- protected readonly LoggerInterface $logger,
- ) {
- parent::__construct($this->logger);
- }
-
final public const NAME = 'doc';
public function getName(): string
diff --git a/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php b/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php
index 05db85099..43f9c47f0 100644
--- a/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php
+++ b/packages/guides-restructured-text/src/RestructuredText/TextRoles/GenericReferenceTextRole.php
@@ -7,7 +7,6 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\ReferenceNode;
use phpDocumentor\Guides\ReferenceResolvers\AnchorReducer;
-use Psr\Log\LoggerInterface;
use function array_keys;
use function preg_match;
@@ -15,11 +14,9 @@
class GenericReferenceTextRole extends AbstractReferenceTextRole
{
public function __construct(
- protected readonly LoggerInterface $logger,
private readonly GenericLinkProvider $genericLinkProvider,
private readonly AnchorReducer $anchorReducer,
) {
- parent::__construct($this->logger);
}
public function getName(): string
diff --git a/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php b/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php
index fde2e723b..9516dde2c 100644
--- a/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php
+++ b/packages/guides-restructured-text/tests/unit/Parser/InlineTokenParserTest.php
@@ -4,7 +4,6 @@
namespace phpDocumentor\Guides\RestructuredText\Parser;
-use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\CitationInlineNode;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\Nodes\Inline\EmphasisInlineNode;
@@ -43,20 +42,18 @@
final class InlineTokenParserTest extends TestCase
{
- public Logger $logger;
private DocumentParserContext $documentParserContext;
private InlineParser $inlineTokenParser;
private DefaultTextRoleFactory $textRoleFactory;
public function setUp(): void
{
- $this->logger = new Logger('test');
$this->textRoleFactory = new DefaultTextRoleFactory(
new GenericTextRole(),
new LiteralTextRole(),
[
- new ReferenceTextRole($this->logger),
- new DocReferenceTextRole($this->logger),
+ new ReferenceTextRole(),
+ new DocReferenceTextRole(),
],
);
$this->documentParserContext = new DocumentParserContext(
@@ -167,19 +164,19 @@ public static function inlineNodeProvider(): array
new InlineCompoundNode([new HyperLinkNode('myref', 'myref')]),
],
'Named Reference, Phrased, With URL' => [
- '`myref`_',
+ '`myref `_',
new InlineCompoundNode([new HyperLinkNode('myref', 'https://test.com')]),
],
'Named Reference, Phrased, With URL not ended' => [
- '`myref [
'`myref`__',
new InlineCompoundNode([new HyperLinkNode('myref', 'myref')]),
],
'Anonymous Reference, Phrased, With URL' => [
- '`myref`__',
+ '`myref `__',
new InlineCompoundNode([new HyperLinkNode('myref', 'https://test.com')]),
],
'Footnote' => [
diff --git a/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php b/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php
index 8c88afec3..700a53856 100644
--- a/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php
+++ b/packages/guides-restructured-text/tests/unit/TextRoles/DocReferenceTextRoleTest.php
@@ -4,7 +4,6 @@
namespace phpDocumentor\Guides\RestructuredText\TextRoles;
-use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use PHPUnit\Framework\Attributes\DataProvider;
@@ -13,15 +12,13 @@
class DocReferenceTextRoleTest extends TestCase
{
- private Logger $logger;
private DocReferenceTextRole $docReferenceTextRole;
private DocumentParserContext&MockObject $documentParserContext;
public function setUp(): void
{
- $this->logger = new Logger('test');
$this->documentParserContext = $this->createMock(DocumentParserContext::class);
- $this->docReferenceTextRole = new DocReferenceTextRole($this->logger);
+ $this->docReferenceTextRole = new DocReferenceTextRole();
}
#[DataProvider('docReferenceProvider')]
diff --git a/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php b/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php
index 24aef400b..ca3f6150a 100644
--- a/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php
+++ b/packages/guides-restructured-text/tests/unit/TextRoles/ReferenceTextRoleTest.php
@@ -4,7 +4,6 @@
namespace phpDocumentor\Guides\RestructuredText\TextRoles;
-use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\ReferenceNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use PHPUnit\Framework\Attributes\DataProvider;
@@ -13,15 +12,13 @@
class ReferenceTextRoleTest extends TestCase
{
- private Logger $logger;
private ReferenceTextRole $referenceTextRole;
private DocumentParserContext&MockObject $documentParserContext;
public function setUp(): void
{
- $this->logger = new Logger('test');
$this->documentParserContext = $this->createMock(DocumentParserContext::class);
- $this->referenceTextRole = new ReferenceTextRole($this->logger);
+ $this->referenceTextRole = new ReferenceTextRole();
}
#[DataProvider('referenceProvider')]
diff --git a/tests/Functional/tests/links/links.html b/tests/Functional/tests/links/links.html
index 9ed0537b7..e1b52c707 100644
--- a/tests/Functional/tests/links/links.html
+++ b/tests/Functional/tests/links/links.html
@@ -10,3 +10,5 @@
http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING.
You can read more on the features of Embeddables objects in the documentation.
Doctrine has a few links that are between brackets (such as here)
+Links may use reserved characters as name like this one linking to the <head> element.
+But this <
is not a link >
.
diff --git a/tests/Functional/tests/links/links.rst b/tests/Functional/tests/links/links.rst
index 4277353ae..ae0736ddf 100644
--- a/tests/Functional/tests/links/links.rst
+++ b/tests/Functional/tests/links/links.rst
@@ -25,5 +25,9 @@ You can read more on the features of Embeddables objects `in the documentation
Doctrine has a few links that are between brackets (`such as here
`_)
+Links may use reserved characters as name like `this one linking to the element`_.
+But this :code:`<` is not a link :code:`>`.
+
.. _`xkcd`: http://xkcd.com/
.. _something: http://something.com/
+.. _`this one linking to the element`: https://html.spec.whatwg.org/#the-head-element