Skip to content

Commit

Permalink
Fix using < or > in a reference name
Browse files Browse the repository at this point in the history
These characters are valid and only the last `<...>` must be treated as
the embeded URL. This stops using the inline lexer inside references,
reSt doesn't allow nested inline nodes and the only thing required is
getting the embeded URL (if there is any).
  • Loading branch information
wouterj committed Dec 16, 2023
1 parent b42e3e1 commit cc2e28c
Show file tree
Hide file tree
Showing 12 changed files with 29 additions and 127 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use function array_flip;
use function parse_url;
use function preg_match;
use function strlen;

use const PHP_URL_SCHEME;

Expand All @@ -23,8 +24,7 @@ final class InlineLexer extends AbstractLexer
public const ANONYMOUS_END = 3;
public const LITERAL = 5;
public const BACKTICK = 6;
public const EMBEDED_URL_START = 9;
public const EMBEDED_URL_END = 10;
public const EMBEDED_URL = 9;
public const NAMED_REFERENCE = 11;
public const ANONYMOUSE_REFERENCE = 12;
public const COLON = 13;
Expand Down Expand Up @@ -60,10 +60,9 @@ protected function getCatchablePatterns(): array
'(?<=^|\s)[a-z0-9-]+_{2}', //Inline href.
'(?<=^|\s)[a-z0-9-]+_{1}(?=[\s\.+]|$)', //Inline href.
'``.+?``(?!`)',
'<[^<]+?>(?=`)',
'_{2}',
'_',
'<',
'>',
'`',
':',
'|',
Expand Down Expand Up @@ -133,13 +132,15 @@ protected function getType(string &$value)
return self::WHITESPACE;
}

if ($value[0] === '<' && strlen($value) > 1) {
return self::EMBEDED_URL;
}

return match ($value) {
'`' => self::BACKTICK,
'**' => self::STRONG_DELIMITER,
'*' => self::EMPHASIS_DELIMITER,
'|' => self::VARIABLE_DELIMITER,
'<' => self::EMBEDED_URL_START,
'>' => self::EMBEDED_URL_END,
'_' => self::UNDERSCORE,
'__' => self::ANONYMOUS_END,
':' => self::COLON,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

use function substr;

/**
* Rule to parse for anonymous references
*
Expand Down Expand Up @@ -45,11 +47,8 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL

return $this->createAnonymousReference($blockContext, $text, $embeddedUrl);

case InlineLexer::EMBEDED_URL_START:
$embeddedUrl = $this->parseEmbeddedUrl($lexer);
if ($embeddedUrl === null) {
$text .= '<';
}
case InlineLexer::EMBEDED_URL:
$embeddedUrl = substr($lexer->token->value, 1, -1);

break;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

use function substr;

/**
* Rule to parse for named references
*
Expand Down Expand Up @@ -48,11 +50,8 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod

return $this->createReference($blockContext, $text, $embeddedUrl);

case InlineLexer::EMBEDED_URL_START:
$embeddedUrl = $this->parseEmbeddedUrl($lexer);
if ($embeddedUrl === null) {
$text .= '<';
}
case InlineLexer::EMBEDED_URL:
$embeddedUrl = substr($lexer->token->value, 1, -1);

break;
case InlineLexer::WHITESPACE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\Nodes\Inline\HyperLinkNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

use function filter_var;
use function preg_replace;
Expand Down Expand Up @@ -41,35 +40,4 @@ protected function createReference(BlockContext $blockContext, string $link, str

return new HyperLinkNode($link, $targetLink);
}

protected function parseEmbeddedUrl(InlineLexer $lexer): string|null
{
if ($lexer->token === null) {
return null;
}

$startPosition = $lexer->token->position;
$text = '';

while ($lexer->moveNext()) {
$token = $lexer->token;
switch ($token->type) {
case InlineLexer::BACKTICK:
//We did not find the expected SpanLexer::EMBEDED_URL_END
$this->rollback($lexer, $startPosition);

return null;

case InlineLexer::EMBEDED_URL_END:
return $text;

default:
$text .= $token->value;
}
}

$this->rollback($lexer, $startPosition);

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,14 @@

use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
use Psr\Log\LoggerInterface;

use function sprintf;
use function trim;
use function preg_match;

/** @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases */
abstract class AbstractReferenceTextRole implements TextRole
{
/** @see https://regex101.com/r/htMn5p/1 */
public const INTERLINK_REGEX = '/^([a-zA-Z0-9-_]+):(.*$)/';
private readonly InlineLexer $lexer;

public function __construct(
private readonly LoggerInterface $logger,
) {
// Do not inject the $lexer. It contains a state.
$this->lexer = new InlineLexer();
}

public function processNode(
DocumentParserContext $documentParserContext,
Expand All @@ -35,54 +24,15 @@ public function processNode(
$referenceTarget = null;
$value = null;

$part = '';
$this->lexer->setInput($content);
$this->lexer->moveNext();
$this->lexer->moveNext();
while ($this->lexer->token !== null) {
$token = $this->lexer->token;
switch ($token->type) {
case InlineLexer::EMBEDED_URL_START:
$value = trim($part);
$part = '';

break;
case InlineLexer::EMBEDED_URL_END:
if ($value === null) {
// not inside the embedded URL
$part .= $token->value;
break;
}

if ($this->lexer->peek() !== null) {
$this->logger->debug(
sprintf(
'Reference contains unexpected content after closing `>`, treating it as text like sphinx does: "%s"',
$rawContent,
),
$documentParserContext->getLoggerInformation(),
);
$part = $value . '<' . $part . '>';
$value = null;
break;
}

$referenceTarget = $part;
$part = '';

break 2;
default:
$part .= $token->value;
}

$this->lexer->moveNext();
}
preg_match('/^(.+?)(?:\s*<(.+)>)?$/s', $content, $matches);

$value .= trim($part);
$value = $matches[1];

if ($referenceTarget === null) {
if (!isset($matches[2])) {
$referenceTarget = $value;
$value = null;
} else {
$referenceTarget = $matches[2];
}

return $this->createNode($referenceTarget, $value, $role);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,11 @@

use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use Psr\Log\LoggerInterface;

use function preg_match;

class DocReferenceTextRole extends AbstractReferenceTextRole
{
public function __construct(
protected readonly LoggerInterface $logger,
) {
parent::__construct($this->logger);
}

final public const NAME = 'doc';

public function getName(): string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,16 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\ReferenceNode;
use phpDocumentor\Guides\ReferenceResolvers\AnchorReducer;
use Psr\Log\LoggerInterface;

use function array_keys;
use function preg_match;

class GenericReferenceTextRole extends AbstractReferenceTextRole
{
public function __construct(
protected readonly LoggerInterface $logger,
private readonly GenericLinkProvider $genericLinkProvider,
private readonly AnchorReducer $anchorReducer,
) {
parent::__construct($this->logger);
}

public function getName(): string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

namespace phpDocumentor\Guides\RestructuredText\Parser;

use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\CitationInlineNode;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\Nodes\Inline\EmphasisInlineNode;
Expand Down Expand Up @@ -43,20 +42,18 @@

final class InlineTokenParserTest extends TestCase
{
public Logger $logger;
private DocumentParserContext $documentParserContext;
private InlineParser $inlineTokenParser;
private DefaultTextRoleFactory $textRoleFactory;

public function setUp(): void
{
$this->logger = new Logger('test');
$this->textRoleFactory = new DefaultTextRoleFactory(
new GenericTextRole(),
new LiteralTextRole(),
[
new ReferenceTextRole($this->logger),
new DocReferenceTextRole($this->logger),
new ReferenceTextRole(),
new DocReferenceTextRole(),
],
);
$this->documentParserContext = new DocumentParserContext(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

namespace phpDocumentor\Guides\RestructuredText\TextRoles;

use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use PHPUnit\Framework\Attributes\DataProvider;
Expand All @@ -13,15 +12,13 @@

class DocReferenceTextRoleTest extends TestCase
{
private Logger $logger;
private DocReferenceTextRole $docReferenceTextRole;
private DocumentParserContext&MockObject $documentParserContext;

public function setUp(): void
{
$this->logger = new Logger('test');
$this->documentParserContext = $this->createMock(DocumentParserContext::class);
$this->docReferenceTextRole = new DocReferenceTextRole($this->logger);
$this->docReferenceTextRole = new DocReferenceTextRole();
}

#[DataProvider('docReferenceProvider')]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

namespace phpDocumentor\Guides\RestructuredText\TextRoles;

use Monolog\Logger;
use phpDocumentor\Guides\Nodes\Inline\ReferenceNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use PHPUnit\Framework\Attributes\DataProvider;
Expand All @@ -13,15 +12,13 @@

class ReferenceTextRoleTest extends TestCase
{
private Logger $logger;
private ReferenceTextRole $referenceTextRole;
private DocumentParserContext&MockObject $documentParserContext;

public function setUp(): void
{
$this->logger = new Logger('test');
$this->documentParserContext = $this->createMock(DocumentParserContext::class);
$this->referenceTextRole = new ReferenceTextRole($this->logger);
$this->referenceTextRole = new ReferenceTextRole();
}

#[DataProvider('referenceProvider')]
Expand Down
1 change: 1 addition & 0 deletions tests/Functional/tests/links/links.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
<a href="http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING">http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING</a>.</p>
<p>You can read more on the features of Embeddables objects <a href="http://docs.doctrine-project.org/en/latest/tutorials/embeddables.html">in the documentation</a>.</p>
<p>Doctrine has a few links that are between brackets (<a href="https://www.doctrine-project.org/projects/doctrine-orm/en/2.16/reference/dql-doctrine-query-language.html#query-hints">such as here</a>)</p>
<p>Links may use reserved characters as name like <a href="https://html.spec.whatwg.org/#the-head-element">this one linking to the &lt;head&gt; element</a>.</p>
3 changes: 3 additions & 0 deletions tests/Functional/tests/links/links.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,8 @@ You can read more on the features of Embeddables objects `in the documentation
Doctrine has a few links that are between brackets (`such as here
<https://www.doctrine-project.org/projects/doctrine-orm/en/2.16/reference/dql-doctrine-query-language.html#query-hints>`_)

Links may use reserved characters as name like `this one linking to the <head> element`_.

.. _`xkcd`: http://xkcd.com/
.. _something: http://something.com/
.. _`this one linking to the <head> element`: https://html.spec.whatwg.org/#the-head-element

0 comments on commit cc2e28c

Please sign in to comment.