Skip to content

Commit

Permalink
Merge pull request #761 from wouterj/html-tags-in-reference
Browse files Browse the repository at this point in the history
Fix using < or > in a reference name
  • Loading branch information
jaapio authored Dec 18, 2023
2 parents 4c6eb16 + c0e4b1b commit e647551
Show file tree
Hide file tree
Showing 13 changed files with 77 additions and 158 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

declare(strict_types=1);

namespace phpDocumentor\Guides\RestructuredText\Parser;

use function preg_match;

trait EmbeddedUriParser
{
/** @return array{text:?string,uri:string} */
private function extractEmbeddedUri(string $text): array
{
preg_match('/^(.*?)(?:(?:\s|^)<([^<]+)>)?$/s', $text, $matches);

$text = $matches[1] === '' ? null : $matches[1];
$uri = $matches[1];

if (isset($matches[2])) {
// there is an embedded URI, text and URI are different
$uri = $matches[2];
} else {
$text = null;
}

return ['text' => $text, 'uri' => $uri];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ final class InlineLexer extends AbstractLexer
public const ANONYMOUS_END = 3;
public const LITERAL = 5;
public const BACKTICK = 6;
public const EMBEDED_URL_START = 9;
public const EMBEDED_URL_END = 10;
public const NAMED_REFERENCE = 11;
public const ANONYMOUSE_REFERENCE = 12;
public const COLON = 13;
Expand Down Expand Up @@ -62,8 +60,6 @@ protected function getCatchablePatterns(): array
'``.+?``(?!`)',
'_{2}',
'_',
'<',
'>',
'`',
':',
'|',
Expand Down Expand Up @@ -138,8 +134,6 @@ protected function getType(string &$value)
'**' => self::STRONG_DELIMITER,
'*' => self::EMPHASIS_DELIMITER,
'|' => self::VARIABLE_DELIMITER,
'<' => self::EMBEDED_URL_START,
'>' => self::EMBEDED_URL_END,
'_' => self::UNDERSCORE,
'__' => self::ANONYMOUS_END,
':' => self::COLON,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

/**
Expand All @@ -20,15 +21,16 @@
*/
class AnonymousPhraseRule extends ReferenceRule
{
use EmbeddedUriParser;

public function applies(InlineLexer $lexer): bool
{
return $lexer->token?->type === InlineLexer::BACKTICK;
}

public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractLinkInlineNode|null
{
$text = '';
$embeddedUrl = null;
$value = '';
$initialPosition = $lexer->token?->position;
$lexer->moveNext();
while ($lexer->token !== null) {
Expand All @@ -43,17 +45,10 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL

$lexer->moveNext();

return $this->createAnonymousReference($blockContext, $text, $embeddedUrl);

case InlineLexer::EMBEDED_URL_START:
$embeddedUrl = $this->parseEmbeddedUrl($lexer);
if ($embeddedUrl === null) {
$text .= '<';
}
return $this->createAnonymousReference($blockContext, $value);

break;
default:
$text .= $lexer->token->value;
$value .= $lexer->token->value;
}

$lexer->moveNext();
Expand All @@ -64,9 +59,17 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): AbstractL
return null;
}

private function createAnonymousReference(BlockContext $blockContext, string $link, string|null $embeddedUrl): AbstractLinkInlineNode
private function createAnonymousReference(BlockContext $blockContext, string $value): AbstractLinkInlineNode
{
$node = $this->createReference($blockContext, $link, $embeddedUrl, false);
$parsed = $this->extractEmbeddedUri($value);
$link = $parsed['text'];
$uri = $parsed['uri'];
if ($link === null) {
$link = $uri;
$uri = null;
}

$node = $this->createReference($blockContext, $link, $uri, false);
$blockContext->getDocumentParserContext()->pushAnonymous($link);

return $node;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use phpDocumentor\Guides\Nodes\Inline\InlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

/**
Expand All @@ -20,15 +21,16 @@
*/
class NamedPhraseRule extends ReferenceRule
{
use EmbeddedUriParser;

public function applies(InlineLexer $lexer): bool
{
return $lexer->token?->type === InlineLexer::BACKTICK;
}

public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNode|null
{
$text = '';
$embeddedUrl = null;
$value = '';
$initialPosition = $lexer->token?->position;
$lexer->moveNext();
while ($lexer->token !== null) {
Expand All @@ -42,25 +44,23 @@ public function apply(BlockContext $blockContext, InlineLexer $lexer): InlineNod
}

$lexer->moveNext();
if ($text === '') {
$text = $embeddedUrl ?? '';
}

return $this->createReference($blockContext, $text, $embeddedUrl);

case InlineLexer::EMBEDED_URL_START:
$embeddedUrl = $this->parseEmbeddedUrl($lexer);
if ($embeddedUrl === null) {
$text .= '<';
$parsed = $this->extractEmbeddedUri($value);
$text = $parsed['text'];
$uri = $parsed['uri'];
if ($text === null) {
$text = $uri;
$uri = null;
}

break;
return $this->createReference($blockContext, $text, $uri);

case InlineLexer::WHITESPACE:
$text .= ' ';
$value .= ' ';

break;
default:
$text .= $lexer->token->value;
$value .= $lexer->token->value;
}

$lexer->moveNext();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use phpDocumentor\Guides\Nodes\Inline\HyperLinkNode;
use phpDocumentor\Guides\RestructuredText\Parser\BlockContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;

use function filter_var;
use function preg_replace;
Expand Down Expand Up @@ -41,35 +40,4 @@ protected function createReference(BlockContext $blockContext, string $link, str

return new HyperLinkNode($link, $targetLink);
}

protected function parseEmbeddedUrl(InlineLexer $lexer): string|null
{
if ($lexer->token === null) {
return null;
}

$startPosition = $lexer->token->position;
$text = '';

while ($lexer->moveNext()) {
$token = $lexer->token;
switch ($token->type) {
case InlineLexer::BACKTICK:
//We did not find the expected SpanLexer::EMBEDED_URL_END
$this->rollback($lexer, $startPosition);

return null;

case InlineLexer::EMBEDED_URL_END:
return $text;

default:
$text .= $token->value;
}
}

$this->rollback($lexer, $startPosition);

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,86 +6,25 @@

use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\RestructuredText\Parser\DocumentParserContext;
use phpDocumentor\Guides\RestructuredText\Parser\InlineLexer;
use Psr\Log\LoggerInterface;

use function sprintf;
use function trim;
use phpDocumentor\Guides\RestructuredText\Parser\EmbeddedUriParser;

/** @see https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases */
abstract class AbstractReferenceTextRole implements TextRole
{
use EmbeddedUriParser;

/** @see https://regex101.com/r/htMn5p/1 */
public const INTERLINK_REGEX = '/^([a-zA-Z0-9-_]+):(.*$)/';
private readonly InlineLexer $lexer;

public function __construct(
private readonly LoggerInterface $logger,
) {
// Do not inject the $lexer. It contains a state.
$this->lexer = new InlineLexer();
}

public function processNode(
DocumentParserContext $documentParserContext,
string $role,
string $content,
string $rawContent,
): AbstractLinkInlineNode {
$referenceTarget = null;
$value = null;

$part = '';
$this->lexer->setInput($content);
$this->lexer->moveNext();
$this->lexer->moveNext();
while ($this->lexer->token !== null) {
$token = $this->lexer->token;
switch ($token->type) {
case InlineLexer::EMBEDED_URL_START:
$value = trim($part);
$part = '';

break;
case InlineLexer::EMBEDED_URL_END:
if ($value === null) {
// not inside the embedded URL
$part .= $token->value;
break;
}

if ($this->lexer->peek() !== null) {
$this->logger->debug(
sprintf(
'Reference contains unexpected content after closing `>`, treating it as text like sphinx does: "%s"',
$rawContent,
),
$documentParserContext->getLoggerInformation(),
);
$part = $value . '<' . $part . '>';
$value = null;
break;
}

$referenceTarget = $part;
$part = '';

break 2;
default:
$part .= $token->value;
}

$this->lexer->moveNext();
}

$value .= trim($part);

if ($referenceTarget === null) {
$referenceTarget = $value;
$value = null;
}
$parsed = $this->extractEmbeddedUri($content);

return $this->createNode($referenceTarget, $value, $role);
return $this->createNode($parsed['uri'], $parsed['text'], $role);
}

abstract protected function createNode(string $referenceTarget, string|null $referenceName, string $role): AbstractLinkInlineNode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,11 @@

use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\DocReferenceNode;
use Psr\Log\LoggerInterface;

use function preg_match;

class DocReferenceTextRole extends AbstractReferenceTextRole
{
public function __construct(
protected readonly LoggerInterface $logger,
) {
parent::__construct($this->logger);
}

final public const NAME = 'doc';

public function getName(): string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,16 @@
use phpDocumentor\Guides\Nodes\Inline\AbstractLinkInlineNode;
use phpDocumentor\Guides\Nodes\Inline\ReferenceNode;
use phpDocumentor\Guides\ReferenceResolvers\AnchorReducer;
use Psr\Log\LoggerInterface;

use function array_keys;
use function preg_match;

class GenericReferenceTextRole extends AbstractReferenceTextRole
{
public function __construct(
protected readonly LoggerInterface $logger,
private readonly GenericLinkProvider $genericLinkProvider,
private readonly AnchorReducer $anchorReducer,
) {
parent::__construct($this->logger);
}

public function getName(): string
Expand Down
Loading

0 comments on commit e647551

Please sign in to comment.