Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement RST & Markdown quote blocks #19147

Merged
merged 3 commits into from
Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion config/nimdoc.tex.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,17 @@ doc.file = """
\usepackage[most]{tcolorbox} % boxes around admonitions, code blocks, doc.item

\newtcolorbox{rstadmonition}[1][]{blanker, breakable,
left=3mm, right=3mm, top=1mm, bottom=1mm,
left=3mm, right=0mm, top=1mm, bottom=1mm,
before upper=\indent, parbox=false, #1}

\newtcolorbox{rstquote}[1][]{blanker, breakable,
left=3mm, right=3mm, top=1mm, bottom=1mm,
parbox=false,
borderline west={0.3em}{0pt}{lightgray},
borderline north={0.05em}{0pt}{lightgray},
borderline east={0.05em}{0pt}{lightgray},
borderline south={0.05em}{0pt}{lightgray}}

\definecolor{rstframecolor}{rgb}{0.85, 0.8, 0.6}

\newtcolorbox{rstprebox}[1][]{blanker, breakable,
Expand Down
5 changes: 5 additions & 0 deletions doc/nimdoc.css
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,11 @@ blockquote {
border-left: 5px solid #bbc;
}

blockquote.markdown-quote {
font-size: 0.9rem; /* use rem to avoid recursion */
font-style: normal;
}

.pre, span.tok {
font-family: "Source Code Pro", Monaco, Menlo, Consolas, "Courier New", monospace;
font-weight: 500;
Expand Down
210 changes: 202 additions & 8 deletions lib/packages/docutils/rst.nim
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
## + field lists
## + option lists
## + indented literal blocks
## + quoted literal blocks
## + line blocks
## + simple tables
## + directives (see official documentation in `RST directives list`_):
## - ``image``, ``figure`` for including images and videos
Expand Down Expand Up @@ -121,6 +123,7 @@
## * Markdown code blocks
## * Markdown links
## * Markdown headlines
## * Markdown block quotes
## * using ``1`` as auto-enumerator in enumerated lists like RST ``#``
## (auto-enumerator ``1`` can not be used with ``#`` in the same list)
##
Expand All @@ -145,7 +148,7 @@
## 2) Compatibility mode which is RST rules.
##
## .. Note:: in both modes the parser interpretes text between single
## backticks (code) identically:
## backticks (code) identically:
## backslash does not escape; the only exception: ``\`` folowed by `
## does escape so that we can always input a single backtick ` in
## inline code. However that makes impossible to input code with
Expand All @@ -156,13 +159,35 @@
## ``\`` -- GOOD
## So single backticks can always be input: `\`` will turn to ` code
##
## .. Attention::
## We don't support some obviously poor design choices of Markdown (or RST).
##
## - no support for the rule of 2 spaces causing a line break in Markdown
## (use RST "line blocks" syntax for making line breaks)
##
## - interpretation of Markdown block quotes is also slightly different,
## e.g. case
##
## ::
##
## >>> foo
## > bar
## >>baz
##
## is a single 3rd-level quote `foo bar baz` in original Markdown, while
## in Nim we naturally see it as 3rd-level quote `foo` + 1st level `bar` +
## 2nd level `baz`:
##
## >>> foo
## > bar
## >>baz
##
## Limitations
## -----------
##
## * no Unicode support in character width calculations
## * body elements
## - no roman numerals in enumerated lists
## - no quoted literal blocks
## - no doctest blocks
## - no grid tables
## - some directives are missing (check official `RST directives list`_):
Expand Down Expand Up @@ -472,6 +497,10 @@ type
line: int # the last line of this style occurrence
# (for error message)
hasPeers: bool # has headings on the same level of hierarchy?
LiteralBlockKind = enum # RST-style literal blocks after `::`
lbNone,
lbIndentedLiteralBlock,
lbQuotedLiteralBlock
LevelMap = seq[LevelInfo] # Saves for each possible title adornment
# style its level in the current document.
SubstitutionKind = enum
Expand Down Expand Up @@ -1953,6 +1982,44 @@ proc parseLiteralBlock(p: var RstParser): PRstNode =
inc p.idx
result.add(n)

proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
result = newRstNodeA(p, rnLiteralBlock)
var n = newLeaf("")
if currentTok(p).kind == tkIndent:
var indent = currInd(p)
while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
var quoteSym = currentTok(p).symbol[0]
while true:
case currentTok(p).kind
of tkEof:
break
of tkIndent:
if currentTok(p).ival < indent:
break
elif currentTok(p).ival == indent:
if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
n.text.add("\n")
inc p.idx
elif nextTok(p).kind == tkIndent:
break
else:
rstMessage(p, mwRstStyle, "no newline after quoted literal block")
break
else:
rstMessage(p, mwRstStyle,
"unexpected indentation in quoted literal block")
break
else:
n.text.add(currentTok(p).symbol)
inc p.idx
result.add(n)

proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
if kind == lbIndentedLiteralBlock:
result = parseLiteralBlock(p)
else:
result = parseQuotedLiteralBlock(p)

proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
## Returns (preliminary) heading level corresponding to `c` and
## `hasOverline`. If level does not exist, add it first.
Expand Down Expand Up @@ -2023,6 +2090,33 @@ proc isLineBlock(p: RstParser): bool =
p.tok[j].col > currentTok(p).col or
p.tok[j].symbol == "\n"

proc isMarkdownBlockQuote(p: RstParser): bool =
result = currentTok(p).symbol[0] == '>'

proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
## Checks that the following tokens are either Indented Literal Block or
## Quoted Literal Block (which is not quite the same as Markdown quote block).
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
if currInd(p) > nextTok(p).ival:
result = lbNone
if currInd(p) < nextTok(p).ival:
result = lbIndentedLiteralBlock
elif currInd(p) == nextTok(p).ival:
var i = p.idx + 1
while p.tok[i].kind == tkIndent: inc i
const validQuotingCharacters = {
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
'_', '`', '{', '|', '}', '~'}
if p.tok[i].kind in {tkPunct, tkAdornment} and
p.tok[i].symbol[0] in validQuotingCharacters:
result = lbQuotedLiteralBlock
else:
result = lbNone
else:
result = lbNone

proc predNL(p: RstParser): bool =
result = true
if p.idx > 0:
Expand Down Expand Up @@ -2078,6 +2172,8 @@ proc whichSection(p: RstParser): RstNodeKind =
elif match(p, p.idx + 1, " a"): result = rnTable
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
result = rnMarkdownBlockQuote
elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
result = rnOverline
else:
Expand All @@ -2090,6 +2186,8 @@ proc whichSection(p: RstParser): RstNodeKind =
result = rnMarkdownTable
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
result = rnMarkdownBlockQuote
elif match(p, tokenAfterNewline(p), "aI") and
isAdornmentHeadline(p, tokenAfterNewline(p)):
result = rnHeadline
Expand Down Expand Up @@ -2143,6 +2241,102 @@ proc parseLineBlock(p: var RstParser): PRstNode =
else:
break

proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}

proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
result = ("", 0, 0)
var i = idx
result.sym &= p.tok[i].symbol
result.depth += p.tok[i].symbol.len
inc result.tokens
inc i
while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
result.sym &= p.tok[i].symbol
result.sym &= p.tok[i+1].symbol
result.depth += p.tok[i+1].symbol.len
inc result.tokens, 2
inc i, 2

proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
PRstNode =
## We define *segment* as a group of lines that starts with exactly the
## same quote symbol. If the following lines don't contain any `>` (*lazy*
## continuation) they considered as continuation of the current segment.
var q: RstParser # to delete `>` at a start of line and then parse normally
initParser(q, p.s)
q.col = p.col
q.line = p.line
var minCol = int.high # minimum colum num in the segment
while true: # move tokens of segment from `p` to `q` skipping `curSym`
case currentTok(p).kind
of tkEof:
break
of tkIndent:
if nextTok(p).kind in {tkIndent, tkEof}:
break
else:
if nextTok(p).symbol[0] == '>':
var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
if quoteSym == curSym: # the segment continues
var iTok = tokenAfterNewline(p, p.idx+1)
if p.tok[iTok].kind notin {tkEof, tkIndent} and
p.tok[iTok].symbol[0] != '>':
rstMessage(p, mwRstStyle,
"two or more quoted lines are followed by unquoted line " &
$(curLine(p) + 1))
break
q.tok.add currentTok(p)
var ival = currentTok(p).ival + quoteSym.len
inc p.idx, (1 + quoteTokens) # skip newline and > > >
if currentTok(p).kind == tkWhite:
ival += currentTok(p).symbol.len
inc p.idx
# fix up previous `tkIndent`s to ival (as if >>> were not there)
var j = q.tok.len - 1
while j >= 0 and q.tok[j].kind == tkIndent:
q.tok[j].ival = ival
dec j
else: # next segment started
break
elif currentTok(p).ival < col:
break
else: # the segment continues, a case like:
# > beginning
# continuation
q.tok.add currentTok(p)
inc p.idx
else:
if currentTok(p).col < minCol: minCol = currentTok(p).col
q.tok.add currentTok(p)
inc p.idx
q.indentStack = @[minCol]
# if initial indentation `minCol` is > 0 then final newlines
# should be omitted so that parseDoc could advance to the end of tokens:
var j = q.tok.len - 1
while q.tok[j].kind == tkIndent: dec j
q.tok.setLen (j+1)
q.tok.add Token(kind: tkEof, line: currentTok(p).line)
result = parseDoc(q)

proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
let col = currentTok(p).col
result = newRstNodeA(p, rnMarkdownBlockQuote)
inc p.idx, quoteTokens # skip first >
while true:
var item = newRstNode(rnMarkdownBlockQuoteItem)
item.quotationDepth = quotationDepth
if currentTok(p).kind == tkWhite: inc p.idx
item.add parseMarkdownQuoteSegment(p, curSym, col)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
(curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
inc p.idx, (1 + quoteTokens) # skip newline and > > >
else:
break

proc parseParagraph(p: var RstParser, result: PRstNode) =
while true:
case currentTok(p).kind
Expand All @@ -2158,16 +2352,17 @@ proc parseParagraph(p: var RstParser, result: PRstNode) =
result.add newLeaf(" ")
of rnLineBlock:
result.addIfNotNil(parseLineBlock(p))
of rnMarkdownBlockQuote:
result.addIfNotNil(parseMarkdownBlockQuote(p))
else: break
else:
break
of tkPunct:
if currentTok(p).symbol == "::" and
nextTok(p).kind == tkIndent and
currInd(p) < nextTok(p).ival:
if (let literalBlockKind = whichRstLiteralBlock(p);
literalBlockKind != lbNone):
result.add newLeaf(":")
inc p.idx # skip '::'
result.add(parseLiteralBlock(p))
result.add(parseRstLiteralBlock(p, literalBlockKind))
break
else:
parseInline(p, result)
Expand Down Expand Up @@ -2257,8 +2452,6 @@ proc getColumns(p: var RstParser, cols: var IntSeq) =
# last column has no limit:
cols[L - 1] = 32000

proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}

proc parseSimpleTable(p: var RstParser): PRstNode =
var
cols: IntSeq
Expand Down Expand Up @@ -2585,6 +2778,7 @@ proc parseSection(p: var RstParser, result: PRstNode) =
a = parseLiteralBlock(p)
of rnBulletList: a = parseBulletList(p)
of rnLineBlock: a = parseLineBlock(p)
of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
of rnDirective: a = parseDotDot(p)
of rnEnumList: a = parseEnumList(p)
of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
Expand Down
9 changes: 8 additions & 1 deletion lib/packages/docutils/rstast.nim
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ type
rnFieldName, # consisting of a field name ...
rnFieldBody, # ... and a field body
rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString,
rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock,
rnOptionArgument, rnDescription, rnLiteralBlock,
rnMarkdownBlockQuote, # a quote starting from punctuation like >>>
rnMarkdownBlockQuoteItem, # a quotation block, quote lines starting with
# the same number of chars
rnLineBlock, # the | thingie
rnLineBlockItem, # a son of rnLineBlock - one line inside it.
# When `RstNode` lineIndent="\n" the line's empty
Expand Down Expand Up @@ -101,6 +104,8 @@ type
of rnFootnote, rnCitation, rnOptionListItem:
order*: int ## footnote order (for auto-symbol footnotes and
## auto-numbered ones without a label)
of rnMarkdownBlockQuoteItem:
quotationDepth*: int ## number of characters in line prefix
of rnRef, rnSubstitutionReferences,
rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
info*: TLineInfo ## To have line/column info for warnings at
Expand Down Expand Up @@ -409,6 +414,8 @@ proc treeRepr*(node: PRstNode, indent=0): string =
result.add " level=" & $node.level
of rnFootnote, rnCitation, rnOptionListItem:
result.add (if node.order == 0: "" else: " order=" & $node.order)
of rnMarkdownBlockQuoteItem:
result.add " quotationDepth=" & $node.quotationDepth
else:
discard
result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'")
Expand Down
Loading