From 5993630910be672354dee4ba206aa9889dbea911 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 2 Mar 2024 19:22:49 +1100 Subject: [PATCH 1/5] Update spec to 0.31.2 --- .../src/main/resources/gfm-spec.txt | 66 +++---- .../src/main/resources/spec.txt | 170 +++++++++--------- 2 files changed, 111 insertions(+), 125 deletions(-) diff --git a/commonmark-test-util/src/main/resources/gfm-spec.txt b/commonmark-test-util/src/main/resources/gfm-spec.txt index 17027615..d42f3369 100644 --- a/commonmark-test-util/src/main/resources/gfm-spec.txt +++ b/commonmark-test-util/src/main/resources/gfm-spec.txt @@ -130,7 +130,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -138,7 +138,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -171,7 +171,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -1001,10 +1001,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces indentation and any number of trailing spaces. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1638,7 +1635,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2068,7 +2065,7 @@ followed by an uppercase ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or `foo, bar, baz

+

foo, bar, baz

```````````````````````````````` @@ -7200,7 +7197,7 @@ foo***bar***baz ```````````````````````````````` example foo******bar*********baz . -

foobar***baz

+

foobar***baz

```````````````````````````````` @@ -7271,21 +7268,21 @@ __foo _bar_ baz__ ```````````````````````````````` example __foo __bar__ baz__ . -

foo bar baz

+

foo bar baz

```````````````````````````````` ```````````````````````````````` example ____foo__ bar__ . -

foo bar

+

foo bar

```````````````````````````````` ```````````````````````````````` example **foo **bar**** . -

foo bar

+

foo bar

```````````````````````````````` @@ -7570,14 +7567,14 @@ switching delimiters: ```````````````````````````````` example ****foo**** . -

foo

+

foo

```````````````````````````````` ```````````````````````````````` example ____foo____ . -

foo

+

foo

```````````````````````````````` @@ -7588,7 +7585,7 @@ delimiters: ```````````````````````````````` example ******foo****** . -

foo

+

foo

```````````````````````````````` @@ -7604,7 +7601,7 @@ Rule 14: ```````````````````````````````` example _____foo_____ . -

foo

+

foo

```````````````````````````````` @@ -9410,10 +9407,9 @@ character, and a `>` character. A [closing tag](@) consists of the string ``. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ` +foo . -

foo

+

foo

```````````````````````````````` - -```````````````````````````````` example -foo -. -

foo <!-- not a comment -- two hyphens -->

-```````````````````````````````` - - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

foo <!--> foo -->

-

foo <!-- foo--->

+

foo foo -->

+

foo foo -->

```````````````````````````````` diff --git a/commonmark-test-util/src/main/resources/spec.txt b/commonmark-test-util/src/main/resources/spec.txt index e6f31375..f1fab281 100644 --- a/commonmark-test-util/src/main/resources/spec.txt +++ b/commonmark-test-util/src/main/resources/spec.txt @@ -1,9 +1,9 @@ --- title: CommonMark Spec author: John MacFarlane -version: 0.30 -date: '2021-06-19' -license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +version: '0.31.2' +date: '2024-01-28' +license: '[CC-BY-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)' ... # Introduction @@ -14,7 +14,7 @@ Markdown is a plain text format for writing structured documents, based on conventions for indicating formatting in email and usenet posts. It was developed by John Gruber (with help from Aaron Swartz) and released in 2004 in the form of a -[syntax description](http://daringfireball.net/projects/markdown/syntax) +[syntax description](https://daringfireball.net/projects/markdown/syntax) and a Perl script (`Markdown.pl`) for converting Markdown to HTML. In the next decade, dozens of implementations were developed in many languages. Some extended the original @@ -34,10 +34,10 @@ As Gruber writes: > Markdown-formatted document should be publishable as-is, as > plain text, without looking like it's been marked up with tags > or formatting instructions. -> () +> () The point can be illustrated by comparing a sample of -[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +[AsciiDoc](https://asciidoc.org/) with an equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc manual: @@ -103,7 +103,7 @@ source, not just in the processed document. ## Why is a spec needed? John Gruber's [canonical description of Markdown's -syntax](http://daringfireball.net/projects/markdown/syntax) +syntax](https://daringfireball.net/projects/markdown/syntax) does not specify the syntax unambiguously. Here are some examples of questions it does not answer: @@ -114,7 +114,7 @@ questions it does not answer: not require that. This is hardly a "corner case," and divergences between implementations on this issue often lead to surprises for users in real documents. (See [this comment by John - Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) 2. Is a blank line needed before a block quote or heading? Most implementations do not require the blank line. However, @@ -122,7 +122,7 @@ questions it does not answer: also to ambiguities in parsing (note that some implementations put the heading inside the blockquote, while others do not). (John Gruber has also spoken [in favor of requiring the blank - lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) 3. Is a blank line needed before an indented code block? (`Markdown.pl` requires it, but this is not mentioned in the @@ -155,7 +155,7 @@ questions it does not answer: ``` (There are some relevant comments by John Gruber - [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) 5. Can list markers be indented? Can ordered list markers be right-aligned? @@ -316,9 +316,9 @@ A line containing no characters, or a line containing only spaces The following definitions of character classes will be used in this spec: -A [Unicode whitespace character](@) is -any code point in the Unicode `Zs` general category, or a tab (`U+0009`), -line feed (`U+000A`), form feed (`U+000C`), or carriage return (`U+000D`). +A [Unicode whitespace character](@) is a character in the Unicode `Zs` general +category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or +carriage return (`U+000D`). [Unicode whitespace](@) is a sequence of one or more [Unicode whitespace characters]. @@ -337,9 +337,8 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, `[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), `{`, `|`, `}`, or `~` (U+007B–007E). -A [Unicode punctuation character](@) is an [ASCII -punctuation character] or anything in -the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. +A [Unicode punctuation character](@) is a character in the Unicode `P` +(puncuation) or `S` (symbol) general categories. ## Tabs @@ -579,9 +578,9 @@ raw HTML: ```````````````````````````````` example - + . -

http://example.com?find=\*

+

https://example.com?find=\*

```````````````````````````````` @@ -1330,10 +1329,7 @@ interpretable as a [code fence], [ATX heading][ATX headings], A [setext heading underline](@) is a sequence of `=` characters or a sequence of `-` characters, with no more than 3 -spaces of indentation and any number of trailing spaces or tabs. If a line -containing a single `-` can be interpreted as an -empty [list items], it should be interpreted this way -and not as a [setext heading underline]. +spaces of indentation and any number of trailing spaces or tabs. The heading is a level 1 heading if `=` characters are used in the [setext heading underline], and a level 2 heading if `-` @@ -1967,7 +1963,7 @@ has been found, the code block contains all of the lines after the opening code fence until the end of the containing block (or document). (An alternative spec would require backtracking in the event that a closing code fence is not found. But this makes parsing -much less efficient, and there seems to be no real down side to the +much less efficient, and there seems to be no real downside to the behavior described here.) A fenced code block may interrupt a paragraph, and does not require @@ -2397,7 +2393,7 @@ followed by an ASCII letter.\ ``. -6. **Start condition:** line begins the string `<` or ``, or the string `/>`.\ @@ -4118,7 +4114,7 @@ The following rules define [list items]: blocks *Bs* starting with a character other than a space or tab, and *M* is a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, then the result of prepending *M* and the following spaces to the first line - of Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a list item with *Bs* as its contents. The type of the list item (bullet or ordered) is determined by the type of its list marker. If the list item is ordered, then it is also assigned a start @@ -4533,7 +4529,7 @@ inside the code block: Note that rules #1 and #2 only apply to two cases: (a) cases in which the lines to be included in a list item begin with a -characer other than a space or tab, and (b) cases in which +character other than a space or tab, and (b) cases in which they begin with an indented code block. In a case like the following, where the first block begins with three spaces of indentation, the rules do not allow us to form a list item by @@ -5353,11 +5349,11 @@ by itself should be a paragraph followed by a nested sublist. Since it is well established Markdown practice to allow lists to interrupt paragraphs inside list items, the [principle of uniformity] requires us to allow this outside list items as -well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +well. ([reStructuredText](https://docutils.sourceforge.net/rst.html) takes a different approach, requiring blank lines before lists even inside other list items.) -In order to solve of unwanted lists in paragraphs with +In order to solve the problem of unwanted lists in paragraphs with hard-wrapped numerals, we allow only lists starting with `1` to interrupt paragraphs. Thus, @@ -6058,18 +6054,18 @@ But this is an HTML tag: And this is code: ```````````````````````````````` example -`` +`` . -

<http://foo.bar.baz>`

+

<https://foo.bar.baz>`

```````````````````````````````` But this is an autolink: ```````````````````````````````` example -` +` . -

http://foo.bar.`baz`

+

https://foo.bar.`baz`

```````````````````````````````` @@ -6102,7 +6098,7 @@ closing backtick strings to be equal in length: ## Emphasis and strong emphasis John Gruber's original [Markdown syntax -description](http://daringfireball.net/projects/markdown/syntax#em) says: +description](https://daringfireball.net/projects/markdown/syntax#em) says: > Markdown treats asterisks (`*`) and underscores (`_`) as indicators of > emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML @@ -6204,7 +6200,7 @@ Here are some examples of delimiter runs. (The idea of distinguishing left-flanking and right-flanking delimiter runs based on the character before and the character after comes from Roopesh Chander's -[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). vfmd uses the terminology "emphasis indicator string" instead of "delimiter run," and its rules for distinguishing left- and right-flanking runs are a bit more complex than the ones given here.) @@ -6346,6 +6342,21 @@ Unicode nonbreaking spaces count as whitespace, too: ```````````````````````````````` +Unicode symbols count as punctuation, too: + +```````````````````````````````` example +*$*alpha. + +*£*bravo. + +*€*charlie. +. +

*$*alpha.

+

*£*bravo.

+

*€*charlie.

+```````````````````````````````` + + Intraword emphasis with `*` is permitted: ```````````````````````````````` example @@ -7431,16 +7442,16 @@ _a `_`_ ```````````````````````````````` example -**a +**a . -

**ahttp://foo.bar/?q=**

+

**ahttps://foo.bar/?q=**

```````````````````````````````` ```````````````````````````````` example -__a +__a . -

__ahttp://foo.bar/?q=__

+

__ahttps://foo.bar/?q=__

```````````````````````````````` @@ -7688,13 +7699,13 @@ A link can contain fragment identifiers and queries: ```````````````````````````````` example [link](#fragment) -[link](http://example.com#fragment) +[link](https://example.com#fragment) -[link](http://example.com?foo=3#frag) +[link](https://example.com?foo=3#frag) .

link

-

link

-

link

+

link

+

link

```````````````````````````````` @@ -7938,9 +7949,9 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo . -

[foohttp://example.com/?search=](uri)

+

[foohttps://example.com/?search=](uri)

```````````````````````````````` @@ -8094,11 +8105,11 @@ and autolinks over link grouping: ```````````````````````````````` example -[foo +[foo [ref]: /uri . -

[foohttp://example.com/?search=][ref]

+

[foohttps://example.com/?search=][ref]

```````````````````````````````` @@ -8298,7 +8309,7 @@ A [collapsed reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document, followed by the string `[]`. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`. @@ -8351,7 +8362,7 @@ A [shortcut reference link](@) consists of a [link label] that [matches] a [link reference definition] elsewhere in the document and is not followed by `[]` or a link label. -The contents of the first link label are parsed as inlines, +The contents of the link label are parsed as inlines, which are used as the link's text. The link's URI and title are provided by the matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`. @@ -8438,7 +8449,7 @@ following closing bracket: ```````````````````````````````` -Full and compact references take precedence over shortcut +Full and collapsed references take precedence over shortcut references: ```````````````````````````````` example @@ -8754,7 +8765,7 @@ a link to the URI, with the URI as the link's label. An [absolute URI](@), for these purposes, consists of a [scheme] followed by a colon (`:`) -followed by zero or more characters other [ASCII control +followed by zero or more characters other than [ASCII control characters][ASCII control character], [space], `<`, and `>`. If the URI includes these characters, they must be percent-encoded (e.g. `%20` for a space). @@ -8774,9 +8785,9 @@ Here are some valid autolinks: ```````````````````````````````` example - + . -

http://foo.bar.baz/test?q=hello&id=22&boolean

+

https://foo.bar.baz/test?q=hello&id=22&boolean

```````````````````````````````` @@ -8816,9 +8827,9 @@ with their syntax: ```````````````````````````````` example - + . -

http://../

+

https://../

```````````````````````````````` @@ -8832,18 +8843,18 @@ with their syntax: Spaces are not allowed in autolinks: ```````````````````````````````` example - + . -

<http://foo.bar/baz bim>

+

<https://foo.bar/baz bim>

```````````````````````````````` Backslash-escapes do not work inside autolinks: ```````````````````````````````` example - + . -

http://example.com/\[\

+

https://example.com/\[\

```````````````````````````````` @@ -8895,9 +8906,9 @@ These are not autolinks: ```````````````````````````````` example -< http://foo.bar > +< https://foo.bar > . -

< http://foo.bar >

+

< https://foo.bar >

```````````````````````````````` @@ -8916,9 +8927,9 @@ These are not autolinks: ```````````````````````````````` example -http://example.com +https://example.com . -

http://example.com

+

https://example.com

```````````````````````````````` @@ -8980,10 +8991,9 @@ A [closing tag](@) consists of the string ``. -An [HTML comment](@) consists of ``, -where *text* does not start with `>` or `->`, does not end with `-`, -and does not contain `--`. (See the -[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). A [processing instruction](@) consists of the string ` +foo . -

foo

+

foo

```````````````````````````````` - -```````````````````````````````` example -foo -. -

foo <!-- not a comment -- two hyphens -->

-```````````````````````````````` - - -Not comments: - ```````````````````````````````` example foo foo --> -foo +foo foo --> . -

foo <!--> foo -->

-

foo <!-- foo--->

+

foo foo -->

+

foo foo -->

```````````````````````````````` @@ -9674,7 +9674,7 @@ through the stack for an opening `[` or `![` delimiter. delimiter from the stack, and return a literal text node `]`. - If we find one and it's active, then we parse ahead to see if - we have an inline link/image, reference link/image, compact reference + we have an inline link/image, reference link/image, collapsed reference link/image, or shortcut reference link/image. + If we don't, then we remove the opening delimiter from the From 414ba56a17b545601b5360d2f7ddeb7fca30b20b Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 2 Mar 2024 21:03:01 +1100 Subject: [PATCH 2/5] Update Unicode punctuation to include symbols --- .../java/org/commonmark/text/Characters.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/text/Characters.java b/commonmark/src/main/java/org/commonmark/text/Characters.java index 4d953232..ee56ca67 100644 --- a/commonmark/src/main/java/org/commonmark/text/Characters.java +++ b/commonmark/src/main/java/org/commonmark/text/Characters.java @@ -57,17 +57,23 @@ public static boolean isSpaceOrTab(CharSequence s, int index) { } /** - * @see punctuation character + * @see Unicode punctuation character */ public static boolean isPunctuationCodePoint(int codePoint) { switch (Character.getType(codePoint)) { - case Character.CONNECTOR_PUNCTUATION: + // General category "P" (punctuation) case Character.DASH_PUNCTUATION: + case Character.START_PUNCTUATION: case Character.END_PUNCTUATION: - case Character.FINAL_QUOTE_PUNCTUATION: - case Character.INITIAL_QUOTE_PUNCTUATION: + case Character.CONNECTOR_PUNCTUATION: case Character.OTHER_PUNCTUATION: - case Character.START_PUNCTUATION: + case Character.INITIAL_QUOTE_PUNCTUATION: + case Character.FINAL_QUOTE_PUNCTUATION: + // General category "S" (symbol) + case Character.MATH_SYMBOL: + case Character.CURRENCY_SYMBOL: + case Character.MODIFIER_SYMBOL: + case Character.OTHER_SYMBOL: return true; default: switch (codePoint) { From 058e2f0ed4331553e44a77f348983ef7b4d524e4 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 2 Mar 2024 21:05:47 +1100 Subject: [PATCH 3/5] HTML blocks: Add search, remove source --- .../src/main/java/org/commonmark/internal/HtmlBlockParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java b/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java index ce66c20d..123d9ec1 100644 --- a/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/HtmlBlockParser.java @@ -61,7 +61,7 @@ public class HtmlBlockParser extends AbstractBlockParser { "nav|noframes|" + "ol|optgroup|option|" + "p|param|" + - "section|source|summary|" + + "search|section|summary|" + "table|tbody|td|tfoot|th|thead|title|tr|track|" + "ul" + ")(?:\\s|[/]?[>]|$)", Pattern.CASE_INSENSITIVE), From 203eeb24c2ef84472fa8b795a29bf07e550b9d70 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 2 Mar 2024 21:26:42 +1100 Subject: [PATCH 4/5] Update HTML comment scanning Looks like commonmark.js has a bug in its handling: https://github.com/commonmark/commonmark.js/issues/285 --- .../commonmark/internal/inline/HtmlInlineParser.java | 11 ++++++----- .../org/commonmark/test/HtmlInlineParserTest.java | 6 +++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java index c85ae9d7..6dc525cb 100644 --- a/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java +++ b/commonmark/src/main/java/org/commonmark/internal/inline/HtmlInlineParser.java @@ -142,8 +142,9 @@ private static boolean tryProcessingInstruction(Scanner scanner) { } private static boolean tryComment(Scanner scanner) { - // spec: An HTML comment consists of , where text does not start with > or ->, does not end - // with -, and does not contain --. (See the HTML5 spec.) + // spec: An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the + // [HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). // Skip first `-` scanner.next(); @@ -152,12 +153,12 @@ private static boolean tryComment(Scanner scanner) { } if (scanner.next('>') || scanner.next("->")) { - return false; + return true; } while (scanner.find('-') >= 0) { - if (scanner.next("--")) { - return scanner.next('>'); + if (scanner.next("-->")) { + return true; } else { scanner.next(); } diff --git a/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java b/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java index 0172ca43..965a2f18 100644 --- a/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java +++ b/commonmark/src/test/java/org/commonmark/test/HtmlInlineParserTest.java @@ -8,7 +8,11 @@ public class HtmlInlineParserTest extends CoreRenderingTestCase { public void comment() { assertRendering("inline ", "

inline

\n"); assertRendering("inline ", "

inline

\n"); - assertRendering("inline -->", "

inline <!--->-->

\n"); + assertRendering("inline ", "

inline

\n"); + assertRendering("inline ", "

inline

\n"); + assertRendering("inline ", "

inline

\n"); + assertRendering("inline -->", "

inline -->

\n"); + assertRendering("inline -->", "

inline -->

\n"); } @Test From a8af670b8ac5212b9a658b14c6a2a0d1170f83b7 Mon Sep 17 00:00:00 2001 From: Robin Stocker Date: Sat, 2 Mar 2024 22:34:31 +1100 Subject: [PATCH 5/5] Update overrides in SpecIntegrationTest --- .../commonmark/integration/Extensions.java | 25 +++++++++++++ .../ExtensionsIntegrationTest.java | 37 +++++++++++++++++++ .../SourceSpanIntegrationTest.java | 2 +- .../integration/SpecIntegrationTest.java | 34 +++-------------- 4 files changed, 68 insertions(+), 30 deletions(-) create mode 100644 commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java create mode 100644 commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java new file mode 100644 index 00000000..5eddcc57 --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/Extensions.java @@ -0,0 +1,25 @@ +package org.commonmark.integration; + +import org.commonmark.Extension; +import org.commonmark.ext.autolink.AutolinkExtension; +import org.commonmark.ext.front.matter.YamlFrontMatterExtension; +import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.ext.image.attributes.ImageAttributesExtension; +import org.commonmark.ext.ins.InsExtension; +import org.commonmark.ext.task.list.items.TaskListItemsExtension; + +import java.util.Arrays; +import java.util.List; + +public class Extensions { + + static final List ALL_EXTENSIONS = Arrays.asList( + AutolinkExtension.create(), + ImageAttributesExtension.create(), + InsExtension.create(), + StrikethroughExtension.create(), + TablesExtension.create(), + TaskListItemsExtension.create(), + YamlFrontMatterExtension.create()); +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java new file mode 100644 index 00000000..f5d84b5a --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/ExtensionsIntegrationTest.java @@ -0,0 +1,37 @@ +package org.commonmark.integration; + +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.RenderingTestCase; +import org.junit.Test; + +/** + * Tests to ensure all extensions work well together. + */ +public class ExtensionsIntegrationTest extends RenderingTestCase { + + protected static final Parser PARSER = Parser.builder() + .extensions(Extensions.ALL_EXTENSIONS) + .build(); + protected static final HtmlRenderer RENDERER = HtmlRenderer.builder() + .extensions(Extensions.ALL_EXTENSIONS) + .percentEncodeUrls(true) + .build(); + + @Test + public void testImageAttributes() { + assertRendering("![text](/url.png){height=5 width=6}", "

\"text\"

\n"); + } + + @Test + public void testTaskListItems() { + assertRendering("- [ ] task to do\n- [x] task done\n", + "
    \n
  • task to do
  • \n" + + "
  • task done
  • \n
\n"); + + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java index b6fa4922..a0649f53 100644 --- a/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/SourceSpanIntegrationTest.java @@ -10,7 +10,7 @@ public class SourceSpanIntegrationTest extends SpecIntegrationTest { protected static final Parser PARSER = Parser.builder() - .extensions(EXTENSIONS) + .extensions(Extensions.ALL_EXTENSIONS) .includeSourceSpans(IncludeSourceSpans.BLOCKS) .build(); diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java index f434f65d..2b615aa4 100644 --- a/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/SpecIntegrationTest.java @@ -1,18 +1,9 @@ package org.commonmark.integration; -import org.commonmark.Extension; -import org.commonmark.ext.autolink.AutolinkExtension; -import org.commonmark.ext.image.attributes.ImageAttributesExtension; -import org.commonmark.ext.ins.InsExtension; -import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; -import org.commonmark.ext.gfm.tables.TablesExtension; -import org.commonmark.ext.front.matter.YamlFrontMatterExtension; -import org.commonmark.ext.task.list.items.TaskListItemsExtension; import org.commonmark.renderer.html.HtmlRenderer; import org.commonmark.parser.Parser; import org.commonmark.testutil.example.Example; import org.commonmark.testutil.SpecTestCase; -import org.junit.Assert; import org.junit.Test; import java.util.*; @@ -24,17 +15,9 @@ */ public class SpecIntegrationTest extends SpecTestCase { - protected static final List EXTENSIONS = Arrays.asList( - AutolinkExtension.create(), - ImageAttributesExtension.create(), - InsExtension.create(), - StrikethroughExtension.create(), - TablesExtension.create(), - TaskListItemsExtension.create(), - YamlFrontMatterExtension.create()); - protected static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + protected static final Parser PARSER = Parser.builder().extensions(Extensions.ALL_EXTENSIONS).build(); // The spec says URL-escaping is optional, but the examples assume that it's enabled. - protected static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(EXTENSIONS).percentEncodeUrls(true).build(); + protected static final HtmlRenderer RENDERER = HtmlRenderer.builder().extensions(Extensions.ALL_EXTENSIONS).percentEncodeUrls(true).build(); protected static final Map OVERRIDDEN_EXAMPLES = getOverriddenExamples(); public SpecIntegrationTest(Example example) { @@ -59,7 +42,7 @@ private static Map getOverriddenExamples() { Map m = new HashMap<>(); // Not a spec autolink because of space, but the resulting text contains a valid URL - m.put("\n", "

<http://foo.bar/baz bim>

\n"); + m.put("\n", "

<https://foo.bar/baz bim>

\n"); // Not a spec autolink, but the resulting text contains a valid email m.put("\n", "

<foo+@bar.example.com>

\n"); @@ -68,10 +51,10 @@ private static Map getOverriddenExamples() { m.put("\n", "

<heck://bing.bong>

\n"); // Not a spec autolink because of spaces, but autolink extension doesn't limit schemes - m.put("< http://foo.bar >\n", "

< http://foo.bar >

\n"); + m.put("< https://foo.bar >\n", "

< https://foo.bar >

\n"); // Plain autolink - m.put("http://example.com\n", "

http://example.com

\n"); + m.put("https://example.com\n", "

https://example.com

\n"); // Plain autolink m.put("foo@bar.example.com\n", "

foo@bar.example.com

\n"); @@ -80,13 +63,6 @@ private static Map getOverriddenExamples() { m.put("---\nFoo\n---\nBar\n---\nBaz\n", "

Bar

\n

Baz

\n"); m.put("---\n---\n", ""); - // Image attributes - m.put("![text](/url.png){height=5 width=6}", "\"text\""); - - // Task list items - m.put("- [ ] task to do\n- [x] task done\n", "
    \n
  • task to do
  • \n" + - "
  • task done
  • \n
\n"); - return m; }