From 70ee29c02b6812ecc185b8dbe6d3ff1b06e7ff4d Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Thu, 10 Dec 2020 10:28:58 -0600 Subject: [PATCH] fix: fix atx heading and make regex safe (#1853) --- lib/marked.esm.js | 19 ++++++++++++++++--- lib/marked.js | 19 ++++++++++++++++--- src/Tokenizer.js | 15 ++++++++++++++- src/rules.js | 4 ++-- test/specs/commonmark/commonmark.0.29.json | 3 +-- test/specs/gfm/commonmark.0.29.json | 3 +-- test/specs/new/pedantic_heading.html | 9 +++++++++ test/specs/new/pedantic_heading.md | 12 ++++++++++++ test/specs/redos/quadratic_heading.js | 7 +++++++ 9 files changed, 78 insertions(+), 13 deletions(-) create mode 100644 test/specs/new/pedantic_heading.html create mode 100644 test/specs/new/pedantic_heading.md create mode 100644 test/specs/redos/quadratic_heading.js diff --git a/lib/marked.esm.js b/lib/marked.esm.js index 1beab3500e..afac44dc3d 100644 --- a/lib/marked.esm.js +++ b/lib/marked.esm.js @@ -435,11 +435,24 @@ var Tokenizer_1 = class Tokenizer { heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { + let text = cap[2].trim(); + + // remove trailing #s + if (text.endsWith('#')) { + const trimmed = rtrim$1(text, '#'); + if (this.options.pedantic) { + text = trimmed.trim(); + } else if (!trimmed || trimmed.endsWith(' ')) { + // CommonMark requires space before trailing #s + text = trimmed.trim(); + } + } + return { type: 'heading', raw: cap[0], depth: cap[1].length, - text: cap[2] + text: text }; } } @@ -1003,7 +1016,7 @@ const block = { code: /^( {4}[^\n]+\n*)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, - heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/, + heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/, html: '^ {0,3}(?:' // optional indentation @@ -1134,7 +1147,7 @@ block.pedantic = merge$1({}, block.normal, { + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, - heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, + heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest$1, // fences not supported paragraph: edit$1(block.normal._paragraph) .replace('hr', block.hr) diff --git a/lib/marked.js b/lib/marked.js index 4ff1fa6f4d..3e623b8944 100644 --- a/lib/marked.js +++ b/lib/marked.js @@ -532,11 +532,24 @@ var cap = this.rules.block.heading.exec(src); if (cap) { + var text = cap[2].trim(); // remove trailing #s + + if (text.endsWith('#')) { + var trimmed = rtrim$1(text, '#'); + + if (this.options.pedantic) { + text = trimmed.trim(); + } else if (!trimmed || trimmed.endsWith(' ')) { + // CommonMark requires space before trailing #s + text = trimmed.trim(); + } + } + return { type: 'heading', raw: cap[0], depth: cap[1].length, - text: cap[2] + text: text }; } }; @@ -1122,7 +1135,7 @@ code: /^( {4}[^\n]+\n*)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, - heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/, + heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/, html: '^ {0,3}(?:' // optional indentation @@ -1193,7 +1206,7 @@ html: edit$1('^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + '|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))').replace('comment', block._comment).replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b').getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, - heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, + heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest$1, // fences not supported paragraph: edit$1(block.normal._paragraph).replace('hr', block.hr).replace('heading', ' *#{1,6} *[^\n]').replace('lheading', block.lheading).replace('blockquote', ' {0,3}>').replace('|fences', '').replace('|list', '').replace('|html', '').getRegex() diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 972c5b9ea3..d78202fac1 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -121,11 +121,24 @@ module.exports = class Tokenizer { heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { + let text = cap[2].trim(); + + // remove trailing #s + if (text.endsWith('#')) { + const trimmed = rtrim(text, '#'); + if (this.options.pedantic) { + text = trimmed.trim(); + } else if (!trimmed || trimmed.endsWith(' ')) { + // CommonMark requires space before trailing #s + text = trimmed.trim(); + } + } + return { type: 'heading', raw: cap[0], depth: cap[1].length, - text: cap[2] + text: text }; } } diff --git a/src/rules.js b/src/rules.js index df14188275..69975e189a 100644 --- a/src/rules.js +++ b/src/rules.js @@ -12,7 +12,7 @@ const block = { code: /^( {4}[^\n]+\n*)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, - heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/, + heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/, html: '^ {0,3}(?:' // optional indentation @@ -143,7 +143,7 @@ block.pedantic = merge({}, block.normal, { + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, - heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, + heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported paragraph: edit(block.normal._paragraph) .replace('hr', block.hr) diff --git a/test/specs/commonmark/commonmark.0.29.json b/test/specs/commonmark/commonmark.0.29.json index 7d9e16f58a..b40ee785c1 100644 --- a/test/specs/commonmark/commonmark.0.29.json +++ b/test/specs/commonmark/commonmark.0.29.json @@ -389,8 +389,7 @@ "example": 49, "start_line": 963, "end_line": 971, - "section": "ATX headings", - "shouldFail": true + "section": "ATX headings" }, { "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n", diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json index 0526dbf1d3..5c4eba25bd 100644 --- a/test/specs/gfm/commonmark.0.29.json +++ b/test/specs/gfm/commonmark.0.29.json @@ -389,8 +389,7 @@ "example": 49, "start_line": 963, "end_line": 971, - "section": "ATX headings", - "shouldFail": true + "section": "ATX headings" }, { "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n", diff --git a/test/specs/new/pedantic_heading.html b/test/specs/new/pedantic_heading.html new file mode 100644 index 0000000000..7795a3bccc --- /dev/null +++ b/test/specs/new/pedantic_heading.html @@ -0,0 +1,9 @@ +

h1

+ +

h1

+ +

h1 #

+ +

h1

+ +

# h1

diff --git a/test/specs/new/pedantic_heading.md b/test/specs/new/pedantic_heading.md new file mode 100644 index 0000000000..8f7a1e8eb2 --- /dev/null +++ b/test/specs/new/pedantic_heading.md @@ -0,0 +1,12 @@ +--- +pedantic: true +--- +#h1 + +#h1# + +#h1 # # + +#h1#### + + # h1 diff --git a/test/specs/redos/quadratic_heading.js b/test/specs/redos/quadratic_heading.js new file mode 100644 index 0000000000..ae8a20880a --- /dev/null +++ b/test/specs/redos/quadratic_heading.js @@ -0,0 +1,7 @@ +module.exports = { + markdown: `# #${' '.repeat(50000)}a`, + html: '

# a

', + options: { + headerIds: false + } +};