Skip to content

Commit

Permalink
Fix quadratic behavior with inline HTML
Browse files Browse the repository at this point in the history
Repeated starting sequences like `<?`, `<!DECL ` or `<![CDATA` could
lead to quadratic behavior if no matching ending sequence was found.
Separate the inline HTML scanners. Remember if scanning the whole input
for a specific ending sequence failed and skip subsequent scans.

Fixes commonmark#299.
  • Loading branch information
nwellnhof committed Mar 22, 2021
1 parent 8a02328 commit 1944fe7
Show file tree
Hide file tree
Showing 4 changed files with 4,282 additions and 4,427 deletions.
53 changes: 52 additions & 1 deletion src/inlines.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,14 @@ typedef struct bracket {
bool bracket_after;
} bracket;

#define FLAG_SKIP_HTML_CDATA (1u << 0)
#define FLAG_SKIP_HTML_DECLARATION (1u << 1)
#define FLAG_SKIP_HTML_PI (1u << 2)

typedef struct {
cmark_mem *mem;
cmark_chunk input;
unsigned flags;
int line;
bufsize_t pos;
int block_offset;
Expand Down Expand Up @@ -190,6 +195,7 @@ static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset,
int i;
e->mem = mem;
e->input = *chunk;
e->flags = 0;
e->line = line_number;
e->pos = 0;
e->block_offset = block_offset;
Expand Down Expand Up @@ -885,7 +891,52 @@ static cmark_node *handle_pointy_brace(subject *subj, int options) {
}

// finally, try to match an html tag
matchlen = scan_html_tag(&subj->input, subj->pos);
if (subj->pos + 2 <= subj->input.len) {
int c = subj->input.data[subj->pos];
if (c == '!') {
c = subj->input.data[subj->pos+1];
if (c == '-') {
matchlen = scan_html_comment(&subj->input, subj->pos + 2);
if (matchlen > 0)
matchlen += 2;
} else if (c == '[') {
if ((subj->flags & FLAG_SKIP_HTML_CDATA) == 0) {
matchlen = scan_html_cdata(&subj->input, subj->pos + 2);
if (matchlen > 0) {
// The regex doesn't require the final "]]>". But if we're not at
// the end of input, it must come after the match. Otherwise,
// disable subsequent scans to avoid quadratic behavior.
matchlen += 5; // prefix "![", suffix "]]>"
if (subj->pos + matchlen > subj->input.len) {
subj->flags |= FLAG_SKIP_HTML_CDATA;
matchlen = 0;
}
}
}
} else if ((subj->flags & FLAG_SKIP_HTML_DECLARATION) == 0) {
matchlen = scan_html_declaration(&subj->input, subj->pos + 1);
if (matchlen > 0) {
matchlen += 2; // prefix "!", suffix ">"
if (subj->pos + matchlen > subj->input.len) {
subj->flags |= FLAG_SKIP_HTML_DECLARATION;
matchlen = 0;
}
}
}
} else if (c == '?') {
if ((subj->flags & FLAG_SKIP_HTML_PI) == 0) {
// Note that we allow an empty match.
matchlen = scan_html_pi(&subj->input, subj->pos + 1);
matchlen += 3; // prefix "?", suffix "?>"
if (subj->pos + matchlen > subj->input.len) {
subj->flags |= FLAG_SKIP_HTML_PI;
matchlen = 0;
}
}
} else {
matchlen = scan_html_tag(&subj->input, subj->pos);
}
}
if (matchlen > 0) {
const unsigned char *src = subj->input.data + subj->pos - 1;
bufsize_t len = matchlen + 1;
Expand Down
Loading

0 comments on commit 1944fe7

Please sign in to comment.