Skip to content

Commit

Permalink
Merge pull request #343 from digitalmoksha/bw-sync-with-0.29.0.gfm.12
Browse files Browse the repository at this point in the history
Sync with cmark-gfm-0.29.0.gfm.12
  • Loading branch information
charlottia authored Sep 30, 2023
2 parents 8f3e665 + e5bab5a commit 8ac8845
Show file tree
Hide file tree
Showing 11 changed files with 166 additions and 33 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Collective](https://opencollective.com/comrak/all/badge.svg?label=financial+cont
[![crates.io version](https://img.shields.io/crates/v/comrak.svg)](https://crates.io/crates/comrak)
[![docs.rs](https://docs.rs/comrak/badge.svg)](https://docs.rs/comrak)

Rust port of [github's `cmark-gfm`](https://github.com/github/cmark). *Currently synced with release `0.29.0.gfm.11`*.
Rust port of [github's `cmark-gfm`](https://github.com/github/cmark). *Currently synced with release `0.29.0.gfm.12`*.

- [Installation](#installation)
- [Usage](#usage)
Expand Down
4 changes: 2 additions & 2 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::ctype::{isalpha, isdigit, ispunct, isspace};
use crate::nodes::TableAlignment;
use crate::nodes::{
AstNode, ListDelimType, ListType, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink,
NodeValue,
NodeTable, NodeValue,
};
#[cfg(feature = "shortcodes")]
use crate::parser::shortcodes::NodeShortCode;
Expand Down Expand Up @@ -732,7 +732,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
if in_header && node.next_sibling().is_none() {
let table = &node.parent().unwrap().parent().unwrap().data.borrow().value;
let alignments = match *table {
NodeValue::Table(ref alignments) => alignments,
NodeValue::Table(NodeTable { ref alignments, .. }) => alignments,
_ => panic!(),
};

Expand Down
4 changes: 2 additions & 2 deletions src/html.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! The HTML renderer for the CommonMark AST, as well as helper functions.
use crate::ctype::isspace;
use crate::nodes::{
AstNode, ListType, NodeCode, NodeFootnoteDefinition, NodeValue, TableAlignment,
AstNode, ListType, NodeCode, NodeFootnoteDefinition, NodeTable, NodeValue, TableAlignment,
};
use crate::parser::{Options, Plugins};
use crate::scanners;
Expand Down Expand Up @@ -893,7 +893,7 @@ impl<'o> HtmlFormatter<'o> {

let table = &node.parent().unwrap().parent().unwrap().data.borrow().value;
let alignments = match *table {
NodeValue::Table(ref alignments) => alignments,
NodeValue::Table(NodeTable { ref alignments, .. }) => alignments,
_ => panic!(),
};

Expand Down
18 changes: 17 additions & 1 deletion src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub enum NodeValue {

/// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec.
/// Contains table rows.
Table(Vec<TableAlignment>),
Table(NodeTable),

/// **Block**. A table row. The `bool` represents whether the row is the header row or not.
/// Contains table cells.
Expand Down Expand Up @@ -180,6 +180,22 @@ impl TableAlignment {
}
}

/// The metadata of a table
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct NodeTable {
/// The table alignments
pub alignments: Vec<TableAlignment>,

/// Number of columns of the table
pub num_columns: usize,

/// Number of rows of the table
pub num_rows: usize,

/// Number of non-empty, non-autocompleted cells
pub num_nonempty_cells: usize,
}

/// An inline [code span](https://github.github.com/gfm/#code-spans).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NodeCode {
Expand Down
1 change: 1 addition & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> {
self.advance_offset(line, adv, false);
} else if !indented
&& self.options.extension.footnotes
&& depth < MAX_LIST_DEPTH
&& unwrap_into(
scanners::footnote_definition(&line[self.first_nonspace..]),
&mut matched,
Expand Down
68 changes: 64 additions & 4 deletions src/parser/table.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::arena_tree::Node;
use crate::nodes;
use crate::nodes::{Ast, AstNode, NodeValue, TableAlignment};
use crate::nodes::{Ast, AstNode, NodeTable, NodeValue, TableAlignment};
use crate::parser::Parser;
use crate::scanners;
use crate::strings::trim;
Expand All @@ -9,14 +9,17 @@ use std::cmp::min;

use super::inlines::count_newlines;

// Limit to prevent a malicious input from causing a denial of service.
const MAX_AUTOCOMPLETED_CELLS: usize = 500_000;

pub fn try_opening_block<'a>(
parser: &mut Parser<'a, '_, '_>,
container: &'a AstNode<'a>,
line: &[u8],
) -> Option<(&'a AstNode<'a>, bool, bool)> {
let aligns = match container.data.borrow().value {
NodeValue::Paragraph => None,
NodeValue::Table(ref aligns) => Some(aligns.clone()),
NodeValue::Table(NodeTable { ref alignments, .. }) => Some(alignments.clone()),
_ => return None,
};

Expand Down Expand Up @@ -74,7 +77,15 @@ fn try_opening_header<'a>(
}

let start = container.data.borrow().sourcepos.start;
let child = Ast::new(NodeValue::Table(alignments), start);
let child = Ast::new(
NodeValue::Table(NodeTable {
alignments,
num_columns: header_row.cells.len(),
num_rows: 0,
num_nonempty_cells: 0,
}),
start,
);
let table = parser.arena.alloc(Node::new(RefCell::new(child)));
container.append(table);

Expand All @@ -88,7 +99,10 @@ fn try_opening_header<'a>(
);
}

for cell in header_row.cells {
let mut i = 0;

while i < header_row.cells.len() {
let cell = &header_row.cells[i];
let ast_cell = parser.add_child(
header,
NodeValue::TableCell,
Expand All @@ -100,8 +114,12 @@ fn try_opening_header<'a>(
start.column_add((cell.end_offset - header_row.paragraph_offset) as isize);
ast.internal_offset = cell.internal_offset;
ast.content = cell.content.clone();

i += 1;
}

incr_table_row_count(container, i);

let offset = line.len() - 1 - parser.offset;
parser.advance_offset(line, offset, false);

Expand All @@ -117,6 +135,11 @@ fn try_opening_row<'a>(
if parser.blank {
return None;
}

if get_num_autocompleted_cells(container) > MAX_AUTOCOMPLETED_CELLS {
return None;
}

let sourcepos = container.data.borrow().sourcepos;
let this_row = match row(&line[parser.first_nonspace..]) {
Some(this_row) => this_row,
Expand Down Expand Up @@ -148,9 +171,12 @@ fn try_opening_row<'a>(
cell_ast.content = cell.content.clone();

last_column = cell_ast.sourcepos.end.column;

i += 1;
}

incr_table_row_count(container, i);

while i < alignments.len() {
parser.add_child(new_row, NodeValue::TableCell, last_column);
i += 1;
Expand Down Expand Up @@ -305,6 +331,40 @@ fn unescape_pipes(string: &[u8]) -> Vec<u8> {
v
}

// Increment the number of rows in the table. Also update n_nonempty_cells,
// which keeps track of the number of cells which were parsed from the
// input file. (If one of the rows is too short, then the trailing cells
// are autocompleted. Autocompleted cells are not counted in n_nonempty_cells.)
// The purpose of this is to prevent a malicious input from generating a very
// large number of autocompleted cells, which could cause a denial of service
// vulnerability.
fn incr_table_row_count<'a>(container: &'a AstNode<'a>, i: usize) -> bool {
return match container.data.borrow_mut().value {
NodeValue::Table(ref mut node_table) => {
node_table.num_rows += 1;
node_table.num_nonempty_cells += i;
true
}
_ => false,
};
}

// Calculate the number of autocompleted cells.
fn get_num_autocompleted_cells<'a>(container: &'a AstNode<'a>) -> usize {
return match container.data.borrow().value {
NodeValue::Table(ref node_table) => {
let num_cells = node_table.num_columns * node_table.num_rows;

if num_cells < node_table.num_nonempty_cells {
0
} else {
(node_table.num_columns * node_table.num_rows) - node_table.num_nonempty_cells
}
}
_ => 0,
};
}

pub fn matches(line: &[u8]) -> bool {
row(line).is_some()
}
1 change: 1 addition & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod footnotes;
mod fuzz;
mod header_ids;
mod options;
mod pathological;
mod plugins;
mod propfuzz;
mod regressions;
Expand Down
8 changes: 5 additions & 3 deletions src/tests/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,11 @@ fn exercise_full_api() {
let _: &String = &nfd.name;
let _: u32 = nfd.total_references;
}
nodes::NodeValue::Table(aligns) => {
let _: &Vec<nodes::TableAlignment> = aligns;
match aligns[0] {
nodes::NodeValue::Table(nt) => {
let _: &Vec<nodes::TableAlignment> = &nt.alignments;
let _: usize = nt.num_nonempty_cells;
let _: usize = nt.num_rows;
match nt.alignments[0] {
nodes::TableAlignment::None => {}
nodes::TableAlignment::Left => {}
nodes::TableAlignment::Center => {}
Expand Down
67 changes: 67 additions & 0 deletions src/tests/pathological.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use super::*;
use ntest::timeout;

// input: python3 -c 'n = 50000; print("*a_ " * n)'
#[test]
#[timeout(4000)]
fn pathological_emphases() {
let n = 50_000;
let input = format!("{}", "*a_ ".repeat(n));
let mut exp = format!("<p>{}", input);
// Right-most space is trimmed in output.
exp.pop();
exp += "</p>\n";

html(&input, &exp);
}

// input: python3 -c 'n = 10000; print("|" + "x|" * n + "\n|" + "-|" * n)'
#[test]
#[timeout(4000)]
fn pathological_table_columns_1() {
let n = 100_000;
let input = format!("{}{}{}{}", "|", "x|".repeat(n), "\n|", "-|".repeat(n));
let exp = format!("<p>{}</p>\n", input);

html_opts!([extension.table], &input, &exp);
}

// input: python3 -c 'n = 70000; print("|" + "x|" * n + "\n|" + "-|" * n + "\n" + "a\n" * n)'
#[test]
#[timeout(4000)]
fn pathological_table_columns_2() {
let n = 100_000;
let input = format!(
"{}{}{}{}{}{}",
"|",
"x|".repeat(n),
"\n|",
"-|".repeat(n),
"\n",
"a\n".repeat(n)
);

let mut extension = ExtensionOptions::default();
extension.table = true;

// Not interested in the actual html, just that we don't timeout
markdown_to_html(
&input,
&Options {
extension,
parse: Default::default(),
render: RenderOptions::default(),
},
);
}

// input: python3 -c 'n = 10000; print("[^1]:" * n + "\n" * n)'
#[test]
#[timeout(4000)]
fn pathological_footnotes() {
let n = 10_000;
let input = format!("{}{}", "[^1]:".repeat(n), "\n".repeat(n));
let exp = "";

html_opts!([extension.footnotes], &input, &exp);
}
16 changes: 0 additions & 16 deletions src/tests/regressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,6 @@ fn regression_back_to_back_ranges() {
);
}

#[test]
#[timeout(4000)]
fn pathological_emphases() {
let mut s = String::with_capacity(50000 * 4);
for _ in 0..50000 {
s.push_str("*a_ ");
}

let mut exp = format!("<p>{}", s);
// Right-most space is trimmed in output.
exp.pop();
exp += "</p>\n";

html(&s, &exp);
}

#[test]
fn no_panic_on_empty_bookended_atx_headers() {
html("# #", "<h1></h1>\n");
Expand Down
10 changes: 6 additions & 4 deletions src/xml.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::nodes::{AstNode, ListType, NodeCode, NodeValue};
use crate::nodes::{AstNode, ListType, NodeCode, NodeTable, NodeValue};
use crate::parser::{Options, Plugins};
use once_cell::sync::Lazy;
use std::cmp;
Expand Down Expand Up @@ -218,11 +218,13 @@ impl<'o> XmlFormatter<'o> {
let header_row = &ancestors.next().unwrap().data.borrow().value;
let table = &ancestors.next().unwrap().data.borrow().value;

if let (NodeValue::TableRow(true), NodeValue::Table(aligns)) =
(header_row, table)
if let (
NodeValue::TableRow(true),
NodeValue::Table(NodeTable { alignments, .. }),
) = (header_row, table)
{
let ix = node.preceding_siblings().count() - 1;
if let Some(xml_align) = aligns[ix].xml_name() {
if let Some(xml_align) = alignments[ix].xml_name() {
write!(self.output, " align=\"{}\"", xml_align)?;
}
}
Expand Down

0 comments on commit 8ac8845

Please sign in to comment.