Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle unknown markdown #133

Merged
merged 4 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ pub struct Cli {
/// Quiet: do not print anything to stdout. The exit code will still be 0 if-and-only-iff any elements match.
#[arg(long, short)]
pub(crate) quiet: bool,

// See: tree.rs > Lookups::unknown_markdown.
#[arg(long, hide = true)]
pub(crate) allow_unknown_markdown: bool,
}

impl Cli {
Expand Down
12 changes: 11 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,17 @@ where
W: Write,
{
let ast = markdown::to_mdast(&contents, &markdown::ParseOptions::gfm()).unwrap();
let mdqs = MdElem::read(ast, &ReadOptions::default()).unwrap();
let read_options = ReadOptions {
validate_no_conflicting_links: false,
allow_unknown_markdown: cli.allow_unknown_markdown,
};
let mdqs = match MdElem::read(ast, &read_options) {
Ok(mdqs) => mdqs,
Err(err) => {
eprintln!("error: {}", err);
return false;
}
};

let selectors_str = cli.selector_string();
let selectors = match MdqRefSelector::parse(&selectors_str) {
Expand Down
2 changes: 1 addition & 1 deletion src/select/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ impl MdqRefSelector {
children.iter().map(|child| MdElemRef::Inline(child)).collect()
}
Inline::Footnote(footnote) => vec![MdElemRef::Doc(&footnote.text)],
Inline::Link(link) => vec![MdElemRef::Link(link)], // TODO find a test case that hits this to make sure it doesn't infinite-loop!
Inline::Link(link) => vec![MdElemRef::Link(link)], // issue #84: find a test case that hits this to make sure it doesn't infinite-loop!
Inline::Image(image) => vec![MdElemRef::Image(image)],
Inline::Text(Text { .. }) => Vec::new(),
},
Expand Down
118 changes: 96 additions & 22 deletions src/tree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::backtrace::Backtrace;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::Debug;
use std::fmt::{Debug, Display, Formatter, Write};
use std::hash::{Hash, Hasher};
use std::vec::IntoIter;

Expand Down Expand Up @@ -88,14 +89,8 @@ pub struct ReadOptions {
/// [1]: https://example.com/one
/// ```
pub validate_no_conflicting_links: bool,
}

impl Default for ReadOptions {
fn default() -> Self {
Self {
validate_no_conflicting_links: false,
}
}
pub allow_unknown_markdown: bool,
}

pub type TableRow = Vec<Line>;
Expand Down Expand Up @@ -200,10 +195,57 @@ pub enum InvalidMd {
Unsupported(mdast::Node),
NonListItemDirectlyUnderList(mdast::Node),
NonRowDirectlyUnderTable(mdast::Node),
NonInlineWhereInlineExpected,
NonInlineWhereInlineExpected(MdElem),
MissingReferenceDefinition(String),
ConflictingReferenceDefinition(String),
InternalError,
InternalError(PartialEqBacktrace),
UnknownMarkdown(&'static str),
}

/// A wrapper for [Backtrace] that implements [PartialEq] to always return `true`. This lets us use it in a struct
/// while still letting us use `#[derive(PartialEq)]`
#[derive(Debug)]
pub struct PartialEqBacktrace(Backtrace);

impl PartialEq for PartialEqBacktrace {
fn eq(&self, _other: &Self) -> bool {
true
}
}

impl Display for InvalidMd {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
InvalidMd::Unsupported(node) => {
write!(f, "unsupported node: {:?}", node)
}
InvalidMd::NonListItemDirectlyUnderList(node) => {
write!(f, "expected a list item, but found: {:?}", node)
}
InvalidMd::NonRowDirectlyUnderTable(node) => {
write!(f, "expected a row, but found: {:?}", node)
}
InvalidMd::NonInlineWhereInlineExpected(node) => {
write!(f, "expected an inline element, but found: {:?}", node)
}
InvalidMd::MissingReferenceDefinition(id) => {
write!(f, "couldn't find definition for link/image/footnote: {}", id)
}
InvalidMd::ConflictingReferenceDefinition(id) => {
write!(f, "found multiple definitions for link/image/footnote: {}", id)
}
InvalidMd::InternalError(backtrace) => {
f.write_str("internal error\n")?;
std::fmt::Display::fmt(&backtrace.0, f)
}
InvalidMd::UnknownMarkdown(description) => {
write!(f, "encountered unknown markdown: {}\n\n", description)?;
f.write_str("* Please consider reporting this at https://github.com/yshavit/mdq/issues\n")?;
f.write_str("* You can suppress this error by using --allow-unknown-markdown.")
}
}?;
f.write_char('\n')
}
}

#[derive(Debug, PartialEq, Hash)]
Expand Down Expand Up @@ -316,7 +358,7 @@ impl MdElem {
})),
mdast::Node::ImageReference(node) => MdElem::Inline(Inline::Image(Image {
alt: node.alt,
link: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?,
link: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?,
})),
mdast::Node::Link(node) => MdElem::Inline(Inline::Link(Link {
text: MdElem::inlines(node.children, lookups)?,
Expand All @@ -328,10 +370,10 @@ impl MdElem {
})),
mdast::Node::LinkReference(node) => MdElem::Inline(Inline::Link(Link {
text: MdElem::inlines(node.children, lookups)?,
link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?,
link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?,
})),
mdast::Node::FootnoteReference(node) => {
let definition = lookups.resolve_footnote(&node.identifier, &node.label)?;
let definition = lookups.resolve_footnote(&node.identifier, &node.label, lookups)?;
MdElem::Inline(Inline::Footnote(Footnote {
label: node.label.unwrap_or(node.identifier),
text: MdElem::all(definition.children.clone(), lookups)?,
Expand Down Expand Up @@ -383,7 +425,7 @@ impl MdElem {
let mut column = Vec::with_capacity(cell_nodes.len());
for cell_node in cell_nodes {
let mdast::Node::TableCell(table_cell) = cell_node else {
return Err(InvalidMd::InternalError);
return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture())));
};
let cell_contents = Self::inlines(table_cell.children, lookups)?;
column.push(cell_contents);
Expand All @@ -397,7 +439,8 @@ impl MdElem {
}
mdast::Node::ThematicBreak(_) => m_node!(MdElem::ThematicBreak),
mdast::Node::TableRow(_) | mdast::Node::TableCell(_) | mdast::Node::ListItem(_) => {
return Err(InvalidMd::InternalError); // should have been handled by Node::Table
// should have been handled by Node::Table
return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture())));
}
mdast::Node::Definition(_) => return Ok(Vec::new()),
mdast::Node::Paragraph(node) => m_node!(MdElem::Paragraph {
Expand Down Expand Up @@ -532,7 +575,7 @@ impl MdElem {
let mut result = Vec::with_capacity(mdq_children.len());
for child in mdq_children {
let MdElem::Inline(inline) = child else {
return Err(InvalidMd::NonInlineWhereInlineExpected);
return Err(InvalidMd::NonInlineWhereInlineExpected(child));
};
// If both this and the previous were plain text, then just combine the texts. This can happen if there was
// a Node::Break between them.
Expand Down Expand Up @@ -595,6 +638,7 @@ where
struct Lookups {
link_definitions: HashMap<String, mdast::Definition>,
footnote_definitions: HashMap<String, mdast::FootnoteDefinition>,
allow_unknown_markdown: bool,
}

impl Lookups {
Expand All @@ -604,21 +648,31 @@ impl Lookups {
let mut result = Self {
link_definitions: HashMap::with_capacity(DEFAULT_CAPACITY),
footnote_definitions: HashMap::with_capacity(DEFAULT_CAPACITY),
allow_unknown_markdown: read_opts.allow_unknown_markdown,
};

result.build_lookups(node, &read_opts)?;

Ok(result)
}

fn unknown_markdown(&self, description: &'static str) -> Result<(), InvalidMd> {
if self.allow_unknown_markdown {
Ok(())
} else {
Err(InvalidMd::UnknownMarkdown(description))
}
}

fn resolve_link(
&self,
identifier: String,
label: Option<String>,
reference_kind: mdast::ReferenceKind,
lookups: &Lookups,
) -> Result<LinkDefinition, InvalidMd> {
if let None = label {
todo!("What is this case???");
lookups.unknown_markdown("link label was None")?;
}
let Some(definition) = self.link_definitions.get(&identifier) else {
let human_visible_identifier = label.unwrap_or(identifier);
Expand All @@ -641,9 +695,10 @@ impl Lookups {
&self,
identifier: &String,
label: &Option<String>,
lookups: &Lookups,
) -> Result<&mdast::FootnoteDefinition, InvalidMd> {
if label.is_none() {
todo!("What is this case???");
lookups.unknown_markdown("footnote label was None")?;
}
let Some(definition) = self.footnote_definitions.get(identifier) else {
let human_visible_identifier = label.to_owned().unwrap_or_else(|| identifier.to_string());
Expand Down Expand Up @@ -836,7 +891,7 @@ mod tests {

check!(&root.children[0], Node::List(ul), lookups => m_node!(MdElem::List{starting_index, items}) = {
for child in &ul.children {
check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError);
check!(error: child, Node::ListItem(_), lookups => internal_error());
}
assert_eq!(starting_index, None);
assert_eq!(items, vec![
Expand All @@ -856,7 +911,7 @@ mod tests {
});
check!(&root.children[1], Node::List(ol), lookups => m_node!(MdElem::List{starting_index, items}) = {
for child in &ol.children {
check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError);
check!(error: child, Node::ListItem(_), lookups => internal_error());
}
assert_eq!(starting_index, Some(4));
assert_eq!(items, vec![
Expand Down Expand Up @@ -1681,9 +1736,9 @@ mod tests {
);
// Do a spot check for the rows and cells; mainly just so that we'll have called check! on them.
assert_eq!(table_node.children.len(), 2); // two rows
check!(error: &table_node.children[0], Node::TableRow(tr), lookups => InvalidMd::InternalError, {
check!(error: &table_node.children[0], Node::TableRow(tr), lookups => internal_error(), {
assert_eq!(tr.children.len(), 4); // four columns
check!(error: &tr.children[0], Node::TableCell(_), lookups => InvalidMd::InternalError);
check!(error: &tr.children[0], Node::TableCell(_), lookups => internal_error());
})
});
}
Expand Down Expand Up @@ -1750,6 +1805,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
Hello [world][1]
Expand All @@ -1774,6 +1830,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
This [looks like a link][^1], but mdast parses it as a footnote.
Expand Down Expand Up @@ -1804,6 +1861,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
md,
);
Expand All @@ -1822,6 +1880,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
md,
);
Expand All @@ -1839,6 +1898,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links: true,
allow_unknown_markdown: false,
},
indoc! {r#"
This [link is duplicated][1].
Expand All @@ -1859,6 +1919,7 @@ mod tests {
&ParseOptions::gfm(),
ReadOptions {
validate_no_conflicting_links,
allow_unknown_markdown: false,
},
indoc! {r#"
This [link is duplicated][1].
Expand Down Expand Up @@ -2203,4 +2264,17 @@ mod tests {
nodes.iter().for_each(|n| build(&mut s, n));
s
}

fn internal_error() -> InvalidMd {
InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture()))
}

impl Default for ReadOptions {
fn default() -> Self {
Self {
validate_no_conflicting_links: false,
allow_unknown_markdown: false,
}
}
}
}