diff --git a/src/cli.rs b/src/cli.rs index 0b37007..a5a9984 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -40,6 +40,10 @@ pub struct Cli { /// Quiet: do not print anything to stdout. The exit code will still be 0 if-and-only-iff any elements match. #[arg(long, short)] pub(crate) quiet: bool, + + // See: tree.rs > Lookups::unknown_markdown. + #[arg(long, hide = true)] + pub(crate) allow_unknown_markdown: bool, } impl Cli { diff --git a/src/lib.rs b/src/lib.rs index 425b17b..0659520 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,17 @@ where W: Write, { let ast = markdown::to_mdast(&contents, &markdown::ParseOptions::gfm()).unwrap(); - let mdqs = MdElem::read(ast, &ReadOptions::default()).unwrap(); + let read_options = ReadOptions { + validate_no_conflicting_links: false, + allow_unknown_markdown: cli.allow_unknown_markdown, + }; + let mdqs = match MdElem::read(ast, &read_options) { + Ok(mdqs) => mdqs, + Err(err) => { + eprintln!("error: {}", err); + return false; + } + }; let selectors_str = cli.selector_string(); let selectors = match MdqRefSelector::parse(&selectors_str) { diff --git a/src/select/api.rs b/src/select/api.rs index 37ca003..a41beba 100644 --- a/src/select/api.rs +++ b/src/select/api.rs @@ -235,7 +235,7 @@ impl MdqRefSelector { children.iter().map(|child| MdElemRef::Inline(child)).collect() } Inline::Footnote(footnote) => vec![MdElemRef::Doc(&footnote.text)], - Inline::Link(link) => vec![MdElemRef::Link(link)], // TODO find a test case that hits this to make sure it doesn't infinite-loop! + Inline::Link(link) => vec![MdElemRef::Link(link)], // issue #84: find a test case that hits this to make sure it doesn't infinite-loop! Inline::Image(image) => vec![MdElemRef::Image(image)], Inline::Text(Text { .. }) => Vec::new(), }, diff --git a/src/tree.rs b/src/tree.rs index bdc23eb..0221f42 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,6 +1,7 @@ +use std::backtrace::Backtrace; use std::collections::hash_map::Entry; use std::collections::HashMap; -use std::fmt::Debug; +use std::fmt::{Debug, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::vec::IntoIter; @@ -88,14 +89,8 @@ pub struct ReadOptions { /// [1]: https://example.com/one /// ``` pub validate_no_conflicting_links: bool, -} -impl Default for ReadOptions { - fn default() -> Self { - Self { - validate_no_conflicting_links: false, - } - } + pub allow_unknown_markdown: bool, } pub type TableRow = Vec; @@ -200,10 +195,57 @@ pub enum InvalidMd { Unsupported(mdast::Node), NonListItemDirectlyUnderList(mdast::Node), NonRowDirectlyUnderTable(mdast::Node), - NonInlineWhereInlineExpected, + NonInlineWhereInlineExpected(MdElem), MissingReferenceDefinition(String), ConflictingReferenceDefinition(String), - InternalError, + InternalError(PartialEqBacktrace), + UnknownMarkdown(&'static str), +} + +/// A wrapper for [Backtrace] that implements [PartialEq] to always return `true`. This lets us use it in a struct +/// while still letting us use `#[derive(PartialEq)]` +#[derive(Debug)] +pub struct PartialEqBacktrace(Backtrace); + +impl PartialEq for PartialEqBacktrace { + fn eq(&self, _other: &Self) -> bool { + true + } +} + +impl Display for InvalidMd { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + InvalidMd::Unsupported(node) => { + write!(f, "unsupported node: {:?}", node) + } + InvalidMd::NonListItemDirectlyUnderList(node) => { + write!(f, "expected a list item, but found: {:?}", node) + } + InvalidMd::NonRowDirectlyUnderTable(node) => { + write!(f, "expected a row, but found: {:?}", node) + } + InvalidMd::NonInlineWhereInlineExpected(node) => { + write!(f, "expected an inline element, but found: {:?}", node) + } + InvalidMd::MissingReferenceDefinition(id) => { + write!(f, "couldn't find definition for link/image/footnote: {}", id) + } + InvalidMd::ConflictingReferenceDefinition(id) => { + write!(f, "found multiple definitions for link/image/footnote: {}", id) + } + InvalidMd::InternalError(backtrace) => { + f.write_str("internal error\n")?; + std::fmt::Display::fmt(&backtrace.0, f) + } + InvalidMd::UnknownMarkdown(description) => { + write!(f, "encountered unknown markdown: {}\n\n", description)?; + f.write_str("* Please consider reporting this at https://github.com/yshavit/mdq/issues\n")?; + f.write_str("* You can suppress this error by using --allow-unknown-markdown.") + } + }?; + f.write_char('\n') + } } #[derive(Debug, PartialEq, Hash)] @@ -316,7 +358,7 @@ impl MdElem { })), mdast::Node::ImageReference(node) => MdElem::Inline(Inline::Image(Image { alt: node.alt, - link: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?, + link: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?, })), mdast::Node::Link(node) => MdElem::Inline(Inline::Link(Link { text: MdElem::inlines(node.children, lookups)?, @@ -328,10 +370,10 @@ impl MdElem { })), mdast::Node::LinkReference(node) => MdElem::Inline(Inline::Link(Link { text: MdElem::inlines(node.children, lookups)?, - link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind)?, + link_definition: lookups.resolve_link(node.identifier, node.label, node.reference_kind, lookups)?, })), mdast::Node::FootnoteReference(node) => { - let definition = lookups.resolve_footnote(&node.identifier, &node.label)?; + let definition = lookups.resolve_footnote(&node.identifier, &node.label, lookups)?; MdElem::Inline(Inline::Footnote(Footnote { label: node.label.unwrap_or(node.identifier), text: MdElem::all(definition.children.clone(), lookups)?, @@ -383,7 +425,7 @@ impl MdElem { let mut column = Vec::with_capacity(cell_nodes.len()); for cell_node in cell_nodes { let mdast::Node::TableCell(table_cell) = cell_node else { - return Err(InvalidMd::InternalError); + return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture()))); }; let cell_contents = Self::inlines(table_cell.children, lookups)?; column.push(cell_contents); @@ -397,7 +439,8 @@ impl MdElem { } mdast::Node::ThematicBreak(_) => m_node!(MdElem::ThematicBreak), mdast::Node::TableRow(_) | mdast::Node::TableCell(_) | mdast::Node::ListItem(_) => { - return Err(InvalidMd::InternalError); // should have been handled by Node::Table + // should have been handled by Node::Table + return Err(InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture()))); } mdast::Node::Definition(_) => return Ok(Vec::new()), mdast::Node::Paragraph(node) => m_node!(MdElem::Paragraph { @@ -532,7 +575,7 @@ impl MdElem { let mut result = Vec::with_capacity(mdq_children.len()); for child in mdq_children { let MdElem::Inline(inline) = child else { - return Err(InvalidMd::NonInlineWhereInlineExpected); + return Err(InvalidMd::NonInlineWhereInlineExpected(child)); }; // If both this and the previous were plain text, then just combine the texts. This can happen if there was // a Node::Break between them. @@ -595,6 +638,7 @@ where struct Lookups { link_definitions: HashMap, footnote_definitions: HashMap, + allow_unknown_markdown: bool, } impl Lookups { @@ -604,6 +648,7 @@ impl Lookups { let mut result = Self { link_definitions: HashMap::with_capacity(DEFAULT_CAPACITY), footnote_definitions: HashMap::with_capacity(DEFAULT_CAPACITY), + allow_unknown_markdown: read_opts.allow_unknown_markdown, }; result.build_lookups(node, &read_opts)?; @@ -611,14 +656,23 @@ impl Lookups { Ok(result) } + fn unknown_markdown(&self, description: &'static str) -> Result<(), InvalidMd> { + if self.allow_unknown_markdown { + Ok(()) + } else { + Err(InvalidMd::UnknownMarkdown(description)) + } + } + fn resolve_link( &self, identifier: String, label: Option, reference_kind: mdast::ReferenceKind, + lookups: &Lookups, ) -> Result { if let None = label { - todo!("What is this case???"); + lookups.unknown_markdown("link label was None")?; } let Some(definition) = self.link_definitions.get(&identifier) else { let human_visible_identifier = label.unwrap_or(identifier); @@ -641,9 +695,10 @@ impl Lookups { &self, identifier: &String, label: &Option, + lookups: &Lookups, ) -> Result<&mdast::FootnoteDefinition, InvalidMd> { if label.is_none() { - todo!("What is this case???"); + lookups.unknown_markdown("footnote label was None")?; } let Some(definition) = self.footnote_definitions.get(identifier) else { let human_visible_identifier = label.to_owned().unwrap_or_else(|| identifier.to_string()); @@ -836,7 +891,7 @@ mod tests { check!(&root.children[0], Node::List(ul), lookups => m_node!(MdElem::List{starting_index, items}) = { for child in &ul.children { - check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError); + check!(error: child, Node::ListItem(_), lookups => internal_error()); } assert_eq!(starting_index, None); assert_eq!(items, vec![ @@ -856,7 +911,7 @@ mod tests { }); check!(&root.children[1], Node::List(ol), lookups => m_node!(MdElem::List{starting_index, items}) = { for child in &ol.children { - check!(error: child, Node::ListItem(_), lookups => InvalidMd::InternalError); + check!(error: child, Node::ListItem(_), lookups => internal_error()); } assert_eq!(starting_index, Some(4)); assert_eq!(items, vec![ @@ -1681,9 +1736,9 @@ mod tests { ); // Do a spot check for the rows and cells; mainly just so that we'll have called check! on them. assert_eq!(table_node.children.len(), 2); // two rows - check!(error: &table_node.children[0], Node::TableRow(tr), lookups => InvalidMd::InternalError, { + check!(error: &table_node.children[0], Node::TableRow(tr), lookups => internal_error(), { assert_eq!(tr.children.len(), 4); // four columns - check!(error: &tr.children[0], Node::TableCell(_), lookups => InvalidMd::InternalError); + check!(error: &tr.children[0], Node::TableCell(_), lookups => internal_error()); }) }); } @@ -1750,6 +1805,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links: true, + allow_unknown_markdown: false, }, indoc! {r#" Hello [world][1] @@ -1774,6 +1830,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links: true, + allow_unknown_markdown: false, }, indoc! {r#" This [looks like a link][^1], but mdast parses it as a footnote. @@ -1804,6 +1861,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links: true, + allow_unknown_markdown: false, }, md, ); @@ -1822,6 +1880,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links: true, + allow_unknown_markdown: false, }, md, ); @@ -1839,6 +1898,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links: true, + allow_unknown_markdown: false, }, indoc! {r#" This [link is duplicated][1]. @@ -1859,6 +1919,7 @@ mod tests { &ParseOptions::gfm(), ReadOptions { validate_no_conflicting_links, + allow_unknown_markdown: false, }, indoc! {r#" This [link is duplicated][1]. @@ -2203,4 +2264,17 @@ mod tests { nodes.iter().for_each(|n| build(&mut s, n)); s } + + fn internal_error() -> InvalidMd { + InvalidMd::InternalError(PartialEqBacktrace(Backtrace::force_capture())) + } + + impl Default for ReadOptions { + fn default() -> Self { + Self { + validate_no_conflicting_links: false, + allow_unknown_markdown: false, + } + } + } }