Skip to content

Commit

Permalink
add ability to select tables (#163)
Browse files Browse the repository at this point in the history
At long last! Table selectors.

Syntax:

```
:-: headers matcher :-: rows matcher
```

- The `headers matcher` _must_ be specified explicitly: if you want "all
columns", use `*`
- Header rows are always matched, regardless of the `rows matcher`
- If the table is jagged (not all rows have the same number of columns),
it will be normalized by extending all short rows with empty columns.
This is a departure from the official spec, which says that the number
of columns is dictated by the header row, and all longer rows are
truncated. I'm making that departure intentionally, because I think it's
nicer, and the output I'll provide will always be valid markdown. I may
later provide a switch to control that behavior.

This resolves #141.
  • Loading branch information
yshavit authored Aug 7, 2024
1 parent 23de6d1 commit 2ff2d7d
Show file tree
Hide file tree
Showing 22 changed files with 1,076 additions and 56 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ You can select...
```bash
$ cat example.md | mdq 'P: foo' # find paragraphs containing "foo"
```

- Tables

```bash
$ cat example.md | mdq ':-: "some headers" :-: "some rows"'
```
(Tables selection differs from other selections in that you can actually select only certain headers and rows.
See the wiki for more.)

The `foo`s and `bar`s above can be:

Expand Down
53 changes: 44 additions & 9 deletions src/fmt_md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::link_transform::LinkLabel;
use crate::output::{Block, Output, SimpleWrite};
use crate::str_utils::{pad_to, standard_align, CountingWriter};
use crate::tree::*;
use crate::tree_ref::{ListItemRef, MdElemRef};
use crate::tree_ref::{ListItemRef, MdElemRef, TableSlice};

pub struct MdOptions {
pub link_reference_placement: ReferencePlacement,
Expand Down Expand Up @@ -156,7 +156,8 @@ impl<'s, 'a> MdWriterState<'s, 'a> {
MdElemRef::Paragraph(para) => self.write_paragraph(out, para),
MdElemRef::BlockQuote(block) => self.write_block_quote(out, block),
MdElemRef::List(list) => self.write_list(out, list),
MdElemRef::Table(table) => self.write_table(out, table),
MdElemRef::Table(table) => self.write_table(out, table.into()), // TODO maybe have a generic table trait, so I don't need to do the copying?
MdElemRef::TableSlice(table) => self.write_table(out, table),
MdElemRef::Inline(inline) => {
self.inlines_writer.write_inline_element(out, inline);
}
Expand Down Expand Up @@ -193,15 +194,16 @@ impl<'s, 'a> MdWriterState<'s, 'a> {
});
}

fn write_table<W: SimpleWrite>(&mut self, out: &mut Output<W>, table: &'a Table) {
let Table { alignments, rows } = table;
fn write_table<W: SimpleWrite>(&mut self, out: &mut Output<W>, table: TableSlice<'a>) {
let alignments = table.alignments();
let rows = table.rows();

let mut row_strs = Vec::with_capacity(rows.len());
let mut row_strs = Vec::with_capacity(alignments.len());

let mut column_widths = [0].repeat(alignments.len());
if !alignments.is_empty() {
for (idx, alignment) in alignments.iter().enumerate() {
let width = match standard_align(alignment) {
let width = match standard_align(*alignment) {
Some(Alignment::Left | Alignment::Right) => 2,
Some(Alignment::Center) => 3,
None => 1,
Expand All @@ -213,8 +215,8 @@ impl<'s, 'a> MdWriterState<'s, 'a> {
// Pre-calculate all the cells, and also how wide each column needs to be
for row in rows {
let mut col_strs = Vec::with_capacity(row.len());
for (idx, col) in row.iter().enumerate() {
let col_str = self.line_to_string(col);
for (idx, &col) in row.iter().enumerate() {
let col_str = col.map(|ln| self.line_to_string(ln)).unwrap_or("".to_string());
// Extend the row_sizes if needed. This happens if we had fewer alignments than columns in any
// row. I'm not sure if that's possible, but it's easy to guard against.
while column_widths.len() <= idx {
Expand Down Expand Up @@ -266,7 +268,7 @@ impl<'s, 'a> MdWriterState<'s, 'a> {
// Headers
if !alignments.is_empty() {
out.write_char('|');
for (idx, align) in alignments.iter().enumerate() {
for (idx, &align) in alignments.iter().enumerate() {
let width = column_widths
.get(idx)
.unwrap_or_else(|| match standard_align(align) {
Expand Down Expand Up @@ -517,6 +519,7 @@ pub mod tests {
BlockQuote(_),
List(_),
Table(_),
TableSlice(_),
});

#[test]
Expand Down Expand Up @@ -982,6 +985,38 @@ pub mod tests {
| i | ii | iii |"#},
);
}

/// Test of a table slice, instead of the table ref directly. This is just a smoke test,
/// because the implementations are the same (one forwards to the other); this test is
/// here just to validate that the delegation happens, as opposed to "oops, I forgot to
/// actually implement the delegation."
#[test]
fn slice() {
let table = Table {
alignments: vec![mdast::AlignKind::Left, mdast::AlignKind::Right],
rows: vec![
// Header row
vec![
// columns
vec![mdq_inline!("Left")],
vec![mdq_inline!("Right")],
],
// Data row
vec![
// columns
vec![mdq_inline!("a")],
vec![mdq_inline!("b")],
],
],
};
check_render_refs(
vec![MdElemRef::TableSlice((&table).into())],
indoc! {r#"
| Left | Right |
|:-----|------:|
| a | b |"#},
);
}
}

mod thematic_break {
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod tree_ref;
mod tree_ref_serde;
mod tree_test_utils;
mod utils_for_test;
mod vec_utils;

pub fn run_in_memory(cli: &Cli, contents: &str) -> (bool, String) {
let mut out = Vec::with_capacity(256); // just a guess
Expand Down
52 changes: 38 additions & 14 deletions src/select/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::select::sel_list_item::ListItemSelector;
use crate::select::sel_list_item::ListItemType;
use crate::select::sel_paragraph::ParagraphSelector;
use crate::select::sel_section::SectionSelector;
use crate::select::sel_table::TableSliceSelector;
use crate::tree::{Formatting, Inline, Link, Text, TextVariant};
use crate::tree_ref::{HtmlRef, ListItemRef, MdElemRef};
use std::fmt::{Display, Formatter};
Expand All @@ -17,7 +18,8 @@ pub type ParseResult<T> = Result<T, ParseErrorReason>;

pub const SELECTOR_SEPARATOR: char = '|';

pub trait Selector<'a, I: Copy + Into<MdElemRef<'a>>> {
pub trait Selector<'a, I: Into<MdElemRef<'a>>> {
// TODO I should really rename all these 'a to 'md
fn try_select(&self, item: I) -> Option<MdElemRef<'a>>;
}

Expand Down Expand Up @@ -50,10 +52,13 @@ impl Display for ParseErrorReason {

macro_rules! selectors {
[
$($(#[$meta:meta])*
$({$($char:literal $(=>$($read_variant:ident)::+)? ),+})?
$(! {$($bang_char:literal $(=>$($bang_read_variant:ident)::+)? ),+})?
$name:ident),* $(,)?
$(
$(#[$meta:meta])*
$({$($char:literal $(=>$($read_variant:ident)::+)? ),+})?
$(! {$($bang_char:literal $(=>$($bang_read_variant:ident)::+)? ),+})?
$name:ident
$(| $alias:ident)?
),* $(,)?
] => {
#[derive(Debug, PartialEq)]
pub enum MdqRefSelector {
Expand All @@ -68,6 +73,7 @@ macro_rules! selectors {
match (self, node) {
$(
(Self::$name(selector), MdElemRef::$name(elem)) => selector.try_select(elem),
$( (Self::$name(selector), MdElemRef::$alias(elem)) => selector.try_select(elem.into()), )?
)*
_ => None
}
Expand Down Expand Up @@ -145,6 +151,8 @@ selectors![
{'`'} CodeBlock,

{'<'} Html,

{':'} TableSlice | Table,
];

impl MdqRefSelector {
Expand Down Expand Up @@ -192,7 +200,8 @@ impl MdqRefSelector {

fn build_output<'a>(&self, out: &mut Vec<MdElemRef<'a>>, node: MdElemRef<'a>) {
// try_select_node is defined in macro_helpers::selectors!
match self.try_select_node(node) {
match self.try_select_node(node.clone()) {
// TODO can we remove this? I don't think so, but let's follow up
Some(found) => out.push(found),
None => {
for child in Self::find_children(node) {
Expand All @@ -208,7 +217,7 @@ impl MdqRefSelector {
/// selector-specific. For example, an [MdqNode::Section] has child nodes both in its title and in its body, but
/// only the body nodes are relevant for select recursion. `MdqNode` shouldn't need to know about that oddity; it
/// belongs here.
fn find_children<'a>(node: MdElemRef) -> Vec<MdElemRef> {
fn find_children(node: MdElemRef) -> Vec<MdElemRef> {
match node {
MdElemRef::Doc(body) => {
let mut wrapped = Vec::with_capacity(body.len());
Expand All @@ -232,13 +241,18 @@ impl MdqRefSelector {
}
result
}
MdElemRef::Table(table) => {
let count_estimate = table.rows.len() * table.rows.first().map(|tr| tr.len()).unwrap_or(0);
MdElemRef::Table(table) => Self::find_children(MdElemRef::TableSlice(table.into())),
MdElemRef::TableSlice(table) => {
let table_rows_estimate = 8; // TODO expose this from the table.rows() trait
let first_row_cols = table.rows().next().map(Vec::len).unwrap_or(0);
let count_estimate = table_rows_estimate * first_row_cols;
let mut result = Vec::with_capacity(count_estimate);
for row in &table.rows {
for col in row {
for cell in col {
result.push(MdElemRef::Inline(cell));
for row in table.rows() {
for maybe_col in row {
if let Some(col) = maybe_col {
for cell in *col {
result.push(MdElemRef::Inline(cell));
}
}
}
}
Expand Down Expand Up @@ -363,6 +377,15 @@ mod test {
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Paragraph(item_parsed));
}

/// See `mod sel_table::tests` for more extensive tests
#[test]
fn table_smoke() {
let input = ":-: * :-:";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = TableSliceSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::TableSlice(item_parsed));
}

#[test]
fn unknown() {
let input = "\u{2603}";
Expand All @@ -387,6 +410,7 @@ mod test {
CodeBlock(_),
Html(_),
Paragraph(_),
TableSlice(_),
});
}

Expand Down Expand Up @@ -448,7 +472,7 @@ mod test {
let inline = Inline::Link(mk_link());
let node_ref = MdElemRef::Inline(&inline);
let children = MdqRefSelector::find_children(node_ref);
assert_eq!(children, vec![MdElemRef::Link(&mk_link()),]);
assert_eq!(children, vec![MdElemRef::Link(&mk_link())]);
}
}
}
4 changes: 2 additions & 2 deletions src/select/match_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ use crate::tree_ref::MdElemRef;

/// MatchSelector is a helper trait for implementing [Selector]. Simply provide the boolean predicate for whether a
/// given item matches, and MatchSelector will do the rest.
pub trait MatchSelector<'a, I: Copy + Into<MdElemRef<'a>>> {
pub trait MatchSelector<I> {
fn matches(&self, item: I) -> bool;
}

impl<'a, I, M> Selector<'a, I> for M
where
I: Copy + Into<MdElemRef<'a>>,
M: MatchSelector<'a, I>,
M: MatchSelector<I>,
{
fn try_select(&self, item: I) -> Option<MdElemRef<'a>> {
if self.matches(item) {
Expand Down
1 change: 1 addition & 0 deletions src/select/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ mod sel_link;
mod sel_list_item;
mod sel_paragraph;
mod sel_section;
mod sel_table;

pub use api::*;
4 changes: 2 additions & 2 deletions src/select/sel_block_quote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ impl BlockQuoteSelector {
}
}

impl<'a> MatchSelector<'a, &'a BlockQuote> for BlockQuoteSelector {
fn matches(&self, block_quote: &'a BlockQuote) -> bool {
impl MatchSelector<&BlockQuote> for BlockQuoteSelector {
fn matches(&self, block_quote: &BlockQuote) -> bool {
self.matcher.matches_any(&block_quote.body)
}
}
4 changes: 2 additions & 2 deletions src/select/sel_code_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ impl CodeBlockSelector {
}
}

impl<'a> MatchSelector<'a, &'a CodeBlock> for CodeBlockSelector {
fn matches(&self, code_block: &'a CodeBlock) -> bool {
impl MatchSelector<&CodeBlock> for CodeBlockSelector {
fn matches(&self, code_block: &CodeBlock) -> bool {
let lang_matches = match &code_block.variant {
CodeVariant::Code(code_opts) => {
let actual_lang = match code_opts {
Expand Down
4 changes: 2 additions & 2 deletions src/select/sel_html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ impl HtmlSelector {
}
}

impl<'a> MatchSelector<'a, HtmlRef<'a>> for HtmlSelector {
fn matches(&self, html: HtmlRef<'a>) -> bool {
impl MatchSelector<HtmlRef<'_>> for HtmlSelector {
fn matches(&self, html: HtmlRef) -> bool {
self.matcher.matches(html.0)
}
}
4 changes: 2 additions & 2 deletions src/select/sel_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ impl ImageSelector {
}
}

impl<'a> MatchSelector<'a, &'a Image> for ImageSelector {
fn matches(&self, item: &'a Image) -> bool {
impl MatchSelector<&Image> for ImageSelector {
fn matches(&self, item: &Image) -> bool {
self.matchers.display_matcher.matches(&item.alt) && self.matchers.url_matcher.matches(&item.link.url)
}
}
4 changes: 2 additions & 2 deletions src/select/sel_link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ impl LinkSelector {
}
}

impl<'a> MatchSelector<'a, &'a Link> for LinkSelector {
fn matches(&self, item: &'a Link) -> bool {
impl MatchSelector<&Link> for LinkSelector {
fn matches(&self, item: &Link) -> bool {
self.matchers.display_matcher.matches_inlines(&item.text)
&& self.matchers.url_matcher.matches(&item.link_definition.url)
}
Expand Down
4 changes: 2 additions & 2 deletions src/select/sel_list_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ impl ListItemSelector {
}
}

impl<'a> MatchSelector<'a, ListItemRef<'a>> for ListItemSelector {
fn matches(&self, item: ListItemRef<'a>) -> bool {
impl MatchSelector<ListItemRef<'_>> for ListItemSelector {
fn matches(&self, item: ListItemRef) -> bool {
let ListItemRef(idx, li) = item;
self.li_type.matches(&idx) && self.checkbox.matches(&li.checked) && self.string_matcher.matches_any(&li.item)
}
Expand Down
4 changes: 2 additions & 2 deletions src/select/sel_paragraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ impl ParagraphSelector {
}
}

impl<'a> MatchSelector<'a, &'a Paragraph> for ParagraphSelector {
fn matches(&self, paragraph: &'a Paragraph) -> bool {
impl MatchSelector<&Paragraph> for ParagraphSelector {
fn matches(&self, paragraph: &Paragraph) -> bool {
self.matcher.matches_inlines(&paragraph.body)
}
}
4 changes: 2 additions & 2 deletions src/select/sel_section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ impl SectionSelector {
}
}

impl<'a> MatchSelector<'a, &'a Section> for SectionSelector {
fn matches(&self, section: &'a Section) -> bool {
impl MatchSelector<&Section> for SectionSelector {
fn matches(&self, section: &Section) -> bool {
self.matcher.matches_inlines(&section.title)
}
}
Loading

0 comments on commit 2ff2d7d

Please sign in to comment.