Skip to content

Commit

Permalink
add paragraph selector
Browse files Browse the repository at this point in the history
resolves #140
  • Loading branch information
yshavit authored Jul 25, 2024
1 parent 3cfae6d commit b4b36c6
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ You can select...
```bash
$ cat example.md | mdq '</> foo' # find html tags containing "foo"
```

- Paragraphs

```bash
$ cat example.md | mdq 'P: foo' # find paragraphs containing "foo"
```



The `foo`s and `bar`s above can be:

Expand Down
20 changes: 20 additions & 0 deletions src/select/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::select::sel_image::ImageSelector;
use crate::select::sel_link::LinkSelector;
use crate::select::sel_list_item::ListItemSelector;
use crate::select::sel_list_item::ListItemType;
use crate::select::sel_paragraph::ParagraphSelector;
use crate::select::sel_section::SectionSelector;
use crate::tree::{Formatting, Inline, Link, Text, TextVariant};
use crate::tree_ref::{HtmlRef, ListItemRef, MdElemRef};
Expand Down Expand Up @@ -131,6 +132,8 @@ selectors![
'-' => ListItemType::Unordered
} ListItem,

{'P'} Paragraph,

{'['} Link,
! {'['} Image,

Expand Down Expand Up @@ -341,6 +344,22 @@ mod test {
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Html(item_parsed));
}

#[test]
fn paragraph() {
let input = "P: foo";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = ParagraphSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Paragraph(item_parsed));
}

#[test]
fn paragraph_no_matcher() {
let input = "P: |";
let mdq_ref_sel_parsed = MdqRefSelector::parse_selector(&mut ParsingIterator::new(input));
let item_parsed = ParagraphSelector::read(&mut ParsingIterator::new(&input[1..])).unwrap();
expect_ok(mdq_ref_sel_parsed, MdqRefSelector::Paragraph(item_parsed));
}

#[test]
fn unknown() {
let input = "\u{2603}";
Expand All @@ -364,6 +383,7 @@ mod test {
BlockQuote(_),
CodeBlock(_),
Html(_),
Paragraph(_),
});
}

Expand Down
1 change: 1 addition & 0 deletions src/select/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod sel_html;
mod sel_image;
mod sel_link;
mod sel_list_item;
mod sel_paragraph;
mod sel_section;

pub use api::*;
25 changes: 25 additions & 0 deletions src/select/sel_paragraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use crate::matcher::StringMatcher;
use crate::parsing_iter::ParsingIterator;
use crate::select::base::Selector;
use crate::select::{ParseResult, SELECTOR_SEPARATOR};
use crate::tree::Paragraph;

#[derive(Debug, PartialEq)]
pub struct ParagraphSelector {
matcher: StringMatcher,
}

impl ParagraphSelector {
pub fn read(iter: &mut ParsingIterator) -> ParseResult<Self> {
iter.require_char(':')?;
iter.require_whitespace_or(SELECTOR_SEPARATOR, "P:")?;
let matcher = StringMatcher::read(iter, SELECTOR_SEPARATOR)?;
Ok(Self { matcher })
}
}

impl<'a> Selector<'a, &'a Paragraph> for ParagraphSelector {
fn matches(&self, paragraph: &'a Paragraph) -> bool {
self.matcher.matches_inlines(&paragraph.body)
}
}
6 changes: 6 additions & 0 deletions src/tree_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ impl<'a> From<&'a Link> for MdElemRef<'a> {
}
}

impl<'a> From<&'a Paragraph> for MdElemRef<'a> {
fn from(value: &'a Paragraph) -> Self {
MdElemRef::Paragraph(value)
}
}

impl<'a> From<&'a Section> for MdElemRef<'a> {
fn from(value: &'a Section) -> Self {
MdElemRef::Section(value)
Expand Down
67 changes: 67 additions & 0 deletions tests/md_cases/select_paragraphs.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
[given]
md = '''
Hello, world.
> Paragraph within a block quote.
This paragraph has _inline_ **formatting**.
'''


[expect."all"]
cli_args = ["P:"]
output = '''
Hello, world.
-----
Paragraph within a block quote.
-----
This paragraph has _inline_ **formatting**.
'''


[expect."all but with explicit all-matcher"]
cli_args = ["P: *"]
output = '''
Hello, world.
-----
Paragraph within a block quote.
-----
This paragraph has _inline_ **formatting**.
'''


[expect."select within a block quote"]
cli_args = ["P: block"]
output = '''
Paragraph within a block quote.
'''


[expect."matcher ignores inline formatting"]
cli_args = ["P: has inline"]
# The markdown is "has _inline_", but the emphasis formatting is ignored for matching. It's still used for output.
output = '''
This paragraph has _inline_ **formatting**.
'''


[expect."no colon after p"]
cli_args = ["P *"]
expect_success = false
output = '''
'''


[expect."space before colon"]
cli_args = ["P : *"]
expect_success = false
output = '''
'''

0 comments on commit b4b36c6

Please sign in to comment.