From c8a32d07f9e506d9f715b91d574e5107567c5be5 Mon Sep 17 00:00:00 2001 From: Ben Brandt Date: Sat, 10 Feb 2024 22:17:26 +0100 Subject: [PATCH] Add tasklist markers --- src/unstable_markdown.rs | 45 ++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/unstable_markdown.rs b/src/unstable_markdown.rs index d348b41..999fb46 100644 --- a/src/unstable_markdown.rs +++ b/src/unstable_markdown.rs @@ -7,7 +7,7 @@ as possible, eventually falling back to the normal [`TextSplitter`] method. use std::ops::Range; use auto_enums::auto_enum; -use pulldown_cmark::{Event, Parser}; +use pulldown_cmark::{Event, Options, Parser}; use unicode_segmentation::UnicodeSegmentation; use crate::{ @@ -156,6 +156,10 @@ enum SemanticLevel { /// Single line break, which isn't necessarily a new element in Markdown /// Falls back to [`Self::Sentence`] SoftBreak, + /// An inline element that is within a larger element such as a paragraph, but + /// more specific than a sentence. + /// Falls back to [`Self::Sentence`] + InlineElement, /// Hard line break (two newlines), which signifies a new element in Markdown /// Falls back to [`Self::SoftBreak`] HardBreak, @@ -184,16 +188,16 @@ impl SemanticSplit for Markdown { ]; fn new(text: &str) -> Self { - let ranges = Parser::new(text) + let ranges = Parser::new_ext(text, Options::all()) .into_offset_iter() - .filter_map(|(event, range)| match event { + .filter_map(|(event, range)| match dbg!(event) { Event::Start(_) | Event::End(_) | Event::Text(_) | Event::Code(_) | Event::Html(_) - | Event::FootnoteReference(_) - | Event::TaskListMarker(_) => None, + | Event::FootnoteReference(_) => None, + Event::TaskListMarker(_) => Some((SemanticLevel::InlineElement, range)), Event::SoftBreak => Some((SemanticLevel::SoftBreak, range)), Event::HardBreak => Some((SemanticLevel::HardBreak, range)), Event::Rule => Some((SemanticLevel::Rule, range)), @@ -242,14 +246,15 @@ impl SemanticSplit for Markdown { SemanticLevel::Sentence => text .split_sentence_bound_indices() .map(move |(i, str)| (offset + i, str)), - SemanticLevel::SoftBreak | SemanticLevel::HardBreak | SemanticLevel::Rule => { - split_str_by_separator( - text, - self.ranges_after_offset(offset, semantic_level) - .map(move |(_, sep)| sep.start - offset..sep.end - offset), - ) - .map(move |(i, str)| (offset + i, str)) - } + SemanticLevel::InlineElement + | SemanticLevel::SoftBreak + | SemanticLevel::HardBreak + | SemanticLevel::Rule => split_str_by_separator( + text, + self.ranges_after_offset(offset, semantic_level) + .map(move |(_, sep)| sep.start - offset..sep.end - offset), + ) + .map(move |(i, str)| (offset + i, str)), } } } @@ -445,6 +450,20 @@ mod tests { assert_eq!(SemanticLevel::Sentence, markdown.max_level()); } + #[test] + fn test_checklist() { + let markdown = Markdown::new("- [ ] incomplete task\n- [x] completed task"); + + assert_eq!( + vec![ + &(SemanticLevel::InlineElement, 2..5), + &(SemanticLevel::InlineElement, 24..27) + ], + markdown.ranges().collect::>() + ); + assert_eq!(SemanticLevel::InlineElement, markdown.max_level()); + } + #[test] fn test_softbreak() { let markdown = Markdown::new("Some text\nwith a softbreak");