Skip to content

Commit

Permalink
fix: issue with nested Markdown lists
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jul 6, 2024
1 parent 994cd6e commit f5a669d
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 81 deletions.
125 changes: 70 additions & 55 deletions harper-comments/src/comment_parsers/jsdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,76 +8,94 @@ pub struct JsDoc;

impl Parser for JsDoc {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
dbg!();
let mut tokens = Vec::new();

let actual = without_initiators(source);
let mut chars_traversed = 0;

if actual.is_empty() {
return Vec::new();
for line in source.split(|c| *c == '\n') {
let mut new_tokens = parse_line(line);

new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(chars_traversed));

chars_traversed += line.len() + 1;
tokens.append(&mut new_tokens);
}

let source = actual.get_content(source);
let mut tokens = Markdown.parse(source);
tokens
}
}

let mut cursor = 0;
fn parse_line(source: &[char]) -> Vec<Token> {
let actual_line = without_initiators(source);

// Handle inline tags
loop {
if cursor >= tokens.len() {
break;
}
if actual_line.is_empty() {
return vec![];
}

if let Some(new_cursor) = &tokens[cursor..]
.iter()
.position(|t| t.kind == TokenKind::Punctuation(Punctuation::OpenCurly))
.map(|i| i + cursor)
{
cursor = *new_cursor;
} else {
break;
}
let source_line = actual_line.get_content(source);

let parsers = [parse_link, parse_tutorial];
let mut new_tokens = Markdown.parse(source_line);

for parser in parsers {
if let Some(p) = parser(&tokens[cursor..], source) {
for tok in &mut tokens[cursor..cursor + p] {
tok.kind = TokenKind::Unlintable;
}
let mut cursor = 0;

cursor += p;
continue;
}
}
// Handle inline tags
loop {
if cursor >= new_tokens.len() {
break;
}

// Handle the block tag, if it exists
if let Some(tag_start) = tokens.iter().tuple_windows().position(|(a, b)| {
matches!(
(a, b),
(
Token {
kind: TokenKind::Punctuation(Punctuation::At),
..
},
Token {
kind: TokenKind::Word,
..
}
)
)
}) {
for token in &mut tokens[tag_start..] {
token.kind = TokenKind::Unlintable;
if let Some(new_cursor) = &new_tokens[cursor..]
.iter()
.position(|t| t.kind == TokenKind::Punctuation(Punctuation::OpenCurly))
.map(|i| i + cursor)
{
cursor = *new_cursor;
} else {
break;
}

let parsers = [parse_link, parse_tutorial];

for parser in parsers {
if let Some(p) = parser(&new_tokens[cursor..], source_line) {
for tok in &mut new_tokens[cursor..cursor + p] {
tok.kind = TokenKind::Unlintable;
}

cursor += p;
continue;
}
}
}

for token in tokens.iter_mut() {
token.span.push_by(actual.start);
// Handle the block tag, if it exists on the current line.
if let Some(tag_start) = new_tokens.iter().tuple_windows().position(|(a, b)| {
matches!(
(a, b),
(
Token {
kind: TokenKind::Punctuation(Punctuation::At),
..
},
Token {
kind: TokenKind::Word,
..
}
)
)
}) {
for token in &mut new_tokens[tag_start..] {
token.kind = TokenKind::Unlintable;
}
}

tokens
for token in new_tokens.iter_mut() {
token.span.push_by(actual_line.start);
}

new_tokens
}

fn parse_link(tokens: &[Token], source: &[char]) -> Option<usize> {
Expand Down Expand Up @@ -112,10 +130,7 @@ fn parse_inline_tag(tag_name: &[char], tokens: &[Token], source: &[char]) -> Opt
return None;
}

dbg!(tokens[2].span.get_content(source));

if tokens[2].span.get_content(source) != tag_name {
dbg!();
return None;
}

Expand Down
41 changes: 30 additions & 11 deletions harper-comments/src/comment_parsers/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,41 @@ pub struct Unit;

impl Parser for Unit {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
let actual = without_initiators(source);
let mut tokens = Vec::new();

if actual.is_empty() {
return Vec::new();
}
let mut chars_traversed = 0;

for line in source.split(|c| *c == '\n') {
let mut new_tokens = parse_line(line);

let source = actual.get_content(source);
new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(chars_traversed));

let mut markdown_parser = Markdown;
chars_traversed += line.len() + 1;
tokens.append(&mut new_tokens);
}

let mut new_tokens = markdown_parser.parse(source);
tokens
}
}

new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(actual.start));
fn parse_line(source: &[char]) -> Vec<Token> {
let actual = without_initiators(source);

new_tokens
if actual.is_empty() {
return Vec::new();
}

let source = actual.get_content(source);

let mut markdown_parser = Markdown;

let mut new_tokens = markdown_parser.parse(source);

new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(actual.start));

new_tokens
}
2 changes: 1 addition & 1 deletion harper-core/src/char_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl CharExt for char {
unicode_blocks::EMOTICONS,
unicode_blocks::MISCELLANEOUS_SYMBOLS,
unicode_blocks::VARIATION_SELECTORS,
unicode_blocks::SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
unicode_blocks::SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
];

blocks.contains(&block)
Expand Down
26 changes: 20 additions & 6 deletions harper-core/src/parsers/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,33 @@ impl Parser for Markdown {
}

match event {
pulldown_cmark::Event::SoftBreak | pulldown_cmark::Event::HardBreak => {
pulldown_cmark::Event::SoftBreak => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 1),
kind: TokenKind::Newline(1)
kind: TokenKind::Newline(1),
});
}
pulldown_cmark::Event::HardBreak => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 1),
kind: TokenKind::Newline(2),
});
}
pulldown_cmark::Event::Start(pulldown_cmark::Tag::List(v)) => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 0),
kind: TokenKind::Newline(2),
});
stack.push(pulldown_cmark::Tag::List(v));
}
pulldown_cmark::Event::Start(tag) => stack.push(tag),
pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Paragraph)
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Item)
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Heading(_))
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::TableCell) => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 0),
kind: TokenKind::Newline(2)
kind: TokenKind::Newline(2),
});
stack.pop();
}
Expand All @@ -57,7 +71,7 @@ impl Parser for Markdown {

tokens.push(Token {
span: Span::new_with_len(traversed_chars, chunk_len),
kind: TokenKind::Unlintable
kind: TokenKind::Unlintable,
});
}
pulldown_cmark::Event::Text(text) => {
Expand All @@ -69,7 +83,7 @@ impl Parser for Markdown {
if matches!(tag, Tag::CodeBlock(..)) {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, text.chars().count()),
kind: TokenKind::Unlintable
kind: TokenKind::Unlintable,
});
continue;
}
Expand All @@ -96,7 +110,7 @@ impl Parser for Markdown {

tokens.append(&mut new_tokens);
}
_ => ()
_ => (),
}
}

Expand Down
16 changes: 8 additions & 8 deletions harper-core/src/parsers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub trait StrParser {

impl<T> StrParser for T
where
T: Parser
T: Parser,
{
fn parse_str(&mut self, source: impl AsRef<str>) -> Vec<Token> {
let source: Vec<_> = source.as_ref().chars().collect();
Expand All @@ -33,7 +33,7 @@ mod tests {
fn assert_tokens_eq(
test_str: impl AsRef<str>,
expected: &[TokenKind],
parser: &mut impl Parser
parser: &mut impl Parser,
) {
let chars: Vec<_> = test_str.as_ref().chars().collect();
let tokens = parser.parse(&chars);
Expand Down Expand Up @@ -70,8 +70,8 @@ mod tests {
Space(1),
Word,
Space(1),
Word
]
Word,
],
)
}

Expand All @@ -87,8 +87,8 @@ mod tests {
Space(1),
Word,
Space(1),
Word
]
Word,
],
);
}

Expand All @@ -104,8 +104,8 @@ mod tests {
Newline(2),
Word,
Space(1),
Word
]
Word,
],
);
}

Expand Down
32 changes: 32 additions & 0 deletions harper-core/tests/run_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use harper_core::{Document, FullDictionary, LintGroup, LintGroupConfig, Linter};

/// Creates a unit test checking that the linting of a Markdown document (in `tests_sources`)
/// produces the expected number of lints.
macro_rules! create_test {
($filename:ident.md, $correct_expected:expr) => {
paste::paste! {
#[test]
fn [<lints_ $filename _correctly>](){
let source = include_str!(
concat!(
"./test_sources/",
concat!(stringify!($filename), ".md")
)
);

let document = Document::new_markdown(&source);

let mut linter = LintGroup::new(
LintGroupConfig::default(),
FullDictionary::create_from_curated()
);
let lints = linter.lint(&document);

dbg!(&lints);
assert_eq!(lints.len(), $correct_expected);
}
}
};
}

create_test!(whack_bullets.md, 1);
7 changes: 7 additions & 0 deletions harper-core/tests/test_sources/whack_bullets.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This is a big heading, with a lot of words

- New here's a list, this part doesn't have as many words
- But this part does, it has so many words, more words than you could ever dream of
Just look at all those words
- So does this part, I might be overwhelmed with all these words
- This is an test to make sure it isn't crashing

0 comments on commit f5a669d

Please sign in to comment.