Skip to content

Commit

Permalink
Added spaces, tests, and fixed issue in sentence parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jan 22, 2024
1 parent 55ef7b4 commit daea731
Show file tree
Hide file tree
Showing 12 changed files with 132 additions and 22 deletions.
39 changes: 33 additions & 6 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,33 @@ impl Document {
} = token
{
if is_sentence_terminator(punct) {
Some(index)
} else {
None
return Some(index);
}
} else {
None
}
None
})
}

/// Get the index of the last sentence terminator.
fn last_sentence_terminator(&self) -> Option<usize> {
self.tokens
.iter()
.enumerate()
.rev()
.find_map(|(index, token)| {
if let Token {
kind: TokenKind::Punctuation(punct),
..
} = token
{
if is_sentence_terminator(punct) {
return Some(index);
}
}
None
})
}

pub fn sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
let first_sentence = self
.sentence_terminators()
Expand All @@ -122,7 +139,17 @@ impl Document {
.tuple_windows()
.map(move |(a, b)| &self.tokens[a + 1..=b]);

first_sentence.into_iter().chain(rest)
let last = if let Some(last_i) = self.last_sentence_terminator() {
if last_i + 1 < self.tokens.len() {
Some(&self.tokens[last_i + 1..])
} else {
None
}
} else {
Some(self.tokens.as_slice())
};

first_sentence.into_iter().chain(rest).chain(last)
}

/** Returns all tokens whose `kind` is [`Punctuation::Word`] */
Expand Down
3 changes: 1 addition & 2 deletions harper-core/src/linting/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ pub struct Lint {
pub enum LintKind {
Spelling,
Capitalization,
UnmatchedQuote,
WrongQuotes,
Formatting,
Repetition,
Readability,
#[default]
Expand Down
8 changes: 5 additions & 3 deletions harper-core/src/linting/lint_set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{Dictionary, Lint};

use super::Linter;
use super::{spaces::Spaces, Linter};
use paste::paste;

use super::{
Expand Down Expand Up @@ -39,7 +39,8 @@ impl LintSet {
.add_long_sentences()
.add_unclosed_quotes()
.add_sentence_capitalization()
.add_spell_check(dictionary);
.add_spell_check(dictionary)
.add_spaces();
self
}

Expand Down Expand Up @@ -91,5 +92,6 @@ create_simple_builder_methods!(
UnclosedQuotes,
WrongQuotes,
LongSentences,
RepeatedWords
RepeatedWords,
Spaces
);
12 changes: 12 additions & 0 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod lint_set;
mod long_sentences;
mod repeated_words;
mod sentence_capitalization;
mod spaces;
mod spell_check;
mod unclosed_quotes;
mod wrong_quotes;
Expand All @@ -15,3 +16,14 @@ use crate::Document;
pub trait Linter: Send + Sync {
fn lint(&mut self, document: &Document) -> Vec<Lint>;
}

#[cfg(test)]
mod tests {
use crate::{Document, Linter};

pub fn assert_lint_count(text: &str, mut linter: impl Linter, count: usize) {
let test = Document::new(text, false);
let lints = linter.lint(&test);
assert_eq!(lints.len(), count);
}
}
8 changes: 2 additions & 6 deletions harper-core/src/linting/repeated_words.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,11 @@ impl Linter for RepeatedWords {

#[cfg(test)]
mod tests {
use super::super::Linter;
use super::super::tests::assert_lint_count;
use super::RepeatedWords;
use crate::Document;

#[test]
fn catches_basic() {
let test = Document::new("I wanted the the banana.", false);
let mut linter = RepeatedWords::new();
let lints = linter.lint(&test);
assert!(lints.len() == 1);
assert_lint_count("I wanted the the banana.", RepeatedWords::new(), 1)
}
}
26 changes: 26 additions & 0 deletions harper-core/src/linting/sentence_capitalization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ impl Linter for SentenceCapitalization {
let mut lints = Vec::new();

for sentence in document.sentences() {
dbg!(sentence);
if let Some(first_word) = sentence.first_word() {
let letters = document.get_span_content(first_word.span);

Expand All @@ -36,3 +37,28 @@ impl Linter for SentenceCapitalization {
lints
}
}

#[cfg(test)]
mod tests {
use super::super::tests::assert_lint_count;
use super::SentenceCapitalization;

#[test]
fn catches_basic() {
assert_lint_count("there is no way.", SentenceCapitalization::default(), 1)
}

#[test]
fn no_period() {
assert_lint_count("there is no way", SentenceCapitalization::default(), 1)
}

#[test]
fn two_sentence() {
assert_lint_count(
"i have complete conviction. she is guilty",
SentenceCapitalization::default(),
2,
)
}
}
33 changes: 33 additions & 0 deletions harper-core/src/linting/spaces.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use super::{Lint, Linter};
use crate::{parsing::TokenStringExt, Document, LintKind, Suggestion, TokenKind};

#[derive(Debug, Default)]
pub struct Spaces;

impl Linter for Spaces {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut output = Vec::new();

for sentence in document.sentences() {
for space in sentence.iter_spaces() {
let TokenKind::Space(count) = space.kind else {
panic!("The space iterator should only return spaces.")
};

if count > 1 {
output.push(Lint {
span: space.span,
lint_kind: LintKind::Formatting,
suggestions: vec![Suggestion::ReplaceWith(vec![' '])],
message: format!(
"There are {} spaces where there should be only one.",
count
),
})
}
}
}

output
}
}
2 changes: 1 addition & 1 deletion harper-core/src/linting/unclosed_quotes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl Linter for UnclosedQuotes {
{
lints.push(Lint {
span: token.span,
lint_kind: LintKind::UnmatchedQuote,
lint_kind: LintKind::Formatting,
suggestions: vec![],
message: "This quote has no termination.".to_string(),
})
Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/linting/wrong_quotes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ fn lint_quote(document: &Document, quote_idx: usize, quote_token: Token) -> Opti
if quote_char != should_be {
Some(Lint {
span: quote_token.span,
lint_kind: LintKind::WrongQuotes,
suggestions: vec![Suggestion::ReplaceWith(vec![should_be])],
message: "Use the better-formatted quote character.".to_string(),
..Default::default()
})
} else {
None
Expand Down
15 changes: 14 additions & 1 deletion harper-core/src/parsing/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ pub trait TokenStringExt {
fn first_word(&self) -> Option<Token>;
fn iter_word_indices(&self) -> impl Iterator<Item = usize> + '_;
fn iter_words(&self) -> impl Iterator<Item = &Token> + '_;
fn iter_space_indices(&self) -> impl Iterator<Item = usize> + '_;
fn iter_spaces(&self) -> impl Iterator<Item = &Token> + '_;
}

impl TokenStringExt for [Token] {
Expand All @@ -108,6 +110,17 @@ impl TokenStringExt for [Token] {
}

fn iter_words(&self) -> impl Iterator<Item = &Token> + '_ {
self.iter().filter(|t| t.kind.is_word())
self.iter_word_indices().map(|i| &self[i])
}

fn iter_space_indices(&self) -> impl Iterator<Item = usize> + '_ {
self.iter()
.enumerate()
.filter(|(_, t)| t.kind.is_space())
.map(|(i, _)| i)
}

fn iter_spaces(&self) -> impl Iterator<Item = &Token> + '_ {
self.iter_space_indices().map(|i| &self[i])
}
}
2 changes: 1 addition & 1 deletion harper-ls/src/backend.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{borrow::BorrowMut, ops::DerefMut};
use std::ops::DerefMut;

use harper_core::{Dictionary, LintSet};
use tokio::{sync::Mutex, time::Instant};
Expand Down
4 changes: 3 additions & 1 deletion web/src/lib/analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ export async function lintText(text: string, useWasm = defaultUseWasm): Promise<
const res: LintResponse = await req.json();

// We only want to show fixable errors.
return res.lints.filter((lint) => lint.suggestions.length > 0);
const lints = res.lints.filter((lint) => lint.suggestions.length > 0);
console.log(lints);
return lints;
}
}

Expand Down

0 comments on commit daea731

Please sign in to comment.