Skip to content

Commit

Permalink
feat: added linter for multiple sequential pronouns
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Jun 12, 2024
1 parent 7c4e1db commit 5b40b9c
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 8 deletions.
51 changes: 44 additions & 7 deletions harper-core/src/linting/an_a.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::borrow::Cow;

use itertools::Itertools;

use crate::{CharStringExt, Document, Lint, LintKind, Linter, Suggestion, TokenStringExt};
Expand All @@ -6,7 +8,7 @@ use crate::{CharStringExt, Document, Lint, LintKind, Linter, Suggestion, TokenSt
pub struct AnA;

impl Linter for AnA {
fn lint(&mut self, document: &Document) -> Vec<crate::Lint> {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut lints = Vec::new();

for (first, second) in document.iter_words().tuple_windows() {
Expand Down Expand Up @@ -45,12 +47,27 @@ impl Linter for AnA {
}
}

// Checks whether a provided word begins with a vowel _sound_.
//
// It was produced through trail and error.
// Matches with 99.71% and 99.77% of vowels and non-vowels in the
// Carnegie-Mellon University word -> pronunciation dataset.
fn to_lower_word(word: &[char]) -> Cow<'_, [char]> {
if word.iter().any(|c| c.is_uppercase()) {
Cow::Owned(
word.iter()
.flat_map(|c| c.to_lowercase())
.collect::<Vec<_>>()
)
} else {
Cow::Borrowed(word)
}
}

/// Checks whether a provided word begins with a vowel _sound_.
///
/// It was produced through trail and error.
/// Matches with 99.71% and 99.77% of vowels and non-vowels in the
/// Carnegie-Mellon University word -> pronunciation dataset.
fn starts_with_vowel(word: &[char]) -> bool {
let word = to_lower_word(word);
let word = word.as_ref();

if matches!(
word,
[] | ['u', 'k', ..] | ['e', 'u', 'p', 'h', ..] | ['e', 'u', 'g' | 'l' | 'c', ..]
Expand All @@ -60,7 +77,9 @@ fn starts_with_vowel(word: &[char]) -> bool {

if matches!(
word,
['S', 'V', 'G']
['s', 'v', 'g']
| ['h', 't', 'm', 'l']
| ['l', 'l', 'm']
| ['h', 'o', 'u', 'r', ..]
| ['h', 'o', 'n', ..]
| ['u', 'n', 'i', 'n' | 'm', ..]
Expand Down Expand Up @@ -128,3 +147,21 @@ fn starts_with_vowel(word: &[char]) -> bool {
['a', ..] | ['e', ..] | ['i', ..] | ['o', ..] | ['u', ..]
)
}

#[cfg(test)]
mod tests {
use super::AnA;
use crate::linting::tests::assert_lint_count;

#[test]
fn detects_html_as_vowel() {
assert_lint_count("Here is a HTML document.", AnA, 1);
assert_lint_count("Here is a html document.", AnA, 1);
}

#[test]
fn detects_llm_as_vowel() {
assert_lint_count("Here is a LLM document.", AnA, 1);
assert_lint_count("Here is a llm document.", AnA, 1);
}
}
4 changes: 3 additions & 1 deletion harper-core/src/linting/lint_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use super::an_a::AnA;
use super::correct_number_suffix::CorrectNumberSuffix;
use super::long_sentences::LongSentences;
use super::matcher::Matcher;
use super::multiple_sequential_pronouns::MultipleSequentialPronouns;
use super::number_suffix_capitalization::NumberSuffixCapitalization;
use super::repeated_words::RepeatedWords;
use super::sentence_capitalization::SentenceCapitalization;
Expand Down Expand Up @@ -101,7 +102,8 @@ create_lint_group_config!(
Spaces => true,
Matcher => true,
CorrectNumberSuffix => true,
NumberSuffixCapitalization => true
NumberSuffixCapitalization => true,
MultipleSequentialPronouns => true
);

impl<T: Dictionary + Default> Default for LintGroup<T> {
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod lint;
mod lint_group;
mod long_sentences;
mod matcher;
mod multiple_sequential_pronouns;
mod number_suffix_capitalization;
mod repeated_words;
mod sentence_capitalization;
Expand Down
106 changes: 106 additions & 0 deletions harper-core/src/linting/multiple_sequential_pronouns.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
use crate::{CharString, Document, Lint, LintKind, Linter, Span, TokenStringExt};

/// Linter that checks if multiple pronouns are being used right after each
/// other. This is a common mistake to make during the revision process.
#[derive(Debug)]
pub struct MultipleSequentialPronouns {
/// Since there aren't many pronouns, it's faster to store this as a vector.
pronouns: Vec<CharString>
}

impl MultipleSequentialPronouns {
fn new() -> Self {
let pronoun_strs = [
"me", "my", "I", "we", "you", "he", "him", "her", "she", "it", "they"
];

let mut pronouns: Vec<CharString> = pronoun_strs
.iter()
.map(|s| s.chars().collect::<CharString>())
.collect();

pronouns.sort();

Self { pronouns }
}

fn is_pronoun(&self, word: &[char]) -> bool {
self.pronouns
.binary_search_by_key(&word, |w| w.as_slice())
.is_ok()
}
}

impl Linter for MultipleSequentialPronouns {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut lints = Vec::new();

let mut found_pronouns = Vec::new();

for sentence in document.sentences() {
for word in sentence.iter_words() {
let word_chars = document.get_span_content(word.span);

if self.is_pronoun(word_chars) {
found_pronouns.push(word);
} else if found_pronouns.len() == 1 {
found_pronouns.clear();
} else if found_pronouns.len() > 1 {
let first = found_pronouns.first().unwrap();
let last = found_pronouns.last().unwrap();

lints.push(Lint {
span: Span::new(first.span.start, last.span.end),
lint_kind: LintKind::Repetition,
message: "There are too many personal pronouns in sequence here."
.to_owned(),
priority: 63,
..Default::default()
});
found_pronouns.clear();
}
}
}

lints
}
}

impl Default for MultipleSequentialPronouns {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod tests {
use super::MultipleSequentialPronouns;
use crate::linting::tests::assert_lint_count;

#[test]
fn can_detect_two_pronouns() {
assert_lint_count(
"...little bit about my I want to do.",
MultipleSequentialPronouns::new(),
1
)
}

#[test]
fn can_detect_three_pronouns() {
assert_lint_count(
"...little bit about my I you want to do.",
MultipleSequentialPronouns::new(),
1
)
}

#[test]
fn allows_single_pronouns() {
assert_lint_count(
"...little bit about I want to do.",
MultipleSequentialPronouns::new(),
0
)
}
}
1 change: 1 addition & 0 deletions harper-ls/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ lspconfig.harper_ls.setup {
matcher = true,
correct_number_suffix = true,
number_suffix_capitalization = true,
multiple_sequential_pronouns = true
}
}
},
Expand Down

0 comments on commit 5b40b9c

Please sign in to comment.