Skip to content

Commit

Permalink
feat: now lints for proper use of "a" vs "an"
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Feb 29, 2024
1 parent 1da200f commit 7a90f1a
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 19 deletions.
137 changes: 137 additions & 0 deletions harper-core/src/linting/an_a.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use itertools::Itertools;

use crate::{Document, Lint, LintKind, Linter, Suggestion, TokenStringExt};

#[derive(Debug, Default)]
pub struct AnA;

impl Linter for AnA {
fn lint(&mut self, document: &Document) -> Vec<crate::Lint> {
let mut lints = Vec::new();

for (first, second) in document.iter_words().tuple_windows() {
let chars_first = document.get_span_content(first.span);
let chars_second = document.get_span_content(second.span);

let is_a_an = match chars_first {
['a'] => Some(true),
['a', 'n'] => Some(false),
_ => None,
};

let Some(a_an) = is_a_an else {
continue;
};

let should_be_a_an = !starts_with_vowel(chars_second);

if a_an != should_be_a_an {
let replacement = match a_an {
true => vec!['a', 'n'],
false => vec!['a'],
};

lints.push(Lint {
span: first.span,
lint_kind: LintKind::Formatting,
suggestions: vec![Suggestion::ReplaceWith(replacement)],
message: "This is not vocally correct.".to_string(),
priority: 31,
})
}
}

lints
}
}

// Checks whether a provided word begins with a vowel _sound_.
//
// It was produced through trail and error.
// Matches with 99.71% and 99.77% of vowels and non-vowels in the
// Carnegie-Mellon University word -> pronunciation dataset.
fn starts_with_vowel(word: &[char]) -> bool {
if word.is_empty() {
return false;
}

if matches!(
word,
['e', 'u', 'p', 'h', ..] | ['e', 'u', 'g' | 'l' | 'c', ..]
) {
return false;
}

if matches!(word, ['u', 'k', ..]) {
return false;
}

if matches!(
word,
['h', 'o', 'u', 'r', ..]
| ['h', 'o', 'n', ..]
| ['u', 'n', 'i', 'n' | 'm', ..]
| ['u', 'n', 'a' | 'u', ..]
| ['h', 'e', 'r', 'b', ..]
| ['u', 'r', 'b', ..]
) {
return true;
}

if matches!(word, ['u', 'n' | 's', 'i' | 'a' | 'u', ..]) {
return false;
}

if matches!(word, ['u', 'n', ..]) {
return true;
}

if matches!(word, ['u', 'r', 'g', ..]) {
return true;
}

if matches!(
word,
['u', 't' | 'r' | 'n', ..] | ['e', 'u', 'r', ..] | ['u', 'w', ..] | ['u', 's', 'e', ..]
) {
return false;
}

if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u', 'l' | 'd', ..]) {
return true;
}

if matches!(word, ['o', 'n', 'e', 'a' | 'e' | 'i' | 'u' | '-' | 's', ..]) {
return false;
}

if matches!(
word,
['s', 'o', 's']
| ['r', 'z', ..]
| ['n', 'g', ..]
| ['n', 'v', ..]
| ['x']
| ['x', 'b', 'o', 'x']
| ['h', 'e', 'i', 'r', ..]
| ['h', 'o', 'n', 'o', 'r', ..]
) {
return true;
}

if matches!(
word,
['j', 'u' | 'o', 'n', ..] | ['j', 'u', 'r' | 'n', 'a' | 'i' | 'o', ..]
) {
return false;
}

if matches!(word, ['x', '-' | '\'' | '.' | 'o' | 's', ..]) {
return true;
}

matches!(
word,
['a', ..] | ['e', ..] | ['i', ..] | ['o', ..] | ['u', ..]
)
}
8 changes: 5 additions & 3 deletions harper-core/src/linting/lint_set.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use paste::paste;

use super::long_sentences::LongSentences;
use super::matcher::Matcher;
use super::repeated_words::RepeatedWords;
use super::sentence_capitalization::SentenceCapitalization;
Expand All @@ -9,10 +8,11 @@ use super::spell_check::SpellCheck;
use super::unclosed_quotes::UnclosedQuotes;
use super::wrong_quotes::WrongQuotes;
use super::Linter;
use super::{an_a::AnA, long_sentences::LongSentences};
use crate::{Dictionary, Document, Lint};

pub struct LintSet {
pub(super) linters: Vec<Box<dyn Linter>>
pub(super) linters: Vec<Box<dyn Linter>>,
}

impl Linter for LintSet {
Expand All @@ -32,12 +32,13 @@ impl Linter for LintSet {
impl LintSet {
pub fn new() -> Self {
Self {
linters: Vec::new()
linters: Vec::new(),
}
}

pub fn add_standard(&mut self, dictionary: impl Dictionary + 'static) -> &mut Self {
self.add_repeated_words()
.add_an_a()
.add_long_sentences()
.add_unclosed_quotes()
.add_sentence_capitalization()
Expand Down Expand Up @@ -97,6 +98,7 @@ macro_rules! create_simple_builder_methods {
}

create_simple_builder_methods!(
AnA,
SentenceCapitalization,
UnclosedQuotes,
WrongQuotes,
Expand Down
24 changes: 12 additions & 12 deletions harper-core/src/linting/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ use crate::{Document, Lint, LintKind, Linter, Punctuation, Span, Suggestion, Tok
#[derive(Debug, PartialEq, PartialOrd, Clone)]
struct PatternToken {
kind: TokenKind,
content: Option<DictWord>
content: Option<DictWord>,
}

impl PatternToken {
fn from_token(token: Token, document: &Document) -> Self {
if token.kind.is_word() {
Self {
kind: token.kind,
content: Some(document.get_span_content(token.span).into())
content: Some(document.get_span_content(token.span).into()),
}
} else {
Self {
kind: token.kind,
content: None
content: None,
}
}
}
Expand Down Expand Up @@ -85,13 +85,13 @@ macro_rules! pt {

struct Rule {
pattern: Vec<PatternToken>,
replace_with: Vec<char>
replace_with: Vec<char>,
}

/// A linter that uses a variety of curated pattern matches to find and fix
/// common grammatical issues.
pub struct Matcher {
triggers: Vec<Rule>
triggers: Vec<Rule>,
}

impl Matcher {
Expand Down Expand Up @@ -193,24 +193,24 @@ impl Matcher {
// We need to be more explicit that we are replacing with an Em dash
triggers.push(Rule {
pattern: vec![pt!(Hyphen), pt!(Hyphen), pt!(Hyphen)],
replace_with: vecword!("—")
replace_with: vecword!("—"),
});

// Same goes for this En dash
triggers.push(Rule {
pattern: vec![pt!(Hyphen), pt!(Hyphen)],
replace_with: vecword!("–")
replace_with: vecword!("–"),
});

// And this ellipsis
triggers.push(Rule {
pattern: vec![pt!(Period), pt!(Period), pt!(Period)],
replace_with: vecword!("…")
replace_with: vecword!("…"),
});

triggers.push(Rule {
pattern: vec![pt!("L"), pt!(Period), pt!("L"), pt!(Period), pt!("M")],
replace_with: vecword!("large language model")
replace_with: vecword!("large language model"),
});

triggers.push(Rule {
Expand All @@ -222,7 +222,7 @@ impl Matcher {
pt!("M"),
pt!(Period),
],
replace_with: vecword!("large language model")
replace_with: vecword!("large language model"),
});

Self { triggers }
Expand Down Expand Up @@ -262,7 +262,7 @@ impl Linter for Matcher {
if match_tokens.len() == trigger.pattern.len() && !match_tokens.is_empty() {
let span = Span::new(
match_tokens.first().unwrap().span.start,
match_tokens.last().unwrap().span.end
match_tokens.last().unwrap().span.end,
);

lints.push(Lint {
Expand All @@ -273,7 +273,7 @@ impl Linter for Matcher {
"Did you mean “{}”?",
trigger.replace_with.iter().collect::<String>()
),
priority: 15
priority: 15,
})
}
}
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/linting/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod an_a;
mod lint;
mod lint_set;
mod long_sentences;
Expand Down
8 changes: 4 additions & 4 deletions harper-core/src/linting/spell_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ use crate::Dictionary;

pub struct SpellCheck<T>
where
T: Dictionary
T: Dictionary,
{
dictionary: T,
word_cache: HashMap<Vec<char>, Vec<Vec<char>>>
word_cache: HashMap<Vec<char>, Vec<Vec<char>>>,
}

impl<T: Dictionary> SpellCheck<T> {
pub fn new(dictionary: T) -> Self {
Self {
dictionary,
word_cache: HashMap::new()
word_cache: HashMap::new(),
}
}
}
Expand Down Expand Up @@ -86,7 +86,7 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
"Did you mean to spell “{}” this way?",
document.get_span_content_str(word.span)
),
priority: 63
priority: 63,
})
}

Expand Down

0 comments on commit 7a90f1a

Please sign in to comment.