Skip to content

Commit

Permalink
fix: see full commit message
Browse files Browse the repository at this point in the history
- Cleaned up `hunspell` module.
- Fixed file URL issues
- Fixed omitted newlines in Markdown files.
  • Loading branch information
elijah-potter committed Mar 13, 2024
1 parent 3783af8 commit fc3064b
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 36 deletions.
2 changes: 1 addition & 1 deletion harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ fn is_sentence_terminator(token: &TokenKind) -> bool {
Punctuation::Question
]
.contains(punct),
TokenKind::Newline(_) => true,
TokenKind::Newline(count) => *count >= 2,
_ => false
}
}
Expand Down
2 changes: 1 addition & 1 deletion harper-core/src/parsers/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl Parser for Markdown {
}

match event {
pulldown_cmark::Event::HardBreak => {
pulldown_cmark::Event::SoftBreak | pulldown_cmark::Event::HardBreak => {
tokens.push(Token {
span: Span::new_with_len(traversed_chars, 1),
kind: TokenKind::Newline(1)
Expand Down
5 changes: 3 additions & 2 deletions harper-core/src/spell/full_dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ fn uncached_inner_new() -> FullDictionary {
let word_list = parse_default_word_list().unwrap();
let attr_list = parse_default_attribute_list().unwrap();

let words = attr_list.expand_marked_words(word_list).unwrap();
let mut words: Vec<DictWord> = words.into_iter().collect();
let mut words = Vec::new();

attr_list.expand_marked_words(word_list, &mut words);

FullDictionary {
word_set: HashSet::from_iter(words.iter().cloned()),
Expand Down
47 changes: 27 additions & 20 deletions harper-core/src/spell/hunspell/attributes.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::usize;

use hashbrown::HashMap;
use itertools::Itertools;
use smallvec::ToSmallVec;

use super::matcher::Matcher;
Expand All @@ -25,7 +24,7 @@ struct Expansion {
pub replacements: Vec<AffixReplacement>
}

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct AttributeList {
/// Key = Affix Flag
affixes: HashMap<char, Expansion>
Expand Down Expand Up @@ -109,9 +108,13 @@ impl AttributeList {

/// Expand [`MarkedWord`] into a list of full words, including itself.
///
/// Will append to the given `dest`;
///
/// In the future, I want to make this function cleaner and faster.
pub fn expand_marked_word(&self, word: MarkedWord) -> Result<Vec<DictWord>, Error> {
let mut words = Vec::with_capacity(word.attributes.len() + 1);
pub fn expand_marked_word(&self, word: MarkedWord, dest: &mut Vec<DictWord>) {
dest.reserve(word.attributes.len() + 1);

let start_len = dest.len();

for attr in &word.attributes {
let Some(expansion) = self.affixes.get(attr) else {
Expand Down Expand Up @@ -140,37 +143,41 @@ impl AttributeList {
}
}

let mut cross_product_words = Vec::new();
let cross_product_words = Vec::new();

for new_word in new_words {
cross_product_words.extend(self.expand_marked_word(MarkedWord {
letters: new_word,
attributes: opp_attr.clone()
})?)
self.expand_marked_word(
MarkedWord {
letters: new_word,
attributes: opp_attr.clone()
},
dest
);
}

words.extend_from_slice(&cross_product_words);
dest.extend_from_slice(&cross_product_words);
} else {
words.extend_from_slice(&new_words);
dest.extend_from_slice(&new_words);
}
}

words.push(word.letters);
dest.push(word.letters);

let mut split = dest.split_off(start_len);
split.sort();
split.dedup();

Ok(words)
dest.append(&mut split);
}

pub fn expand_marked_words(
&self,
words: impl IntoIterator<Item = MarkedWord>
) -> Result<Vec<DictWord>, Error> {
let mut output = Vec::new();

words: impl IntoIterator<Item = MarkedWord>,
dest: &mut Vec<DictWord>
) {
for word in words {
output.extend(self.expand_marked_word(word)?.into_iter().unique());
self.expand_marked_word(word, dest);
}

Ok(output)
}

fn apply_replacement(
Expand Down
17 changes: 13 additions & 4 deletions harper-core/src/spell/hunspell/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,17 @@ mod tests {
let words = parse_word_list(TEST_WORD_LIST).unwrap();
let attributes = AttributeList::parse(ATTR_LIST).unwrap();

let expanded = attributes.expand_marked_words(words).unwrap();
let mut expanded = Vec::new();

attributes.expand_marked_words(words, &mut expanded);
let expanded: Vec<String> = expanded
.into_iter()
.map(|v| v.into_iter().collect())
.collect();

assert_eq!(
expanded,
vec!["hello", "tried", "try", "reworked", "rework", "worked", "work"]
vec!["hello", "tried", "try", "rework", "reworked", "work", "worked",]
)
}

Expand All @@ -56,15 +58,22 @@ mod tests {
)
.unwrap();

let expanded = attributes.expand_marked_words(words).unwrap();
let mut expanded = Vec::new();

attributes.expand_marked_words(words, &mut expanded);

assert!(expanded.contains(&split("giants")))
}

fn build_expanded() -> Vec<DictWord> {
let words = parse_default_word_list().unwrap();
let attributes = parse_default_attribute_list().unwrap();

attributes.expand_marked_words(words).unwrap()
let mut expanded = Vec::new();

attributes.expand_marked_words(words, &mut expanded);

expanded
}

#[test]
Expand Down
3 changes: 2 additions & 1 deletion harper-core/src/spell/hunspell/word_list.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use super::Error;
use crate::spell::DictWord;

#[derive(Debug, Clone)]
pub struct MarkedWord {
pub letters: DictWord,
pub attributes: Vec<char>
}

/// Parse a hunspell word list
/// Parse a Hunspell word list
///
/// Returns [`None`] if the given string is invalid.
pub fn parse_word_list(source: &str) -> Result<Vec<MarkedWord>, Error> {
Expand Down
1 change: 1 addition & 0 deletions harper-core/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub enum TokenKind {
Newline(usize),
EmailAddress,
Url,
Hostname,
/// A special token used for things like inline code blocks that should be
/// ignored by all linters.
Unlintable
Expand Down
19 changes: 14 additions & 5 deletions harper-ls/src/backend.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::path::{Component, PathBuf};
use std::sync::Arc;

use harper_core::parsers::Markdown;
Expand Down Expand Up @@ -87,9 +87,11 @@ impl Backend {
let mut rewritten = String::new();

// We assume all URLs are local files and have a base
for seg in url.path_segments().unwrap() {
rewritten.push_str(seg);
rewritten.push('%');
for seg in url.to_file_path().unwrap().components() {
if !matches!(seg, Component::RootDir) {
rewritten.push_str(&seg.as_os_str().to_string_lossy());
rewritten.push('%');
}
}

rewritten.into()
Expand Down Expand Up @@ -117,6 +119,8 @@ impl Backend {

#[instrument(skip(self, dict))]
async fn save_file_dictionary(&self, url: &Url, dict: impl Dictionary) -> anyhow::Result<()> {
dbg!(self.get_file_dict_path(url).await);

Ok(save_dict(self.get_file_dict_path(url).await, dict).await?)
}

Expand Down Expand Up @@ -185,7 +189,12 @@ impl Backend {

#[instrument(skip(self))]
async fn update_document_from_file(&self, url: &Url) -> anyhow::Result<()> {
let content = match tokio::fs::read_to_string(url.path()).await {
let content = match tokio::fs::read_to_string(
url.to_file_path()
.map_err(|_| anyhow::format_err!("Could not extract file path."))?
)
.await
{
Ok(content) => content,
Err(err) => {
error!("Error updating document from file: {}", err);
Expand Down
2 changes: 0 additions & 2 deletions harper-ls/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ impl Config {
}

if let Some(v) = value.get("linters") {
dbg!(v);
base.lint_config = serde_json::from_value(v.clone())?;
dbg!(base.lint_config);
}

Ok(base)
Expand Down

0 comments on commit fc3064b

Please sign in to comment.