From fc3064baf584cab37df9bdf8545bf2daf320e8b1 Mon Sep 17 00:00:00 2001 From: Elijah Potter Date: Wed, 13 Mar 2024 14:11:01 -0600 Subject: [PATCH] fix: see full commit message - Cleaned up `hunspell` module. - Fixed file URL issues - Fixed omitted newlines in Markdown files. --- harper-core/src/document.rs | 2 +- harper-core/src/parsers/markdown.rs | 2 +- harper-core/src/spell/full_dictionary.rs | 5 ++- harper-core/src/spell/hunspell/attributes.rs | 47 +++++++++++--------- harper-core/src/spell/hunspell/mod.rs | 17 +++++-- harper-core/src/spell/hunspell/word_list.rs | 3 +- harper-core/src/token.rs | 1 + harper-ls/src/backend.rs | 19 +++++--- harper-ls/src/config.rs | 2 - 9 files changed, 62 insertions(+), 36 deletions(-) diff --git a/harper-core/src/document.rs b/harper-core/src/document.rs index 14e381f0..f2a52ae6 100644 --- a/harper-core/src/document.rs +++ b/harper-core/src/document.rs @@ -356,7 +356,7 @@ fn is_sentence_terminator(token: &TokenKind) -> bool { Punctuation::Question ] .contains(punct), - TokenKind::Newline(_) => true, + TokenKind::Newline(count) => *count >= 2, _ => false } } diff --git a/harper-core/src/parsers/markdown.rs b/harper-core/src/parsers/markdown.rs index 6b5ed955..46d1b640 100644 --- a/harper-core/src/parsers/markdown.rs +++ b/harper-core/src/parsers/markdown.rs @@ -31,7 +31,7 @@ impl Parser for Markdown { } match event { - pulldown_cmark::Event::HardBreak => { + pulldown_cmark::Event::SoftBreak | pulldown_cmark::Event::HardBreak => { tokens.push(Token { span: Span::new_with_len(traversed_chars, 1), kind: TokenKind::Newline(1) diff --git a/harper-core/src/spell/full_dictionary.rs b/harper-core/src/spell/full_dictionary.rs index ec83e81f..51008242 100644 --- a/harper-core/src/spell/full_dictionary.rs +++ b/harper-core/src/spell/full_dictionary.rs @@ -29,8 +29,9 @@ fn uncached_inner_new() -> FullDictionary { let word_list = parse_default_word_list().unwrap(); let attr_list = parse_default_attribute_list().unwrap(); - let words = attr_list.expand_marked_words(word_list).unwrap(); - let mut words: Vec = words.into_iter().collect(); + let mut words = Vec::new(); + + attr_list.expand_marked_words(word_list, &mut words); FullDictionary { word_set: HashSet::from_iter(words.iter().cloned()), diff --git a/harper-core/src/spell/hunspell/attributes.rs b/harper-core/src/spell/hunspell/attributes.rs index b90bd677..2b657a1c 100644 --- a/harper-core/src/spell/hunspell/attributes.rs +++ b/harper-core/src/spell/hunspell/attributes.rs @@ -1,7 +1,6 @@ use std::usize; use hashbrown::HashMap; -use itertools::Itertools; use smallvec::ToSmallVec; use super::matcher::Matcher; @@ -25,7 +24,7 @@ struct Expansion { pub replacements: Vec } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct AttributeList { /// Key = Affix Flag affixes: HashMap @@ -109,9 +108,13 @@ impl AttributeList { /// Expand [`MarkedWord`] into a list of full words, including itself. /// + /// Will append to the given `dest`; + /// /// In the future, I want to make this function cleaner and faster. - pub fn expand_marked_word(&self, word: MarkedWord) -> Result, Error> { - let mut words = Vec::with_capacity(word.attributes.len() + 1); + pub fn expand_marked_word(&self, word: MarkedWord, dest: &mut Vec) { + dest.reserve(word.attributes.len() + 1); + + let start_len = dest.len(); for attr in &word.attributes { let Some(expansion) = self.affixes.get(attr) else { @@ -140,37 +143,41 @@ impl AttributeList { } } - let mut cross_product_words = Vec::new(); + let cross_product_words = Vec::new(); for new_word in new_words { - cross_product_words.extend(self.expand_marked_word(MarkedWord { - letters: new_word, - attributes: opp_attr.clone() - })?) + self.expand_marked_word( + MarkedWord { + letters: new_word, + attributes: opp_attr.clone() + }, + dest + ); } - words.extend_from_slice(&cross_product_words); + dest.extend_from_slice(&cross_product_words); } else { - words.extend_from_slice(&new_words); + dest.extend_from_slice(&new_words); } } - words.push(word.letters); + dest.push(word.letters); + + let mut split = dest.split_off(start_len); + split.sort(); + split.dedup(); - Ok(words) + dest.append(&mut split); } pub fn expand_marked_words( &self, - words: impl IntoIterator - ) -> Result, Error> { - let mut output = Vec::new(); - + words: impl IntoIterator, + dest: &mut Vec + ) { for word in words { - output.extend(self.expand_marked_word(word)?.into_iter().unique()); + self.expand_marked_word(word, dest); } - - Ok(output) } fn apply_replacement( diff --git a/harper-core/src/spell/hunspell/mod.rs b/harper-core/src/spell/hunspell/mod.rs index e5aa34f7..3da32836 100644 --- a/harper-core/src/spell/hunspell/mod.rs +++ b/harper-core/src/spell/hunspell/mod.rs @@ -34,7 +34,9 @@ mod tests { let words = parse_word_list(TEST_WORD_LIST).unwrap(); let attributes = AttributeList::parse(ATTR_LIST).unwrap(); - let expanded = attributes.expand_marked_words(words).unwrap(); + let mut expanded = Vec::new(); + + attributes.expand_marked_words(words, &mut expanded); let expanded: Vec = expanded .into_iter() .map(|v| v.into_iter().collect()) @@ -42,7 +44,7 @@ mod tests { assert_eq!( expanded, - vec!["hello", "tried", "try", "reworked", "rework", "worked", "work"] + vec!["hello", "tried", "try", "rework", "reworked", "work", "worked",] ) } @@ -56,7 +58,10 @@ mod tests { ) .unwrap(); - let expanded = attributes.expand_marked_words(words).unwrap(); + let mut expanded = Vec::new(); + + attributes.expand_marked_words(words, &mut expanded); + assert!(expanded.contains(&split("giants"))) } @@ -64,7 +69,11 @@ mod tests { let words = parse_default_word_list().unwrap(); let attributes = parse_default_attribute_list().unwrap(); - attributes.expand_marked_words(words).unwrap() + let mut expanded = Vec::new(); + + attributes.expand_marked_words(words, &mut expanded); + + expanded } #[test] diff --git a/harper-core/src/spell/hunspell/word_list.rs b/harper-core/src/spell/hunspell/word_list.rs index 37d9f88c..b83204a0 100644 --- a/harper-core/src/spell/hunspell/word_list.rs +++ b/harper-core/src/spell/hunspell/word_list.rs @@ -1,12 +1,13 @@ use super::Error; use crate::spell::DictWord; +#[derive(Debug, Clone)] pub struct MarkedWord { pub letters: DictWord, pub attributes: Vec } -/// Parse a hunspell word list +/// Parse a Hunspell word list /// /// Returns [`None`] if the given string is invalid. pub fn parse_word_list(source: &str) -> Result, Error> { diff --git a/harper-core/src/token.rs b/harper-core/src/token.rs index 809fd033..dfac8ba8 100644 --- a/harper-core/src/token.rs +++ b/harper-core/src/token.rs @@ -43,6 +43,7 @@ pub enum TokenKind { Newline(usize), EmailAddress, Url, + Hostname, /// A special token used for things like inline code blocks that should be /// ignored by all linters. Unlintable diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index cedeb32b..cf4ee01a 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -1,5 +1,5 @@ use std::collections::HashMap; -use std::path::PathBuf; +use std::path::{Component, PathBuf}; use std::sync::Arc; use harper_core::parsers::Markdown; @@ -87,9 +87,11 @@ impl Backend { let mut rewritten = String::new(); // We assume all URLs are local files and have a base - for seg in url.path_segments().unwrap() { - rewritten.push_str(seg); - rewritten.push('%'); + for seg in url.to_file_path().unwrap().components() { + if !matches!(seg, Component::RootDir) { + rewritten.push_str(&seg.as_os_str().to_string_lossy()); + rewritten.push('%'); + } } rewritten.into() @@ -117,6 +119,8 @@ impl Backend { #[instrument(skip(self, dict))] async fn save_file_dictionary(&self, url: &Url, dict: impl Dictionary) -> anyhow::Result<()> { + dbg!(self.get_file_dict_path(url).await); + Ok(save_dict(self.get_file_dict_path(url).await, dict).await?) } @@ -185,7 +189,12 @@ impl Backend { #[instrument(skip(self))] async fn update_document_from_file(&self, url: &Url) -> anyhow::Result<()> { - let content = match tokio::fs::read_to_string(url.path()).await { + let content = match tokio::fs::read_to_string( + url.to_file_path() + .map_err(|_| anyhow::format_err!("Could not extract file path."))? + ) + .await + { Ok(content) => content, Err(err) => { error!("Error updating document from file: {}", err); diff --git a/harper-ls/src/config.rs b/harper-ls/src/config.rs index d5579160..19d01ea8 100644 --- a/harper-ls/src/config.rs +++ b/harper-ls/src/config.rs @@ -35,9 +35,7 @@ impl Config { } if let Some(v) = value.get("linters") { - dbg!(v); base.lint_config = serde_json::from_value(v.clone())?; - dbg!(base.lint_config); } Ok(base)