From 9211597c68c27a9252102a4118edbe14460676d2 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 01:42:29 -0700 Subject: [PATCH 01/15] feat(#230): map basic typst expressions to tokens --- Cargo.lock | 190 +++++++++++++++++++++++++++++++ harper-core/Cargo.toml | 1 + harper-core/src/parsers/mod.rs | 1 + harper-core/src/parsers/typst.rs | 170 +++++++++++++++++++++++++++ 4 files changed, 362 insertions(+) create mode 100644 harper-core/src/parsers/typst.rs diff --git a/Cargo.lock b/Cargo.lock index f640be1a..029091c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,6 +322,31 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + [[package]] name = "crunchy" version = "0.2.2" @@ -393,6 +418,15 @@ dependencies = [ "syn", ] +[[package]] +name = "ecow" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e42fc0a93992b20c58b99e59d61eaf1635a25bfbe49e4275c34ba0aee98119ba" +dependencies = [ + "serde", +] + [[package]] name = "either" version = "1.13.0" @@ -610,6 +644,7 @@ dependencies = [ "serde_json", "smallvec", "thiserror 2.0.3", + "typst-syntax", "unicode-blocks", "unicode-width 0.2.0", ] @@ -850,6 +885,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.1", +] + [[package]] name = "is-docker" version = "0.2.0" @@ -1161,6 +1206,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + [[package]] name = "ppv-lite86" version = "0.2.20" @@ -1239,6 +1290,26 @@ dependencies = [ "serde", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -1378,6 +1449,15 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -1387,6 +1467,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.9" @@ -1449,6 +1535,12 @@ dependencies = [ "syn", ] +[[package]] +name = "thin-vec" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a38c90d48152c236a3ab59271da4f4ae63d678c5d7ad6b7714d7cb9760be5e4b" + [[package]] name = "thiserror" version = "1.0.69" @@ -1559,6 +1651,40 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.4.13" @@ -1857,6 +1983,37 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "typst-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b7be8b6ed6b2cb39ca495947d548a28d7db0ba244008e44c5a759120327693" +dependencies = [ + "ecow", + "once_cell", + "serde", + "toml", + "typst-utils", + "unicode-ident", + "unicode-math-class", + "unicode-script", + "unicode-segmentation", + "unscanny", +] + +[[package]] +name = "typst-utils" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f0305443ed97f0b658471487228f86bf835705e7525fbdcc671cebd864f7a40" +dependencies = [ + "once_cell", + "portable-atomic", + "rayon", + "siphasher", + "thin-vec", +] + [[package]] name = "unicase" version = "2.8.0" @@ -1875,6 +2032,24 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +[[package]] +name = "unicode-math-class" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d246cf599d5fae3c8d56e04b20eb519adb89a8af8d0b0fbcded369aa3647d65" + +[[package]] +name = "unicode-script" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb421b350c9aff471779e262955939f565ec18b86c15364e6bdf0d662ca7c1f" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.14" @@ -1887,6 +2062,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unscanny" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9df2af067a7953e9c3831320f35c1cc0600c30d44d9f7a12b01db1cd88d6b47" + [[package]] name = "url" version = "2.5.3" @@ -2173,6 +2354,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +dependencies = [ + "memchr", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/harper-core/Cargo.toml b/harper-core/Cargo.toml index 0f520a7e..f218e034 100644 --- a/harper-core/Cargo.toml +++ b/harper-core/Cargo.toml @@ -24,6 +24,7 @@ thiserror = "2.0.3" unicode-blocks = "0.1.9" unicode-width = "0.2.0" levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } +typst-syntax = "0.12.0" [dev-dependencies] criterion = { version = "0.5.1", default-features = false } diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index f35f209b..08f53bbf 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -3,6 +3,7 @@ mod isolate_english; mod markdown; mod mask; mod plain_english; +mod typst; use blanket::blanket; pub use collapse_identifiers::CollapseIdentifiers; diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs new file mode 100644 index 00000000..9f5c6f85 --- /dev/null +++ b/harper-core/src/parsers/typst.rs @@ -0,0 +1,170 @@ +use itertools::Itertools; + +use typst_syntax::ast::{AstNode, Expr}; + +use super::{Parser, PlainEnglish}; +use crate::{parsers::StrParser, Token, TokenKind, WordMetadata}; + +/// A parser that wraps the [`PlainEnglish`] parser that allows one to parse +/// Typst files. +pub struct Typst; + +macro_rules! constant_token { + ($offset:ident, $doc:ident, $a:ident, $to:expr) => {{ + let range = $doc.range($a.span()).unwrap(); + *$offset += range.len(); + Some(vec![Token { + span: range.into(), + kind: $to, + }]) + }}; +} +macro_rules! recursive_env { + ($offset:ident, $expr:ident, $doc:ident, $parser:ident) => { + Some( + $expr + .body() + .exprs() + .filter_map(|e| map_token(e, $doc, $parser, $offset)) + .flatten() + .collect_vec(), + ) + }; +} + +fn map_token( + ex: typst_syntax::ast::Expr, + doc: &typst_syntax::Source, + parser: &mut PlainEnglish, + offset: &mut usize, +) -> Option> { + match ex { + Expr::Text(text) => Some( + parser + .parse_str(text.get()) + .into_iter() + .map(|mut t| { + t.span.push_by(*offset); + t + }) + .collect_vec(), + ), + Expr::Space(a) => constant_token!(offset, doc, a, TokenKind::Space(1)), + Expr::Linebreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(1)), + Expr::Parbreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(2)), + Expr::Escape(_) => None, + Expr::Shorthand(_) => None, + Expr::SmartQuote(_) => None, + Expr::Strong(strong) => recursive_env!(offset, strong, doc, parser), + Expr::Emph(emph) => recursive_env!(offset, emph, doc, parser), + Expr::Raw(_) => None, + Expr::Link(a) => constant_token!(offset, doc, a, TokenKind::Url), + Expr::Label(label) => Some( + parser + .parse_str(label.get()) + .into_iter() + .map(|mut t| { + t.span.push_by(*offset); + t + }) + .collect_vec(), + ), + Expr::Ref(a) => { + constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())) + } + Expr::Heading(heading) => recursive_env!(offset, heading, doc, parser), + Expr::List(list_item) => recursive_env!(offset, list_item, doc, parser), + Expr::Enum(enum_item) => recursive_env!(offset, enum_item, doc, parser), + Expr::Term(term_item) => Some( + term_item + .term() + .exprs() + .chain(term_item.description().exprs()) + .filter_map(|e| map_token(e, doc, parser, offset)) + .flatten() + .collect_vec(), + ), + Expr::Equation(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Math(_) => None, + Expr::MathIdent(_) => None, + Expr::MathShorthand(_) => None, + Expr::MathAlignPoint(_) => None, + Expr::MathDelimited(_) => None, + Expr::MathAttach(_) => None, + Expr::MathPrimes(_) => None, + Expr::MathFrac(_) => None, + Expr::MathRoot(_) => None, + Expr::Ident(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), + Expr::None(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Auto(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Bool(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Int(int) => todo!(), + Expr::Float(float) => todo!(), + Expr::Numeric(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Str(text) => Some( + parser + .parse_str(text.get()) + .into_iter() + .map(|mut t| { + t.span.push_by(*offset); + t + }) + .collect_vec(), + ), + Expr::Code(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Content(content_block) => recursive_env!(offset, content_block, doc, parser), + Expr::Parenthesized(parenthesized) => map_token(parenthesized.expr(), doc, parser, offset), + Expr::Array(array) => Some( + array + .items() + .filter_map(|i| { + if let typst_syntax::ast::ArrayItem::Pos(e) = i { + map_token(e, doc, parser, offset) + } else { + None + } + }) + .flatten() + .collect_vec(), + ), + Expr::Dict(dict) => todo!(), + Expr::Unary(unary) => todo!(), + Expr::Binary(binary) => todo!(), + Expr::FieldAccess(field_access) => todo!(), + Expr::FuncCall(func_call) => todo!(), + Expr::Closure(closure) => todo!(), + Expr::Let(let_binding) => todo!(), + Expr::DestructAssign(destruct_assignment) => todo!(), + Expr::Set(set_rule) => todo!(), + Expr::Show(show_rule) => todo!(), + Expr::Contextual(contextual) => todo!(), + Expr::Conditional(conditional) => todo!(), + Expr::While(while_loop) => todo!(), + Expr::For(for_loop) => todo!(), + Expr::Import(module_import) => todo!(), + Expr::Include(module_include) => todo!(), + Expr::Break(loop_break) => todo!(), + Expr::Continue(loop_continue) => todo!(), + Expr::Return(func_return) => todo!(), + } +} + +impl Parser for Typst { + fn parse(&mut self, source: &[char]) -> Vec { + let mut english_parser = PlainEnglish; + + let source_str: String = source.iter().collect(); + let typst_document = typst_syntax::Source::detached(source_str); + let typst_tree = typst_syntax::ast::Markup::from_untyped(typst_document.root()) + .expect("Unable to create typst document from parsed tree!"); + let mut offset = 0; + + // NOTE: the range spits out __byte__ indices, not char indices. + // This is why we keep track above. + typst_tree + .exprs() + .filter_map(|ex| map_token(ex, &typst_document, &mut english_parser, &mut offset)) + .flatten() + .collect_vec() + } +} From fd05b6a1ee2100d95c4cb9e282be18f2c840df29 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 11:29:57 -0700 Subject: [PATCH 02/15] feat(#230): change recursive shorthand from macro to function --- harper-core/src/parsers/typst.rs | 54 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 9f5c6f85..4f7e27b9 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -1,6 +1,6 @@ use itertools::Itertools; -use typst_syntax::ast::{AstNode, Expr}; +use typst_syntax::ast::{AstNode, Expr, Markup}; use super::{Parser, PlainEnglish}; use crate::{parsers::StrParser, Token, TokenKind, WordMetadata}; @@ -19,17 +19,19 @@ macro_rules! constant_token { }]) }}; } -macro_rules! recursive_env { - ($offset:ident, $expr:ident, $doc:ident, $parser:ident) => { - Some( - $expr - .body() - .exprs() - .filter_map(|e| map_token(e, $doc, $parser, $offset)) - .flatten() - .collect_vec(), - ) - }; + +fn recursive_env( + exprs: &mut dyn Iterator, + doc: &typst_syntax::Source, + parser: &mut PlainEnglish, + offset: &mut usize, +) -> Option> { + Some( + exprs + .filter_map(|e| map_token(e, doc, parser, offset)) + .flatten() + .collect_vec(), + ) } fn map_token( @@ -55,8 +57,8 @@ fn map_token( Expr::Escape(_) => None, Expr::Shorthand(_) => None, Expr::SmartQuote(_) => None, - Expr::Strong(strong) => recursive_env!(offset, strong, doc, parser), - Expr::Emph(emph) => recursive_env!(offset, emph, doc, parser), + Expr::Strong(strong) => recursive_env(&mut strong.body().exprs(), doc, parser, offset), + Expr::Emph(emph) => recursive_env(&mut emph.body().exprs(), doc, parser, offset), Expr::Raw(_) => None, Expr::Link(a) => constant_token!(offset, doc, a, TokenKind::Url), Expr::Label(label) => Some( @@ -72,17 +74,17 @@ fn map_token( Expr::Ref(a) => { constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())) } - Expr::Heading(heading) => recursive_env!(offset, heading, doc, parser), - Expr::List(list_item) => recursive_env!(offset, list_item, doc, parser), - Expr::Enum(enum_item) => recursive_env!(offset, enum_item, doc, parser), - Expr::Term(term_item) => Some( - term_item + Expr::Heading(heading) => recursive_env(&mut heading.body().exprs(), doc, parser, offset), + Expr::List(list_item) => recursive_env(&mut list_item.body().exprs(), doc, parser, offset), + Expr::Enum(enum_item) => recursive_env(&mut enum_item.body().exprs(), doc, parser, offset), + Expr::Term(term_item) => recursive_env( + &mut term_item .term() .exprs() - .chain(term_item.description().exprs()) - .filter_map(|e| map_token(e, doc, parser, offset)) - .flatten() - .collect_vec(), + .chain(term_item.description().exprs()), + doc, + parser, + offset, ), Expr::Equation(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), Expr::Math(_) => None, @@ -112,7 +114,9 @@ fn map_token( .collect_vec(), ), Expr::Code(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Content(content_block) => recursive_env!(offset, content_block, doc, parser), + Expr::Content(content_block) => { + recursive_env(&mut content_block.body().exprs(), doc, parser, offset) + } Expr::Parenthesized(parenthesized) => map_token(parenthesized.expr(), doc, parser, offset), Expr::Array(array) => Some( array @@ -155,7 +159,7 @@ impl Parser for Typst { let source_str: String = source.iter().collect(); let typst_document = typst_syntax::Source::detached(source_str); - let typst_tree = typst_syntax::ast::Markup::from_untyped(typst_document.root()) + let typst_tree = Markup::from_untyped(typst_document.root()) .expect("Unable to create typst document from parsed tree!"); let mut offset = 0; From 920ee0a556bde53ff31b5cab634806f9826f13e7 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 23:12:02 -0700 Subject: [PATCH 03/15] feat(#230): flesh out more complicated typst syntax parsing --- harper-core/src/parsers/typst.rs | 260 +++++++++++++++++++++++-------- 1 file changed, 195 insertions(+), 65 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 4f7e27b9..6c15494e 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -3,14 +3,14 @@ use itertools::Itertools; use typst_syntax::ast::{AstNode, Expr, Markup}; use super::{Parser, PlainEnglish}; -use crate::{parsers::StrParser, Token, TokenKind, WordMetadata}; +use crate::{parsers::StrParser, Punctuation, Token, TokenKind, WordMetadata}; /// A parser that wraps the [`PlainEnglish`] parser that allows one to parse /// Typst files. pub struct Typst; macro_rules! constant_token { - ($offset:ident, $doc:ident, $a:ident, $to:expr) => {{ + ($offset:ident, $doc:ident, $a:expr, $to:expr) => {{ let range = $doc.range($a.span()).unwrap(); *$offset += range.len(); Some(vec![Token { @@ -20,6 +20,18 @@ macro_rules! constant_token { }}; } +macro_rules! merge_expr { + ($($inner:expr),*) => { + Some( + [$($inner),*] + .into_iter() + .flatten() + .flatten() + .collect_vec(), + ) + }; +} + fn recursive_env( exprs: &mut dyn Iterator, doc: &typst_syntax::Source, @@ -28,12 +40,33 @@ fn recursive_env( ) -> Option> { Some( exprs - .filter_map(|e| map_token(e, doc, parser, offset)) + .filter_map(|e| { + let range = doc.range(e.span()).unwrap(); + *offset += range.len(); + map_token(e, doc, parser, offset) + }) .flatten() .collect_vec(), ) } +fn parse_english( + str: impl Into, + parser: &mut PlainEnglish, + offset: &mut usize, +) -> Option> { + let res = parser + .parse_str(str.into()) + .into_iter() + .map(|mut t| { + t.span.push_by(*offset); + t + }) + .collect_vec(); + *offset = res.last()?.span.end - 1; + Some(res) +} + fn map_token( ex: typst_syntax::ast::Expr, doc: &typst_syntax::Source, @@ -41,36 +74,34 @@ fn map_token( offset: &mut usize, ) -> Option> { match ex { - Expr::Text(text) => Some( - parser - .parse_str(text.get()) - .into_iter() - .map(|mut t| { - t.span.push_by(*offset); - t - }) - .collect_vec(), - ), + Expr::Text(text) => parse_english(text.get(), parser, offset), Expr::Space(a) => constant_token!(offset, doc, a, TokenKind::Space(1)), Expr::Linebreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(1)), Expr::Parbreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(2)), - Expr::Escape(_) => None, - Expr::Shorthand(_) => None, - Expr::SmartQuote(_) => None, + Expr::Escape(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Shorthand(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::SmartQuote(quote) => { + if quote.double() { + constant_token!( + offset, + doc, + quote, + TokenKind::Punctuation(Punctuation::Quote(crate::Quote { twin_loc: None })) + ) + } else { + constant_token!( + offset, + doc, + quote, + TokenKind::Punctuation(Punctuation::Apostrophe) + ) + } + } Expr::Strong(strong) => recursive_env(&mut strong.body().exprs(), doc, parser, offset), Expr::Emph(emph) => recursive_env(&mut emph.body().exprs(), doc, parser, offset), - Expr::Raw(_) => None, + Expr::Raw(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), Expr::Link(a) => constant_token!(offset, doc, a, TokenKind::Url), - Expr::Label(label) => Some( - parser - .parse_str(label.get()) - .into_iter() - .map(|mut t| { - t.span.push_by(*offset); - t - }) - .collect_vec(), - ), + Expr::Label(label) => parse_english(label.get(), parser, offset), Expr::Ref(a) => { constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())) } @@ -87,15 +118,15 @@ fn map_token( offset, ), Expr::Equation(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Math(_) => None, - Expr::MathIdent(_) => None, - Expr::MathShorthand(_) => None, - Expr::MathAlignPoint(_) => None, - Expr::MathDelimited(_) => None, - Expr::MathAttach(_) => None, - Expr::MathPrimes(_) => None, - Expr::MathFrac(_) => None, - Expr::MathRoot(_) => None, + Expr::Math(_) => panic!("Unexpected math outside equation environment."), + Expr::MathIdent(_) => panic!("Unexpected math outside equation environment."), + Expr::MathShorthand(_) => panic!("Unexpected math outside equation environment."), + Expr::MathAlignPoint(_) => panic!("Unexpected math outside equation environment."), + Expr::MathDelimited(_) => panic!("Unexpected math outside equation environment."), + Expr::MathAttach(_) => panic!("Unexpected math outside equation environment."), + Expr::MathPrimes(_) => panic!("Unexpected math outside equation environment."), + Expr::MathFrac(_) => panic!("Unexpected math outside equation environment."), + Expr::MathRoot(_) => panic!("Unexpected math outside equation environment."), Expr::Ident(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), Expr::None(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), Expr::Auto(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), @@ -103,16 +134,7 @@ fn map_token( Expr::Int(int) => todo!(), Expr::Float(float) => todo!(), Expr::Numeric(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Str(text) => Some( - parser - .parse_str(text.get()) - .into_iter() - .map(|mut t| { - t.span.push_by(*offset); - t - }) - .collect_vec(), - ), + Expr::Str(text) => parse_english(text.get(), parser, offset), Expr::Code(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), Expr::Content(content_block) => { recursive_env(&mut content_block.body().exprs(), doc, parser, offset) @@ -131,25 +153,64 @@ fn map_token( .flatten() .collect_vec(), ), - Expr::Dict(dict) => todo!(), - Expr::Unary(unary) => todo!(), - Expr::Binary(binary) => todo!(), - Expr::FieldAccess(field_access) => todo!(), - Expr::FuncCall(func_call) => todo!(), - Expr::Closure(closure) => todo!(), - Expr::Let(let_binding) => todo!(), - Expr::DestructAssign(destruct_assignment) => todo!(), - Expr::Set(set_rule) => todo!(), - Expr::Show(show_rule) => todo!(), - Expr::Contextual(contextual) => todo!(), - Expr::Conditional(conditional) => todo!(), - Expr::While(while_loop) => todo!(), - Expr::For(for_loop) => todo!(), - Expr::Import(module_import) => todo!(), - Expr::Include(module_include) => todo!(), - Expr::Break(loop_break) => todo!(), - Expr::Continue(loop_continue) => todo!(), - Expr::Return(func_return) => todo!(), + // TODO: actually parse dictionaries + Expr::Dict(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Unary(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Binary(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::FieldAccess(field_access) => merge_expr!( + map_token(field_access.target(), doc, parser, offset), + constant_token!( + offset, + doc, + field_access.field(), + TokenKind::Word(WordMetadata::default()) + ) + ), + Expr::FuncCall(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Closure(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Let(let_binding) => let_binding + .init() + .and_then(|e| map_token(e, doc, parser, offset)), + Expr::DestructAssign(destruct_assignment) => { + map_token(destruct_assignment.value(), doc, parser, offset) + } + Expr::Set(set_rule) => merge_expr!( + map_token(set_rule.target(), doc, parser, offset), + map_token(set_rule.condition()?, doc, parser, offset) + ), + Expr::Show(show_rule) => merge_expr!( + map_token(show_rule.transform(), doc, parser, offset), + map_token(show_rule.selector()?, doc, parser, offset) + ), + Expr::Contextual(contextual) => map_token(contextual.body(), doc, parser, offset), + Expr::Conditional(conditional) => merge_expr!( + map_token(conditional.condition(), doc, parser, offset), + map_token(conditional.if_body(), doc, parser, offset), + map_token(conditional.else_body()?, doc, parser, offset) + ), + Expr::While(while_loop) => merge_expr!( + map_token(while_loop.condition(), doc, parser, offset), + map_token(while_loop.body(), doc, parser, offset) + ), + Expr::For(for_loop) => merge_expr!( + map_token(for_loop.iterable(), doc, parser, offset), + map_token(for_loop.body(), doc, parser, offset) + ), + Expr::Import(module_import) => { + merge_expr!( + map_token(module_import.source(), doc, parser, offset), + constant_token!( + offset, + doc, + module_import.new_name()?, + TokenKind::Word(WordMetadata::default()) + ) + ) + } + Expr::Include(module_include) => map_token(module_include.source(), doc, parser, offset), + Expr::Break(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Continue(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Return(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), } } @@ -172,3 +233,72 @@ impl Parser for Typst { .collect_vec() } } + +#[cfg(test)] +mod tests { + use super::Typst; + use crate::{parsers::StrParser, Punctuation, TokenKind}; + + #[test] + fn conjunction() { + let source = r"doesn't"; + + let tokens = Typst.parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!(token_kinds.as_slice(), &[TokenKind::Word(_),])) + } + + #[test] + fn sentence() { + let source = r"This is a sentence, it does not have any particularly interesting elements of the typst syntax."; + + let tokens = Typst.parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Punctuation(Punctuation::Period), + ] + )) + } +} From e974fe6254e4e2407c54ed00fe3b1f7cef556160 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 23:25:17 -0700 Subject: [PATCH 04/15] feat(#230): delegate typst files to parser in harper-cli and harper-ls --- harper-cli/src/main.rs | 4 +++- harper-core/src/parsers/mod.rs | 1 + harper-ls/src/backend.rs | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 73b7d45d..1a3e3639 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -7,7 +7,7 @@ use ariadne::{Color, Label, Report, ReportKind, Source}; use clap::Parser; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, LintGroupConfig, Linter}; -use harper_core::parsers::Markdown; +use harper_core::parsers::{Markdown, Typst}; use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary}; #[derive(Debug, Parser)] @@ -107,6 +107,8 @@ fn load_file(file: &Path) -> anyhow::Result<(Document, String)> { let mut parser: Box = if let Some("md") = file.extension().map(|v| v.to_str().unwrap()) { Box::new(Markdown) + } else if let Some("typ") = file.extension().map(|v| v.to_str().unwrap()) { + Box::new(Typst) } else { Box::new( CommentParser::new_from_filename(file) diff --git a/harper-core/src/parsers/mod.rs b/harper-core/src/parsers/mod.rs index 08f53bbf..af742a3e 100644 --- a/harper-core/src/parsers/mod.rs +++ b/harper-core/src/parsers/mod.rs @@ -11,6 +11,7 @@ pub use isolate_english::IsolateEnglish; pub use markdown::Markdown; pub use mask::Mask; pub use plain_english::PlainEnglish; +pub use typst::Typst; pub use crate::token::{Token, TokenKind, TokenStringExt}; diff --git a/harper-ls/src/backend.rs b/harper-ls/src/backend.rs index 1cb4c670..af07991e 100644 --- a/harper-ls/src/backend.rs +++ b/harper-ls/src/backend.rs @@ -5,7 +5,9 @@ use std::sync::Arc; use anyhow::anyhow; use harper_comments::CommentParser; use harper_core::linting::{LintGroup, Linter}; -use harper_core::parsers::{CollapseIdentifiers, IsolateEnglish, Markdown, Parser, PlainEnglish}; +use harper_core::parsers::{ + CollapseIdentifiers, IsolateEnglish, Markdown, Parser, PlainEnglish, Typst, +}; use harper_core::{ Dictionary, Document, FstDictionary, FullDictionary, MergedDictionary, Token, TokenKind, WordMetadata, @@ -206,6 +208,8 @@ impl Backend { } } else if language_id == "markdown" { Some(Box::new(Markdown)) + } else if language_id == "typst" { + Some(Box::new(Typst)) } else if language_id == "git-commit" { Some(Box::new(GitCommitParser)) } else if language_id == "html" { From b283ce4dc8931f4015131aa56041b1eeed9892e3 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 23:32:28 -0700 Subject: [PATCH 05/15] fix(#230): fix offset update after delegating parser --- harper-core/src/parsers/typst.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 6c15494e..3a894112 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -63,7 +63,7 @@ fn parse_english( t }) .collect_vec(); - *offset = res.last()?.span.end - 1; + *offset = res.last()?.span.end; Some(res) } From dac8c7c3158a83935ef9fb1ecf7f1f4612642c92 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Mon, 25 Nov 2024 23:58:30 -0700 Subject: [PATCH 06/15] fix(#230): ParBreak to ParBreak, not two Newlines --- harper-core/src/parsers/typst.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 3a894112..8d9b5e02 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -77,7 +77,7 @@ fn map_token( Expr::Text(text) => parse_english(text.get(), parser, offset), Expr::Space(a) => constant_token!(offset, doc, a, TokenKind::Space(1)), Expr::Linebreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(1)), - Expr::Parbreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(2)), + Expr::Parbreak(a) => constant_token!(offset, doc, a, TokenKind::ParagraphBreak), Expr::Escape(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), Expr::Shorthand(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), Expr::SmartQuote(quote) => { From 9798b8d6dc9d7d6be0efebf6d1e218a2f0362836 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Tue, 26 Nov 2024 16:58:26 -0700 Subject: [PATCH 07/15] feat(#230): remove offset variable, and just use the start of an environment's span --- harper-core/src/parsers/typst.rs | 138 +++++++++++++------------------ 1 file changed, 59 insertions(+), 79 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 8d9b5e02..8edfe427 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -10,11 +10,9 @@ use crate::{parsers::StrParser, Punctuation, Token, TokenKind, WordMetadata}; pub struct Typst; macro_rules! constant_token { - ($offset:ident, $doc:ident, $a:expr, $to:expr) => {{ - let range = $doc.range($a.span()).unwrap(); - *$offset += range.len(); + ($doc:ident, $a:expr, $to:expr) => {{ Some(vec![Token { - span: range.into(), + span: $doc.range($a.span()).unwrap().into(), kind: $to, }]) }}; @@ -36,15 +34,10 @@ fn recursive_env( exprs: &mut dyn Iterator, doc: &typst_syntax::Source, parser: &mut PlainEnglish, - offset: &mut usize, ) -> Option> { Some( exprs - .filter_map(|e| { - let range = doc.range(e.span()).unwrap(); - *offset += range.len(); - map_token(e, doc, parser, offset) - }) + .filter_map(|e| map_token(e, doc, parser)) .flatten() .collect_vec(), ) @@ -52,18 +45,18 @@ fn recursive_env( fn parse_english( str: impl Into, + doc: &typst_syntax::Source, parser: &mut PlainEnglish, - offset: &mut usize, + span: &typst_syntax::Span, ) -> Option> { let res = parser .parse_str(str.into()) .into_iter() .map(|mut t| { - t.span.push_by(*offset); + t.span.push_by(doc.range(*span).unwrap().start); t }) .collect_vec(); - *offset = res.last()?.span.end; Some(res) } @@ -71,43 +64,36 @@ fn map_token( ex: typst_syntax::ast::Expr, doc: &typst_syntax::Source, parser: &mut PlainEnglish, - offset: &mut usize, ) -> Option> { match ex { - Expr::Text(text) => parse_english(text.get(), parser, offset), - Expr::Space(a) => constant_token!(offset, doc, a, TokenKind::Space(1)), - Expr::Linebreak(a) => constant_token!(offset, doc, a, TokenKind::Newline(1)), - Expr::Parbreak(a) => constant_token!(offset, doc, a, TokenKind::ParagraphBreak), - Expr::Escape(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Shorthand(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Text(text) => parse_english(text.get(), doc, parser, &text.span()), + Expr::Space(a) => constant_token!(doc, a, TokenKind::Space(1)), + Expr::Linebreak(a) => constant_token!(doc, a, TokenKind::Newline(1)), + Expr::Parbreak(a) => constant_token!(doc, a, TokenKind::ParagraphBreak), + Expr::Escape(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Shorthand(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::SmartQuote(quote) => { if quote.double() { constant_token!( - offset, doc, quote, TokenKind::Punctuation(Punctuation::Quote(crate::Quote { twin_loc: None })) ) } else { - constant_token!( - offset, - doc, - quote, - TokenKind::Punctuation(Punctuation::Apostrophe) - ) + constant_token!(doc, quote, TokenKind::Punctuation(Punctuation::Apostrophe)) } } - Expr::Strong(strong) => recursive_env(&mut strong.body().exprs(), doc, parser, offset), - Expr::Emph(emph) => recursive_env(&mut emph.body().exprs(), doc, parser, offset), - Expr::Raw(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Link(a) => constant_token!(offset, doc, a, TokenKind::Url), - Expr::Label(label) => parse_english(label.get(), parser, offset), + Expr::Strong(strong) => recursive_env(&mut strong.body().exprs(), doc, parser), + Expr::Emph(emph) => recursive_env(&mut emph.body().exprs(), doc, parser), + Expr::Raw(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Link(a) => constant_token!(doc, a, TokenKind::Url), + Expr::Label(label) => parse_english(label.get(), doc, parser, &label.span()), Expr::Ref(a) => { - constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())) + constant_token!(doc, a, TokenKind::Word(WordMetadata::default())) } - Expr::Heading(heading) => recursive_env(&mut heading.body().exprs(), doc, parser, offset), - Expr::List(list_item) => recursive_env(&mut list_item.body().exprs(), doc, parser, offset), - Expr::Enum(enum_item) => recursive_env(&mut enum_item.body().exprs(), doc, parser, offset), + Expr::Heading(heading) => recursive_env(&mut heading.body().exprs(), doc, parser), + Expr::List(list_item) => recursive_env(&mut list_item.body().exprs(), doc, parser), + Expr::Enum(enum_item) => recursive_env(&mut enum_item.body().exprs(), doc, parser), Expr::Term(term_item) => recursive_env( &mut term_item .term() @@ -115,9 +101,8 @@ fn map_token( .chain(term_item.description().exprs()), doc, parser, - offset, ), - Expr::Equation(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Equation(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::Math(_) => panic!("Unexpected math outside equation environment."), Expr::MathIdent(_) => panic!("Unexpected math outside equation environment."), Expr::MathShorthand(_) => panic!("Unexpected math outside equation environment."), @@ -127,25 +112,25 @@ fn map_token( Expr::MathPrimes(_) => panic!("Unexpected math outside equation environment."), Expr::MathFrac(_) => panic!("Unexpected math outside equation environment."), Expr::MathRoot(_) => panic!("Unexpected math outside equation environment."), - Expr::Ident(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), - Expr::None(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), - Expr::Auto(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), - Expr::Bool(a) => constant_token!(offset, doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Ident(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), + Expr::None(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Auto(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), + Expr::Bool(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), Expr::Int(int) => todo!(), Expr::Float(float) => todo!(), - Expr::Numeric(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Str(text) => parse_english(text.get(), parser, offset), - Expr::Code(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Numeric(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Str(text) => parse_english(text.get(), doc, parser, &text.span()), + Expr::Code(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::Content(content_block) => { - recursive_env(&mut content_block.body().exprs(), doc, parser, offset) + recursive_env(&mut content_block.body().exprs(), doc, parser) } - Expr::Parenthesized(parenthesized) => map_token(parenthesized.expr(), doc, parser, offset), + Expr::Parenthesized(parenthesized) => map_token(parenthesized.expr(), doc, parser), Expr::Array(array) => Some( array .items() .filter_map(|i| { if let typst_syntax::ast::ArrayItem::Pos(e) = i { - map_token(e, doc, parser, offset) + map_token(e, doc, parser) } else { None } @@ -154,63 +139,59 @@ fn map_token( .collect_vec(), ), // TODO: actually parse dictionaries - Expr::Dict(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Unary(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Binary(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Dict(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Unary(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Binary(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::FieldAccess(field_access) => merge_expr!( - map_token(field_access.target(), doc, parser, offset), + map_token(field_access.target(), doc, parser), constant_token!( - offset, doc, field_access.field(), TokenKind::Word(WordMetadata::default()) ) ), - Expr::FuncCall(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Closure(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Let(let_binding) => let_binding - .init() - .and_then(|e| map_token(e, doc, parser, offset)), + Expr::FuncCall(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Closure(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Let(let_binding) => let_binding.init().and_then(|e| map_token(e, doc, parser)), Expr::DestructAssign(destruct_assignment) => { - map_token(destruct_assignment.value(), doc, parser, offset) + map_token(destruct_assignment.value(), doc, parser) } Expr::Set(set_rule) => merge_expr!( - map_token(set_rule.target(), doc, parser, offset), - map_token(set_rule.condition()?, doc, parser, offset) + map_token(set_rule.target(), doc, parser), + map_token(set_rule.condition()?, doc, parser) ), Expr::Show(show_rule) => merge_expr!( - map_token(show_rule.transform(), doc, parser, offset), - map_token(show_rule.selector()?, doc, parser, offset) + map_token(show_rule.transform(), doc, parser), + map_token(show_rule.selector()?, doc, parser) ), - Expr::Contextual(contextual) => map_token(contextual.body(), doc, parser, offset), + Expr::Contextual(contextual) => map_token(contextual.body(), doc, parser), Expr::Conditional(conditional) => merge_expr!( - map_token(conditional.condition(), doc, parser, offset), - map_token(conditional.if_body(), doc, parser, offset), - map_token(conditional.else_body()?, doc, parser, offset) + map_token(conditional.condition(), doc, parser), + map_token(conditional.if_body(), doc, parser), + map_token(conditional.else_body()?, doc, parser) ), Expr::While(while_loop) => merge_expr!( - map_token(while_loop.condition(), doc, parser, offset), - map_token(while_loop.body(), doc, parser, offset) + map_token(while_loop.condition(), doc, parser), + map_token(while_loop.body(), doc, parser) ), Expr::For(for_loop) => merge_expr!( - map_token(for_loop.iterable(), doc, parser, offset), - map_token(for_loop.body(), doc, parser, offset) + map_token(for_loop.iterable(), doc, parser), + map_token(for_loop.body(), doc, parser) ), Expr::Import(module_import) => { merge_expr!( - map_token(module_import.source(), doc, parser, offset), + map_token(module_import.source(), doc, parser), constant_token!( - offset, doc, module_import.new_name()?, TokenKind::Word(WordMetadata::default()) ) ) } - Expr::Include(module_include) => map_token(module_include.source(), doc, parser, offset), - Expr::Break(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Continue(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), - Expr::Return(a) => constant_token!(offset, doc, a, TokenKind::Unlintable), + Expr::Include(module_include) => map_token(module_include.source(), doc, parser), + Expr::Break(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Continue(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Return(a) => constant_token!(doc, a, TokenKind::Unlintable), } } @@ -222,13 +203,12 @@ impl Parser for Typst { let typst_document = typst_syntax::Source::detached(source_str); let typst_tree = Markup::from_untyped(typst_document.root()) .expect("Unable to create typst document from parsed tree!"); - let mut offset = 0; // NOTE: the range spits out __byte__ indices, not char indices. // This is why we keep track above. typst_tree .exprs() - .filter_map(|ex| map_token(ex, &typst_document, &mut english_parser, &mut offset)) + .filter_map(|ex| map_token(ex, &typst_document, &mut english_parser)) .flatten() .collect_vec() } From ccab586dae1d38f0b96fcebc8fa5510ca1437840 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Wed, 27 Nov 2024 11:45:34 -0700 Subject: [PATCH 08/15] feat(#230): parse numbers properly and add test for numbers --- harper-core/src/parsers/typst.rs | 54 ++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 8edfe427..3621bc7c 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -116,8 +116,12 @@ fn map_token( Expr::None(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), Expr::Auto(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), Expr::Bool(a) => constant_token!(doc, a, TokenKind::Word(WordMetadata::default())), - Expr::Int(int) => todo!(), - Expr::Float(float) => todo!(), + Expr::Int(int) => { + constant_token!(doc, int, TokenKind::Number((int.get() as f64).into(), None)) + } + Expr::Float(float) => { + constant_token!(doc, float, TokenKind::Number(float.get().into(), None)) + } Expr::Numeric(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::Str(text) => parse_english(text.get(), doc, parser, &text.span()), Expr::Code(a) => constant_token!(doc, a, TokenKind::Unlintable), @@ -216,6 +220,8 @@ impl Parser for Typst { #[cfg(test)] mod tests { + use ordered_float::OrderedFloat; + use super::Typst; use crate::{parsers::StrParser, Punctuation, TokenKind}; @@ -232,6 +238,50 @@ mod tests { assert!(matches!(token_kinds.as_slice(), &[TokenKind::Word(_),])) } + #[test] + fn number() { + let source = r"The number 12 is larger than 11, but is much less than 11!"; + + let tokens = Typst.parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Number(OrderedFloat(12.0), None), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Number(OrderedFloat(11.0), None), + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Word(_), + TokenKind::Space(1), + TokenKind::Number(OrderedFloat(11.0), None), + TokenKind::Punctuation(Punctuation::Bang), + ] + )) + } + #[test] fn sentence() { let source = r"This is a sentence, it does not have any particularly interesting elements of the typst syntax."; From c69446546e099ed980e6b8bd763fc5ea338b167d Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Wed, 27 Nov 2024 14:23:01 -0700 Subject: [PATCH 09/15] feat: add a `spans` command to harper-cli that shows spans visually --- harper-cli/src/main.rs | 28 ++++++++++++++++++++++++++++ justfile | 4 ++++ 2 files changed, 32 insertions(+) diff --git a/harper-cli/src/main.rs b/harper-cli/src/main.rs index 1a3e3639..2be82714 100644 --- a/harper-cli/src/main.rs +++ b/harper-cli/src/main.rs @@ -26,6 +26,11 @@ enum Args { /// The file you wish to parse. file: PathBuf, }, + /// Parse a provided document and show the spans of the detected tokens. + Spans { + /// The file you wish to display the spans. + file: PathBuf, + }, /// Emit decompressed, line-separated list of words in Harper's dictionary. Words, } @@ -84,6 +89,29 @@ fn main() -> anyhow::Result<()> { Ok(()) } + Args::Spans { file } => { + let (doc, source) = load_file(&file)?; + + let primary_color = Color::Blue; + let filename = file + .file_name() + .map(|s| s.to_string_lossy().into()) + .unwrap_or("".to_string()); + + let mut report_builder = Report::build(ReportKind::Advice, &filename, 0); + for token in doc.tokens() { + report_builder = report_builder.with_label( + Label::new((&filename, token.span.into())) + .with_message(format!("[{}, {})", token.span.start, token.span.end)) + .with_color(primary_color), + ); + } + + let report = report_builder.finish(); + report.print((&filename, Source::from(source)))?; + + std::process::exit(1); + } Args::Words => { let dict = FstDictionary::curated(); diff --git a/justfile b/justfile index 2bb49920..1593f377 100644 --- a/justfile +++ b/justfile @@ -173,6 +173,10 @@ parse file: lint file: cargo run --bin harper-cli -- lint {{file}} +# Show the spans of the parsed tokens overlapped on the file. +spans file: + cargo run --bin harper-cli -- spans {{file}} + # Add a noun to Harper's curated dictionary. addnoun noun: #! /bin/bash From 245fb3299353318efc30248213309289ff97d683 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Wed, 27 Nov 2024 14:23:52 -0700 Subject: [PATCH 10/15] feat(#230): consolidate words separated by apostrophes into possessives or conjunctions --- harper-core/src/parsers/typst.rs | 51 ++++++++++++++++++-- harper-core/src/patterns/sequence_pattern.rs | 1 + 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 3621bc7c..682413e8 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -1,9 +1,13 @@ use itertools::Itertools; - +use std::collections::VecDeque; use typst_syntax::ast::{AstNode, Expr, Markup}; use super::{Parser, PlainEnglish}; -use crate::{parsers::StrParser, Punctuation, Token, TokenKind, WordMetadata}; +use crate::{ + parsers::StrParser, + patterns::{PatternExt, SequencePattern}, + ConjunctionData, Lrc, Punctuation, Span, Token, TokenKind, VecExt, WordMetadata, +}; /// A parser that wraps the [`PlainEnglish`] parser that allows one to parse /// Typst files. @@ -199,6 +203,13 @@ fn map_token( } } +thread_local! { + static WORD_APOSTROPHE_WORD: Lrc = Lrc::new(SequencePattern::default() + .then_any_word() + .then_apostrophe() + .then_any_word()); +} + impl Parser for Typst { fn parse(&mut self, source: &[char]) -> Vec { let mut english_parser = PlainEnglish; @@ -210,11 +221,43 @@ impl Parser for Typst { // NOTE: the range spits out __byte__ indices, not char indices. // This is why we keep track above. - typst_tree + let mut tokens = typst_tree .exprs() .filter_map(|ex| map_token(ex, &typst_document, &mut english_parser)) .flatten() - .collect_vec() + .collect_vec(); + + // Consolidate conjunctions + let mut to_remove = VecDeque::default(); + for tok_span in WORD_APOSTROPHE_WORD + .with(|v| v.clone()) + .find_all_matches(&tokens, source) + { + let start_tok = &tokens[tok_span.start]; + let end_tok = &tokens[tok_span.end - 1]; + let char_span = Span::new(start_tok.span.start, end_tok.span.end); + + if let TokenKind::Word(metadata) = start_tok.kind { + if end_tok.span.get_content(source) == &['s'] { + if let Some(mut noun) = metadata.noun { + noun.is_possessive = Some(true); + } + } else { + tokens[tok_span.start].kind = TokenKind::Word(WordMetadata { + conjunction: Some(ConjunctionData {}), + ..metadata + }); + }; + + tokens[tok_span.start].span = char_span; + to_remove.extend(tok_span.start + 1..tok_span.end); + } else { + panic!("Apostrophe consolidation does not start with Word Token!") + } + } + tokens.remove_indices(to_remove.into_iter().sorted().unique().collect()); + + tokens } } diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index c26a2035..02a3788e 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -43,6 +43,7 @@ impl SequencePattern { gen_then_from_is!(case_separator); gen_then_from_is!(adverb); gen_then_from_is!(adjective); + gen_then_from_is!(apostrophe); pub fn then_exact_word(mut self, word: &'static str) -> Self { self.token_patterns From 819d7677c138f530bc7dab32c5c94c58d9d6dd53 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Wed, 27 Nov 2024 14:25:58 -0700 Subject: [PATCH 11/15] fix(clippy): satisfy clippy --- harper-core/src/parsers/typst.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 682413e8..1ce641ba 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -238,7 +238,7 @@ impl Parser for Typst { let char_span = Span::new(start_tok.span.start, end_tok.span.end); if let TokenKind::Word(metadata) = start_tok.kind { - if end_tok.span.get_content(source) == &['s'] { + if end_tok.span.get_content(source) == ['s'] { if let Some(mut noun) = metadata.noun { noun.is_possessive = Some(true); } From 6ff5fc002fdd55f00883bb9da306a255ce14b64d Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Wed, 27 Nov 2024 15:29:18 -0700 Subject: [PATCH 12/15] feat(#230): simplify possessive-conjunction logic and add respective tests --- harper-core/src/parsers/typst.rs | 89 +++++++++++++++++++++++++------- harper-core/src/word_metadata.rs | 10 ++-- 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 1ce641ba..92ba5bb5 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -6,7 +6,7 @@ use super::{Parser, PlainEnglish}; use crate::{ parsers::StrParser, patterns::{PatternExt, SequencePattern}, - ConjunctionData, Lrc, Punctuation, Span, Token, TokenKind, VecExt, WordMetadata, + ConjunctionData, Lrc, NounData, Punctuation, Span, Token, TokenKind, VecExt, WordMetadata, }; /// A parser that wraps the [`PlainEnglish`] parser that allows one to parse @@ -238,16 +238,26 @@ impl Parser for Typst { let char_span = Span::new(start_tok.span.start, end_tok.span.end); if let TokenKind::Word(metadata) = start_tok.kind { - if end_tok.span.get_content(source) == ['s'] { - if let Some(mut noun) = metadata.noun { - noun.is_possessive = Some(true); - } - } else { - tokens[tok_span.start].kind = TokenKind::Word(WordMetadata { - conjunction: Some(ConjunctionData {}), - ..metadata + tokens[tok_span.start].kind = + TokenKind::Word(if end_tok.span.get_content(source) == ['s'] { + WordMetadata { + noun: Some(NounData { + is_possessive: Some(true), + ..metadata.noun.unwrap_or_default() + }), + conjunction: None, + ..metadata + } + } else { + WordMetadata { + noun: metadata.noun.map(|noun| NounData { + is_possessive: Some(false), + ..noun + }), + conjunction: Some(ConjunctionData {}), + ..metadata + } }); - }; tokens[tok_span.start].span = char_span; to_remove.extend(tok_span.start + 1..tok_span.end); @@ -266,7 +276,7 @@ mod tests { use ordered_float::OrderedFloat; use super::Typst; - use crate::{parsers::StrParser, Punctuation, TokenKind}; + use crate::{parsers::StrParser, NounData, Punctuation, TokenKind, WordMetadata}; #[test] fn conjunction() { @@ -278,12 +288,38 @@ mod tests { dbg!(&token_kinds); - assert!(matches!(token_kinds.as_slice(), &[TokenKind::Word(_),])) + assert_eq!(token_kinds.len(), 1); + assert!(token_kinds.into_iter().all(|t| t.is_conjunction())) + } + + #[test] + fn possessive() { + let source = r"person's"; + + let tokens = Typst.parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert_eq!(token_kinds.len(), 1); + assert!(token_kinds.into_iter().all(|t| { + matches!( + t, + TokenKind::Word(WordMetadata { + noun: Some(NounData { + is_possessive: Some(true), + .. + }), + .. + }) + ) + })) } #[test] fn number() { - let source = r"The number 12 is larger than 11, but is much less than 11!"; + let source = r"12 is larger than 11, but much less than 11!"; let tokens = Typst.parse_str(source); @@ -294,10 +330,6 @@ mod tests { assert!(matches!( token_kinds.as_slice(), &[ - TokenKind::Word(_), - TokenKind::Space(1), - TokenKind::Word(_), - TokenKind::Space(1), TokenKind::Number(OrderedFloat(12.0), None), TokenKind::Space(1), TokenKind::Word(_), @@ -317,14 +349,33 @@ mod tests { TokenKind::Space(1), TokenKind::Word(_), TokenKind::Space(1), - TokenKind::Word(_), - TokenKind::Space(1), TokenKind::Number(OrderedFloat(11.0), None), TokenKind::Punctuation(Punctuation::Bang), ] )) } + #[test] + fn math_unlintable() { + let source = r"$12 > 11$, $12 << 11!$"; + + let tokens = Typst.parse_str(source); + + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + + dbg!(&token_kinds); + + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Unlintable, + TokenKind::Punctuation(Punctuation::Comma), + TokenKind::Space(1), + TokenKind::Unlintable, + ] + )) + } + #[test] fn sentence() { let source = r"This is a sentence, it does not have any particularly interesting elements of the typst syntax."; diff --git a/harper-core/src/word_metadata.rs b/harper-core/src/word_metadata.rs index 314f855d..326a3572 100644 --- a/harper-core/src/word_metadata.rs +++ b/harper-core/src/word_metadata.rs @@ -120,7 +120,7 @@ pub enum Tense { Future, } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct VerbData { pub is_linking: Option, pub tense: Option, @@ -136,7 +136,7 @@ impl VerbData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct NounData { pub is_proper: Option, pub is_plural: Option, @@ -156,7 +156,7 @@ impl NounData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct AdjectiveData {} impl AdjectiveData { @@ -166,7 +166,7 @@ impl AdjectiveData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct AdverbData {} impl AdverbData { @@ -176,7 +176,7 @@ impl AdverbData { } } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)] pub struct ConjunctionData {} impl ConjunctionData { From 0e73ebd81c4ef5e2ffef391a32f86d3f44889b5f Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Thu, 28 Nov 2024 17:24:32 -0700 Subject: [PATCH 13/15] fix: update cargo lock after merge from master --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b0efa2cc..cfb79fab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -892,7 +892,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.15.2", ] [[package]] From f5b006b3a6c56053e7ea0c67010a56f5b2e3f8fa Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Thu, 28 Nov 2024 21:20:06 -0700 Subject: [PATCH 14/15] feat(#230): create additional parsers for complex dictionary parsing --- harper-core/src/parsers/typst.rs | 85 +++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 92ba5bb5..5d2b235e 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -64,6 +64,88 @@ fn parse_english( Some(res) } +fn parse_dict( + dict: &mut dyn Iterator, + doc: &typst_syntax::Source, + parser: &mut PlainEnglish, +) -> Option> { + Some( + dict.filter_map(|di| match di { + typst_syntax::ast::DictItem::Named(named) => merge_expr!( + constant_token!(doc, named.name(), TokenKind::Word(WordMetadata::default())), + map_token(named.expr(), doc, parser), + parse_pattern(named.pattern(), doc, parser) + ), + typst_syntax::ast::DictItem::Keyed(keyed) => merge_expr!( + map_token(keyed.key(), doc, parser), + map_token(keyed.expr(), doc, parser) + ), + typst_syntax::ast::DictItem::Spread(spread) => spread.sink_ident().map_or_else( + || { + spread + .sink_expr() + .and_then(|expr| map_token(expr, doc, parser)) + }, + |ident| constant_token!(doc, ident, TokenKind::Word(WordMetadata::default())), + ), + }) + .flatten() + .collect(), + ) +} + +fn parse_pattern( + pat: typst_syntax::ast::Pattern, + doc: &typst_syntax::Source, + parser: &mut PlainEnglish, +) -> Option> { + match pat { + typst_syntax::ast::Pattern::Normal(expr) => map_token(expr, doc, parser), + typst_syntax::ast::Pattern::Placeholder(underscore) => { + constant_token!(doc, underscore, TokenKind::Unlintable) + } + typst_syntax::ast::Pattern::Parenthesized(parenthesized) => merge_expr!( + map_token(parenthesized.expr(), doc, parser), + parse_pattern(parenthesized.pattern(), doc, parser) + ), + typst_syntax::ast::Pattern::Destructuring(destructuring) => Some( + destructuring + .items() + .filter_map(|item| match item { + typst_syntax::ast::DestructuringItem::Pattern(pattern) => { + parse_pattern(pattern, doc, parser) + } + typst_syntax::ast::DestructuringItem::Named(named) => merge_expr!( + constant_token!( + doc, + named.name(), + TokenKind::Word(WordMetadata::default()) + ), + parse_pattern(named.pattern(), doc, parser) + ), + typst_syntax::ast::DestructuringItem::Spread(spread) => { + spread.sink_ident().map_or_else( + || { + spread + .sink_expr() + .and_then(|expr| map_token(expr, doc, parser)) + }, + |ident| { + constant_token!( + doc, + ident, + TokenKind::Word(WordMetadata::default()) + ) + }, + ) + } + }) + .flatten() + .collect(), + ), + } +} + fn map_token( ex: typst_syntax::ast::Expr, doc: &typst_syntax::Source, @@ -146,8 +228,7 @@ fn map_token( .flatten() .collect_vec(), ), - // TODO: actually parse dictionaries - Expr::Dict(a) => constant_token!(doc, a, TokenKind::Unlintable), + Expr::Dict(a) => parse_dict(&mut a.items(), doc, parser), Expr::Unary(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::Binary(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::FieldAccess(field_access) => merge_expr!( From 47be9e6bffd7a8027adaf4e58231c2b60742c9e7 Mon Sep 17 00:00:00 2001 From: Grant Lemons Date: Fri, 29 Nov 2024 12:14:26 -0700 Subject: [PATCH 15/15] feat(#230): add some tests for dictionary parsing, and improve dict parsing to fit better --- harper-core/src/parsers/typst.rs | 88 ++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/harper-core/src/parsers/typst.rs b/harper-core/src/parsers/typst.rs index 5d2b235e..2beac0af 100644 --- a/harper-core/src/parsers/typst.rs +++ b/harper-core/src/parsers/typst.rs @@ -53,15 +53,17 @@ fn parse_english( parser: &mut PlainEnglish, span: &typst_syntax::Span, ) -> Option> { - let res = parser - .parse_str(str.into()) - .into_iter() - .map(|mut t| { - t.span.push_by(doc.range(*span).unwrap().start); - t - }) - .collect_vec(); - Some(res) + let offset = doc.range(*span).unwrap().start; + Some( + parser + .parse_str(str.into()) + .into_iter() + .map(|mut t| { + t.span.push_by(offset); + t + }) + .collect_vec(), + ) } fn parse_dict( @@ -73,8 +75,7 @@ fn parse_dict( dict.filter_map(|di| match di { typst_syntax::ast::DictItem::Named(named) => merge_expr!( constant_token!(doc, named.name(), TokenKind::Word(WordMetadata::default())), - map_token(named.expr(), doc, parser), - parse_pattern(named.pattern(), doc, parser) + map_token(named.expr(), doc, parser) ), typst_syntax::ast::DictItem::Keyed(keyed) => merge_expr!( map_token(keyed.key(), doc, parser), @@ -241,7 +242,15 @@ fn map_token( ), Expr::FuncCall(a) => constant_token!(doc, a, TokenKind::Unlintable), Expr::Closure(a) => constant_token!(doc, a, TokenKind::Unlintable), - Expr::Let(let_binding) => let_binding.init().and_then(|e| map_token(e, doc, parser)), + Expr::Let(let_binding) => merge_expr!( + match let_binding.kind() { + typst_syntax::ast::LetBindingKind::Normal(pattern) => + parse_pattern(pattern, doc, parser), + typst_syntax::ast::LetBindingKind::Closure(ident) => + constant_token!(doc, ident, TokenKind::Word(WordMetadata::default())), + }, + let_binding.init().and_then(|e| map_token(e, doc, parser)) + ), Expr::DestructAssign(destruct_assignment) => { map_token(destruct_assignment.value(), doc, parser) } @@ -354,6 +363,7 @@ impl Parser for Typst { #[cfg(test)] mod tests { + use itertools::Itertools; use ordered_float::OrderedFloat; use super::Typst; @@ -361,12 +371,10 @@ mod tests { #[test] fn conjunction() { - let source = r"doesn't"; + let source = "doesn't"; let tokens = Typst.parse_str(source); - let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); - dbg!(&token_kinds); assert_eq!(token_kinds.len(), 1); @@ -375,12 +383,10 @@ mod tests { #[test] fn possessive() { - let source = r"person's"; + let source = "person's"; let tokens = Typst.parse_str(source); - let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); - dbg!(&token_kinds); assert_eq!(token_kinds.len(), 1); @@ -400,12 +406,10 @@ mod tests { #[test] fn number() { - let source = r"12 is larger than 11, but much less than 11!"; + let source = "12 is larger than 11, but much less than 11!"; let tokens = Typst.parse_str(source); - let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); - dbg!(&token_kinds); assert!(matches!( @@ -438,12 +442,10 @@ mod tests { #[test] fn math_unlintable() { - let source = r"$12 > 11$, $12 << 11!$"; + let source = "$12 > 11$, $12 << 11!$"; let tokens = Typst.parse_str(source); - let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); - dbg!(&token_kinds); assert!(matches!( @@ -458,13 +460,45 @@ mod tests { } #[test] - fn sentence() { - let source = r"This is a sentence, it does not have any particularly interesting elements of the typst syntax."; + fn dict_parsing() { + let source = r#"#let dict = ( + name: "Typst", + born: 2019, + )"#; let tokens = Typst.parse_str(source); - let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); + dbg!(&token_kinds); + + let typst_document = typst_syntax::Source::detached(source); + let typst_tree = ::from_untyped( + typst_document.root(), + ) + .expect("Unable to create typst document from parsed tree!"); + dbg!(typst_tree.exprs().collect_vec()); + let charslice = source.chars().collect_vec(); + assert_eq!(tokens[2].span.get_content_string(&charslice), "Typst"); + assert!(matches!( + token_kinds.as_slice(), + &[ + TokenKind::Word(_), + TokenKind::Word(_), + TokenKind::Punctuation(Punctuation::Quote { .. }), + TokenKind::Word(_), + TokenKind::Punctuation(Punctuation::Quote { .. }), + TokenKind::Word(_), + TokenKind::Number(OrderedFloat(2019.0), None), + ] + )) + } + + #[test] + fn sentence() { + let source = "This is a sentence, it does not have any particularly interesting elements of the typst syntax."; + + let tokens = Typst.parse_str(source); + let token_kinds = tokens.iter().map(|t| t.kind).collect::>(); dbg!(&token_kinds); assert!(matches!(